嘿,我的脚本出了什么问题?它响应错误的请求。我不知道怎么了
from Bio import Entrez
Entrez.email = '[email protected]'
import time
def fetch(ID):
handle = Entrez.efetch(db = 'Protein', id = ID, retmode = 'fasta', rettype = 'text') #<--- here
seq = handle.read()
time.sleep(1)
return seq
ids = ['ATK1','Cat','Lig1']
out = [fetch(id) for id in ids]
with open('out.fasta', 'w') as f:
f.writelines(out)
追溯:
File "<ipython-input-42-0be173f176eb>", line 1, in <module>
runfile('C:/Users/MGrad/bioPythonSearch.py', wdir='C:/Users/MGrad/Dropbox/Leg')
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\Local\conda\conda\envs\my_root\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Leg/bioPythonSearch.py", line 20, in <module>
out = [fetch(id) for id in ids] # where ids is a Python list containing gene ids/accession numbers
File "C:/Users/MGrad/bioPythonSearch.py", line 20, in <listcomp>
out = [fetch(id) for id in ids] # where ids is a Python list containing gene ids/accession numbers
File "C:/Users/MGrad/bioPythonSearch.py", line 14, in fetch
handle = Entrez.efetch(db = 'Protein', id = ID, retmode = 'fasta', rettype = 'text')
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 180, in efetch
return _open(cgi, variables, post=post)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 526, in _open
raise exception
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 524, in _open
handle = _urlopen(cgi)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: Bad Request
最佳答案
Entrez.efetch()
使用准确的ID号。如果要查找ATK1之类的术语,则需要先通过Entrez.esearch()
将其解析为一个或多个ID号。这是一个简单但可行的示例:
import time
from Bio import SeqIO
from Bio import Entrez
import xml.etree.cElementTree as ElementTree
TERMS = ['ATK1', 'Cat', 'Lig1']
Entrez.email = '[email protected]'
def fetch(term):
# retmax=1 just returns first result of possibly many; i.e. may be wrong, use more specific ID
handle = Entrez.esearch(db='Protein', term=term, retmax=1)
root = ElementTree.fromstring(handle.read())
id_number = root.find("IdList/Id").text
print(term, '->', id_number) # ATK1 -> 1039008188
handle = Entrez.efetch(db='Protein', id=id_number, retmode='text', rettype='fasta')
seq_record = SeqIO.read(handle, 'fasta')
time.sleep(1)
return seq_record
out = [fetch(my_term) for my_term in TERMS]
with open('out.fasta', 'w') as f:
for record in out:
SeqIO.write(record, f, 'fasta')
Entrez.esearch()
结果作为XML文档返回,因此我们使用cElementTree
对其进行解析。此查询有多个结果,但我们只是天真地询问一个-您需要通过检查多个结果或提供更具体的术语来解决此问题。另外,您的代码将
retmode
和rettype
的值取反。