import psycopg2
import logging, time
import requests as req
DB_URI = 'postgresql://postgres:@{dbhost}:5432/{dbname}?application_name=json_parse'
DB_URI = DB_URI.format(dbhost='***', dbname='***')
class ConnectDB(object):
def __init__(self,url):
self.url=url
def __enter__(self):
self.conn = psycopg2.connect(self.url)
return self.conn
def __exit__(self, exc_type, exc_val, exc_tb):
if self.conn:
self.conn.close()
self.conn=None
def update_db(sql, value):
with ConnectDB(DB_URI) as db_con:
with db_con.cursor() as cur:
cur.executemany(sql, value)
db_con.commit()
def get_ids():
logging.basicConfig(filename='/home/zcy/ncbi/NCBI.log', level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(lineno)s: %(message)s')
email = 'zzzz'
apk_key = 'zzzz'
rooturl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
sql = 'INSERT INTO gene (gene_id) VALUES(%s);'
step = 10000
item = 0
try_times = 3
while item < 71786370:
logging.info('item=%s' % item)
while try_times > 0:
url = f'{rooturl}?db=gene&term="all"[filter]&retstart={item}&retmax={step}&retmode=json&sort=name&usehistory=y&email={email}&api_key={apk_key}'
logging.debug(url)
rsp = req.get(url)
if rsp.status_code == 200:
logging.info('call success')
id_lst = rsp.json().get('esearchresult', {}).get('idlist', [])
id_lst = [(int(i),) for i in id_lst]
update_db(sql, id_lst)
break
else:
logging.warning('call %s failed, rsp=%s' % (url, rsp.text))
time.sleep(1)
try_times -= 1
else:
logging.error('retstart= %s failed' % item)
item += step
if __name__ == '__main__':
get_ids()
标签:__,logging,item,url,self,db,postgre,插入,数据
From: https://blog.51cto.com/u_12890843/8257181