Python requests.Session 协程 下载文件
# coding: utf-8 from gevent import monkey monkey.patch_all() from gevent.pool import Pool import gevent import requests import os, sys import time import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class Downloader: def __init__(self, pool_size, retry=3): self.pool = Pool(pool_size) self.session = self._get_http_session(pool_size, pool_size, retry) self.retry = retry # 重试次数 self.dir = '' self.failed = [] self.url_total = 0 self.completed_count = 0 def _get_http_session(self, pool_connections, pool_maxsize, max_retries): session = requests.Session() adapter = requests.adapters.HTTPAdapter(pool_connections=pool_connections, pool_maxsize=pool_maxsize, max_retries=max_retries) session.mount('http://', adapter) session.mount('https://', adapter) return session def run(self, url_list, dir=''): self.dir = dir if self.dir and not os.path.isdir(self.dir): os.makedirs(self.dir) self.url_total = len(url_list) self.completed_count = 0 print ('total ts count:', self.url_total) g1 = gevent.spawn(self._check_finish) self._download(url_list) g1.join() def _download(self, url_list): self.pool.map(self._worker, url_list) if self.failed: url_list = self.failed self.failed = [] self._download(url_list) def _worker(self, url): retry = self.retry while retry: try: file_name = url.split('/')[-1].split('?')[0] file_path = os.path.join(self.dir, file_name) if not os.path.exists(file_path): r = self.session.get(url, timeout=20, verify=False) if r.ok: print ('download:', file_name) with open(file_path, 'wb') as f: f.write(r.content) else: # print('fail:', file_name) raise RuntimeError('download fail') else: print('exist:', file_name) return except: retry -= 1 print ('[FAIL]%s' % url) self.failed.append(url) self.completed_count += 1 def _check_finish(self): while self.completed_count >= self.url_total: time.sleep(0.01) if __name__ == '__main__': downloader = Downloader(50) # 协程个数 url_list = ['https://pics1.baidu.com/feed/b999a9014c086e0610f3d6bf8bf4d6ff08d1cbf7.jpeg', 'https://pics7.baidu.com/feed/d53f8794a4c27d1ef06a7b6195290065dfc438ca.jpeg'] downloader.run(url_list, './dst_dir')
标签:session,协程,Python,self,list,url,Session,dir,pool From: https://www.cnblogs.com/zhaoyingjie/p/17096377.html