首页 > 其他分享 >dl_images_4.py

dl_images_4.py

时间:2023-10-06 22:27:01浏览次数:30  
标签:dl img url py list fail images import process

 

 

#!/usr/bin/env python3
import os
import sys
import pandas as pd
import requests
from requests.packages.urllib3.util import Retry
from requests.adapters import HTTPAdapter
from requests import Session
import time
import logging
from logging.handlers import RotatingFileHandler
import re
from clickhouse_driver import Client
from multiprocessing import Process
from multiprocessing import cpu_count
import multiprocessing

'''
        读取csv文件的url,多次请求url,批量下载图片,
'''


def get_xg_images_url():
    df = pd.read_csv('./xg_fail_rec.csv')
    df['license_plate2']
    return df['image_url1'],df['license_plate2'],df['capture_time']
def get_am_images_url():
        df = pd.read_csv('./am_fail_rec.csv')
        df['license_plate2']
        return df['image_url1'],df['license_plate2'],df['capture_time']
def download_img(img_url,num,result_path,plateNo,ct,access_fail_c):
    os.chdir(result_path)
    # 以url命名
    #img_list =  re.findall(r'\/.*?\/.*?\/.*?\/.*?\/.*?\/.*?\/(.*\.)(JPG|jpg)',img_url)
    #img_name = img_list[0][0]+img_list[0][1]
    img_name = plateNo + '_' + ct + '+' + str(num) + '.jpg'
    retries = Retry(total=10,backoff_factor=0.1,status_forcelist=[500])
    try:
        with Session() as s:
            s.mount('http://',HTTPAdapter(max_retries=retries))
            img_obj = s.get(img_url)
    except:
        access_fail_c.value = access_fail_c.value + 1
        logger.error("connect fail {}  ".format(img_url))
        logger.info("child_process {} exited... ".format(num))
        sys.exit(1)
    if int(img_obj.status_code) != 200:
        access_fail_c.value = access_fail_c.value + 1
        logger.error("download {} fail staus = {}".format(img_url,img_obj.status_code))
    else:
        try:
            with open(img_name,'wb') as f:
                f.write(img_obj.content)
            logger.info("saved success {}  staus = {}".format(img_url,img_obj.status_code))
        except:
            access_fail_c.value = access_fail_c.value + 1
            logger.error("saved fail {} fail staus = {}".format(img_url,img_obj.status_code))
    logger.info("child_process {} exited... ".format(num))
def start_process():
        access_fail_c = multiprocessing.Value('d',0)
    # 下载港牌图片
        image_url,license_plate,cts = get_xg_images_url()
        print('港牌url总数: {}'.format(len(image_url)))
        process_list = []
        i = 0 
        result_path = os.path.dirname(os.path.abspath(__file__)) + "/fail_images/fail_xg/"
        for url in image_url:
            if len(process_list) == cpu_count():
                while True:
                    #time.sleep(1)
                    flag = 0 
                    for p in process_list:
                        if not p.is_alive():
                            process_list.remove(p)
                            flag = 1 
                    if flag == 1:
                        break
            Pro = Process(target=download_img,args=(url,i,result_path,license_plate[i],cts[i],access_fail_c))
            logger.info("child_process {} started... ".format(i))
            Pro.start()
            process_list.append(Pro)
            i = i + 1 
        for p in process_list:
            p.join()
        # 下载澳牌图片
        image_url,license_plate,cts = get_am_images_url()
        print('澳牌url总数: {}'.format(len(image_url)))
        i = 0
        result_path = os.path.dirname(os.path.abspath(__file__)) + "/fail_images/fail_am/"
        for url in image_url:
            if len(process_list) == cpu_count():
                while True:
                    #time.sleep(1)
                    flag = 0
                    for p in process_list:
                        if not p.is_alive():
                            process_list.remove(p)
                            flag = 1
                    if flag == 1:
                        break
            Pro = Process(target=download_img,args=(url,i,result_path,license_plate[i],cts[i],access_fail_c))
            logger.info("child_process {} started... ".format(i))
            Pro.start()
            process_list.append(Pro)
            i = i + 1
        for p in process_list:
            p.join()
        print('访问图片失败总数:{}'.format(access_fail_c.value))
if __name__ == '__main__':
      logger = logging.getLogger()
      logger.setLevel(logging.INFO)
      handler = RotatingFileHandler(os.path.dirname(os.path.abspath(__file__)) + "/dl_images.log")
      formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
      handler.setFormatter(formatter)
      logger.addHandler(handler)
      start_process()

 

标签:dl,img,url,py,list,fail,images,import,process
From: https://www.cnblogs.com/lfxx/p/17745177.html

相关文章

  • dl_images_gt.py
      #!/usr/bin/envpython3importosimportsysimportdatetimeimportpandasaspdimportrequestsfromrequestsimportSessionfromrequests.packages.urllib3.utilimportRetryfromrequests.adaptersimportHTTPAdapterimporttimeimportloggingfromlo......
  • pn_recognize_fail_3.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClient"""统计佛山市市级卡口的港澳过车总数,识别率及格的总数"""if__name__==&#......
  • pn_recognize_fail_YLKK.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClient"""统......
  • pn_recognize_fail_SJKK_2.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath......
  • pn_recognize_xny2.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath"""统计佛山市所有卡口的港澳过车总数,识别率""&quo......
  • tcc_pn_recognize_fail.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath"""统计佛山市停车场的所有卡口的过车总数"""......
  • pn_recognize_fail_SJKK_4.py
      #!/usr/bin/python3importos,statimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimport......
  • Python 元组完全指南1
    元组用于在单个变量中存储多个项目。mytuple=("apple","banana","cherry")元组是Python中的4种内置数据类型之一,用于存储数据集合,另外还有列表、集合和字典,它们都具有不同的特性和用途。元组是有序且不可更改的集合。元组使用圆括号表示。示例,创建一个元组:thistuple=......
  • area_recognize_fail.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientif__name__=='__main__':logging.basicConfig(filename=os.path.dirname......
  • count_ga_5.py
      #!/usr/bin/python3'''作用:统计港澳车的识别率,分别输出港牌和澳牌识别失败的港澳车的二次识别车牌、筛选过的时间和图片url的csv文件'''importosimportsysimportreimportpymysqlimporttimeimportdatetimeimportloggingimportpandasaspdimportre......