首页 > 其他分享 >dl_images_gt.py

dl_images_gt.py

时间:2023-10-06 22:26:35浏览次数:32  
标签:dl img url py list fail time images import

 

 

#!/usr/bin/env python3
import os
import sys
import datetime
import pandas as pd
import requests
from requests import Session
from requests.packages.urllib3.util import Retry
from requests.adapters import HTTPAdapter
import time
import logging
from logging.handlers import RotatingFileHandler
import re
from clickhouse_driver import Client
from multiprocessing import Process
from multiprocessing import cpu_count
import multiprocessing
def get_images_url():
    try:
                cursor = Client(host='127.0.0.1', port=9001, password='Ys_gz@2022')
    except:
                logging.info("连接失败!")
                sys.exit(1)
    #sql = "select license_plate2,xgbdp,ambdp,VIIDOBJECTID,location_id,capture_time,image_url1  from yisa_oe.vehicle_all where (license_plate2 like '粤Z%澳' or license_plate2 LIKE '粤Z%港') and date = '2022-06-26'"
    #sql = "select license_plate2,xgbdp,ambdp,VIIDOBJECTID,location_id,capture_time,image_url1  from yisa_oe.vehicle_all where license_plate2 LIKE '粤Z%港' and date = '2022-06-29'"
    sql = "select image_url1,capture_time from yisa_oe.face_all where hair_id = 1  and (clarity_id = 1 or clarity_id = 2) limit 20000"
    try:
                results = cursor.execute(sql)
    except:
                logging.error("语句执行错误!")
                sys.exit(1)
    data_list = [] #查询某天的港澳牌的行
    for row in results:
                row_list = list(row)
                logging.info(row)
                data_list.append(row_list)
    #df = pd.DataFrame(data_list,columns=['license_plate2','xgbdp','ambdp','VIIDOBJECTID','location_id','capture_time','image_url1'])
    df = pd.DataFrame(data_list,columns=['image_url1','capture_time'])
    return df['image_url1'],df['capture_time']
def download_img(img_url,num,result_path,ct,access_fail_c):
    os.chdir(result_path)
    # 以url命名
    #img_list =  re.findall(r'\/.*?\/.*?\/.*?\/.*?\/.*?\/.*?\/(.*\.)(JPG|jpg)',img_url)
    #img_name = img_list[0][0]+img_list[0][1]
    img_name = '_' + ct + '_' + str(num) + '.jpg'
    retries = Retry(total=10,backoff_factor=0.1,status_forcelist=[500])
    try:
                with Session() as s:
                        s.mount('http://',HTTPAdapter(max_retries=retries))
                        img_obj = s.get(img_url)
    except:
                access_fail_c.value = access_fail_c.value + 1
                logger.error("connect fail {}  ".format(img_url))
                logger.info("child_process {} exited... ".format(num))
                sys.exit(1)
    #排除404的情况
    if int(img_obj.status_code) != 200:
                access_fail_c.value = access_fail_c.value + 1
                logger.error("download {} fail staus = {}".format(img_url,img_obj.status_code))
    else:
                try:
                        with open(img_name,'wb') as f:
                                f.write(img_obj.content)
                        logger.info("saved success {}  staus = {}".format(img_url,img_obj.status_code))
                except:
                        access_fail_c.value = access_fail_c.value + 1
                        logger.error("saved fail {} fail staus = {}".format(img_url,img_obj.status_code))
    logger.info("child_process {} exited... ".format(num))
def start_process():
    access_fail_c = multiprocessing.Value('d',0)
    image_url,cts = get_images_url()
    process_list = []
    i = 0 
    result_path = os.path.dirname(os.path.abspath(__file__)) + "/result_images/"
    for url in image_url:
            if len(process_list) == cpu_count():
                while True:
                    #time.sleep(1)
                    flag = 0 
                    for p in process_list:
                        if not p.is_alive():
                            process_list.remove(p)
                            flag = 1 
                    if flag == 1:
                        break
            #print(str(cts[i]))
            #time_int = time.mktime(time.strptime(str(cts[i]),'%Y-%m-%d %H:%M:%S'))
            #time_obj = time.localtime(time_int)
            #cts[i] = time.strftime("%Y-%m-%d_%H-%M-%S",time_obj)
            Pro = Process(target=download_img,args=(url,i,result_path,cts[i],access_fail_c))
            logger.info("child_process {} started... ".format(i))
            Pro.start()
            process_list.append(Pro)
            i = i + 1 
    for p in process_list:
            p.join()
if __name__ == '__main__':
      logger = logging.getLogger()
      logger.setLevel(logging.DEBUG)
      handler = RotatingFileHandler(os.path.dirname(os.path.abspath(__file__)) + "/dl_images.log")
      formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
      handler.setFormatter(formatter)
      logger.addHandler(handler)
      start_process()

 

标签:dl,img,url,py,list,fail,time,images,import
From: https://www.cnblogs.com/lfxx/p/17745179.html

相关文章

  • pn_recognize_fail_3.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClient"""统计佛山市市级卡口的港澳过车总数,识别率及格的总数"""if__name__==&#......
  • pn_recognize_fail_YLKK.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClient"""统......
  • pn_recognize_fail_SJKK_2.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath......
  • pn_recognize_xny2.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath"""统计佛山市所有卡口的港澳过车总数,识别率""&quo......
  • tcc_pn_recognize_fail.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimportPath"""统计佛山市停车场的所有卡口的过车总数"""......
  • pn_recognize_fail_SJKK_4.py
      #!/usr/bin/python3importos,statimportsysimportreimportpymysqlimporttimefromdatetimeimporttimedeltafromdatetimeimportdatetimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientfrompathlibimport......
  • Python 元组完全指南1
    元组用于在单个变量中存储多个项目。mytuple=("apple","banana","cherry")元组是Python中的4种内置数据类型之一,用于存储数据集合,另外还有列表、集合和字典,它们都具有不同的特性和用途。元组是有序且不可更改的集合。元组使用圆括号表示。示例,创建一个元组:thistuple=......
  • area_recognize_fail.py
      #!/usr/bin/python3importosimportsysimportreimportpymysqlimporttimeimportloggingimportpandasaspdimportrequestsfromclickhouse_driverimportClientif__name__=='__main__':logging.basicConfig(filename=os.path.dirname......
  • count_ga_5.py
      #!/usr/bin/python3'''作用:统计港澳车的识别率,分别输出港牌和澳牌识别失败的港澳车的二次识别车牌、筛选过的时间和图片url的csv文件'''importosimportsysimportreimportpymysqlimporttimeimportdatetimeimportloggingimportpandasaspdimportre......
  • post_image.py
    说明:在特定的目录中通过文件名通配符筛选出图片,向某个接口发送post请求传输图片识别图片,并判断post请求返回的结果 #!/usr/bin/python#-*-coding:utf-8-*-importos,sys,glob#importtqdmimportmultiprocessing#importreimporttimeimportthreadingimportreq......