#pip install requests-html
''' 目标网站: https://pic.netbian.com ''' from requests_html import HTMLSession import re,os import requests from tqdm import tqdm from functools import partial from multiprocessing import Pool session = HTMLSession() # 获取下载链接 def get_urllist(addr): ''' :addr: 传入该网站一个地址 :return: 返回一个高清大图下载地址字典(标题:url) ''' addr = f'https://pic.netbian.com{addr}' r = session.get(addr) # // 查看页面内容 # print(r.html.html) # 获取链接 # print(r.html.links) # 所有链接 url_list = r.html.absolute_links # 绝对链接 # print(url_list) rule = re.compile('(https://pic.netbian.com/tupian/[0-9]{1,9}.html)') href_list = re.findall(rule,str(url_list)) # print(href_list) # print('获取图片:',len(href_list)) # 请求进去得到高清壁纸 complete_url_dict = {} for i in href_list: response = session.get(i) # print(response.html.html) # 获取部分url imperfect_url = response.html.xpath("//a[@id='img']/img/@src",first=True) title = response.html.xpath("//a[@id='img']/img/@title",first=True) # 拼接完整url url = f'https://pic.netbian.com{imperfect_url}' complete_url_dict.update({url:title}) return complete_url_dict # 下载图片 def download(url_dict,save_path): ''' :save_path: 保存目录 :param url_dict: 图片下载地址列表 :return: ''' # print(url_dict) name_list = os.listdir(save_path) # print(name_list) # for url,title in tqdm(url_dict.items()): url = url_dict[0] title = url_dict[1] if f'{title}.jpg' in name_list: print(title,'已存在...跳过!') pass else: with open(f'{save_path}/{title}.jpg', mode='wb') as f: f.write(requests.get(url).content) # print(title,':写入成功!') if __name__ == '__main__': # 根据网站创建图片分类 dic = { '4k风景':'/4kfengjing/', '4k美女':'/4kmeinv/', '4k游戏': '/4kyouxi/', '4k动漫': '/4kdongman/', '4k影视': '/4kyingshi/', '4k汽车': '/4kqiche/', '4k动物': '/4kdongwu/', '4k人物': '/4krenwu/', '4k美食': '/4kmeishi/', '4k宗教': '/4kzongjiao/', '4k背景': '/4kbeijing/', '4k手机壁纸': '/shoujibizhi/', } # url:title 字典 print('图片下载器'.center(50,'=')) for id,i in enumerate(dic.keys()): print(f'{id+1}.{i}') print(''.center(50, '=')) try: idd = int(input('请选择图片序号:'))-1 num = int(input('请选择下载页数:')) if num > 10: print('为确保安全,最多下载10页!!') num = 10 if num <= 0: print('1<下载页数<10') exit() except: print('请输入数字!') exit() print('正在获取下载链接-/-/') name = list(dic.keys())[idd] # 第一页链接 url_dict = get_urllist(dic[name]) # 后几页链接 for x in range(1,num): url_dict.update(get_urllist(dic[name]+f'index_{x+1}.html')) print('下载图片张数:',len(url_dict)) save_path = f'./图片/{name}' if not os.path.exists(save_path): os.mkdir(save_path) # print(url_dict) # 下载图片 # download(url_dict,save_path) print('正在下载-/-/') func = partial(download, save_path=save_path) pool = Pool(10) r = list(tqdm(pool.imap(func, url_dict.items()), total=len(url_dict.items()), ncols=80)) pool.close() pool.join()
标签:title,python,list,url,html,4k,print,requests From: https://www.cnblogs.com/boye169/p/17293643.html