爬取url网页里面所有的图片,把url填上就能用了,下面的自己看着改改进行筛选。
import requests import re import time import random import os def Find(string): # findall() 查找匹配正则表达式的字符串 st = re.findall('src="(.*?)"',string,re.S) return st url = '' user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" headers = {"User-Agent":user_agent} headers2 = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 ", } os.chdir('D:\\temp') html_r = requests.get(url,headers = headers2) #print(html_r.text, type(html_r.text)) lis = Find(html_r.text) print(len(lis), lis) name = 'xxxx' i = 0 for ele in lis: i += 1 if len(ele) > 20 and ele.startswith('https://'): img_url = ele img = requests.get(img_url, headers = headers2) savename = name + str(i) + '.jpg' with open(savename, 'wb') as w: w.write(img.content) time.sleep(random.randrange(4, 6))
标签:网页,img,url,ele,爬取,537.36,lis,import,python3 From: https://www.cnblogs.com/smatrchen/p/16871373.html