# https://www.umei.cc/bizhitupian/meinvbizhi/index_2.htm #本文公供个人学习之用 import requests from bs4 import BeautifulSou headers = { 'authority': 'www.umei.cc', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'max-age=0', # 'cookie': '__51vcke__K0KOUvCHIpTH8Vt6=81cf9431-fdbf-5504-93a6-08ac77e51820; __51vuft__K0KOUvCHIpTH8Vt6=1721268844171; __51uvsct__K0KOUvCHIpTH8Vt6=3; gxgefecookieinforecord=%2C67-317404%2C; __vtins__K0KOUvCHIpTH8Vt6=%7B%22sid%22%3A%20%223b0f6380-c11e-5ba6-af30-dcd1896efbea%22%2C%20%22vd%22%3A%205%2C%20%22stt%22%3A%20482411%2C%20%22dr%22%3A%2016586%2C%20%22expires%22%3A%201721271448467%2C%20%22ct%22%3A%201721269648467%7D', 'referer': 'https://www.umei.cc/bizhitupian/', 'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36', } #在打印输出时出现304的原因大家可以参见header中有两个参数得删除。 response = requests.get('https://www.umei.cc/bizhitupian/meinvbizhi/', cookies=cookies, headers=headers) response.encoding="utf-8" #打印输出时是乱码 soup =BeautifulSoup(response.text,'lxml') photos = soup.select("div.title a") #查找图片链接 for p in photos: #找到a的属性并分割,补全网址,和图片名称 p_url = p['href'].split("=")[-1] pnurl = "https://www.umei.cc/"+p_url p_name =p.text print(pnurl,p_name) #进行图片地址找到图片地址 # < div class ="big-pic" > < a href="/bizhitupian/meinvbizhi/314119.htm" > # < img alt="" src="https://www.umei.cc/d/file/20230520/b19e18d9fe13a6c284d8447ea68e9113.jpg" / > < / a > < / div > res = requests.get(url=pnurl,cookies=cookies, headers=headers) res.encoding=("utf-8"), soup = BeautifulSoup(res.text, 'lxml') pp = soup.select("div.big-pic a img")[-1] pp_url=pp.【‘src'】 res1 = requests.get(url=pp_url, cookies=cookies, headers=headers) open(f'{p_name}.jpg','wb').write(res1.content)标签:__,www,批量,22%,BeautifulSoup,headers,sec,umei,美女 From: https://blog.csdn.net/lfsysc/article/details/140533914