import requests
import os
import re
from time import sleep
from bs4 import BeautifulSoup
url = 'http://www.netbian.com/mei/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
resp = requests.get(url,headers=headers)
resp.encoding='gbk'
resp_html =resp.text
soup = BeautifulSoup(resp_html,'lxml')
urls_a = soup.find_all('img')
for url_a in urls_a:
if 'small' in url_a['src']:
picture_url = url_a['src']
picture_title = url_a['alt']
picture_url_list = re.split(r'(\d+)',picture_url)
picture_url = picture_url.replace('small','')
picture_url = picture_url.replace(picture_url_list[-2],'')
# print(picture_title,picture_url)
picture = requests.get(picture_url,headers=headers).content
with open(picture_title+'.jpg','wb')as f:
f.write(picture)
print('图片'+picture_title+'下载完成')
# try:
# href = url_a['href']
# title = url_a['title']
# if '/desk' in href:
# # print(url_a[1])
# # href_url = f'http://www.netbian.com{href}'
# # picture = requests.get(href_url,headers=headers).content
# # # print(picture)
# # with open(title+'.jpg','wb')as f:
# # f.write(picture)
# except:
# continue
今天重新做下图片下载,明天继续
标签:picture,21,title,Python,resp,url,href,2023,headers From: https://blog.51cto.com/u_2469839/7178335