一、requests获取数据
import requests
if not os.path.exists('02-heng'):
os.makedirs('02-heng')
url = "https://yys.163.com/media/picture.html"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
resp = requests.get(url=url, headers=headers)
# print(resp.text)
# resp.encoding = resp.apparent_encoding
二、xpath解析数据,得到想要的图片连接
from lxml import etree
e = etree.HTML(resp.text)
images_heng = e.xpath("//div[@class='tab-cont'][1]/div/div/div/a/@href")
# print(images_heng)
# for i in images_heng:
# print(i)
lst_1 = []
for url in images_heng:
# print(url)
imges_1 =url.split('/')
if imges_1[-1][:-4] == "2732x2048":
lst_1.append(url)
# print(lst_1)
# break
三、with open('**.jpg','wb')存储数据
for url in lst_1:
time.sleep(1)
resp = requests.get(url, headers=headers)
file_name = url[url.rindex('picture'):url.rindex('/')].replace('/', '_')+ '.jpg'
# print(file_name)
print("正在保存:"+ file_name+ "壁纸")
with open(f'02-heng/{file_name}', 'wb') as f:
f.write(resp.content)
print("全部保存完毕")
四、其它注意事项。爬取速度、保存目录的建立
import os
import time
if not os.path.exists('02-heng'):
os.makedirs('02-heng')
分辨率为2732X2048高清晰图片(横版):
import requests
from lxml import etree
import os
import time
if not os.path.exists('02-heng'):
os.makedirs('02-heng')
url = "https://yys.163.com/media/picture.html"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
resp = requests.get(url=url, headers=headers)
# print(resp.text)
# resp.encoding = resp.apparent_encoding
# with open('a.txt', 'w') as f:
# f.write(resp.text)
e = etree.HTML(resp.text)
images_heng = e.xpath("//div[@class='tab-cont'][1]/div/div/div/a/@href")
# print(images_heng)
# for i in images_heng:
# print(i)
lst_1 = []
for url in images_heng:
# print(url)
imges_1 =url.split('/')
if imges_1[-1][:-4] == "2732x2048":
lst_1.append(url)
# print(lst_1)
# break
for url in lst_1:
time.sleep(1)
resp = requests.get(url, headers=headers)
file_name = url[url.rindex('picture'):url.rindex('/')].replace('/', '_')+ '.jpg'
# print(file_name)
print("正在保存:"+ file_name+ "壁纸")
with open(f'02-heng/{file_name}', 'wb') as f:
f.write(resp.content)
print("全部保存完毕")
分辨率为1080X1920高清晰图片(竖版):
import requests
from lxml import etree
import os
import time
if not os.path.exists('02-shu'):
os.makedirs('02-shu')
url = "https://yys.163.com/media/picture.html"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
resp = requests.get(url=url, headers=headers)
e = etree.HTML(resp.text)
images_shu = e.xpath("//div[@class='tab-cont'][2]/div/div/div/a/@href")
# for i in images_shu:
# print(i)
lst_2 = []
for url in images_shu:
# print(url)
imges_1 =url.split('/')
if imges_1[-1][:-4] == "1080x1920":
lst_2.append(url)
for url in lst_2:
# print(url)
time.sleep(1)
resp = requests.get(url, headers=headers)
file_name = url[url.rindex('picture'):url.rindex('/')].replace('/', '_')+ '.jpg'
# print(file_name)
print("正在保存:"+ file_name+ "壁纸")
with open(f'02-shu/{file_name}', 'wb') as f:
f.write(resp.content)
print("全部保存完毕")