依赖requests库,运行此代码会爬取https://www.huya.com/g/2168上的图片,图片名字为@alt,会爬取到此脚本所在目录中的img文件夹中,没有此文件夹则运行不成功。
import requests
from lxml import etree
url='https://www.huya.com/g/2168'
r=requests.get(url)
data=etree.HTML(r.text)
girls=data.xpath('//img[@class="pic"]') #匹配
for girl in girls:
img_url=girl.xpath('./@data-original')[0]
name=girl.xpath('./@alt')
img=requests.get(img_url)
with open('./aaa/%s.jpg' %name,'wb') as jpg:
jpg.write(img.content)
print("<%s> 下载成功" %name)
爬取单个图片https://anchorpost.msstatic.com/cdnimage/anchorpost/1053/af/5bc9192add9117924ab8cde0b86049_2168_1660647126.jpg,会保存为此脚本目录下1.jpg
# 1. 导包
import requests
# 2. 指定url
img_url = "https://anchorpost.msstatic.com/cdnimage/anchorpost/1053/af/5bc9192add9117924ab8cde0b86049_2168_1660647126.jpg"
# 3. 使用GET方法发送请求,该方法会返回一个响应对象
response = requests.get(img_url)
# 4. 获取响应数据
print(response.content)
# 保存数据
with open('./1.jpg', 'wb', ) as f:
f.write(response.content)
爬取百度图片搜索到的图片,依赖tqdm和requests,保存到当前目录中图片名字imgmaintenanceWorker_+num
# -*- coding: UTF-8 -*-"""
import requests
import tqdm
def configs(search, page, number):
"""
:param search:
:param page:
:param number:
:return:
"""
url = 'https://image.baidu.com/search/acjson'
params = {
"tn": "resultjson_com",
"logid": "11555092689241190059",
"ipn": "rj",
"ct": "201326592",
"is": "",
"fp": "result",
"queryWord": search,
"cl": "2",
"lm": "-1",
"ie": "utf-8",
"oe": "utf-8",
"adpicid": "",
"st": "-1",
"z": "",
"ic": "0",
"hd": "",
"latest": "",
"copyright": "",
"word": search,
"s": "",
"se": "",
"tab": "",
"width": "",
"height": "",
"face": "0",
"istype": "2",
"qc": "",
"nc": "1",
"fr": "",
"expermode": "",
"force": "",
"pn": str(60 * page),
"rn": number,
"gsm": "1e",
"1617626956685": ""
}
return url, params
def loadpic(number, page):
"""
:param number:
:param page:
:return:
"""
while (True):
if number == 0:
break
url, params = configs(search, page, number)
result = requests.get(url, headers=header, params=params).json()
url_list = []
for data in result['data'][:-1]:
url_list.append(data['thumbURL'])
for i in range(len(url_list)):
getImg(url_list[i], 60 * page + i, path)
bar.update(1)
number -= 1
if number == 0:
break
page += 1
print("\nfinish!")
def getImg(url, idx, path):
"""
:param url:
:param idx:
:param path:
:return:
"""
img = requests.get(url, headers=header)
#图片名字maintenanceWorker_
file = open(path + 'maintenanceWorker_' + str(idx + 1) + '.jpg', 'wb')
file.write(img.content)
file.close()
if __name__ == '__main__':
search = input("请输入搜索内容:")
number = int(input("请输入需求数量:"))
path = './img'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0'}
bar = tqdm.tqdm(total=number)
page = 0
loadpic(number, page)
标签:img,虎牙,python,number,param,爬取,url,requests,page From: https://www.cnblogs.com/zhangzhang001/p/16655733.html