import os
#导入发送请求模块:
import requests
#导入解析文本模块:
from bs4 import BeautifulSoup
#返回上一级目录:
base_path = os.path.dirname(os.path.abspath(__file__))
#路径和图片文件夹拼接:
img1_path = os.path.join(base_path,"img1")
#获取响应:
response = requests.get("http://pic.yesky.com/c/6_20491_1.shtml")
#将请求结果交给bs4解析:
soup = BeautifulSoup(response.text,"html.parser")
#经过分析定位到<div class="lb_box">:
div_obj = soup.find(name="div",attrs={"class":"lb_box"})
#从div中找所有的dl标签(每一张图片的外部标点)、结果是列表
list_dd = div_obj.find_all(name = "dd")
#循环每一张图片的dl:
for dd in list_dd:
#从dd中找到a:
a_obj = dd.find("a")
#拼接文件夹的路径并创建文件夹:
dir_path = os.path.join(img1_path,a_obj.text)
#如果文件夹不存在就创建:
if not os.path.isdir(dir_path):
os.mkdir(dir_path)
#拿到链接href:
a_response = requests.get(a_obj.get("href"))
#转码
a_response.encoding = "gbk"
#文本解析
soup2 = BeautifulSoup(a_response.text,"html.parser")
#拿到整体的div:
div_obj2 = soup2.find(name = "div",attrs={"class":"overview"})
print(div_obj2)
img_list = div_obj2.find_all(name = "img")
for img in img_list:
img_src = img.get("src")
img_response = requests.get(img_src)
file_path = os.path.join(dir_path,img_src.rsplit("/",1)[-1])
with open(file_path,"wb") as f:
f.write(img_response.content)
# break
效果如下: