安装 requests、beautifulsoup4 库
# 安装 requests、beautifulsoup4 库
pip install requests beautifulsoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple
完成代码
# pip install requests beautifulsoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
def download_images(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36"
}
# 发送HTTP请求并获取网页源代码
response = requests.get(url, headers=headers)
if response.status_code == 200:
# 使用 BeautifulSoup 解析 HTML 内容
soup = BeautifulSoup(response.text, "html.parser")
# 创建一个目录来保存下载的图片
download_dir = os.path.join(os.getcwd(), 'download_images')
# 创建保存图片的文件夹
os.makedirs(download_dir, exist_ok=True)
# 获取图片链接
image_links = []
img_tags = soup.find_all('img')
for img_tag in img_tags:
# 获取图片的 URL
img_url = img_tag.get('src')
if img_url:
if img_url.startswith("http"):
image_links.append(img_url)
else:
# 将相对路径转换为绝对路径
img_url = urljoin(url, img_url)
image_links.append(img_url)
# 下载图片并保存到文件夹
for i, image_link in enumerate(image_links):
img_response = requests.get(image_link, headers=headers)
if img_response.status_code == 200:
# 提取图片文件名
img_filename = os.path.basename(urlparse(image_link).path)
# 保存图片到本地
with open(os.path.join(download_dir, img_filename), 'wb') as img_file:
img_file.write(img_response.content)
print(f"Downloaded: {img_filename}")
else:
print(f"Failed to download image from: {img_url}")
if __name__ == "__main__":
# url = input("Enter the URL to scrape images from: ")
url = "http://www.vipsoft.com.cn"
download_images(url)
标签:img,Python,image,url,html,download,requests,os,下载
From: https://www.cnblogs.com/vipsoft/p/18338040