宅男爬虫学习第一课! 宅男们的福利来啦~
话不多说,直接上代码!
# -*- encoding: utf-8 -*- # FUNCTION: Capture beauty picture import requests from bs4 import BeautifulSoup import os import time url_list = ['http://www.mzitu.com/201024', 'http://www.mzitu.com/169782'] # interested beauties headers = { 'referer': 'https://www.mzitu.com/201024', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 ' 'Safari/537.36' } def get_page_num(url): response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'lxml') page_num = soup.find(class_='pagenavi').find_all('a')[-2].text name = soup.find(class_='currentpath').text.split()[-1] return page_num, name # page_num 是字符串 def parse_page(url): """ 得到一页的图片 :param url: 页面URL :return: 图片链接,图片名称 """ response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'lxml') pic_url = soup.find(class_='main-image').find('img')['src'] pic_name = soup.find(class_='main-title').text return pic_url, pic_name def get_pic(pic_url, pic_name, name): """下载并保存图片""" response = requests.get(pic_url, headers=headers, allow_redirects=False) filepath = '/home/f/crawler/Beauty/photo/' + name + '/' + pic_name + '.jpg' with open(filepath, 'wb') as f: f.write(response.content) def main(): for url in url_list: page_num, name = get_page_num(url) try: os.mkdir('/home/f/crawler/Beauty/photo/' + name) except FileExistsError: pass for page in range(1, int(page_num) + 1): # range迭代 page_url = url + '/' + str(page) print(page_url) pic_url, pic_name = parse_page(page_url) get_pic(pic_url, pic_name, name) time.sleep(2) if __name__ == '__main__': main()
可以收藏一下,慢慢学习哈!
————————————————————————————————————————————
微信关注号:**爬虫王者**
标签:name,get,url,pic,爬虫,num,图爬取,妹子,page From: https://www.cnblogs.com/crawler-king/p/16988749.html