1、分析网页,爬取美食数据
https://mip.xiachufang.com/explore/?page=2
https://mip.xiachufang.com/explore/?page=3
url = "https://mip.xiachufang.com/explore/?page={0}"
for page in range(1, 4):
resp = requests.get(url=url.format(page), headers=headers)
2、网页解析库BeautifulSoup
bs = BeautifulSoup(resp.text, 'html.parser')
recipes = bs.find('section', class_='recipes')
article = recipes.find_all('article', class_='recipe-332-horizon pos-r')
for item in article:
count +=1
recipe_name = item.find('div', class_='recipe-name bold ellipsis').text
uthor_name = item.find('div', class_='author-name font15 ellipsis').text
font14 = item.find('div', class_='font14').text
click_expand = item.find('a', class_='click-expand')['href']
lst_food = "https://mip.xiachufang.com" + click_expand
lst.append([count, recipe_name, author_name, font14, lst_food]) 3、数据的存储
wk = openpyxl.Workbook()
sheet = wk.active
for item in lst: sheet.append(item)
wk.save('12-全民美食.xlsx')
import requests
from bs4 import BeautifulSoup
import time
import openpyxl
url = "https://mip.xiachufang.com/explore/?page={0}"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
lst = []
count = 0 #用于记录美食个数
for page in range(1, 4):
resp = requests.get(url=url.format(page), headers=headers)
# print(resp.status_code)
bs = BeautifulSoup(resp.text, 'html.parser')
# print(type(bs))
recipes = bs.find('section', class_='recipes')
# print(recipes)
article = recipes.find_all('article', class_='recipe-332-horizon pos-r')
# print(article)
for item in article:
count +=1
recipe_name = item.find('div', class_='recipe-name bold ellipsis').text
author_name = item.find('div', class_='author-name font15 ellipsis').text
font14 = item.find('div', class_='font14').text
click_expand = item.find('a', class_='click-expand')['href']
lst_food = "https://mip.xiachufang.com" + click_expand
# print(recipe_name, author_name, font14, lst_food)
lst.append([count, recipe_name, author_name, font14, lst_food])
time.sleep(5)
# for i in lst:
# print(i)
wk = openpyxl.Workbook()
sheet = wk.active
for item in lst:
sheet.append(item)
wk.save('12-全民美食.xlsx')