爬取页面热搜榜单
爬取汽车榜单中的热度和价格
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
,
'Cookie':
"BIDUPSID=533E2C368EEB36A3FD9D61D5E2ED701D; PSTM=1701426278; BAIDUID=4445FFBCC0EE2BDC834E4893CDAC797E:FG=1; MCITY=-%3A; BDUSS=0NYU2N1ckxyM090STRqRUlPfktkN0pJT3ZiWlo0Q2hTeXBTajVNWmtqTzRRMTltSVFBQUFBJCQAAAAAAAAAAAEAAAD9jfvxu6WyuTIzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALi2N2a4tjdmb; BDUSS_BFESS=0NYU2N1ckxyM090STRqRUlPfktkN0pJT3ZiWlo0Q2hTeXBTajVNWmtqTzRRMTltSVFBQUFBJCQAAAAAAAAAAAEAAAD9jfvxu6WyuTIzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALi2N2a4tjdmb; H_WISE_SIDS_BFESS=60274_60340_60346_60362_60360; H_WISE_SIDS=60274_60362_60360; H_PS_PSSID=60274_60470_60491_60500; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BAIDUID_BFESS=4445FFBCC0EE2BDC834E4893CDAC797E:FG=1; BA_HECTOR=a5al8k01210l0404058180a103gi9j1j9stlv1u; ZFY=egHzkrAoC4T1EKBSIVxmq:A9d2CRZeNCzn1fI8:B6JPLU:C; BDRCVFR[Ter2S3H5o_D]=mk3SLVN4HKm; delPer=0; PSINO=6"}
url = "https://top.baidu.com/board?platform=pc&sa=pcindex_entry"
res = requests.get(url,headers)
result = res.content.decode('utf-8')
# print(res.content.decode('utf-8'))
def get_hots(result):
soup = BeautifulSoup(result,'html.parser')
divs = soup.find_all('div', class_ ="list_1EDla")
# print(divs)
a_s = divs[0].find_all('a', class_="item-wrap_2oCLZ")
# print(a_s)
hots =[]
for a in a_s:
hot_href = a['href']
div_ = a.find_all('div', class_="c-single-text-ellipsis")
hot_text = div_[0].text
# print(div_[0].string)
hot = {
hot_text:hot_href
}
hots.append(hot)
print(hots)
return hots
# break
def get_car(result):
soup = BeautifulSoup(result, 'html.parser')
divs = soup.find_all('div',theme ='car')
print(divs)
car_divs = divs[0].find_all('div', class_="item-wrap_Z0BrP")
print(car_divs)
car_infos =[]
for div in car_divs:
info_div = div.find_all('div', class_="right_1PE2e")
car_name_tag = info_div[0].find_all('a')
car_name = car_name_tag[0].text
hot_price = info_div[0].find_all('div')
hot = hot_price[0].text
price = hot_price[-1].text
# print(car_name,hot,price)
car_info = {
'car_name':car_name,
'热搜指数':hot,
'price':price
}
car_infos.append(car_info)
print(car_infos)
return car_infos
if __name__ == '__main__':
hots = get_hots(result)
cars = get_car(result)
for car in cars:
for v in car.values():
print(v)
print('--------------热点榜单--------------')
num=1
for hot in hots:
for k,v in hot.items():
print(num , k, v)
num +=1
标签:car,爬虫,hot,print,div,divs,find,百度
From: https://www.cnblogs.com/iruan/p/18319839