import requests from lxml import etree url = 'https://top.baidu.com/board?tab=realtime' headers ={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' } bd_rot = requests.get(url,headers=headers) data = etree.HTML(bd_rot.text) rot_data = data.xpath('//div[@class="category-wrap_iQLoo horizontal_1eKyQ"]') for j,i in enumerate(rot_data): title = i.xpath('./div[@class="content_1YWBm"]/a/div[1]/text()') rot_text = i.xpath('./div[@class="trend_2RttY hide-icon"]/div[3]/text()') rot_num = i.xpath('./div[@class="trend_2RttY hide-icon"]/div[2]/text()') rot1 = "".join(title) rot2 = "".join(rot_text) rot3 = "".join(rot_num) rot = "标题:"+rot1.strip()+' '+rot2.strip()+':'+rot3 if j == 0: j="置顶" print(j,rot)
标签:xpath,python,text,爬虫,data,热榜,div,rot,class From: https://www.cnblogs.com/fanpush/p/17690448.html