# Demo Describe:数据解析 xpath
import requests
from lxml import etree
from fake_useragent import UserAgent
'''
company
title
price
'''
# picType = input('输入想要爬取的词语: ')
# domain = f'https://www.zbj.com/search/f/?kw={picType}'
domain = 'https://www.zbj.com/search/f/?kw=saas'
ua = UserAgent()
user_agent = ua.random
headers = {
'user-agent': user_agent
}
resp = requests.get(domain, headers=headers)
# get web html
html = etree.HTML(resp.text)
divs = html.xpath('/html/body/div[6]/div/div/div[2]/div[5]/div')
for element in divs:
company = element.xpath('./div/div/div/a[1]/div[1]/p/text()')
title = 'sass'.join(element.xpath('./div/div/div/a[2]/div[2]/div[2]/p/text()'))
price = element.xpath('./div/div/div/a[2]/div[2]/div[1]/span[1]/text()')
print(company)
标签:xpath,domain,Python,text,element,爬取,html,div
From: https://blog.51cto.com/mooreyxia/6002883