import requests
from lxml import etree
url = "https://fy.58.com/ershoufang/?PGTID=0d100000-0091-53ca-4993-576198ca62e3"
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
response = requests.get(url=url, headers=headers) # 发起请求,并保存到response变量中
etree_txt = etree.HTML(response.text) # 将网页原码加载到etree对象中
result = [] # 创建一个空列表用来保存最后的结果
divs = etree_txt.xpath('//*[@id="esfMain"]/section/section[3]/section[1]/section[2]/div') # 在网页原码中用xpath定位到要爬取的信息在那个标签中
for div in divs: # 编列页面存在的标签
address = div.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0].strip() # 注意:这是从当前标签中拿到文字信息,所以是div.xpath,strip()去空格
result.append(address) # 内容添加到列表
for i in result: # 输出结果
print(i)
标签:xpath,03,etree,58,python,section,headers,result,div From: https://www.cnblogs.com/shuxi/p/17210819.html