import requests
from bs4 import BeautifulSoup
import time
import openpyxl
def write_mysql(lst):
wk = openpyxl.Workbook()
sheet = wk.active
for i in lst:
sheet.append(i)
wk.save("1-房产交易信息.xlsx")
# print('保存完成')
def parser_content(resp):
html = resp.text
bs = BeautifulSoup(html, 'html.parser')
ul = bs.find('ul', class_='sellListContent')
li = ul.find_all('li')
# print(li, type(li))
lst = []
for item in li:
title = item.find('div', class_='title').text
positionInfo = item.find('div', class_='positionInfo').text
houseInfo = item.find('div', class_='houseInfo').text
followInfo = item.find('div', class_='followInfo').text
totalPrice = item.find('div', class_='totalPrice totalPrice2').text
unitPrice = item.find('div', class_='unitPrice').text
lst.append([title, positionInfo, houseInfo, followInfo, totalPrice, unitPrice])
# break
# print(lst)
return lst
class LianJiaSpider():
def __init__(self):
self.url = "https://bj.lianjia.com/ershoufang/pg{0}/"
self.headers = {
"User - Agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KABUL, like Gecko) Chrome "
"/ 84.0.4147.125 Safari / 537.36 "
}
def send_request(self, url):
resp = requests.get(url=url, headers=self.headers)
if resp:
return resp
def start(self):
for i in range(1, 11):
full_url = self.url.format(i)
# print(full_url)
resp = self.send_request(full_url)
# print(resp.text)
data_list = parser_content(resp)
write_mysql(data_list)
time.sleep(5)
if __name__ == '__main__':
lian_jia = LianJiaSpider()
lian_jia.start()