1 # coding: utf-8 2 import re 3 from bs4 import BeautifulSoup 4 import requests 5 6 7 def main(city='zhengzhou', ymonth='202301'): 8 9 headers = { 10 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' 11 } 12 13 url = F'http://www.tianqihoubao.com/lishi/{city}/month/{ymonth}.html' 14 content = requests.get(url=url, headers=headers) 15 soup = BeautifulSoup(content.text, 'lxml') 16 weather = soup.find_all('tr') 17 18 # 逐日处理 19 for info in weather: 20 info = str(info).replace(' ', '').replace('\r\n', '') 21 info = info.replace('\n', '') 22 info = re.sub(r'<[^>]+>', ' ', info) 23 info = re.sub(r' {2,}', ' ', info.strip()) 24 print(info)
标签:info,历史,headers,url,爬虫,天气,re,import,replace From: https://www.cnblogs.com/ddzhen/p/18604996