Python爬取7天天气
需要的包
- requests
- BeautifulSoup
- openpyxl
安装包
- 安装命令
pip install [包名]
代码演示
- 第一部分:爬取文件,写入excel表格
#爬虫获取天气
#导包
import datetime;
import requests;
from bs4 import BeautifulSoup as bs;
from openpyxl import Workbook;
import re;
#获取网页:101181701为三门峡的代码
url='http://www.weather.com.cn/weather/101181701.shtml';
header={
#从浏览器找的ua
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
response=requests.get(url=url,headers=header);
#查看访问情况
print(response.status_code);
#设置文本格式
response.encoding=response.apparent_encoding;
html_doc=response.text;
#测试输出
#print(html_doc);
soup=bs(html_doc,'html.parser');
weather_data_list=soup.find_all('p',class_='tem');
#print(weather_data_list);
#获取今天最高温:最高温有时候会没有,用明天最高温代替
tagToday=soup.find('p',class_="tem")
try:
temperatureHigh = tagToday.span.string
except AttributeError as e:
temperatureHigh =\
tagToday.find_next('p',class_="tem").span.string
print(f"今天最高温度:{temperatureHigh}");
#创建7天日期列表
tem_list=[];
now = datetime.datetime.now();
#取数字正则表达式
reg_digital='\-?\d+\.?\d*'
today_high_temper=re.findall(reg_digital,temperatureHigh);
today_low_temper=re.findall(reg_digital,tagToday.i.string);
#添加今天温度
tem_list.append((now.strftime("%Y-%m-%d"),int(today_high_temper[0]),int(today_low_temper[0])));
print(tem_list);
#日期
date=now;
#添加后面七天的数据,得到七天的数据列表
for weather_data in weather_data_list[1:7]:
#print(weather_data.i.string);
date = date + datetime.timedelta(days=1)
high_digital=re.findall(reg_digital,weather_data.span.string);
low_digital=re.findall(reg_digital,weather_data.i.string);
tem_list.append((date.strftime("%Y-%m-%d"),int(high_digital[0]),int(low_digital[0])));
print(tem_list);
#首标题和表头
wb=Workbook();
date_high_low=wb.active;
date_high_low.title=("7天日期统计分析");
date_high_low.append(["日期","最高温","最低温"]);
for row in tem_list:
date_high_low.append(row);
wb.save("temper.xlsx");
- 第二部分:读取excel表格,画折线图
#图表可视化
#导包
from openpyxl import load_workbook
from openpyxl.chart import LineChart, Reference;
#加载文件
wb = load_workbook("temper.xlsx");
#加载sheet
ws=wb["7天日期统计分析"];
#日期
date_line=Reference(ws,min_col=1,min_row=2,max_row=8);
#高温低温数据
temper_data=Reference(ws,min_col=2,max_col=3,min_row=2,max_row=8);
chart=LineChart();
chart.title="7天气温统计分析";
chart.x_axis.title="日期";
chart.y_axis.title="温度";
chart.add_data(temper_data);
chart.set_categories(date_line);
chart.legend = None;
line_style = chart.series[0];
line_style.smooth = False;
ws.add_chart(chart, "D2");
wb.save("line.xlsx");
最终效果
- 效果演示图