python爬取天气网的信息
需要requests和beautifulsoup
前半部分为requests的使用,主要是使用header读取网页html数据,
后半部分是beautifulsoup使用,主要是从html提取我们需要的内容
import requests标签:tem,python,天气,response,爬取,weather,attrs,data,html From: https://www.cnblogs.com/yzjx1999/p/16864515.html
from bs4 import BeautifulSoup
headers={
'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26"
}
#网址,南京为http://www.weather.com.cn/weather/101190101.shtml,下面查找代码也要改变
#长沙为http://www.weather.com.cn/weather/101250101.shtml,下面查找代码也要改变
url="http://www.weather.com.cn/weather/101200101.shtml"
#读取网页数据
response=requests.get(url=url,headers=headers)
#改变编码为utf-8否则会乱码,可以自行尝试
response.encoding="utf-8"
#一下可省略,只是把html导出一下,便于查找下面的错误
page_text = response.text
with open("./tianqichangsha.html","w",encoding="utf-8") as fp:
fp.write(page_text)
#-------------------------------分隔符---------------------------------
#开始美味的汤
soup=BeautifulSoup(response.text,"html.parser")
#选定当天 南京为1 长沙为2 为什么不知道
attrs={
"class":"sky skyid lv2 on"
}
data=soup.find(attrs=attrs)
#查找当天最高温度、最低温度
attrs={
"class":"tem"
}
data_tem=data.find(attrs=attrs)
data_maxtem=str(data_tem.span.contents)
data_mintem=str(data_tem.i.contents)
print(data_maxtem+data_mintem)
#查找天气
attrs={
"class":"wea"
}
data_wea=data.find(attrs=attrs)
#类型转换
data_weastr=str(data_wea.contents)
print(data_weastr)