request
requests¶
get请求¶
In [ ]:import requests response=requests.get('http://www.baidu.com')#get请求In [ ]:
response.status_code #响应状态码In [ ]:
response.url#请求的网址In [ ]:
response.headers#响应头In [ ]:
response.cookies#cookies requests.utils.dict_from_cookiejar(response.cookies)In [ ]:
response.content#字节流形式In [ ]:
response.text#输出正文
带参数的get请求¶
In [ ]:#带参数的请求方式 #方法1:直接把参数写进url import requests response = requests.get('http://httpbin.org/get?name=Jim&age=22')#直接把参数写进url print(response.text)In [ ]:
response.urlIn [ ]:
#方法2:将参数填写在dict中,发起请求时params参数指定为dict data = {'name': 'tom','age': 20} response = requests.get('http://httpbin.org/get', params=data) print(response.text)In [ ]:
#百度中搜索关键字 data = {'wd':'python'} response = requests.get('http://www.baidu.com/s?', params=data) print(response.text)In [ ]:
#百度中搜索多个关键字 words=['python','java','c','matlab'] for word in words: data = {'wd':word} response = requests.get('http://www.baidu.com/s?', params=data) print(response.url)
post请求¶
In [ ]:#post请求,用post方法 import requests data = {'name':'jim','age':'22'} response = requests.post('http://httpbin.org/post', data=data) print(response.text)In [ ]:
response.url
提取链家房源面积和价格并排序¶
In [ ]:import requests import re url='https://bj.lianjia.com/zufang/' header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url, headers=header) html=response.textIn [ ]:
response.status_codeIn [ ]:
htmlIn [ ]:
nameregex=re.compile('alt="(.*?)"\n') name = nameregex.findall(html) #找出所有小区名字 arearegex=re.compile('([0-9.]+)㎡\n') area = arearegex.findall(html) #找出面积 priceregex=re.compile('<em>([0-9.]+)</em> 元/月') price = priceregex.findall(html) #找出价格In [ ]:
len(name)In [ ]:
len(area)In [ ]:
len(price)In [ ]:
import pandas as pd datalist=[] for i in range(len(name)): datalist.append([name[i],float(area[i]),float(price[i])]) df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name')In [ ]:
dfIn [ ]:
df.info()In [ ]:
df.sort_values('area')#对面积进行排序In [ ]:
df.sort_values('price')#对价格进行排序In [ ]:
#完整代码 import requests import re import pandas as pd url='https://sz.lianjia.com/zufang/' header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url, headers=header) html=response.text nameregex=re.compile(r'alt="(.*?)"\n') name = nameregex.findall(html) #找出所有小区名字 arearegex=re.compile(r'([0-9.]+)㎡\n') area = arearegex.findall(html) #找出面积 priceregex=re.compile(r'<em>([0-9.]+)</em> 元/月') price = priceregex.findall(html) #找出价格 datalist=[] for i in range(len(name)): datalist.append([name[i],float(area[i]),float(price[i])]) df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name') df.sort_values('area')#对面积进行排序 df.sort_values('price')#对价格进行排序
链家爬取多页¶
In [ ]:import requests import re import time header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} data=pd.DataFrame() import random for i in range(1,11):#爬取10页 print('正在爬取第%d页'%i) baseurl='https://bj.lianjia.com/zufang/pg' url=baseurl+str(i)+'/#contentList' response = requests.get(url, headers=header) html=response.text nameregex=re.compile('alt="(.*?)"\n') name = nameregex.findall(html) #找出所有小区名字 arearegex=re.compile('([0-9]+)㎡\n') area = arearegex.findall(html) #找出面积 priceregex=re.compile('<em>([0-9.]+)</em> 元/月') price = priceregex.findall(html) #找出价格 datalist=[] for i in range(len(name)): datalist.append([name[i],float(area[i]),float(price[i])]) df=pd.DataFrame(datalist) data=pd.concat([data,df]) time.sleep(random.randint(6,7)) print('爬取完毕') data.columns=['name','area','price'] data.set_index('name').sort_values('area')In [ ]:
time.time()In [ ]:
time.time()
有道翻译 post请求¶
In [ ]:import requests url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule' #http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule formdata = {'i':'中国', 'from':'AUTO', 'to':'AUTO', 'smartresult':'dict', 'client': 'fanyideskweb', 'salt':'1541165120374', 'sign':'5bbeca852044319291d932d4bfe92564', 'doctype':'json', 'version':'2.1', 'keyfrom':'fanyi.web', 'action':'FY_BY_REALTIME', 'typoResult':'false' } header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.post(url, data=formdata,headers=header) print(response.json()['translateResult'][0][0]['tgt'])In [ ]:
response.json()['translateResult'][0][0]['tgt']In [ ]:
response.json()['translateResult'][0][0]['tgt']
界面形式¶
In [ ]:#界面形式 content=input('请输入内容:') import requests url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null' formdata = {'i':content, 'from':'AUTO', 'to':'AUTO', 'smartresult':'dict', 'client': 'fanyideskweb', 'salt':'1541165120374', 'sign':'5bbeca852044319291d932d4bfe92564', 'doctype':'json', 'version':'2.1', 'keyfrom':'fanyi.web', 'action':'FY_BY_REALTIME', 'typoResult':'false' } header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.post(url, data=formdata,headers=header) print(response.json()['translateResult'][0][0]['tgt'])In [ ]:
#翻译多个内容 import requests import time url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null' contents=['中国','美国','英国','法国'] for content in contents: formdata = {'i':content, 'from':'AUTO', 'to':'AUTO', 'smartresult':'dict', 'client': 'fanyideskweb', 'salt':'1541165120374', 'sign':'5bbeca852044319291d932d4bfe92564', 'doctype':'json', 'version':'2.1', 'keyfrom':'fanyi.web', 'action':'FY_BY_REALTIME', 'typoResult':'false' } header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.post(url, data=formdata,headers=header) print(response.json()['translateResult'][0][0]['tgt']) time.sleep(5)
猫眼电影排行榜¶
单页爬取¶
In [ ]:import requests import re import pandas as pd url='https://maoyan.com/board/4?offset=0' header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url, headers=header) html=response.textIn [ ]:
htmlIn [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符 items = pattern.findall(html)In [ ]:
itemsIn [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')In [ ]:
dfIn [ ]:
url='https://maoyan.com/board/4?offset=0' header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url,headers=header) html=response.content.decode('utf-8')In [ ]:
htmlIn [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符 items = re.findall(pattern, html)In [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank') df['actors']=df['actors'].apply(lambda x:x.strip()) dfIn [ ]:
df.info()In [ ]:
df['score1']In [ ]:
df['score']=df['score1']+df['score2']In [ ]:
df.drop(['score1','score2'],axis=1)In [ ]:
#单页完整代码 url='https://maoyan.com/board/4?offset=0' header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url,headers=header) html=response.content.decode('utf-8') pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符 items = re.findall(pattern, html) df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank') df['actors']=df['actors'].apply(lambda x:x.strip()) df['score']=df['score1']+df['score2'] df.drop(['score1','score2'],axis=1)In [ ]:
htmlIn [ ]:
startpattern = re.compile('"?offset=(.*?)"\n >下一页') start = re.findall(startpattern, html)[0]#10In [ ]:
startIn [ ]:
猫眼多页爬取¶
In [ ]:#爬取多页,方法1 import random df=pd.DataFrame() for i in range(10): print('正在爬取第%d页'%i) baseurl='https://maoyan.com/board/4?offset=' url=baseurl+str(i*10) header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url,headers=header) if response.status_code==200: html=response.content.decode('utf-8') pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符 items = re.findall(pattern, html) data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank') df=pd.concat([df,data]) time.sleep(random.randint(5,7)) df['actors']=df['actors'].apply(lambda x:x.strip()) df['score']=df['score1']+df['score2'] df=df.drop(['score1','score2'],axis=1) dfIn [ ]:
#爬取多页,方法2,优先考虑 #多页完整代码 import pandas as pd import re import time import requests import random df=pd.DataFrame() start='0' while True: try: #异常处理 print('正在爬取第%d页'%int(start)) baseurl='https://maoyan.com/board/4?offset=' url=baseurl+start header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'} response = requests.get(url,headers=header) if response.status_code==200: html=response.content.decode('utf-8') pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符 items = re.findall(pattern, html) data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank') df=pd.concat([df,data]) time.sleep(random.randint(5,7)) startpattern = re.compile('"?offset=(.*?)"\n >下一页') start = re.findall(startpattern, html)[0]#10 print(start) except: break df['actors']=df['actors'].apply(lambda x:x.strip()) df['score']=df['score1']+df['score2'] df=df.drop(['score1','score2'],axis=1) dfIn [ ]:
舌尖中国 短评提取¶
In [ ]:url = 'https://movie.douban.com/subject/25875034/comments?start=0&limit=20&sort=new_score&status=P' headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式 response = requests.get(url,headers=headers) html=response.text#查看网页源码 htmlIn [ ]:
reg = 'class="short">(.*?)</span>'#进行匹配 reg = re.compile(reg,re.S)#匹配包括换行符在内的所有 data = re.findall(reg, html) dataIn [ ]:
start='0' baseurl='https://movie.douban.com/subject/25875034/comments?start='+start+'&limit=20&sort=new_score&status=P' start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0] startIn [ ]:
#完整代码,爬取多页 import os os.chdir(r'C:\Users\chenh\Desktop') import requests import re import time import random headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式 start = '0' while True: try: time.sleep(random.randint(5, 10)) url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type=' response = requests.get(url, headers=headers) html=response.text reg = 'class="short">(.*?)</span>'#进行匹配 reg = re.compile(reg,re.S)#匹配包括换行符在内的所有 data = re.findall(reg, html) start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0] if response.status_code == 200: print('正在获取start为' + start + '的数据') for one_data in data: with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理 f.write(one_data + '\n') except Exception as e: print(e) breakIn [ ]:
openIn [ ]:
import wordcloudIn [ ]:
f=open("豆瓣舌尖中国.txt","rb") txt=f.read() f.close()In [ ]:
txt.decode('utf-8')In [ ]:
txt1='小罐茶的导演拍的\r\n真的难看 ,花打四门讲的这么牛逼'In [ ]:
words = jieba.lcut(txt1) #结巴 wordsIn [ ]:
newtxt= ' '.join(words) newtxtIn [ ]:
#词云图分析 import os %matplotlib inline os.chdir(r'D:\CDA\File') import wordcloud import matplotlib.pyplot as plt import jieba f=open("豆瓣舌尖中国.txt","rb") txt=f.read() f.close() words = jieba.lcut(txt)#列表,列表里面的元素为字符串 newtxt= ' '.join(words) w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white') wordcloud=w.generate(newtxt) plt.imshow(wordcloud) plt.axis('off')In [ ]:
txt.decode('utf-8')In [ ]:
#词云图分析,加载背景图片 import os %matplotlib inline from wordcloud import STOPWORDS os.chdir(r'D:\CDA\File') import wordcloud import matplotlib.pyplot as plt import jieba f=open("豆瓣舌尖中国.txt","rb") txt=f.read() words = jieba.lcut(txt)#对字符串进行分词,输出为列表 newtxt= ' '.join(words)#将列表转换为字符串 backgroud_Image = plt.imread('mask.jpg') w=wordcloud.WordCloud(width=1000,height=700,mask=backgroud_Image,max_words=200,stopwords=STOPWORDS,font_path="C:/Windows/Fonts/STHUPO.TTF",background_color = 'white') wordcloud=w.generate(newtxt) plt.imshow(wordcloud) plt.axis('off')In [ ]:
#完整代码,词云图分析 import os os.chdir(r'D:\CDA\File') import requests import re import time import random headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式 start = '0' while True: try: time.sleep(random.randint(5, 10)) url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type=' response = requests.get(url, headers=headers) html=response.text reg = 'class="short">(.*?)</span>'#进行匹配 reg = re.compile(reg,re.S)#匹配包括换行符在内的所有 data = re.findall(reg, html) start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0] if response.status_code == 200: print('正在获取start为' + start + '的数据') for one_data in data: with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理 f.write(one_data + '\n') except Exception as e: print(e) break #词云图分析 import os %matplotlib inline os.chdir(r'D:\CDA\File') import wordcloud import matplotlib.pyplot as plt import jieba f=open("豆瓣舌尖中国.txt","rb") txt=f.read() f.close() words = jieba.lcut(txt)#列表,列表里面的元素为字符串 newtxt= ' '.join(words) w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white') wordcloud=w.generate(newtxt) plt.imshow(wordcloud) plt.axis('off')
用API获取天气预报¶
https://dev.heweather.com/docs/api/weather¶
In [ ]:import requests city = 'beijing' url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city) response = requests.get(url) response.textIn [ ]:
import json dic = json.loads(response.text) dic['HeWeather6'][0]['daily_forecast']In [ ]:
import json dic = json.loads(response.text) #今天的最高温度 dic['HeWeather6'][0]['daily_forecast'][2]['tmp_max']In [ ]:
#完整代码 import requests city = 'beijing' url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city) response = requests.get(url) import json dic = json.loads(response.text) #打印输出 for i in range(3): print(dic['HeWeather6'][0]['daily_forecast'][i]['date']) print(dic['HeWeather6'][0]['daily_forecast'][i]['cond_txt_d']) print(dic['HeWeather6'][0]['daily_forecast'][i]['wind_dir']) print(dic['HeWeather6'][0]['daily_forecast'][i]['tmp_max']) print('----------------')In [ ]:
从文件中读入城市列表,显示每个城市的未来三天的最高温度¶
In [ ]:import requests import json import time import random import pandas as pd key = 'a0ebb9fb3c2540f29065d187b0121694' import os os.chdir(r'D:\CDA\File') data = pd.read_csv('china-city-list.txt',delimiter='|') data.columnsIn [ ]:
dataIn [ ]:
data[' 城市英文 ']In [ ]: In [ ]: In [ ]:
import requests import json import time import random import pandas as pd key = 'a0ebb9fb3c2540f29065d187b0121694' import os os.chdir(r'D:\CDA\File') data = pd.read_csv('china-city-list.txt',delimiter='|') cities=data[' 城市英文 '] #定义list,存放所有数据,最后转成DataFrame lst = [] for city in cities[:40]: url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key={}'.format(city, key) #print(url) response = requests.get(url) city_dic = json.loads(response.text) #print(city_dic) #得到城市名 city_name = city_dic['HeWeather6'][0]['basic']['location'] print(city_name) #遍历该城市的三天的天气预报 for item in city_dic['HeWeather6'][0]['daily_forecast']: #print(city_name, item['date'], item['tmp_max'], item['tmp_min']) lst.append([city_name, item['date'], item['cond_txt_d'],item['tmp_max'], item['tmp_min']]) time.sleep(random.randint(2,3)) #写到文件中 df = pd.DataFrame(lst, columns = ['名称', '日期','天气','最高温度', '最低温度']) dfIn [ ]:
city_dicIn [ ]:
dfIn [ ]: 标签:re,Python,request,df,url,import,data,response From: https://www.cnblogs.com/thankcat/p/17114819.html