首页 > 编程语言 >Python request

Python request

时间:2023-02-12 22:00:44浏览次数:61  
标签:re Python request df url import data response

request    

requests

   

get请求

  In [ ]:
import requests
response=requests.get('http://www.baidu.com')#get请求
  In [ ]:
response.status_code #响应状态码
  In [ ]:
response.url#请求的网址
  In [ ]:
response.headers#响应头
  In [ ]:
response.cookies#cookies
requests.utils.dict_from_cookiejar(response.cookies)
  In [ ]:
response.content#字节流形式
  In [ ]:
response.text#输出正文
   

带参数的get请求

  In [ ]:
#带参数的请求方式
#方法1:直接把参数写进url
import requests
response = requests.get('http://httpbin.org/get?name=Jim&age=22')#直接把参数写进url
print(response.text)
  In [ ]:
response.url
  In [ ]:
#方法2:将参数填写在dict中,发起请求时params参数指定为dict
data = {'name': 'tom','age': 20}
response = requests.get('http://httpbin.org/get', params=data)
print(response.text)
  In [ ]:
#百度中搜索关键字
data = {'wd':'python'}
response = requests.get('http://www.baidu.com/s?', params=data)
print(response.text)
  In [ ]:
#百度中搜索多个关键字
words=['python','java','c','matlab']
for word in words:
    data = {'wd':word}
    response = requests.get('http://www.baidu.com/s?', params=data)
    print(response.url)
   

post请求

  In [ ]:
#post请求,用post方法
import requests
data = {'name':'jim','age':'22'}
response = requests.post('http://httpbin.org/post', data=data)
print(response.text)
  In [ ]:
response.url
   

提取链家房源面积和价格并排序

  In [ ]:
import requests
import re
url='https://bj.lianjia.com/zufang/'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
  In [ ]:
response.status_code
  In [ ]:
html
  In [ ]:
nameregex=re.compile('alt="(.*?)"\n')
name = nameregex.findall(html) #找出所有小区名字
arearegex=re.compile('([0-9.]+)㎡\n')
area = arearegex.findall(html) #找出面积
priceregex=re.compile('<em>([0-9.]+)</em> 元/月')
price = priceregex.findall(html) #找出价格
  In [ ]:
len(name)
  In [ ]:
len(area)
  In [ ]:
len(price)
  In [ ]:
import pandas as pd
datalist=[]
for i in range(len(name)):
    datalist.append([name[i],float(area[i]),float(price[i])])
df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name')
  In [ ]:
df
  In [ ]:
df.info()
  In [ ]:
df.sort_values('area')#对面积进行排序
  In [ ]:
df.sort_values('price')#对价格进行排序
  In [ ]:
#完整代码
import requests
import re
import pandas as pd
url='https://sz.lianjia.com/zufang/'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
nameregex=re.compile(r'alt="(.*?)"\n')
name = nameregex.findall(html) #找出所有小区名字
arearegex=re.compile(r'([0-9.]+)㎡\n')
area = arearegex.findall(html) #找出面积
priceregex=re.compile(r'<em>([0-9.]+)</em> 元/月')
price = priceregex.findall(html) #找出价格
datalist=[]
for i in range(len(name)):
    datalist.append([name[i],float(area[i]),float(price[i])])
df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name')
df.sort_values('area')#对面积进行排序
df.sort_values('price')#对价格进行排序
   

链家爬取多页

  In [ ]:
import requests
import re
import time
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
data=pd.DataFrame()
import random
for i in range(1,11):#爬取10页
    print('正在爬取第%d页'%i)
    baseurl='https://bj.lianjia.com/zufang/pg'
    url=baseurl+str(i)+'/#contentList'
    response = requests.get(url, headers=header)
    html=response.text
    nameregex=re.compile('alt="(.*?)"\n')
    name = nameregex.findall(html) #找出所有小区名字
    arearegex=re.compile('([0-9]+)㎡\n')
    area = arearegex.findall(html) #找出面积
    priceregex=re.compile('<em>([0-9.]+)</em> 元/月')
    price = priceregex.findall(html) #找出价格
    datalist=[]
    for i in range(len(name)):
        datalist.append([name[i],float(area[i]),float(price[i])])
    df=pd.DataFrame(datalist)
    data=pd.concat([data,df])
    time.sleep(random.randint(6,7))
print('爬取完毕')
data.columns=['name','area','price']
data.set_index('name').sort_values('area')
  In [ ]:
time.time()
  In [ ]:
time.time()
   

有道翻译 post请求

  In [ ]:
import requests
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
#http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
formdata = {'i':'中国',
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.post(url, data=formdata,headers=header)
print(response.json()['translateResult'][0][0]['tgt'])
  In [ ]:
response.json()['translateResult'][0][0]['tgt']
  In [ ]:
response.json()['translateResult'][0][0]['tgt']
   

界面形式

  In [ ]:
#界面形式
content=input('请输入内容:')
import requests
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null'
formdata = {'i':content,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.post(url, data=formdata,headers=header)
print(response.json()['translateResult'][0][0]['tgt'])
  In [ ]:
#翻译多个内容
import requests
import time
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null'
contents=['中国','美国','英国','法国']
for content in contents:
    formdata = {'i':content,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
    header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
    response = requests.post(url, data=formdata,headers=header)
    print(response.json()['translateResult'][0][0]['tgt'])
    time.sleep(5)
   

猫眼电影排行榜

   

单页爬取

  In [ ]:
import requests
import re
import pandas as pd
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
  In [ ]:
html
  In [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = pattern.findall(html)
  In [ ]:
items
  In [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
  In [ ]:
df
  In [ ]:
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
html=response.content.decode('utf-8')
  In [ ]:
html
  In [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
  In [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df['actors']=df['actors'].apply(lambda x:x.strip())
df
  In [ ]:
df.info()
  In [ ]:
df['score1']
  In [ ]:
df['score']=df['score1']+df['score2']
  In [ ]:
df.drop(['score1','score2'],axis=1)
  In [ ]:
#单页完整代码
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
html=response.content.decode('utf-8')
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df.drop(['score1','score2'],axis=1)
  In [ ]:
html
  In [ ]:
startpattern = re.compile('"?offset=(.*?)"\n  >下一页')
start = re.findall(startpattern, html)[0]#10
  In [ ]:
start
  In [ ]:      

猫眼多页爬取

  In [ ]:
#爬取多页,方法1
import random
df=pd.DataFrame()
for i in range(10):
    print('正在爬取第%d页'%i)
    baseurl='https://maoyan.com/board/4?offset='
    url=baseurl+str(i*10)
    header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
    response = requests.get(url,headers=header)
    if response.status_code==200:
        html=response.content.decode('utf-8')
        pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
        items = re.findall(pattern, html)
        data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
        df=pd.concat([df,data])
        time.sleep(random.randint(5,7))
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df=df.drop(['score1','score2'],axis=1)
df
  In [ ]:
#爬取多页,方法2,优先考虑
#多页完整代码
import pandas as pd 
import re
import time
import requests
import random
df=pd.DataFrame()
start='0'
while True:
    try:                     #异常处理
        print('正在爬取第%d页'%int(start))
        baseurl='https://maoyan.com/board/4?offset='
        url=baseurl+start
        header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
        response = requests.get(url,headers=header)
        if response.status_code==200:
            html=response.content.decode('utf-8')
            pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
            items = re.findall(pattern, html)
            data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
            df=pd.concat([df,data])
            time.sleep(random.randint(5,7))
        startpattern = re.compile('"?offset=(.*?)"\n  >下一页')
        start = re.findall(startpattern, html)[0]#10
        print(start)
    except: 
        break
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df=df.drop(['score1','score2'],axis=1)
df
  In [ ]:      

舌尖中国 短评提取

  In [ ]:
url = 'https://movie.douban.com/subject/25875034/comments?start=0&limit=20&sort=new_score&status=P'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
response = requests.get(url,headers=headers)
html=response.text#查看网页源码
html
  In [ ]:
reg = 'class="short">(.*?)</span>'#进行匹配
reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
data = re.findall(reg, html)
data
  In [ ]:
start='0'
baseurl='https://movie.douban.com/subject/25875034/comments?start='+start+'&limit=20&sort=new_score&status=P'
start = re.findall(r'<a href="\?start=(\d+)&amp;limit=20&amp;sort=new_score&amp;status=P&amp;percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
start
  In [ ]:
#完整代码,爬取多页
import os
os.chdir(r'C:\Users\chenh\Desktop')
import requests
import re
import time
import random
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
start = '0'
while True:
    try:
        time.sleep(random.randint(5, 10))
        url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type='
        response = requests.get(url, headers=headers)
        html=response.text
        reg = 'class="short">(.*?)</span>'#进行匹配
        reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
        data = re.findall(reg, html)
        start = re.findall(r'<a href="\?start=(\d+)&amp;limit=20&amp;sort=new_score&amp;status=P&amp;percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
        if response.status_code == 200:
            print('正在获取start为' + start + '的数据')
            for one_data in data:
                with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理
                    f.write(one_data + '\n')
    except Exception as e:
        print(e)
        break
  In [ ]:
open
 
  In [ ]:
import wordcloud
  In [ ]:
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
  In [ ]:
txt.decode('utf-8')
  In [ ]:
txt1='小罐茶的导演拍的\r\n真的难看 ,花打四门讲的这么牛逼'
  In [ ]:
words  = jieba.lcut(txt1) #结巴
words
  In [ ]:
newtxt= ' '.join(words)
newtxt
  In [ ]:
#词云图分析
import os
%matplotlib inline
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt 
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
words  = jieba.lcut(txt)#列表,列表里面的元素为字符串
newtxt= ' '.join(words)
w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
  In [ ]:
txt.decode('utf-8')
  In [ ]:
#词云图分析,加载背景图片
import os
%matplotlib inline
from wordcloud import STOPWORDS
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt 
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
words  = jieba.lcut(txt)#对字符串进行分词,输出为列表
newtxt= ' '.join(words)#将列表转换为字符串
backgroud_Image = plt.imread('mask.jpg')
w=wordcloud.WordCloud(width=1000,height=700,mask=backgroud_Image,max_words=200,stopwords=STOPWORDS,font_path="C:/Windows/Fonts/STHUPO.TTF",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
  In [ ]:
#完整代码,词云图分析
import os
os.chdir(r'D:\CDA\File')
import requests
import re
import time
import random
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
start = '0'
while True:
    try:
        time.sleep(random.randint(5, 10))
        url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type='
        response = requests.get(url, headers=headers)
        html=response.text
        reg = 'class="short">(.*?)</span>'#进行匹配
        reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
        data = re.findall(reg, html)
        start = re.findall(r'<a href="\?start=(\d+)&amp;limit=20&amp;sort=new_score&amp;status=P&amp;percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
        if response.status_code == 200:
            print('正在获取start为' + start + '的数据')
            for one_data in data:
                with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理
                    f.write(one_data + '\n')
    except Exception as e:
        print(e)
        break

#词云图分析
import os
%matplotlib inline
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt 
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
words  = jieba.lcut(txt)#列表,列表里面的元素为字符串
newtxt= ' '.join(words)
w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
   

用API获取天气预报

https://dev.heweather.com/docs/api/weather

  In [ ]:
import requests
city = 'beijing'
url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city)
response = requests.get(url)
response.text
  In [ ]:
import json
dic = json.loads(response.text)
dic['HeWeather6'][0]['daily_forecast']
  In [ ]:
import json
dic = json.loads(response.text)
#今天的最高温度
dic['HeWeather6'][0]['daily_forecast'][2]['tmp_max']
  In [ ]:
#完整代码
import requests
city = 'beijing'
url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city)
response = requests.get(url)
import json
dic = json.loads(response.text)
#打印输出
for i in range(3):
    print(dic['HeWeather6'][0]['daily_forecast'][i]['date'])
    print(dic['HeWeather6'][0]['daily_forecast'][i]['cond_txt_d'])
    print(dic['HeWeather6'][0]['daily_forecast'][i]['wind_dir'])
    print(dic['HeWeather6'][0]['daily_forecast'][i]['tmp_max'])
    print('----------------')
  In [ ]:      

从文件中读入城市列表,显示每个城市的未来三天的最高温度

  In [ ]:
import requests
import json
import time
import random
import pandas as pd
key = 'a0ebb9fb3c2540f29065d187b0121694'
import os
os.chdir(r'D:\CDA\File')
data = pd.read_csv('china-city-list.txt',delimiter='|')
data.columns
  In [ ]:
data
  In [ ]:
data[' 城市英文            ']
  In [ ]:     In [ ]:     In [ ]:
import requests
import json
import time
import random
import pandas as pd
key = 'a0ebb9fb3c2540f29065d187b0121694'
import os
os.chdir(r'D:\CDA\File')
data = pd.read_csv('china-city-list.txt',delimiter='|')
cities=data[' 城市英文            ']
#定义list,存放所有数据,最后转成DataFrame
lst = []
for city in cities[:40]:
    url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key={}'.format(city, key)
    #print(url)
    response = requests.get(url)
    city_dic = json.loads(response.text)
    #print(city_dic)
    #得到城市名
    city_name = city_dic['HeWeather6'][0]['basic']['location']
    print(city_name)
    #遍历该城市的三天的天气预报 
    for item in city_dic['HeWeather6'][0]['daily_forecast']:
        #print(city_name, item['date'], item['tmp_max'], item['tmp_min'])
        lst.append([city_name, item['date'], item['cond_txt_d'],item['tmp_max'], item['tmp_min']])
    time.sleep(random.randint(2,3))
#写到文件中
df = pd.DataFrame(lst, columns = ['名称', '日期','天气','最高温度', '最低温度'])
df
  In [ ]:
city_dic
  In [ ]:
df
  In [ ]:  

标签:re,Python,request,df,url,import,data,response
From: https://www.cnblogs.com/thankcat/p/17114819.html

相关文章

  • Python如何创建有声读物
    本文分享一下利用Python的GTTS模块将PDF文本转换为音频,从而将PDF书籍转换为有声读物的脚本:#CreateAudiobooks#pipinstallgTTS#pipinstallPyPDF2fromPyPDF2i......
  • Python飞机大战游戏
    #导入模块顺序:官方标准模块、第三方模块、应用程序模块importrandomimportpygameSCREEN_RECT=pygame.Rect(0,0,480,700)FRAME_PER_SEC=60CREATE_ENEMY_EV......
  • cnblog_fastapi 中的 schemas 和 models 的区别 - python 后端实战经验分享 - Segment
    pythonfastapischema和model的区别ToavoidconfusionbetweentheSQLAlchemymodelsandthePydanticmodels,wewillhavethefilemodel......来自fastapi......
  • Python黑客编程之Bp字典生成插件
    描述编写一款burpsuite插件,用于从浏览的网页中抓取特定文字,生成字典给Intruder使用代码注册插件创建JMenuItem菜单,在target站点中右键触发回调函数wordlist_menuw......
  • python优缺点分析11
    学--就如同你即将看到的一样,Python极其容易上手。前面已经提到了,Python有极其简单的语法。​ 免费、开源--Python是FLOSS(自由/开放源码软件)之一。简单地说,你可以自......
  • 1行Python代码去除图片水印,网友:一干二净!
    大家好,这里是程序员晚枫。最近小明在开淘宝店(店名:爱吃火锅的小明),需要给自己的原创图片加水印,于是我上次给她开发了增加水印的功能:图片加水印,保护原创图片,一行Python代码搞......
  • 大爽Python入门教程 2-7 *拓展实践,对比与思考
    大爽Python入门公开课教案点击查看教程总目录本文偏难。推荐等第一二三四章上完后,回过来拓展阅读。基础情景思考假设有这样一张成绩表最左边的一列是名字,起名麻......
  • 大爽Python入门教程 2-6 拓展练习
    大爽Python入门公开课教案点击查看教程总目录方位输出第一章有一个思考题,方位变换:小明同学站在平原上,面朝北方,向左转51次之后(每次只转90度),小明面朝哪里?小明转过......
  • Python面向对象---类的基本使用
     ✅作者简介:热爱科研的算法开发者,Python、Matlab项目可交流、沟通、学习。 ......
  • 记 node 中 http.request.on() 方法来源
    node官方文档通篇没有明确说明on方法的来源,但是很多地方又都在使用,抱着刨根究底的心态查资料得知:在Node.js中,所有的流都是继承自stream.Readable类或它的子类的。......