1.requests模块基本使用
1.1 使用requests发送get请求
import requests # res 响应对象,http响应,python包装成了对象,响应头,响应头。。。在res中都会有 res=requests.get('https://www.cnblogs.com/Hao12345/p/17661461.html') print(res.text) #res.text------->响应体
1.2 get请求携带参数
import requests res=requests.get('https://www.cnblogs.com/Hao12345/p/17661461.html',params={'name':"ydh","age":19}) #params:拼接url地址 print(res.url) #res.url:https://www.cnblogs.com/Hao12345/p/17661461.html?name=ydh&age=19
1.3 编码和解码
from urllib.parse import quote,unquote
# quote------->编码 unquote----->解码 print(unquote('%E5%B8%85%E5%93%A5')) print(quote("帅哥")) # %E5%B8%85%E5%93%A5
1.4 get请求携带请求头
import requests
"""
常见的请求头:
User-Agent:客户端类型
referer:上一个页面url
cookie:本地终端上的数据
"""
headers={
"User-Agent":"Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36 Edg/118.0.0.0"
}
res=requests.get('https://dig.chouti.com/',headers=headers)
print(res.text)
1.5 使用post请求携带数据获取cookies登陆
import requests #发送post请求 headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46", "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F" } data = {'username': "[email protected]", "password": "hao09148079", "captcha": " kv6n", "remember": " 1", "ref": "http://www.aa7a.cn/", "act": "act_login"} res = requests.post('http://www.aa7a.cn/user.php', data=data, headers=headers) # 登陆成功获取cookies cookies=res.cookies #get请求携带cookies res_get=requests.get('http://www.aa7a.cn/',cookies=cookies) print(res_get.text)
1.6携带cookie的两种方式:
import requests # 方式1:携带在请求头中 headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46", "Cookie":"deviceId=web.eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiIyNGZkMzY0MS04Zjg2LTRiM2ItOGNhMC0zMTQ2MjhjYjk2YTEiLCJleHBpcmUiOiIxNzAwOTg4ODIwNTcxIn0.8JFzEL_xSn2HtSbVG54g_e6mOsNaEouO2ESk6rWuHVU; Hm_lvt_03b2668f8e8699e91d479d62bc7630f1=1698396823; __snaker__id=t6eAqJjjUPoZmCI4; gdxidpyhxdE=0fqbq3Vt78ByCEtiN%2F6v%2FA%2F4XTCsqP55A%2Bq2iA%5Cqx0%2BTDTPfR7xEY84cNJ9Ac5KMBMMvLz8KZU7GeV2qH74nrEjS5PwzAdwoSaEh4biwmzwEsr%5CcYzDADVEfCA2e4Y3VRrS4Yy54B%2Fevlavi%2B8%5CDPd%5CqAh6M8i7Yno7Q%2B64ekcSqBaRl%3A1698397753185; YD00000980905869%3AWM_NI=%2FBZjrPmHDbdrQvsBw2R1hZRyhG10%2BaBM5lsXpkVV99G7AeVr48aG37t4Go2%2FKYLTQg7gzg0jPL7k4uMF5b2RMFc3jgDzHhm3qVICve9XTmZv8RJTqjP2RvUlp8BMYU%2FMS1U%3D; YD00000980905869%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed1e944a5f0f890e821b3eb8aa7d14b838a8facd839afe79b84f979f295a5b9d92af0fea7c3b92aed9e968bf15caf988c83e77a93aabda3f63a92efa8bbd173ac99ffd1aa6b8293fb8df254b4b59ab7bc3385e98f86d57af8e8afa8b23db8ec8990b125898784b5d46df5b9b688b3629494f98cb77f8f9afb83b15a8f9aadd4e96d88a68889f039b38a87a4f56ffb889c8ed97efc8e86b5f46294ee9e97d868b4e88eaedc5fed939dd2d837e2a3; YD00000980905869%3AWM_TID=46M6MDMPrOdFRRFVABbUmisUY2sjLhGJ; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJjdHVfNjk4Mzk2OTE2MDgiLCJleHBpcmUiOiIxNzAwOTg4OTE2MjM5In0.WzH1t9AAoy2Q91YhERM4AU1YZqMf6TP_N4FigkRwNko; Hm_lpvt_03b2668f8e8699e91d479d62bc7630f1=1698396920" } data={"linkId": "40419683"} res=requests.post('https://dig.chouti.com/link/vote',headers=headers,data=data) print(res.text) #方式2:get请求携带cookies headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46", "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F" } data = {'username': "[email protected]", "password": "hao09148079", "captcha": " kv6n", "remember": " 1", "ref": "http://www.aa7a.cn/", "act": "act_login"} res = requests.post('http://www.aa7a.cn/user.php', data=data, headers=headers) # 登陆成功获取cookies cookies=res.cookies #get请求携带cookies res_get=requests.get('http://www.aa7a.cn/',cookies=cookies) print(res_get.text)
2.requests模块的进阶用法:
2.1 自动携带cookie 的session对象
import requests # 使用session发送请求 session=requests.session() headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46", "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F" } data = {'username': "[email protected]", "password": "hao09148079", "captcha": " kv6n", "remember": " 1", "ref": "http://www.aa7a.cn/", "act": "act_login"} res = session.post('http://www.aa7a.cn/user.php', data=data, headers=headers) res_get=session.get('http://www.aa7a.cn/') # 使用session发请求后,不再需要携带cookies了 print(res_get.text)
2.2 响应Response
import requests res=requests.get('http://www.aa7a.cn/') print(res.text) #-------------->响应体,默认为utf-8形式 print(res.content) #--------------->响应体的bytes格式 print(res.cookies) #------------>cookies print(res.status_code) #---------->响应状态码 print(res.headers) #---------->响应头 print(res.cookies.get_dict()) #----------->cookies转为字典形式 print(res.cookies.items()) #----------->cookies转为字典键值对 print(res.url) #------->请求地址 print(res.history) #-------->访问一个地址,如果重定向了,history会有没重定向的地址 print(res.encoding) #-------->网页源码
3.爬虫小案例:
3.1 爬取图片
1 import requests 2 3 res = requests.get('https://ts4.cn.mm.bing.net/th?id=OIP-C.WOiZvWcRE0EhFei1CzT_twHaNK&w=187&h=333&c=8&rs=1&qlt=90&o=6&dpr=1.3&pid=3.1&rm=2') 4 with open('美女.png', 'wb') as f: 5 f.write(res.content)
3.2 爬取肯德基餐厅地址
import requests, json data = 'cname=&pid=&keyword=%E5%91%A8%E6%B5%A6&pageIndex=1&pageSize=10' headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': 'route-cell=ksa; ASP.NET_SessionId=sir55y2z3ppkk0zdgq5pbxkj; VOLCALB=839681b35f197b4ed33d4bc5335bdf66|1702364346|1702364224', 'Referer': 'http://www.kfc.com.cn/kfccda/storelist/index.aspx'} res = requests.post('http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword', data=data, headers=headers) ktc = json.loads(res.text) ktc_dict = ktc.get('Table1') for i in ktc_dict: print('餐厅名称:%s,餐厅地址:%s' % (i.get('storeName'), i.get('addressDetail')))
标签:基本,cookies,get,res,www,模块,print,requests From: https://www.cnblogs.com/Hao12345/p/17790380.html