requests库
requests
提供发送网络请求和处理响应的方法
安装
pip install requests
GET 请求
import requests
url = 'https://www.baidu.com/' # url
params = {'key1': 'value1', 'key2': 'value2'} # 参数
# 发送get请求
response = requests.get(url, params=params)
print(response.url) # 查看 URL,包含查询参数 https://www.baidu.com/?key1=value1&key2=value2
print(response.text) # 返回内容
POST请求
import requests
url = 'https://www.baidu.com/' # url
data = {'key': 'value'} # data数据
#发送post请求
response = requests.post(url, data=data)
print(response.text) # 打印响应的文本内容
自定义请求头
import requests
url = 'https://www.baidu.com/'
# 自定义请求头
headers = {
'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
}
response = requests.get(url, headers=headers)
print(response.text)
异常处理
import requests
from requests.exceptions import RequestException
try:
response = requests.get('https://www.baidu.com/')
response.raise_for_status() # 如果响应的 HTTP 状态码不是 200,则抛出 HTTPError 异常
print(response.text)
except RequestException as e:
print(e)
豆瓣实例
import requests
# 获取请求对象
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&'
# 头部信息
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/58.0.3029.110 Safari/537.36 '
}
# 页码
start_page = int(input('起始页码:'))
end_page = int(input('结束的页码:'))
# 循环请求数据
for page in range(start_page, end_page + 1):
params = {
'start': (page - 1) * 20,
'limit': 20
}
# 获取网页信息
resp = requests.get(url=url, headers=headers, params=params)
resp.encoding = "utf-8"
# 保存为json文本
with open(f'douban{page}.json', 'w', encoding='utf-8') as fp:
fp.write(resp.text)
标签:python,爬虫,response,url,print,import,requests,page
From: https://www.cnblogs.com/noahze/p/18284301