1. 导包
import requests
import json
2. 初步获取数据
url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?"
op = ["cname", "pid", "keyword"]
cname = input("请输入需要查询的城市: ")
pid = "" # "31"表示有Wi-Fi
kw = input("餐厅关键字: ")
pi = "1" # 页面索引
ps = "10" # 一页包含十条数据
params = {
"op": op[2],
"cname": cname,
"pid": pid,
"keyword": kw,
"pageIndex": pi,
"pageSize": ps
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 "
"Safari/537.36 Edg/106.0.1370.42"
}
rsp = requests.post(url, params, headers=headers)
# Content-Type: text/plain; charset=utf-8, 响应数据以json格式编写
dict_data = rsp.json()
3. 获取所有数据
flag = True
while flag:
pi = str(int(pi) + 1)
params["pageIndex"] = pi
rsp = requests.post(url, params, headers=headers)
if not rsp.json()["Table1"]:
break
for i in rsp.json()["Table1"]:
dict_data["Table1"].append(i)
4. 持久化存储
with open("v_me_50.json", 'w', encoding="utf-8") as f:
json.dump(dict_data, f, indent=4, ensure_ascii=False)
标签:肯德基,爬虫,headers,json,cname,params,餐厅,pi,rsp
From: https://www.cnblogs.com/khrushchefox/p/16834144.html