import pandas as pd
import requests
def fetch_page_data(page_num):
cookies = "替换浏览器cookie"
headers = {
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json;charset=UTF-8",
"Origin": "https://isite.baidu.com",
"Pragma": "no-cache",
"Referer": "https://isite.baidu.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
"sec-ch-ua": ""Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": ""Windows"",
}
params = {
"reqid": "浏览器的reqid",
}
json_data = {
"dataType": 0,
"userId": "自己的数据",
"siteId": "自己的数据",
"pagination": {
"pageNum": page_num,
"pageSize": 12,
},
"uniqId": "自己的数据",
"selectCateId": None,
}
response = requests.post(
"https://jzapi.baidu.com/mini/reset/pcsite/getProductList",
params=params,
cookies=cookies,
headers=headers,
json=json_data,
)
return response
def get_category_name_by_id(categories, category_id):
for category in categories:
if category["id"] == category_id:
return category["name"]
return None
def calculate_total_pages(total_items, items_per_page=12):
return (total_items + items_per_page - 1) // items_per_page
def main():
short_titles = []
long_titles = []
categories = []
image_4_3 = []
image_1_1 = []
text_details = []
image_details = []
rich_text_details = []
tags = []
response = fetch_page_data(1)
total_pages = calculate_total_pages(response.json()["data"]["totalNum"])
for i in range(total_pages):
response = fetch_page_data(i + 1)
products = response.json()["data"]["list"]
for product in products:
short_titles.append(product["productTitle"])
long_titles.append(product["productTitle"])
categories.append(get_category_name_by_id(response.json()["data"]["cateList"], product["categoryId"]))
images_4_3 = product["image"][0]["value"]
image_4_3.append(";".join(img["url"] for img in images_4_3))
images_1_1 = product["image"][1]["value"]
image_1_1.append(";".join(img["url"] for img in images_1_1))
text_details.append("")
image_details.append(product["content"])
rich_text_details.append("")
tags.append(";".join(product["productExt"]["tags"]))
data = {
"产品短标题": short_titles,
"产品长标题": long_titles,
"产品分组": categories,
"图片 - 横版图4:3": image_4_3,
"图片 - 方形图1:1": image_1_1,
"产品详情-文本": text_details,
"产品详情-图片": rich_text_details,
"产品详情-图文": image_details,
"卖点标签": tags
}
df = pd.DataFrame(data=data)
df.to_excel("商品导出.xlsx", index=False)
if __name__ == "__main__":
main()
标签:木鱼,批量,上传,image,json,details,data,page,append
From: https://www.cnblogs.com/lwm3/p/18349858