import requests
import pandas as pd
import random
from time import sleep
import json
def shuzhilian(keyword):
for i in range(1, 20):
baseurl =
f'https://www.17hongtu.cn/third_Party/Build/getArticle?id=340&page={i}&cate_id=0'
headers = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" ]
headers = {
"User-Agent": str(random.choice(headers)),
}
res = requests.get(url=baseurl, headers=headers).json()
datalist = res['data']['data']
for item in datalist:
title = item['name']
hrefid = item['id']
href = f'http://www.unionbigdata.com/news/detail/news-9682-{id}-1.html'
retime = item['create_time']
intro = item['description']
print(f'{title}的网址为{href}')
info = pd.DataFrame(
{'keyword': keyword, 'title': title, 'href': href, 'retime': retime, 'intro': intro},
index=[1])
info.to_csv(r'D:\桌面\shuzhilian.csv', mode='a', header=None, index=None, encoding='utf_8_sig')
sleep(.3)
shuzhilian('成都数之联科技股份有限公司')
说明:代码仅供学习参考使用,请勿用于任何非法用途,否则自行承担法律责任
标签:数之联,headers,title,爬取,item,href,import,官网,id From: https://www.cnblogs.com/ysnote/p/17180331.html