前言
国资央企招聘平台集成了众多国企央企的招聘。写一个脚本实现一下各个岗位的招聘
接口
当抓包的时候发现,这些岗位信息都是通过接口发布的。因此可以通过获取这个接口的数据爬取。
编写代码:
'''
基于国资央企招聘平台的招聘信息。
https://cujiuye.iguopin.com/
'''
import requests
class get_guoqi_job():
def __init__(self):
self.url = "https://gp-api.iguopin.com/api/jobs/v1/list"
self.headers = {
"Content-Type": "application/json;charset=UTF-8",
"Accept": "application/json, text/plain, */*",
"Device": "pc",
"Subsite": "cujiuye",
"Version": "5.0.0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}
'''
默认关键词 keyword = 网络安全 信息安全,工作数量爬取最大为200,页数为1,如果工作数量大于等于200,将page改为2再次爬取.
直接调用 get_guoqi_job()
关键词调用:
get_guoqi_job('销售‘)
'''
def __call__(self,keyword='网络安全 信息安全',job_num=200 , page=1):
data = {
"page": page,
"page_size": job_num,
"keyword": keyword,
"nature": ["115xW5oQ"]
}
response = requests.post(self.url, json=data, headers=self.headers)
jobs = []
# 打印响应的 JSON 内容
res_json = response.json()
data = res_json['data']
# print(data)
data_list = data['list']
print(f'-------------有{len(data_list)}个相关{keyword}岗位正在校招-------------')
num =1
for company in data_list:
job_id = company['job_id']
job_url = 'https://www.iguopin.com/job/detail?id=' + str(job_id)
job_name = company['job_name']
company_name = company['company_name']
salary = str(company['min_wage']) + '-' + str(company['max_wage'])
education_cn = company['education_cn']
contents = company['contents']
area_cn = company['district_list'][0]['area_cn']
start_end_time = company['start_time'] + '——' + company['end_time']
company_info = company['company_info']['nature_cn']
print('岗位:',job_name, '工作地点:', area_cn, '薪资:', salary, education_cn)
print('公司:', company_name , company_info)
print('岗位链接:', job_url)
print('招聘时间:', start_end_time)
print('职位要求:', contents)
print(f'------------------------------------------------------{num}')
num +=1
job = {
'岗位' : job_name,
'工作地点': area_cn,
'薪资': salary,
'学历': education_cn,
'公司信息': company_name + ' ' + company_info,
'岗位链接:': job_url,
'招聘时间':start_end_time,
'职位要求': contents
}
jobs.append(job)
return jobs
if __name__ == '__main__':
get = get_guoqi_job()
get()
效果:
网页爬虫
暂时不。
标签:__,name,央企,company,招聘,爬取,job,data,cn From: https://www.cnblogs.com/iruan/p/18323973