整体思路:
首先利用https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9053 这个网站解析目前国内所有车站代码以及对应的城市名称
利用Python对该url进行处理
def get_respose(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = 'utf-8'
return r
except:
return None
def get_station_name_list():
r = get_respose(NAME_URL)
info = r.text
match = re.search(r'\'(.*?)\'', info)
station_lists = match.group(1)[1:]
namelist = station_lists.split('@')
city_list = set()
for i in range(len(namelist)):
station_info = namelist[i].split('|')
station_sx1 = str(station_info[0])
station_name = str(station_info[1])
station_code = str(station_info[2])
station_py = str(station_info[3])
station_sx2 = str(station_info[4])
station_city_code = str(station_info[6])
station_city_name = str(station_info[7])
res = (station_sx1 + '\u0001' + station_name + '\u0001' + station_code + '\u0001' + station_py + '\u0001' +
station_sx2 + '\u0001' + station_city_code + '\u0001' + station_city_name + '\n')
if station_city_name == '上海':
city_list.add(station_code)
else:
pass
print(city_list)
city_lists = list(city_list)
permutations = list(itertools.permutations(city_lists, 2))
# print(permutations)
return permutations
上述这段代码可以按照自己的需要进行调整,获取指定城市的所有站点代码,然后再进行两两站点排列组合,进而获取站点与站点之间所有可能出现的组合。
通过研究观察发现对应站点之间的数据都存储在铁路客户服务中心
再次构造函数对这个URL进行解析,以获取对应车次的列出编码
def spider_station(permutations):
infos = list()
current_date = datetime.now()
previous_date = current_date - timedelta(days=-1)
base_date = previous_date.strftime('%Y-%m-%d')
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Cookie": "_uab_collina=172827966864159075648991; JSESSIONID=8A54FAB2EAC4967564A7F83EDD7AB79B; _jc_save_czxxcx_toStation=%u5317%u4EAC%2CBJP; _jc_save_wfdc_flag=dc; _jc_save_czxxcx_fromDate=2024-10-08; _jc_save_fromStation=%u5317%u4EAC%2CBJP; _jc_save_toStation=%u4E0A%u6D77%2CSHH; _jc_save_fromDate=2024-10-07; _jc_save_toDate=2024-10-07; JSESSIONID=; CLIENT=wxgzh; tfstk=gqa-V1fu8tXuCpA28psmxHQpg3CcsyFzl8P6-vDkA-eY15JoA0iIhSGtnzmnPuqK9RVSFHqlapwYE-qnEY0SODw3eDmSqbYKA7DvrejrKeMIH8TnqJ4L0eFaTzfr-_uza2uCIObip7Pr8S0ZtHzJlZGaGeTWRNAhWhZAIObGouXX72WgEtYfcKhEhbTSVe1xcfGnRpM7dxij1fvSR2wCMmGsO39BP3gXGbkjd2gQdSMO03HeNvLLYY5yIXIejeY3kbn-peDWJvzWSck_vxLCCrhJdANSHeTIEbXmnSFleFPmzP2ts8bXyRFQ_kiYJK6tLrFbPkN2e9G4A80jEVsWM44zNliQ59-QB4eKfzivO6FuNY3xGP1Hbx4xEJa-c1Ytxqaif4Zc0TmnyfeQz8dpRRN0szo4JOpjLucZlXUF1HHKAgWaSPKCjdDtt31AMH-EVjP8NctRuGPZXjHGiSxeYmcqMAfAMH-EVjlxIsVDYHomg; _jc_save_fromStation=%u5317%u4EAC%2CBJP; guidesStatus=off; highContrastMode=defaltMode; cursorStatus=off; _jc_save_toStation=%u4E0A%u6D77%2CSHH; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerpassport=921174282.50215.0000; _jc_save_toDate=2024-11-22; _jc_save_fromDate=2024-11-23; BIGipServerotn=1859715338.50210.0000",
"Host": "kyfw.12306.cn",
"If-Modified-Since": '0',
"Referer": "https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
'User-Agent': 'Mozilla/5.0 (Windows NT 10
标签:info,city,Python,list,爬取,station,12306,save,jc
From: https://blog.csdn.net/liiukangkang/article/details/144029978