#! /usr/bin/env python #-*-coding:utf-8-*- import sys import bs4 import requests import json class HTML_OBJ: def __init__(self,country=None): self.url='https://whois.ipip.net/' self.country = country self.As_dic = {} self.IPsub_list=[] self.headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/83.0.4103.97 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;' 'q=0.9,image/webp,image/apng,*/*;' 'q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', } def get_as(self): url = self.url+'countries/'+self.country resp = requests.get(url=url,headers=self.headers) soup = bs4.BeautifulSoup(resp.text, 'lxml') elements = soup.select('a') for element in elements: span = element.text span_des = element.get('title') if span_des: self.As_dic.update({span:{'descriptions':span_des}}) return self.As_dic def get_ipsub(self,ASN): url = self.url+ASN resp = requests.get(url=url,headers=self.headers) soup = bs4.BeautifulSoup(resp.text, 'lxml') elements = soup.select_one('.table-responsive').select('a') for element in elements: self.IPsub_list.append(element.text) return self.IPsub_list def get_json(self): write, flush = sys.stdout.write, sys.stdout.flush AS_dic = self.get_as() famdic = {} th = 100 for index,a in enumerate(AS_dic): self.get_ipsub(a) famdic[a]=AS_dic[a] famdic[a]['ipsub'] = self.IPsub_list self.IPsub_list=[] show = '{:.3f}%'.format(index/len(AS_dic)) write(show) flush() write('\x08'*len(show)) filename=self.country+str(th//100) if index > th: th+=100 with open('/root/python/crawler/%s.json'%filename,'a+',newline='\n') as e: e.write(json.dumps(famdic,indent=1)) famdic={} else: print('>>>>>>finish') with open('/root/python/crawler/%s.json'%(self.country+'last'),'a+',newline='\n') as e: e.write(json.dumps(famdic,indent=1)) if __name__ == '__main__': HTML_OBJ(country='KR').get_json()
{"AS2500": { "descriptions": "AS2500 - WIDE-BB - WIDE Project, JP", "ipsub": [ "", "", "", "", "", "", "", "" ] }, "AS2501": { "descriptions": "AS2501 - UTnet - The University of Tokyo, JP", "ipsub": [ "", "", "", "", "", "", "", "" ] }, "AS2504": { "descriptions": "AS2504 - NCA5 - Kyoto University, JP", "ipsub": [ "", "", "", "", "", "" ] },
... }
标签:24,get,url,0.0,self,下雨,dic,IP地址,备用 From: https://www.cnblogs.com/darkchen/p/16753301.html