本质是对‘site’功能的自动化查询
import re from selenium import webdriver a = {'org.do', ...., 'org.ua'} driver = webdriver.Chrome() driver.get("https://www.baidu.com/") p_input = driver.find_element_by_id('kw') dir_ = {} d_ = [] import time for aa in a: p_input.send_keys('site:{}'.format(aa)) p_btn = driver.find_element_by_id('su') p_btn.click() time.sleep(2) try: text = driver.find_element_by_xpath('//*[@id="content_left"]/div[1]/div/p[1]/b').text number = re.findall("\d+", text) # 输出结果为列表 if int(''.join(number)) < 100: d_.append(aa) dir_[aa] = text except: d_.append(aa) dir_[aa] = 0 p_input.clear() print(d_) print(dir_) import pandas as pd pd.DataFrame(dir_, index=[0]).to_excel('2.xlsx', index=False) driver.close()
标签:aa,后缀,selenium,driver,爬虫,text,import,id,dir From: https://www.cnblogs.com/chrysanthemum/p/17439883.html