import random import time from selenium import webdriver import requests from lxml import etree from selenium.webdriver.common.by import By def request_zy(url): response = requests.get(url=url) return response.url driver = webdriver.Chrome() url='https://www.baidu.com/s?wd=site%3Awww.china-mcc.com&pn=10&oq=site%3Awww.china-mcc.com&ct=2097152&tn=baiduhome_pg&ie=utf-8&si=www.china-mcc.com&rsv_idx=2&rsv_pq=b12974be0001e1e7&rsv_t=0ec8iP0K%2B50KOidEPlwNMg4d5yDJA8wmDCnRC3C2o8p3fd5Ts70J7kpf0GI3EDJPK5N8&gpc=stf%3D1684080000%2C1685462400%7Cstftype%3D2&tfflag=1&bs=site%3Awww.china-mcc.com&rsv_jmp=fail' for i in range(2, 10000): time.sleep(random.randint(5, 8)) driver.get(url) html = driver.page_source tree = etree.HTML(html) second_list = tree.xpath('//div[@id="content_left"]//h3//a//@href') for second in second_list: zy = request_zy(second) open('有色技术.txt', 'a').write(zy + '\n') print(f'{zy}正在写入中') print('写入完成') print(f'第{i}页----------') driver.find_element(by=By.XPATH, value='//a[@class="n"][2]').click() time.sleep(random.randint(5, 8)) driver.close() driver.quit()
标签:url,driver,回收,网址,second,zy,import,com,百度 From: https://www.cnblogs.com/wolvies/p/17445395.html