1.selenium的无头模式,后期有需要可直接提取
# 实现无可视化界面
from selenium.webdriver.chrome.options import Options
# 实现无头模式的操作
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
#1.打开浏览器
driver = webdriver.Chrome(chrome_options=chrome_options)
2.随机UA
# faker模块 随机生成UA
from faker import Factory
# -----------------------------------随机生成一个ua-------------------------
Fact =Factory.create()
ua = Fact.user_agent()
3.线程池
import requests
from time import sleep
import time
from multiprocessing.dummy import Pool
# faker模块 随机生成UA
from faker import Factory
# -----------------------------------随机生成一个ua-------------------------
Fact =Factory.create()
ua = Fact.user_agent()
# --------------------------------------
def collect(url):
headers = {
"User-Agent": ua, # 将ua赋值
}
resp = requests.get(url=url, headers=headers)
print(resp.status_code)
if __name__ == '__main__':
# tim = time.time() # 1671166250.3781385 <class 'float'>
stat = int(time.time())
urlLs = ['http://www.baidu.com' for i in range(50)]
# for u in urlLs:
# collect(u)
pool = Pool(processes=4) #可设置最大进程数,不加默认电脑配置
pool.map(collect, urlLs) # 进程平行,互不影响
pool.close() # 关闭进程
pool.join()
end = int(time.time())
print(end - stat)
标签:chrome,线程,25selenium,time,import,ua,options,随机
From: https://www.cnblogs.com/socoo-/p/16988365.html