这一篇主要是解决一个问题:浏览器操作下拉才能显示更多的内容
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
thepaper_dict = {}
web = webdriver.Chrome(service=Service('chromedriver.exe'))
web.implicitly_wait(10)
web.get('https://www.thepaper.cn')
time.sleep(5)
web.maximize_window()
time.sleep(5)
web.find_element(By.XPATH,'//*[@id="navMenu"]/ul/li[5]/a').click()
time.sleep(10)
'''重复下拉,尽可能多的获取内容'''
for i in range(5):
web.execute_script('window.scrollTo(0,3500)')
time.sleep(5)
tes_list = web.find_elements(By.CLASS_NAME,"small_toplink__GmZhY")
for i in tes_list:
title = i.find_element(By.TAG_NAME,'h2').text
url =i.find_element(By.TAG_NAME,'a').get_attribute('href')
thepaper_dict.update({title:url})
web.close()
标签:web,webdriver,Selenium,爬取,实例,sleep,time,import,find
From: https://www.cnblogs.com/yigehulu/p/17982757