print(driver.page_source) # 定位至节点 elem = driver.find_element(By.ID, "info") htm_dat = elem.get_property("outerHTML") print('获取节点的html源码:', htm_dat) htm_name = elem.get_property("nodeName") print('节点名称:', htm_name) htm_type = elem.get_property("nodeType") print('节点类型:', htm_type) htm_ght = elem.get_property("clientHeight") print('节点实际高度:', htm_ght) htm_dth = elem.get_property("clientWidth") print('节点实际宽度:', htm_dth) htm_node_name = elem.get_property("parentNode").get_property("nodeName") print('该节点的父节点.名称:', htm_node_name) htm_next_htm = elem.get_property("nextSibling").get_property("outerHTML") print('该节点的相邻的下一个节点.源码:', htm_next_htm)
from selenium.webdriver import Firefox, FirefoxOptions from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By class ContentPageParser(): def __init__(self, content_page_url) -> None: self.content_url = content_page_url # 抓取content_page_url页面上全部的图片url self.img_src = [] # 保存当前页面上所有的图片url def visit_content_page_with_firefox(self): option = FirefoxOptions() # 设置浏览器为无头模式,使用过程中不会弹出浏览器页面 option.headless = True self.driver = Firefox(options=option) try: # 打开待抓取的url页面 self.driver.get(self.content_url) # 设置灵活等待,最长等待10s,轮询间隔为1s wait = WebDriverWait(self.driver, timeout=10, poll_frequency=1) # 使用css选择器进行元素定位,直到元素可见为止 wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'img[class="showimg"]'))) # 使用css选择器查找所有元素 imgs = self.driver.find_elements(By.CSS_SELECTOR, 'img[class="showimg"]') # 提取所有图片的url for img in imgs: self.img_src.append(img.get_attribute('src')) except Exception as e: print(repr(e)) finally: # 关闭webdriver self.driver.close() def get_img_src(self): return self.img_src if __name__ == '__main__': content_parser = ContentPageParser('https://xxx/content_48495.html') content_parser.visit_content_page_with_firefox() img = content_parser.get_img_src() print(img)
标签:img,get,self,保存,content,HTML,htm,print,代码 From: https://www.cnblogs.com/QQ-77Ly/p/17673872.html