selenium是一种web自动化测试的工具,可以控制浏览器,进行网页操作
准备
首先,下载谷歌驱动,下载地址
http://npm.taobao.org/mirrors/chromedriver/
查看自己的谷歌浏览器版本,选择与版本最近的下载。
示例
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* create by fzg
* 2022/6/15 10:45
*/
@SpringBootTest
public class ServerWebTests {
@Autowired
private ChromeDriver driver;
@Test
public void getContent() throws IOException {
System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
driver = new ChromeDriver(chromeOptions);
driver.get("https://so.gushiwen.cn/shiwenv_45c396367f59.aspx");
System.out.println("fzg1===>" + driver.getTitle());
String title = driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText().toString();
// 古诗的标题
System.out.println("fzg===>" + driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText());
// 古诗内容
WebElement cont = driver.findElement(By.id("contson45c396367f59"));
System.out.println(cont.getText());
String text = cont.getText();
driver.close();
String fileName = "C:\\Users\\Asus\\Desktop\\temp\\poetry.txt";
Path path = Paths.get(fileName);
try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) {
writer.write(title);
}
try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8, StandardOpenOption.APPEND)) {
writer.write("\n" + text);
}
}
}
利用selenium可以爬取网页元素
import lombok.extern.slf4j.Slf4j;
import org.junit.Test;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import java.util.UUID;
/**
* create by fzg
* 2022/11/29 14:11
*/
@Slf4j
@SpringBootTest
public class SeleniumTest {
@Autowired
private ChromeDriver driver;
@Test
public void getImages(){
System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
driver = new ChromeDriver(chromeOptions);
driver.get("https://picsum.photos/images");
List<WebElement> elements = driver.findElements(By.className("download-url"));
log.info("大小:" + elements.size());
for (WebElement element : elements) {
String imgUrl = element.getAttribute("href");
downImage(imgUrl, UUID.randomUUID().toString());
}
driver.close();
}
public void downImage(String imageUrl,String fileName) {
String file = "E:\\pictures\\java-repile-images";
File files = new File(file);
if (!files.exists()) {
files.mkdirs();
}
InputStream is;
FileOutputStream out;
try {
URL url = new URL(imageUrl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
is = connection.getInputStream();
// 创建文件
File fileOfImg = new File(file + "/" + fileName + ".jpg");
out = new FileOutputStream(fileOfImg);
int i = 0;
while ((i = is.read()) != -1) {
out.write(i);
}
is.close();
out.close();
log.info(fileName + "下载成功");
} catch (MalformedURLException e) {
log.info("图片地址解析失败");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
标签:java,selenium,driver,import,org,openqa
From: https://www.cnblogs.com/Fantasyfzg/p/16935587.html