1.建数据库product 表和product_data表
CREATE TABLE product ( id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) NOT NULL, url VARCHAR(255) NOT NULL ); CREATE TABLE product_data ( id INT AUTO_INCREMENT PRIMARY KEY, product_id INT, price VARCHAR(255), sale INT, FOREIGN KEY (product_id) REFERENCES product(id) );
2.插入maven
<dependencies> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.47</version> </dependency> <dependency> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> <version>2.8.8</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.14.3</version> </dependency> </dependencies>
3.新建类WebCrawler
public class WebCrawler { public static void main(String[] args) { String url = "https://www.leadongshop.com/products.html"; try { // Connect to MySQL database Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/webcrawler?useSSL=true&requireSSL=true&verifyServerCertificate=false&characterEncoding=UTF-8", "root", "990522"); // Parse HTML from the URL Document doc = Jsoup.connect(url).get(); // Select product elements Elements products = doc.select("div.bodyList > a"); // Iterate over each product for (Element product : products) { // Extract product name and URL String name = product.select("div.name").text(); String url1 = product.attr("href"); String productUrl = "https://www.leadongshop.com" + url1; String data = product.select("div.price").text(); String[] s = data.split(" "); String price = s[0]; String sale1 = s[1]; int sale = Integer.parseInt(sale1); // Insert product into 'product' table String insertProductQuery = "INSERT INTO product (name, url) VALUES (?, ?)"; PreparedStatement insertProductStmt = conn.prepareStatement(insertProductQuery); insertProductStmt.setString(1, name); insertProductStmt.setString(2, productUrl); insertProductStmt.executeUpdate(); insertProductStmt.close(); String insertProductQuery1 = "INSERT INTO product_data (price,sale) VALUES (?, ?)"; PreparedStatement insertProductStmt1 = conn.prepareStatement(insertProductQuery1); insertProductStmt1.setString(1, price); insertProductStmt1.setInt(2, sale); insertProductStmt1.executeUpdate(); insertProductStmt1.close(); } conn.close(); } catch (IOException | SQLException e) { e.printStackTrace(); }
}
}
4.数据库中查询。通过SQL查询出商品列表并按销量从高到低、价格从高到低排序
SELECT p.name, pd.price, pd.sale FROM product p JOIN product_data pd ON p.id = pd.product_id ORDER BY pd.sale DESC;
标签:product,name,网站,price,爬虫,爬取,data,id,String From: https://www.cnblogs.com/zjmdeblog/p/18151421