今天需要将北京物业公司名称以及电话给整理出来,发现爱帮网上的数据比较多,自己一个一个的去摘取,太麻烦了,于是写一段代码来完成,并自动写入excel文档中,主要用的Jsoup 以及jxl 插件,很方便。
代码如下:
package com.bes.st.buz.website;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.bes.core.StringUtil;
import com.bes.st.rw.WriteFacade;
public class FetchUrl {
/**
* @param url
* @return
*/
public static List<WebsiteBean> fetchData(final String url) {
List<WebsiteBean> result = new ArrayList<WebsiteBean>();
try {
Document doc = Jsoup.connect(url).timeout(10000).get();
Elements es = doc.select("div.aside");
for (int i = 0; i < es.size(); i++) {
Elements aArr = es.get(i).select("a.title");
Elements pArr = es.get(i).select("div.part1 p");
Elements tArr = es.get(i).select("span.biztel");
String str = StringUtil.nvl(aArr.attr("title"));
String tel = StringUtil.nvl(tArr.text());
if(str.length() == 0 || tel.length() == 0){
continue;
}
WebsiteBean bean = new WebsiteBean();
bean.setName(aArr.attr("title"));
if(pArr.size()>0){
bean.setAddress(pArr.get(0).text());
}else{
bean.setAddress(pArr.text());
}
bean.setTel(tel);
result.add(bean);
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
public static void writeFetchData(List<String> urls){
List<WebsiteBean> result = new ArrayList<WebsiteBean>();
if(urls != null){
for(String url : urls){
result.addAll(fetchData(url));
}
}
String output = "c:/tmp";
File file = new File(output, "tt.xls");
if (file.exists()) {
file.delete();
}
OutputStream out = null;
try{
file.createNewFile();
out = new FileOutputStream(file);
WriteFacade.writeExcel(out, "物业公司", result);
}catch(Exception ex){
ex.printStackTrace();
}finally{
try{
if(out != null){
out.close();
}
}catch(Exception ex){}
}
}
public static void main(String[] args) {
List<String> urls = new ArrayList<String>();
urls.add("http://www.aibang.com/?addr=%E5%85%A8%E5%B8%82&what=%E7%89%A9%E4%B8%9A&area=bizsearch2&cmd=noscript&script=false&city=%E5%8C%97%E4%BA%AC");
for(int i=2;i<20;i++){
urls.add("http://www.aibang.com/?area=bizsearch2&cmd=bigmap&city=%E5%8C%97%E4%BA%AC&a=&q=%E7%89%A9%E4%B8%9A&as=5000&ufcate=&rc=1&zone=&quan=&fm=&p=" + i);
}
writeFetchData(urls);
// fetchData("http://www.aibang.com/?addr=%E5%85%A8%E5%B8%82&what=%E7%89%A9%E4%B8%9A&area=bizsearch2&cmd=noscript&script=false&city=%E5%8C%97%E4%BA%AC");
}
}
标签:String,E5%,excel,bean,文档,urls,new,网页内容,import From: https://blog.51cto.com/u_15458282/5875431