pom文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>excelutil</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.1.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<!-- 此处指定main方法入口的class -->
<mainClass>com.xie.ExcelUtil</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
java代码:
查看代码
package com.xie;
import org.apache.commons.io.FileUtils;
import org.apache.poi.ss.usermodel.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ExcelUtil {
//java -classpath \path_to_jars\*.jar excelutil-1.0-SNAPSHOT.jar com.xie.ExcelUtil
public static void main(String[] args) {
String parentPath = args[0]; // 运行时指定参数 总文件路径
String savePhonePath = args[1]; // 运行时指定参数 保存路径
String savePhonePathDistinct = args[2]; // 运行时指定参数 去重后保存路径
List<Path> fileNames = null;
try (Stream<Path> paths = Files.walk(Paths.get(parentPath))){
fileNames = paths.filter(Files::isRegularFile).collect(Collectors.toList());
fileNames.forEach(path -> insertInfo(path.toString(), savePhonePath));
System.out.println("写入完成。。。。。。。");
} catch (IOException e) {
e.printStackTrace();
}
// 去重
if (null != fileNames && fileNames.size() > 0) {
int splitSize = fileNames.size() * 5;
File[] files = DistinctFileUtil.splitFile(savePhonePath, splitSize);
DistinctFileUtil.distinct(files, savePhonePathDistinct, splitSize);
File savePhoneFile = new File(savePhonePath);
savePhoneFile.delete();
}
}
public static void insertInfo(String excelPath, String savePhonePath) {
Set<String> phoneStrSet = new LinkedHashSet<>();
try (FileInputStream fis = new FileInputStream(new File(excelPath));
Workbook workbook = WorkbookFactory.create(fis)) {
Sheet sheet = workbook.getSheetAt(0); // 获取第一个工作表
for (Row row : sheet) {
if (row.getRowNum() < 3) {
continue;
}
Cell wCell = row.getCell(22); // "W"列
Cell xCell = row.getCell(23); // "X"列
if (wCell != null && xCell != null) {
String wValue = wCell.getStringCellValue();
String xValue = xCell.getStringCellValue();
if (null != wValue && !"".equals(wValue.trim())) {
String[] split = wValue.split(";");
phoneStrSet.addAll(Arrays.asList(split));
}
if (null != xValue && !"".equals(xValue.trim())) {
String[] split = xValue.split(";");
phoneStrSet.addAll(Arrays.asList(split));
}
}
}
insertInfo(phoneStrSet, savePhonePath);
} catch (Exception e) {
//e.printStackTrace();
}
}
public final static boolean isNumeric(String str) {
if (null != str && !"".equals(str.trim())) {
return str.matches("^1[0-9]{10}$");
} else {
return false;
}
}
public static void insertInfo(Set<String> phoneStrSet, String savePhonePath) throws IOException {
File f = new File(savePhonePath);
if (!f.exists()) {
f.createNewFile();// 不存在则创建
}
System.out.println("写入中。。。。。。。");
BufferedWriter output = new BufferedWriter(new FileWriter(f,true));//true,则追加写入text文本
phoneStrSet.forEach(phoneStr -> {
try {
if (isNumeric(phoneStr)) {
output.write(phoneStr + "\r\n");
}
} catch (IOException e) {
//e.printStackTrace();
}
});
output.flush();
output.close();
}
}
查看代码
package com.xie;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
public class DistinctFileUtil {
/**
* 将文件hash取模之后放到不同的小文件中
* @param targetFile 要去重的文件路径
* @param splitSize 将目标文件切割成多少份hash取模的小文件个数
* @return
*/
public static File[] splitFile(String targetFile,int splitSize){
File file = new File(targetFile);
BufferedReader reader = null;
PrintWriter[] pws = new PrintWriter[splitSize];
File[] littleFiles = new File[splitSize];
String parentPath = file.getParent();
File tempFolder = new File(parentPath + File.separator + "test");
if(!tempFolder.exists()){
tempFolder.mkdir();
}
for(int i=0;i<splitSize;i++){
littleFiles[i] = new File(tempFolder.getAbsolutePath() + File.separator + i + ".txt");
if(littleFiles[i].exists()){
littleFiles[i].delete();
}
try {
pws[i] = new PrintWriter(littleFiles[i]);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null;
while ((tempString = reader.readLine()) != null) {
tempString = tempString.trim();
if(tempString != ""){
//关键是将每行数据hash取模之后放到对应取模值的文件中,确保hash值相同的字符串都在同一个文件里面
int index = Math.abs(tempString.hashCode() % splitSize);
pws[index].println(tempString);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
for(int i=0;i<splitSize;i++){
if(pws[i] != null){
pws[i].close();
}
}
}
return littleFiles;
}
/**
* 对小文件进行去重合并
* @param littleFiles 切割之后的小文件数组
* @param distinctFilePath 去重之后的文件路径
* @param splitSize 小文件大小
*/
public static void distinct(File[] littleFiles, String distinctFilePath, int splitSize){
File distinctedFile = new File(distinctFilePath);
FileReader[] frs = new FileReader[splitSize];
BufferedReader[] brs = new BufferedReader[splitSize];
PrintWriter pw = null;
try {
if(distinctedFile.exists()){
distinctedFile.delete();
}
distinctedFile.createNewFile();
pw = new PrintWriter(distinctedFile);
Set<String> unicSet = new HashSet<String>();
for(int i=0;i<splitSize;i++){
if(littleFiles[i].exists()){
System.out.println("开始对小文件:" + littleFiles[i].getName() + "去重");
frs[i] = new FileReader(littleFiles[i]);
brs[i] = new BufferedReader(frs[i]);
String line = null;
while((line = brs[i].readLine())!=null){
if(line != ""){
unicSet.add(line);
}
}
for(String s:unicSet){
pw.println(s);
}
unicSet.clear();
System.gc();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e1){
e1.printStackTrace();
} finally {
for(int i=0;i<splitSize;i++){
try {
if(null != brs[i]){
brs[i].close();
}
if(null != frs[i]){
frs[i].close();
}
} catch (IOException e) {
e.printStackTrace();
}
//合并完成之后删除临时小文件
if(littleFiles[i].exists()){
littleFiles[i].delete();
}
}
if(null != pw){
pw.close();
}
}
}
public static void main(String[] args) throws IOException {
int splitSize = 20;
File[] files = splitFile("G://test/bigfile.txt",splitSize);
distinct(files,"G://test/bigfile-distinct.txt",splitSize);
}
}
执行命令:
chcp 65001
set dateStr=%date:~5,2%%date:~8,2%%time:~0,2%%time:~3,2%%time:~6,2%
set "dateStr=%dateStr: =%"
@echo of
.\Ajre\bin\java -jar .\Ajre\excelutil-1.0-SNAPSHOT.jar %cd%\excel原始数据 %cd%\%dateStr%o-phone.txt %cd%\%dateStr%-phone.txt
@pause
标签:java,String,io,mian,jar,File,new,import From: https://www.cnblogs.com/mask-xiexie/p/17555889.html