private static final int THREAD_COUNT = 4; // 线程数
private static final int BUFFER_SIZE = 1024; // 缓冲区大小
/**
* 多线程读取文件,转换文件编码格式 4线程 1Mb缓存
*
* @param inputFile 输入文件 String input = "E:/02code/web/test.txt";
* @param sourceCharset 源文件编码 String output = "E:/02code/web/tes1t.txt";
* @param outputFile 输出文件 String sourceCharset = "GBK";
* @param targetCharset 目标文件编码 String targetCharset = "UTF-8";
*/
private static void conversionFileEncodingFormat(File inputFile, String sourceCharset, File outputFile, String targetCharset) throws InterruptedException, IOException, ExecutionException {
long fileSize = inputFile.length();
long chunkSize = fileSize / THREAD_COUNT;
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
List<Future<File>> futures = new ArrayList<>();
for (int i = 0; i < THREAD_COUNT; i++) {
long start = i * chunkSize;
long end = (i == THREAD_COUNT - 1) ? fileSize : (start + chunkSize);
futures.add(executor.submit(new FileReadTask(inputFile, start, end, i, sourceCharset, targetCharset)));
}
executor.shutdown();
executor.awaitTermination(1, TimeUnit.HOURS);
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outputFile))) {
for (Future<File> future : futures) {
File tempFile = future.get();
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(tempFile))) {
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead;
while ((bytesRead = bis.read(buffer)) != -1) {
bos.write(buffer, 0, bytesRead);
}
}
tempFile.delete(); // 删除临时文件
}
}
}
static class FileReadTask implements Callable<File> {
private final File inputFile;
private final long start;
private final long end;
private final int index;
/**
* 源文件编码
*/
private final String sourceCharset;
/**
* 目标文件编码
*/
private final String targetCharset;
public FileReadTask(File inputFile, long start, long end, int index, String sourceCharset, String targetCharset) {
this.inputFile = inputFile;
this.start = start;
this.end = end;
this.index = index;
this.sourceCharset = sourceCharset;
this.targetCharset = targetCharset;
}
@Override
public File call() throws Exception {
File tempFile = new File("temp_" + index + ".txt");
try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tempFile), targetCharset))) {
raf.seek(start);
byte[] buffer = new byte[BUFFER_SIZE];
long bytesToRead = end - start;
while (bytesToRead > 0) {
int bytesRead = raf.read(buffer, 0, (int) Math.min(buffer.length, bytesToRead));
if (bytesRead == -1) break;
String chunk = new String(buffer, 0, bytesRead, sourceCharset); // 假设原文件编码为 ISO_8859_1
writer.write(chunk);
bytesToRead -= bytesRead;
}
}
return tempFile;
}
}
标签:java,String,sourceCharset,targetCharset,文件格式,private,new,多线程,inputFile
From: https://blog.51cto.com/u_11288266/11971646