题目:
存在这样一个场景:在内存紧够存储1GB数据的情况下,如何把一个10G文件进行排序后输出,10G的文件每行数据为Long型的正整数,请代码实现;
题目分析
题目核心在于以下两点:
1、内存仅能够存储1GB,即不能把所有数据在内存中排序后再输出;
2、源文件是无需的,需要先进行排序;
3、输出后仍未一个文件;
实现思想
通过上述简单分析,可以看出,需要通过分治的方式,分批读取文件,可以按照1GB文件读取,读取之后排序,然后输出到part文件,然后再遍历排序后的10个文件,按照堆排序思想,取第一个元素,即10个文件第一个元素,哪个最小,哪个先输出,然后最小的哪个值来自哪个文件,这个文件再进行遍历,当所有的文件均遍历完成后,再排序输出完成,
实现代码
import java.io.*;
import java.util.*;
public class FileSort {
public static final String ROOT_PATH = "D:/temp/test";
public static final String SAMPLE_ORI_BIG_FILE = "ori.txt";
public static final String SAMPLE_RESULT_BIG_FILE = "result.txt";
/**
* 模拟生成一个大文件,包含Long型的整数数据
*/
public static void testData() throws IOException {
String filePath = ROOT_PATH + File.separator + SAMPLE_ORI_BIG_FILE;
Random random = new Random();
if(!new File(filePath).exists()){
new File(filePath).createNewFile();
int count = 1000000;
try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(filePath)))) {
while (count-- > 0) {
long l = Math.abs(random.nextLong());
writer.write(String.valueOf(l));
writer.newLine();
}
}
}
}
public static void main(String[] args) throws IOException {
//0、生成模拟数据
testData();
//1、读取源文件,按照固定大小排序并输出文件
String filePath = ROOT_PATH + File.separator + SAMPLE_ORI_BIG_FILE;
splitFileAndSorts(filePath,100000);
//2、10个文件遍历读取,放到堆中,取最小的数据出到文件中,输出的元素,读取下一条,然后再比较,以此往复
List<BufferedReader> readerList = new ArrayList<>(10);
for(int i = 1;i<=10;i++){
String partFilePath = filePath + ".part" + i;
BufferedReader reader = new BufferedReader(new FileReader(new File(partFilePath)));
readerList.add(reader);
}
String resultFilePath = ROOT_PATH + File.separator + SAMPLE_RESULT_BIG_FILE;
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(resultFilePath)));
Boolean flag = Boolean.FALSE;
Set<Integer> skipIndex = new HashSet<>();
Map<Integer,Long> indexLongMap = new HashMap<Integer,Long>();
int index = 0;
while (!flag){
for(int i = 0;i<readerList.size();i++){
if(!skipIndex.contains(i)) {
if(null == indexLongMap.get(i)) {
BufferedReader reader = readerList.get(i);
String str = reader.readLine();
if (str != null) {
Long s = Long.parseLong(str);
indexLongMap.put(i, s);
} else {
skipIndex.add(i);
}
}
}
}
if(skipIndex.size() == 10){
flag = Boolean.TRUE;
}else {
Long min = 0L;
for (Integer i : indexLongMap.keySet()) {
Long s = indexLongMap.get(i);
if (min <= s) {
min = s;
index = i;
}
}
writer.write(String.valueOf(min));
writer.newLine();
indexLongMap.remove(index);
}
}
writer.flush();
}
/**
*
* @param sourceFilePath
* @param nums
*/
public static void splitFileAndSorts(String sourceFilePath, int nums) throws IOException {
List<Long> sortedList = new ArrayList<>(nums);
int i = 1;
String s = null;
try (BufferedReader reader = new BufferedReader(new FileReader(new File(sourceFilePath)))) {
while ((s = reader.readLine()) != null) {
sortedList.add(Long.parseLong(s));
if (sortedList.size() >= nums) {
Collections.sort(sortedList);
String partFilePath = sourceFilePath + ".part" + i;
try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(partFilePath)))) {
for (Long l : sortedList) {
writer.write(String.valueOf(l));
writer.newLine();
}
}
sortedList = new ArrayList<>(nums);
i++;
}
}
}
}
}