初稿见:https://www.cnblogs.com/mindzone/p/18362194
一、新增需求
在原稿题库之后,还需要生成一份纯题目 + 纯答案
答案放在开头,题目里面去掉答案
在检查题型时还发现部分内容略有区别:
所以在判断是否为答案的时候需要兼容这种答案
二、关于老版本支持
doc2000版需要追加一个scratchpad的库支持才行
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>5.0.0</version> </dependency>
需要导入的资源:
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range;
三、工具类实现
package cn.cloud9.word; import com.alibaba.druid.util.StringUtils; import lombok.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xwpf.usermodel.XWPFDocument; import java.io.File; import java.io.FileInputStream; import java.util.*; import java.util.stream.Collectors; public class ExamUtil { private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:"); private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final String NUMBER_REGEXP = "^[1-9]\\d*"; private static final String SPLIT_IDENTIFY = "\\."; @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String exCode; public String content; } @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String no; public String title; public String type; public String answer; public String explain; } @SneakyThrows public static XWPFDocument getWordFileDocxType(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; } @SneakyThrows public static HWPFDocument getWordFileDocType(String path) { FileInputStream fileInputStream = new FileInputStream(path); HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream); fileInputStream.close(); return hwpfDocument; } @SneakyThrows public static void main(String[] args) { int examCount = 0; String exCode = ""; List<RoughItem> roughItems = new ArrayList<>(); CharacterProperties props = new CharacterProperties(); props.setFontSize(32); String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中级保育师增加题库 .doc"; String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中级保育师增加题库 " + new Date().getTime() + ".doc"; HWPFDocument wordFile = getWordFileDocType(filePath); Range range = wordFile.getRange(); int numParagraphs = range.numParagraphs(); for (int i = 0; i < numParagraphs; i++) { Paragraph paragraph = range.getParagraph(i); String text = paragraph.text(); if (StringUtils.isEmpty(text)) continue; /* 按点号分割字符串 */ String[] split = text.split(SPLIT_IDENTIFY); /* 首个字符串是否匹配数值序号 */ boolean isExamNo = split[0].matches(NUMBER_REGEXP); /* 是否为答案 */ boolean isAnswer = text.startsWith(ANSWER_PREFIX.get(0)) || text.startsWith(ANSWER_PREFIX.get(1)); /* 是否为选项 */ boolean isOptions = OPTIONS.contains(split[0]); /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */ if (isExamNo) { ++ examCount; exCode = split[0]; ExamUtil.RoughItem roughItem = ExamUtil.RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } else if (isAnswer || isOptions) { /* 反之不是题目序列,而是选项,答案,解析时,保存起来 */ RoughItem roughItem = RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } /* 答案部分是一个完整段落,所以对其删除即可 */ if (isAnswer) paragraph.delete(); } List<ExamItem> examItems = new ArrayList<>(); /* 收集完成后使用序列进行分组处理 */ Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial)); listMap.forEach((k, v) -> { /* 第一项一定是题目 */ RoughItem titleItem = v.get(0); String content = titleItem.getContent(); content = content.replaceAll("\r", ""); /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */ String answer = v.stream() .map(RoughItem::getContent) .filter(xContent -> xContent.startsWith(ANSWER_PREFIX.get(0)) || xContent.startsWith(ANSWER_PREFIX.get(1))) .map(x -> x.replaceAll(ANSWER_PREFIX.get(1), "").replaceAll(ANSWER_PREFIX.get(0), "")) .findFirst() .orElse(""); answer = answer.replaceAll("\r", ""); /* 包装成题目对象后给调用者消费 */ ExamItem build = ExamItem .builder() .no(titleItem.getExCode()) .title(content) .type(null) .answer(answer) .explain(null) .build(); examItems.add(build); }); examItems.forEach(System.out::println); /* 创建一行para,写N个答案在一行中 rowSize = N */ int examTotal = examItems.size(); int rowSize = 10; boolean isComplete = examTotal % rowSize == 0; int totalRow = examTotal / rowSize; totalRow = isComplete ? totalRow : totalRow + 1; /* 因为用的是insertBefore方式插入,所以需要反着翻页写入 */ for (int currentRow = totalRow; currentRow >= 1; currentRow--) { int begin = (currentRow - 1) * rowSize; int end = (currentRow * rowSize) - 1; StringBuilder rowText = new StringBuilder(); for (int exIdx = begin; exIdx <= end; exIdx++) { if (exIdx < 0) break; else if (exIdx >= examTotal) break; ExamItem examItem = examItems.get(exIdx); String no = examItem.getNo(); String answer = examItem.getAnswer(); rowText.append(no).append(".").append(answer).append(" "); } rowText.append("\r"); CharacterRun characterRun = range.insertBefore(rowText.toString()); } wordFile.write(new File(newFilePath)); } }
四、答案嵌套在题目里的处理
选项嵌套在选项,题目中,需要再写逻辑判断
为了处理这种类型的题库文档,单开了一个新的工具类处理
细节部分看代码实现就行
package cn.cloud9.word; import com.alibaba.druid.util.StringUtils; import lombok.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xwpf.usermodel.XWPFDocument; import java.io.File; import java.io.FileInputStream; import java.util.*; import java.util.stream.Collectors; public class ExamUtil2 { // private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:"); private static final List<String> ANSWER_IDENT = Arrays.asList("(正确答案)", "【正确答案】"); private static final List<String> ANSWER_IDENT2 = Arrays.asList("×", "√"); private static final List<String> ANSWER_IDENT3 = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final List<String> OPTIONS2 = Arrays.asList("A、", "B、", "C、", "D、", "E、", "F、", "G、"); private static final String NUMBER_REGEXP = "^[1-9]\\d*"; private static final String SPLIT_IDENTIFY = "\\."; @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String exCode; public String content; } @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String no; public String title; public String type; public String answer; public String explain; } @SneakyThrows public static XWPFDocument getWordFileDocxType(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; } @SneakyThrows public static HWPFDocument getWordFileDocType(String path) { FileInputStream fileInputStream = new FileInputStream(path); HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream); fileInputStream.close(); return hwpfDocument; } @SneakyThrows public static void main(String[] args) { int examCount = 0; String exCode = ""; List<RoughItem> roughItems = new ArrayList<>(); CharacterProperties props = new CharacterProperties(); props.setFontSize(32); String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高级保育师理论题库增加.doc"; String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高级保育师理论题库增加- " + new Date().getTime() + ".doc"; HWPFDocument wordFile = getWordFileDocType(filePath); Range range = wordFile.getRange(); int numParagraphs = range.numParagraphs(); for (int i = 0; i < numParagraphs; i++) { Paragraph paragraph = range.getParagraph(i); String text = paragraph.text(); if (StringUtils.isEmpty(text)) continue; /* 按点号分割字符串 */ String[] split = text.split(SPLIT_IDENTIFY); /* 首个字符串是否匹配数值序号 */ boolean isExamNo = split[0].matches(NUMBER_REGEXP); /* 是否为选项 */ boolean isOptions = OPTIONS.contains(split[0]) || OPTIONS2.stream().anyMatch(text::contains); /* 是否为答案 */ boolean rightOption = ANSWER_IDENT.stream().anyMatch(text::contains) && isOptions; /* 答案在选项中 */ boolean rightOption2 = ANSWER_IDENT2.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */ boolean rightOption3 = ANSWER_IDENT3.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */ boolean isAnswer = rightOption || rightOption2 || rightOption3; /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */ if (isExamNo) { ++ examCount; exCode = split[0]; ExamUtil2.RoughItem roughItem = ExamUtil2.RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } if (isAnswer) { String correctOption = ""; if (rightOption) { for (String answer : ANSWER_IDENT) text = text.replaceAll(answer, ""); paragraph.replaceText(text, false); correctOption = String.valueOf(text.charAt(0)); } if (rightOption2) { correctOption = text.contains(ANSWER_IDENT2.get(0)) ? ANSWER_IDENT2.get(0) : ANSWER_IDENT2.get(1); for (String answer : ANSWER_IDENT2) text = text.replaceAll(answer, ""); paragraph.replaceText(text, false); } if (rightOption3) { for (String option : ANSWER_IDENT3) { if (text.contains(option)) { correctOption = option; text = text.replaceAll(option, ""); break; } } paragraph.replaceText(text, false); } RoughItem roughItem = RoughItem.builder() .serial(examCount) .content(correctOption) .exCode(exCode) .build() ; roughItems.add(roughItem); } } List<ExamItem> examItems = new ArrayList<>(); /* 收集完成后使用序列进行分组处理 */ Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial)); listMap.forEach((k, v) -> { if (v.size() == 1) return; /* 第一项一定是题目 */ RoughItem titleItem = v.get(0); String content = titleItem.getContent(); content = content.replaceAll("\r", ""); /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */ String answer = v.get(1).content; answer = answer.replaceAll("\r", ""); /* 包装成题目对象后给调用者消费 */ ExamItem build = ExamItem .builder() .no(titleItem.getExCode()) .title(content) .type(null) .answer(answer) .explain(null) .build(); examItems.add(build); }); examItems.forEach(System.out::println); /* 创建一行para,写10个答案上来 */ int examTotal = examItems.size(); int rowSize = 10; boolean isComplete = examTotal % rowSize == 0; int totalRow = examTotal / rowSize; totalRow = isComplete ? totalRow : totalRow + 1; for (int currentRow = totalRow; currentRow >= 1; currentRow--) { int begin = (currentRow - 1) * rowSize; int end = (currentRow * rowSize) - 1; StringBuilder rowText = new StringBuilder(); for (int exIdx = begin; exIdx <= end; exIdx++) { if (exIdx < 0) break; else if (exIdx >= examTotal) break; ExamItem examItem = examItems.get(exIdx); String no = examItem.getNo(); String answer = examItem.getAnswer(); rowText.append(no).append(".").append(answer).append(" "); } rowText.append("\r"); CharacterRun characterRun = range.insertBefore(rowText.toString()); } wordFile.write(new File(newFilePath)); } }
标签:Java,String,text,poi,static,import,Word,public,题库 From: https://www.cnblogs.com/mindzone/p/18403308