一、HTML转PDF
对于Html转换成PDF,首先需要页面前端处理好页面,如果Html不规范或存在 等特殊字符,可能到转换失败。
1.1 Maven引入依赖
<!-- html转pdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>3.0.2</version>
</dependency>
<!-- 中文字体支持 -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>font-asian</artifactId>
<version>7.1.16</version>
</dependency>
<!-- 预处理html-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
1.2 编写工具类
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.styledxmlparser.css.media.MediaDeviceDescription;
import com.itextpdf.styledxmlparser.css.media.MediaType;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Slf4j
public class HtmlConvertPdfUtil {
public static void convertToPdf(String filePath, String pdfPath) throws IOException {
log.info("{}:进行PDF转换",filePath);
//对html页面进行预处理
File input = new File(filePath);
Document doc = Jsoup.parse(input, "UTF-8");
// 选择所有的 <script>标签并删除
Elements scripts = doc.select("script");
if(!scripts.isEmpty()){
scripts.remove();
}
String processedHtml = doc.outerHtml();
InputStream inputStream = new ByteArrayInputStream(processedHtml.getBytes());
//若存在输出的pdf文件先删除,再创建
File file = new File(pdfPath);
if(file.isFile() && file.exists()){
file.delete();
}
//创建输出的pdf
InputStream fileInputStream = new FileInputStream(filePath);
PdfWriter pdfWriter = new PdfWriter(new FileOutputStream(pdfPath));
PdfDocument pdfDocument = new PdfDocument(pdfWriter);
//设置为A4大小
pdfDocument.setDefaultPageSize(PageSize.A0);
//添加中文字体支持
ConverterProperties properties = new ConverterProperties();
FontProvider fontProvider = new FontProvider();
fontProvider.addFont("/static/fonts/SimHei.ttf");
properties.setFontProvider(fontProvider);
properties.setMediaDeviceDescription(new MediaDeviceDescription(MediaType.PRINT));
//启用立即刷新和分页控制
properties.setImmediateFlush(true);
//读取Html文件流,查找出当中的 或出现类似的符号空格字符
inputStream = readInputStrem(inputStream);
HtmlConverter.convertToPdf(inputStream, pdfDocument, properties);
pdfWriter.close();
pdfDocument.close();
log.info("{}:PDF转换成功",filePath);
}
//从Html文件流查找出当中的 或出现类似的符号空格字符并替换掉
private static InputStream readInputStrem(InputStream inputStream) {
// 定义一些特殊字符的正则表达式 如:
String regEx_special = "\\&[a-zA-Z]{1,10};";
try(ByteArrayOutputStream baos = new ByteArrayOutputStream();) {
// 创建缓存大小
byte[] buffer = new byte[108192];
// 每次读取到内容的长度
int len = -1;
// 开始读取输入流中的内容
while ((len = inputStream.read(buffer)) != -1) { //当等于-1说明没有数据可以读取了
baos.write(buffer, 0, len); //把读取到的内容写到输出流中
}
// 把字节数组转换为字符串 设置utf-8字符编码
String content = baos.toString(String.valueOf(StandardCharsets.UTF_8));
// 关闭输入流和输出流
inputStream.close();
// 判断HTML内容是否具有HTML的特殊字符标记
Pattern compile = Pattern.compile(regEx_special, Pattern.CASE_INSENSITIVE);
Matcher matcher = compile.matcher(content);
String replaceAll = matcher.replaceAll("");
// 将字符串转化为输入流返回
return getStringStream(replaceAll);
} catch (Exception e) {
e.printStackTrace();
log.error("错误信息:pdf字符串格式化特殊字符失败{}", e.getMessage());
return null;
}
}
public static InputStream getStringStream(String sInputString) {
if (sInputString != null && !sInputString.trim().equals("")) {
try {
return new ByteArrayInputStream(sInputString.getBytes(StandardCharsets.UTF_8)); // 设置utf-8字符编码
} catch (Exception e) {
e.printStackTrace();
log.error("错误信息:pdf字符串转输入流失败,{}", e.getMessage());
}
}
return null;
}
}
二、图片转PDF
2.1 Maven引入依赖
<!-- 图片转pdf-->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>kernel</artifactId>
<version>7.1.16</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>layout</artifactId>
<version>7.1.16</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>io</artifactId>
<version>7.1.16</version>
</dependency>
2.2 编写工具类
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.property.HorizontalAlignment;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.io.FileOutputStream;
@Slf4j
public class ImageConvertPdfUtil {
// 将图片列表转换为 PDF
public static void convertToPdf(String filePath, String pdfPath) {
log.info("{}:进行PDF转换",filePath);
try {
//若存在该文件先删除,再创建
File file = new File(pdfPath);
if(file.isFile() && file.exists()){
file.delete();
}
// 创建 PDF 文档
PdfWriter writer = new PdfWriter(new FileOutputStream(pdfPath));
PdfDocument pdfDocument = new PdfDocument(writer);
Document document = new Document(pdfDocument);
// 获取pdf页面尺寸
PageSize pageSize = pdfDocument.getDefaultPageSize();
float pageWidth = pageSize.getWidth();
float pageHeight = pageSize.getHeight();
// 将图片添加到 PDF 文档
addImageToPdf(document, filePath, pageWidth, pageHeight);
// 关闭文档
document.close();
} catch (Exception e) {
log.error("convertToPdf-Error:{}",e.getMessage());
}
log.info("{}:PDF转换成功",filePath);
}
// 将图片添加到 PDF 文档
private static void addImageToPdf(Document document, String imagePath, float pageWidth, float pageHeight) {
try {
// 创建图片对象并设置等比例缩放和水平居中
Image image = new Image(ImageDataFactory.create(imagePath));
image.setAutoScale(true);
image.setHorizontalAlignment(HorizontalAlignment.CENTER);
// 获取图片的原始尺寸
float width = image.getImageWidth();
float height = image.getImageHeight();
// 计算缩放比例
float scale = Math.min(pageWidth / width, pageHeight / height);
// 设置图片的缩放比例
image.scaleToFit(width * scale, height * scale);
// 添加图片到 PDF 文档
document.add(image);
} catch (Exception e) {
log.error("addImageToPdf-Error:{}",e.getMessage());
}
}
}
三、图片转PDF
3.1 Maven引入依赖
如果下载不成功,去网上搜索下载现有jar包,然后替换版本号
<!-- word转pdf-->
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>15.8.0</version>
</dependency>
3.2 编写工具类
import com.aspose.words.License;
import lombok.extern.slf4j.Slf4j;
import java.io.*;
@Slf4j
public class WordConvertPdfUtil {
public static void convertDocxToPdf(String filePath, String pdfPath){
log.info("{}:进行PDF转换",filePath);
if (!isWordLicense()) {
return null;
}
try {
//若存在输出的pdf文件先删除,再创建
File file = new File(pdfPath);
if(file.isFile() && file.exists()){
file.delete();
}
com.aspose.words.Document document = new com.aspose.words.Document(filePath);
document.save(pdfPath);
} catch (Exception e) {
log.error("convertDocxToPdf-Error:{}",e.getMessage());
}
log.info("{}:PDF转换成功",filePath);
}
/**
* @Description: 验证aspose.word组件是否授权:无授权的文件有水印和试用标记
*/
public static boolean isWordLicense() {
boolean result = false;
try {
//导入许可
String licensexml = "<License>\n" + "<Data>\n" + "<Products>\n"
+ "<Product>Aspose.Total for Java</Product>\n" + "<Product>Aspose.Words for Java</Product>\n"
+ "</Products>\n" + "<EditionType>Enterprise</EditionType>\n"
+ "<SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
+ "<LicenseExpiry>20991231</LicenseExpiry>\n"
+ "<SerialNumber>23dcc79f-44ec-4a23-be3a-03c1632404e9</SerialNumber>\n" + "</Data>\n"
+ "<Signature>\n"
+ "sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=\n"
+ "</Signature>\n" + "</License>";
InputStream inputStream = new ByteArrayInputStream(licensexml.getBytes());
com.aspose.words.License license = new com.aspose.words.License();
license.setLicense(inputStream);
result = true;
} catch (Exception e) {
log.error("error:{}",e.getMessage());
}
return result;
}
}
参考资料:https://blog.csdn.net/WayneLee0809/article/details/112788783
https://cloud.tencent.com/developer/article/1639923