HTML、图片以及word转换成pdf

标签：PDF word String import itextpdf HTML new pdf com

一、HTML转PDF

对于Html转换成PDF，首先需要页面前端处理好页面，如果Html不规范或存在等特殊字符，可能到转换失败。

1.1 Maven引入依赖

        <!-- html转pdf  -->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>html2pdf</artifactId>
            <version>3.0.2</version>
        </dependency>
        <!-- 中文字体支持 -->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>font-asian</artifactId>
            <version>7.1.16</version>
        </dependency>

<!--        预处理html-->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.14.3</version>
        </dependency>

1.2 编写工具类

import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.styledxmlparser.css.media.MediaDeviceDescription;
import com.itextpdf.styledxmlparser.css.media.MediaType;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Slf4j
public class HtmlConvertPdfUtil {

    public static void convertToPdf(String filePath, String pdfPath) throws IOException {
        log.info("{}:进行PDF转换",filePath);
        //对html页面进行预处理
        File input = new File(filePath);
        Document doc = Jsoup.parse(input, "UTF-8");

        // 选择所有的 <script>标签并删除
        Elements scripts = doc.select("script");
        if(!scripts.isEmpty()){
            scripts.remove();
        }

        String processedHtml = doc.outerHtml();
        InputStream inputStream = new ByteArrayInputStream(processedHtml.getBytes());

        //若存在输出的pdf文件先删除，再创建
        File file = new File(pdfPath);
        if(file.isFile() && file.exists()){
            file.delete();
        }

        //创建输出的pdf
        InputStream fileInputStream = new FileInputStream(filePath);
        PdfWriter pdfWriter = new PdfWriter(new FileOutputStream(pdfPath));
        PdfDocument pdfDocument = new PdfDocument(pdfWriter);

        //设置为A4大小
        pdfDocument.setDefaultPageSize(PageSize.A0);

        //添加中文字体支持
        ConverterProperties properties = new ConverterProperties();
        FontProvider fontProvider = new FontProvider();
        fontProvider.addFont("/static/fonts/SimHei.ttf");
        properties.setFontProvider(fontProvider);
        properties.setMediaDeviceDescription(new MediaDeviceDescription(MediaType.PRINT));

        //启用立即刷新和分页控制
        properties.setImmediateFlush(true);

        //读取Html文件流，查找出当中的&nbsp;或出现类似的符号空格字符
        inputStream = readInputStrem(inputStream);
        HtmlConverter.convertToPdf(inputStream, pdfDocument, properties);

        pdfWriter.close();
        pdfDocument.close();
        log.info("{}:PDF转换成功",filePath);
    }

    //从Html文件流查找出当中的&nbsp;或出现类似的符号空格字符并替换掉
    private static InputStream readInputStrem(InputStream inputStream) {
        // 定义一些特殊字符的正则表达式 如：
        String regEx_special = "\\&[a-zA-Z]{1,10};";
        try(ByteArrayOutputStream baos = new ByteArrayOutputStream();) {
            // 创建缓存大小
            byte[] buffer = new byte[108192];
            // 每次读取到内容的长度
            int len = -1;
            // 开始读取输入流中的内容
            while ((len = inputStream.read(buffer)) != -1) { //当等于-1说明没有数据可以读取了
                baos.write(buffer, 0, len);   //把读取到的内容写到输出流中
            }
            // 把字节数组转换为字符串 设置utf-8字符编码
            String content = baos.toString(String.valueOf(StandardCharsets.UTF_8));
            // 关闭输入流和输出流
            inputStream.close();
            // 判断HTML内容是否具有HTML的特殊字符标记
            Pattern compile = Pattern.compile(regEx_special, Pattern.CASE_INSENSITIVE);
            Matcher matcher = compile.matcher(content);
            String replaceAll = matcher.replaceAll("");
            // 将字符串转化为输入流返回
            return getStringStream(replaceAll);
        } catch (Exception e) {
            e.printStackTrace();
            log.error("错误信息：pdf字符串格式化特殊字符失败{}", e.getMessage());
            return null;
        }
    }

    public static InputStream getStringStream(String sInputString) {
        if (sInputString != null && !sInputString.trim().equals("")) {
            try {
                return new ByteArrayInputStream(sInputString.getBytes(StandardCharsets.UTF_8)); // 设置utf-8字符编码
            } catch (Exception e) {
                e.printStackTrace();
                log.error("错误信息：pdf字符串转输入流失败，{}", e.getMessage());
            }
        }
        return null;
    }

}

二、图片转PDF

2.1 Maven引入依赖

<!--        图片转pdf-->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>kernel</artifactId>
            <version>7.1.16</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>layout</artifactId>
            <version>7.1.16</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>io</artifactId>
            <version>7.1.16</version>
        </dependency>

2.2 编写工具类

import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.property.HorizontalAlignment;
import lombok.extern.slf4j.Slf4j;


import java.io.File;
import java.io.FileOutputStream;

@Slf4j
public class ImageConvertPdfUtil {

    // 将图片列表转换为 PDF
    public static void convertToPdf(String filePath, String pdfPath) {
        log.info("{}:进行PDF转换",filePath);
        try {
            //若存在该文件先删除，再创建
            File file = new File(pdfPath);
            if(file.isFile() && file.exists()){
                file.delete();
            }

            // 创建 PDF 文档
            PdfWriter writer = new PdfWriter(new FileOutputStream(pdfPath));
            PdfDocument pdfDocument = new PdfDocument(writer);
            Document document = new Document(pdfDocument);

            // 获取pdf页面尺寸
            PageSize pageSize = pdfDocument.getDefaultPageSize();
            float pageWidth = pageSize.getWidth();
            float pageHeight = pageSize.getHeight();

            // 将图片添加到 PDF 文档
            addImageToPdf(document, filePath, pageWidth, pageHeight);

            // 关闭文档
            document.close();
        } catch (Exception e) {
            log.error("convertToPdf-Error:{}",e.getMessage());
        }
        log.info("{}:PDF转换成功",filePath);
    }

    // 将图片添加到 PDF 文档
    private static void addImageToPdf(Document document, String imagePath, float pageWidth, float pageHeight) {
        try {
            // 创建图片对象并设置等比例缩放和水平居中
            Image image = new Image(ImageDataFactory.create(imagePath));
            image.setAutoScale(true);
            image.setHorizontalAlignment(HorizontalAlignment.CENTER);


            // 获取图片的原始尺寸
            float width = image.getImageWidth();
            float height = image.getImageHeight();

            // 计算缩放比例
            float scale = Math.min(pageWidth / width, pageHeight / height);

            // 设置图片的缩放比例
            image.scaleToFit(width * scale, height * scale);

            // 添加图片到 PDF 文档
            document.add(image);

        } catch (Exception e) {
            log.error("addImageToPdf-Error:{}",e.getMessage());
        }
    }
}

三、图片转PDF

3.1 Maven引入依赖

如果下载不成功，去网上搜索下载现有jar包，然后替换版本号

<!--        word转pdf-->
        <dependency>
            <groupId>com.aspose</groupId>
            <artifactId>aspose-words</artifactId>
            <version>15.8.0</version>
        </dependency>

3.2 编写工具类

import com.aspose.words.License;
import lombok.extern.slf4j.Slf4j;


import java.io.*;

@Slf4j
public class WordConvertPdfUtil {
    public static void convertDocxToPdf(String filePath, String pdfPath){
        log.info("{}:进行PDF转换",filePath);
		if (!isWordLicense()) {
			return null;
		}
        try {
            //若存在输出的pdf文件先删除，再创建
            File file = new File(pdfPath);
            if(file.isFile() && file.exists()){
                file.delete();
            }
            com.aspose.words.Document document = new com.aspose.words.Document(filePath);
            document.save(pdfPath);
        } catch (Exception e) {
            log.error("convertDocxToPdf-Error:{}",e.getMessage());
        }
        log.info("{}:PDF转换成功",filePath);
    }
	
	/**
	 * @Description: 验证aspose.word组件是否授权：无授权的文件有水印和试用标记
	 */
	public static boolean isWordLicense() {
		boolean result = false;
		try {
		//导入许可
			String licensexml = "<License>\n" + "<Data>\n" + "<Products>\n"
					+ "<Product>Aspose.Total for Java</Product>\n" + "<Product>Aspose.Words for Java</Product>\n"
					+ "</Products>\n" + "<EditionType>Enterprise</EditionType>\n"
					+ "<SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
					+ "<LicenseExpiry>20991231</LicenseExpiry>\n"
					+ "<SerialNumber>23dcc79f-44ec-4a23-be3a-03c1632404e9</SerialNumber>\n" + "</Data>\n"
					+ "<Signature>\n"
					+ "sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=\n"
					+ "</Signature>\n" + "</License>";
			InputStream inputStream = new ByteArrayInputStream(licensexml.getBytes());
			com.aspose.words.License license = new com.aspose.words.License();
			license.setLicense(inputStream);
			result = true;
		} catch (Exception e) {
			log.error("error:{}",e.getMessage());
		}
		return result;
	}

}

参考资料：https://blog.csdn.net/WayneLee0809/article/details/112788783
https://cloud.tencent.com/developer/article/1639923

标签：PDF,word,String,import,itextpdf,HTML,new,pdf,com
From： https://www.cnblogs.com/Aying216/p/18098381

HTML、图片以及word转换成pdf

一、HTML转PDF

1.1 Maven引入依赖

1.2 编写工具类

2.1 Maven引入依赖

2.2 编写工具类

3.1 Maven引入依赖

3.2 编写工具类

相关文章

赞助商

阅读排行