Java文件处理系列之：word转pdf-永久免费的源码丞旭猿-丞旭猿

日常操作中，word转pdf是较为常见的操作。尤其是前端上传word文档，需要在页面预览文档的情况。前端直接预览word需要特殊的处理，但是如果由后端先把word转为pdf，再预览，就会比较简单。

效果预览：

原始word文件.docx

转换之后的pdf文件.pdf

接下来就分享实测过的实现方式。

环境：JDK11、Springboot 2.3.7.RELEASE、windows10、Maven

第一步，Maven 依赖配置，主要导入一些工具包

<dependencies><dependency><groupId>org.springframework.bootgroupId><artifactId>spring-boot-starter-webartifactId>dependency><dependency><groupId>org.springframework.bootgroupId><artifactId>spring-boot-starter-testartifactId><scope>testscope>dependency><dependency><groupId>org.projectlombokgroupId><artifactId>lombokartifactId><optional>trueoptional>dependency><dependency><groupId>org.apache.commonsgroupId><artifactId>commons-lang3artifactId><version>3.4version>dependency><dependency><groupId>com.deepoovegroupId><artifactId>poi-tlartifactId><version>1.10.2version>dependency><dependency><groupId>fr.opensagres.xdocreportgroupId><artifactId>fr.opensagres.poi.xwpf.converter.pdfartifactId><version>2.0.2version>dependency><dependency><groupId>org.apache.poigroupId><artifactId>poi-scratchpadartifactId><version>4.1.2version>dependency><dependency><groupId>fr.opensagres.xdocreportgroupId><artifactId>fr.opensagres.poi.xwpf.converter.coreartifactId><version>2.0.2version>dependency><dependency><groupId>fr.opensagres.xdocreportgroupId><artifactId>fr.opensagres.poi.xwpf.converter.xhtmlartifactId><version>2.0.2version>dependency><dependency><groupId>com.itextpdfgroupId><artifactId>itextpdfartifactId><version>5.5.13.2version>dependency><dependency><groupId>com.itextpdf.toolgroupId><artifactId>xmlworkerartifactId><version>5.5.13.2version>dependency><dependency><groupId>com.itextpdfgroupId><artifactId>itext-asianartifactId><version>5.2.0version>dependency><dependency><groupId>com.itextpdfgroupId><artifactId>html2pdfartifactId><version>4.0.1version>dependency><dependency><groupId>org.jsoupgroupId><artifactId>jsoupartifactId><version>1.14.3version>dependency>dependencies>

第二步，service 业务层构造

packagecom.yalin.cn.fileutil.word.service;importjava.io.InputStream;importjava.io.OutputStream;/**
 *@description: word生成pdf
 *@author: lyl
 *@create: 2021-05-08 16:31:47
 **/publicinterfaceIWordConvertPdfService{/**
     * docx 转pdf
     *
     *@paramsourcePath word路径
     *@paramtargetPath pdf路径
     *@paramimageDir   word中的图片临时存放路径
     *@returnboolean
     */booleanconvert(String sourcePath, String targetPath, String imageDir);/**
     * docx 转pdf
     *
     *@paramin         word文件流
     *@paramtargetPath pdf路径
     *@paramimageDir   word中的图片临时存放路径
     *@returnboolean
     */booleanconvert(InputStream in, String targetPath, String imageDir);/**
     * docx 转pdf
     *
     *@paramin       word文件流
     *@paramout      pdf文件流
     *@paramimageDir word中的图片临时存放路径
     *@returnboolean
     *@throwsException 抛出异常
     */booleanconvert(InputStream in, OutputStream out, String imageDir)throwsException;
}

第三步，service impl 业务实现层构造

packagecom.yalin.cn.fileutil.word.service.impl;importcom.yalin.cn.fileutil.util.OfficeUtil;importcom.yalin.cn.fileutil.word.service.IWordConvertPdfService;importlombok.extern.slf4j.Slf4j;importorg.springframework.stereotype.Service;importjava.io.InputStream;importjava.io.OutputStream;importjava.nio.file.Files;importjava.nio.file.Paths;importjava.util.Objects;/**
 *@description: word生成pdf
 *@author: lyl
 *@create: 2021-05-08 16:31:47
 **/@Service@Slf4jpublicclassWordConvertPdfServiceImplimplementsIWordConvertPdfService{/**
     * docx 转pdf
     *
     *@paramsourcePath word路径
     *@paramtargetPath pdf路径
     *@paramimageDir   word中的图片临时存放路径
     *@returnboolean
     */@Overridepublicboolean convert(String sourcePath, String targetPath, String imageDir) {try(InputStream inputStream = Files.newInputStream(Paths.get(sourcePath));
             OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) {returnconvert(inputStream, outputStream, imageDir);
        }catch(Exception e) {
            log.error("convert(String, String, String)异常：{}", e);
        }returnfalse;
    }/**
     * docx 转pdf
     *
     *@paramin         word文件流
     *@paramtargetPath pdf路径
     *@paramimageDir   word中的图片临时存放路径
     *@returnboolean
     */@Overridepublicboolean convert(InputStreamin, String targetPath, String imageDir) {try(OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) {returnconvert(in, outputStream, imageDir);
        }catch(Exception e) {
            log.error("convert(String, String, String)异常：{}", e);
        }returnfalse;
    }/**
     * docx 转pdf
     *
     *@paramin       word文件流
     *@paramout      pdf文件流
     *@paramimageDir word中的图片临时存放路径
     *@returnboolean
     */@Overridepublicboolean convert(InputStreamin, OutputStreamout, String imageDir) throws Exception {if(Objects.isNull(in)) {thrownew Exception("模板文件流为null！");
        }if(Objects.isNull(out)) {thrownew Exception("目标文件流为null！");
        }try{// word转pdfOfficeUtil.docxConvertPdf(in,out, imageDir);returntrue;
        }catch(Exception e) {
            log.error("fill(InputStream, OutputStream, String)异常：{}", e);
        }returnfalse;
    }
}

第四步，真正实现转换的工具类

package com.yalin.cn.fileutil.util;importcom.itextpdf.text.*;importcom.itextpdf.text.pdf.BaseFont;importcom.itextpdf.text.pdf.PdfWriter;importcom.itextpdf.tool.xml.XMLWorkerHelper;importcom.yalin.cn.fileutil.font.AutoFontFactory;importfr.opensagres.poi.xwpf.converter.core.BasicURIResolver;importfr.opensagres.poi.xwpf.converter.core.FileImageExtractor;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;importorg.apache.commons.lang3.StringUtils;importorg.apache.poi.xwpf.usermodel.XWPFDocument;importorg.jsoup.Jsoup;importorg.jsoup.nodes.Element;importorg.jsoup.nodes.Entities;importorg.jsoup.select.Elements;importjava.io.*;importjava.nio.charset.Charset;importjava.util.Objects;/**
 * @description: word 转pdf
 * @author: lyl
 * @create: 2021-04-23 11:09:51
 **/publicclassOfficeUtil {/**
     * 将docx格式文件转成html
     *
     * @param in       docx文件流
     * @param imageDir docx文件中图片存储目录
     * @return html
     */publicstaticStringdocx2Html(InputStreamin,StringimageDir) throws Exception {Stringcontent =null;
        ByteArrayOutputStream baos =null;try{// 1> 加载文档到XWPFDocumentXWPFDocumentdocument=newXWPFDocument(in);// 2> 解析XHTML配置（这里设置IURIResolver来设置图片存放的目录）XHTMLOptions options = XHTMLOptions.create();// 存放word中图片的目录if(Objects.nonNull(imageDir)) {
                options.setExtractor(newFileImageExtractor(newFile(imageDir)));
                options.URIResolver(newBasicURIResolver(imageDir));
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);
            }// 3> 将XWPFDocument转换成XHTMLbaos =newByteArrayOutputStream();
            XHTMLConverter.getInstance().convert(document, baos, options);
        }catch(Exception e) {
            e.printStackTrace();thrownewException(e);
        }finally{try{if(in!=null) {in.close();
                }if(baos !=null) {
                    content =newString(baos.toByteArray(),"utf-8");
                    baos.close();
                }
            }catch(Exception e) {
                e.printStackTrace();
            }
        }returncontent;
    }/**
     * 使用jsoup规范化html
     *
     * @param html html内容
     * @return 规范化后的html
     */privatestaticStringformatHtml(Stringhtml) {
        org.jsoup.nodes.Document doc = Jsoup.parse(html);// 去除过大的宽度Stringstyle = doc.attr("style");if(StringUtils.isNotEmpty(style) && style.contains("width")) {
            doc.attr("style","");
        }
        Elements divs = doc.select("div");for(Element div : divs) {StringdivStyle = div.attr("style");if(StringUtils.isNotEmpty(divStyle) && divStyle.contains("width")) {
                div.attr("style","");
            }
        }// jsoup生成闭合标签doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);returndoc.html();
    }/**
     * html转成pdf
     *
     * @param html html
     * @param out  输出pdf文件流
     */publicstaticvoidhtmlToPdf(Stringhtml, OutputStream out) throws Exception {
        Documentdocument=null;
        ByteArrayInputStream bais =null;try{// 纸document=newDocument(PageSize.A4);// 笔PdfWriter writer = PdfWriter.getInstance(document, out);document.open();// html转pdfbais =newByteArrayInputStream(html.getBytes("UTF-8"));
            XMLWorkerHelper.getInstance().parseXHtml(writer,document, bais,
                    Charset.forName("UTF-8"),newFontProvider() {@OverridepublicbooleanisRegistered(Strings) {returnfalse;
                        }@OverridepublicFont getFont(Strings,Strings1,booleanembedded, float size, int style, BaseColor baseColor) {// 配置字体Font font =null;try{
                                BaseFont bf = AutoFontFactory.getBaseFont();
                                font =newFont(bf, size, style, baseColor);
                                font.setColor(baseColor);
                            }catch(Exception e) {
                                e.printStackTrace();
                            }returnfont;
                        }
                    });
        }catch(Exception e) {
            e.printStackTrace();thrownewException(e);
        }finally{if(document!=null) {document.close();
            }if(bais !=null) {try{
                    bais.close();
                }catch(IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }/**
     * docx 转pdf
     *
     * @param in       docx文件流
     * @param out      pdf文件流
     * @param imageDir docx中图片存放路径
     * @return boolean
     */publicstaticbooleandocxConvertPdf(InputStreamin, OutputStream out,StringimageDir) {try{StringdocxHtml = docx2Html(in, imageDir);
            docxHtml = formatHtml(docxHtml);
            htmlToPdf(docxHtml, out);returntrue;
        }catch(Exception e) {
            e.printStackTrace();
        }returnfalse;
    }
}

备注：OfficeUtil中的AutoFontFactory属于自定义的字体。因为linux环境下不支持某些中文字体，导致乱码。解决方案之一，就是从windows字体库中复制一个，放到resource目录下，在代码中引用即可。

packagecom.yalin.cn.fileutil.font;importcom.itextpdf.text.DocumentException;importcom.itextpdf.text.pdf.BaseFont;importjava.io.IOException;/**
 *@description: 字体工厂
 *@author: lyl
 *@create: 2022-01-17 15:38:29
 **/publicclassAutoFontFactory{/**
     * 获取基础字体
     *
     *@returnBaseFont
     *@throwsIOException
     *@throwsDocumentException
     */publicstaticBaseFontgetBaseFont()throwsIOException, DocumentException{// 方案一：使用资源字体(需要有字体)BaseFont bf = BaseFont.createFont("/font/simsun.ttc,0", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);// 方案二：使用本地字体(本地需要有字体)// BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/seguisym.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);// 方案二：使用jar包：iTextAsian，这样只需一个jar包就可以了// BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);returnbf;
    }
}

测试类

@TestvoidwordConvertPdf() {StringbasePath ="C:\\Users\\lyl\\Desktop\\";StringsourcePath = basePath +"原始word文件.docx";StringtargetPath = basePath +"转换之后的pdf文件.pdf";StringimagePath = basePath +"img"+ File.separator;
        WordConvertPdfServiceImpl tt =newWordConvertPdfServiceImpl();booleanflag = tt.convert(sourcePath, targetPath, imagePath);
        System.out.println(flag);
    }

声明：本文部分素材转载自互联网，如有侵权立即删除。

1. 本站所有资源来源于用户上传和网络，如有侵权请邮件联系站长！邮箱：cxysz1@tom.com
2. 分享目的仅供大家学习和交流，您必须在下载后24小时内删除！
3. 不得使用于非法商业用途，不得违反国家法律。否则后果自负！
4. 本站提供的源码、模板、插件等等其他资源，都不包含技术服务请大家谅解！
5. 如有链接无法下载、失效或广告，请联系管理员处理！
6. 本站资源售价只是赞助，收取费用仅维持本站的日常运营所需！
7. 如遇到加密压缩包，请使用WINRAR解压,如遇到无法解压的请联系管理员！
8. 精力有限，不少源码未能详细测试（解密），不能分辨部分源码是病毒还是误报，所以没有进行任何修改，大家使用前请进行甄别
丞旭猿论坛

THE END

行业资讯