java把Word文件转成html的字符串返回出去

1、需求是把前端上传的word文件解析出来，生成html的字符串返回给前端去展示，Word里面的图片可以忽略不显示，所以这段代码去掉了解析图片的代码

package com.lieni.core.util;

import java.io.ByteArrayOutputStream;

import java.io.IOException;

import java.io.InputStream;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.springframework.web.multipart.MultipartFile;

import org.w3c.dom.Document;

import com.itextpdf.text.log.Logger;

import com.itextpdf.text.log.LoggerFactory;

/**

 * Created by LTmei on 2018/10/10 10:00

 */

public class Word2HtmlUtil {

    /**

     * logger

     */

    private static final Logger logger = LoggerFactory.getLogger(Word2HtmlUtil.class);

    public static String Word2007ToHtml(MultipartFile file) throws IOException {

        if (file.isEmpty() || file.getSize() <= 0) {

            logger.error("Sorry File does not Exists!");

            return null;

        } else {

            if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {

                // 1) 加载word文档生成 XWPFDocument对象

                InputStream in = file.getInputStream();

                XWPFDocument document = new XWPFDocument(in);

                // 也可以使用字符数组流获取解析的内容

                ByteArrayOutputStream baos = new ByteArrayOutputStream();

                XHTMLConverter.getInstance().convert(document, baos, null);

                String content = baos.toString();

                baos.close();

                return content;

            } else {

                logger.error("Enter only MS Office 2007+ files");

                return null;

            }

        }

    }

    public static String Word2003ToHtml(MultipartFile file)

            throws IOException, ParserConfigurationException, TransformerException {

        if (file.isEmpty() || file.getSize() <= 0) {

            logger.error("Sorry File does not Exists!");

            return null;

        } else {

            if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {

                InputStream input = file.getInputStream();

                HWPFDocument wordDocument = new HWPFDocument(input);

                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

                        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

                // 解析word文档

                wordToHtmlConverter.processDocument(wordDocument);

                Document htmlDocument = wordToHtmlConverter.getDocument();

                // 也可以使用字符数组流获取解析的内容

                ByteArrayOutputStream baos = new ByteArrayOutputStream();

                DOMSource domSource = new DOMSource(htmlDocument);

                StreamResult streamResult = new StreamResult(baos);

                TransformerFactory factory = TransformerFactory.newInstance();

                Transformer serializer = factory.newTransformer();

                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

                serializer.setOutputProperty(OutputKeys.INDENT, "yes");

                serializer.setOutputProperty(OutputKeys.METHOD, "html");

                serializer.transform(domSource, streamResult);

                // 也可以使用字符数组流获取解析的内容

                String content = new String(baos.toByteArray());

                baos.close();

                return content;

            } else {

                logger.error("Enter only MS Office 2003 files");

                return null;

            }

        }

    }

}