<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.28</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.16</version>
</dependency>
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.http.HttpUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Objects;
/**
* pdf转图片/图片Base64工具类
*/
@Slf4j
public class Pdf2ImageUtil {
/**
* pdf文件转换成jpg图片字节数组
*
* @param pdfBytes 要转换的pdf文件的字节数组
*/
private static byte[] pdf2Image(byte[] pdfBytes) throws IOException {
PDDocument doc = null;
ByteArrayOutputStream baos = null;
try {
doc = PDDocument.load(pdfBytes);
int pageCount = doc.getNumberOfPages();
log.info("PDF转图片流,总页数:{}", pageCount);
PDFRenderer pdfRenderer = new PDFRenderer(doc);
// 不知道图片的宽和高,所以先定义个null
BufferedImage pdfImage = null;
// pdf有多少页
int y = 0;
List<BufferedImage> list = new ArrayList<>(pageCount);
// 所有页高度综合
int totalHeight = 0;
if (pageCount > 0) {
for (int i = 0; i < pageCount; i++) {
// 每页pdf内容 注意:图片设置100dpi, 设置太高容易OOM并且转不了几页
BufferedImage bim = pdfRenderer.renderImageWithDPI(i, 100, ImageType.RGB);
totalHeight += bim.getHeight();
list.add(bim);
}
}
for (BufferedImage bim : list) {
// 如果是第一页需要初始化 BufferedImage
if (Objects.isNull(pdfImage)) {
// 创建一个总高、总宽 的图片缓冲区
pdfImage = new BufferedImage(bim.getWidth(), totalHeight, BufferedImage.TYPE_INT_RGB);
}
// 将每页pdf画到总的pdfImage上,x坐标=0,y坐标=之前所有页的高度和,属于向下偏移
pdfImage.getGraphics().drawImage(bim, 0, y, null);
y += bim.getHeight();
}
if (pdfImage != null) {
baos = new ByteArrayOutputStream();
ImageIO.write(pdfImage, "jpg", baos);
baos.flush();
byte[] imageInByte = baos.toByteArray();
log.info("PDF转图片流成功");
return imageInByte;
}
return null;
} catch (Exception e) {
log.error("PDF转图片流失败:{}", e.getMessage());
e.printStackTrace();
} finally {
if (ObjectUtil.isNotNull(baos)) {
baos.close();
}
if (ObjectUtil.isNotNull(doc)) {
doc.close();
}
}
return null;
}
/**
* pdf转图片Base64编码
*
* @param filePath pdf路径
* @return 图片Base64编码
* @throws IOException IO异常
*/
public static String image2Base64(String filePath) throws IOException {
// 获取云存储图片链接的字节
byte[] pdfBytes = HttpUtil.createGet(filePath).execute().bodyBytes();
// 获取本地图片的字节, 方便本地测试
// byte[] pdfBytes = Files.readAllBytes(Paths.get(filePath));
byte[] imageBytes = Pdf2ImageUtil.pdf2Image(pdfBytes);
return getFileContentAsBase64Urlencoded(imageBytes);
}
/**
* 获取文件base64编码
*
* @param imageBytes 图片字节数组
* @return base64编码信息,不带文件头
*/
private static String getFileContentAsBase64(byte[] imageBytes) {
return Base64.getEncoder().encodeToString(imageBytes);
}
/**
* 获取文件base64 UrlEncode编码
*
* @param imageBytes 图片字节数组
* @return base64编码信息,不带文件头
* @throws IOException IO异常
*/
private static String getFileContentAsBase64Urlencoded(byte[] imageBytes) throws IOException {
return URLEncoder.encode(getFileContentAsBase64(imageBytes), "utf-8");
}
}