一、将 doc 转为 pdf
1、install 依赖
pip install pywin32
2、直接调用win32com接口打开文件,另存为pdf。SaveAs中的参数17代表村委pdf格式,完了关闭文件,关闭word。
1 def doc2pdf(self): 2 try: 3 w = Dispatch("Word.Application") 4 doc = w.Documents.Open(self.docPath, ReadOnly=1) 5 doc.SaveAs(self.pdfPath, 17) 6 except: 7 traceback.print_exc() 8 finally: 9 doc.Close() 10 w.Quit() 11 self.checkFile(self.pdfPath, 'pdf')
以下为文件格式对应表
wdFormatDocument = 0 wdFormatDocument97 = 0 wdFormatDocumentDefault = 16 wdFormatDOSText = 4 wdFormatDOSTextLineBreaks = 5 wdFormatEncodedText = 7 wdFormatFilteredHTML = 10 wdFormatFlatXML = 19 wdFormatFlatXMLMacroEnabled = 20 wdFormatFlatXMLTemplate = 21 wdFormatFlatXMLTemplateMacroEnabled = 22 wdFormatHTML = 8 wdFormatPDF = 17 wdFormatRTF = 6 wdFormatTemplate = 1 wdFormatTemplate97 = 1 wdFormatText = 2 wdFormatTextLineBreaks = 3 wdFormatUnicodeText = 7 wdFormatWebArchive = 9 wdFormatXML = 11 wdFormatXMLDocument = 12 wdFormatXMLDocumentMacroEnabled = 13 wdFormatXMLTemplate = 14 wdFormatXMLTemplateMacroEnabled = 15 wdFormatXPS = 18
二、将pdf转为图片
1、install 依赖
1.1、pip isntall pdf2image
1.2、Windows安装配置poppler
Windows用户必须为Windows安装poppler (http://blog.alivate.com.au/poppler-windows/),然后将bin/文件夹添加到PATH(开始>输入env>编辑系统环境变量>环境变量...>系统变量>Path)
安装完poppler需重启系统后生效。2、将pdf转为图片
1 def pdf2image(self): 2 # 建立图片文件夹 3 self.imgFold = os.path.join(self.fileFold, self.fileName) 4 if not os.path.exists(self.imgFold): 5 os.mkdir(self.imgFold) 6 7 # 转存图片 8 pages = convert_from_path(self.pdfPath) 9 for i, page in enumerate(pages): 10 imgPath = os.path.join(self.imgFold, str(i)+'.jpg') 11 page.save(imgPath, 'JPEG') 12 self.checkFile(imgPath, 'last img')
三、直接将word转为图片
方法:结合1,2
代码如下:
1 import os 2 import traceback 3 from win32com.client import Dispatch 4 from pdf2image import convert_from_path 5 6 class Word2Pdf2Img(): 7 def __init__(self, docPath): 8 # 初始化路径 9 self.docPath = docPath 10 self.fileName = os.path.basename(self.docPath).split('.')[0] 11 self.fileFold = os.path.dirname(self.docPath) 12 self.pdfPath = os.path.join(self.fileFold, self.fileName + '.pdf') 13 14 @staticmethod 15 def checkFile(filePath, fileType=''): 16 if os.path.isfile(filePath): 17 print ('file {} existed!'.format(fileType)) 18 else: 19 print ('file {} not existed!'.format(fileType)) 20 21 def doc2pdf(self): 22 try: 23 w = Dispatch("Word.Application") 24 doc = w.Documents.Open(self.docPath, ReadOnly=1) 25 doc.SaveAs(self.pdfPath, 17) 26 except: 27 traceback.print_exc() 28 finally: 29 doc.Close() 30 w.Quit() 31 self.checkFile(self.pdfPath, 'pdf') 32 33 def pdf2image(self): 34 # 建立图片文件夹 35 self.imgFold = os.path.join(self.fileFold, self.fileName) 36 if not os.path.exists(self.imgFold): 37 os.mkdir(self.imgFold) 38 39 # 转存图片 40 pages = convert_from_path(self.pdfPath) 41 for i, page in enumerate(pages): 42 imgPath = os.path.join(self.imgFold, str(i)+'.jpg') 43 page.save(imgPath, 'JPEG') 44 self.checkFile(imgPath, 'last img') 45 46 def doc2image(self): 47 self.doc2pdf() 48 self.pdf2image()