1、读取PDF格式的发票,Sub ReadPDFInvoiceInfo()
Sub ReadPDFInvoiceInfo()
Dim tempFolder As String
Dim Table As Object
Dim Cell As Object
Dim wordContent As String
Dim Shape As Object
Dim textBoxRange As Object
On Error Resume Next
With Usf_ProgressBar
.LbProgressBar.Caption = "正在读取发票信息,请稍候......" & Chr(10) & currInvoiceFile
.Show 0
End With
'关闭所有word进程
Call CloseAllWordInstances
tempFolder = "c:\temp\"
If Dir(tempFolder, vbDirectory) = "" Then
MkDir tempFolder
End If
'创建 Acrobat 应用程序对象
Set acrobatApp = CreateObject("AcroExch.App")
'创建 Acrobat AVDoc 对象
Set AcrobatAVDoc = CreateObject("AcroExch.AVDoc")
'打开选择的 PDF 文件
AcrobatAVDoc.Open currInvoiceFile, ""
acrobatApp.Hide
Set AcrobatPDDoc = AcrobatAVDoc.GetPDDoc
Set jsObj = AcrobatPDDoc.getjsobject
'创建 Word 应用程序对象
Set WordApp = CreateObject("Word.Application")
'关闭安全提示
WordApp.Application.AutomationSecurity = msoAutomationSecurityForceDisable
'将 PDF 转换为 Word
WordFilePath = tempFolder & Format(Time, "hhmmss") & ".docx"
jsObj.SaveAs WordFilePath, "com.adobe.acrobat.docx"
'关闭和清理 Acrobat
AcrobatAVDoc.Close 1
acrobatApp.Exit
'打开转换后的 Word 文件
Set WordDoc = WordApp.Documents.Open(WordFilePath)
'将 Word 文档内容连接成一个字符串
For Each Shape In WordDoc.Shapes
If Shape.Type = msoTextBox Then ' 检查对象是否为文本框
Debug.Print "Text Box Name: " & Shape.Name
'输出文本框中的文本内容
Set textBoxRange = Shape.TextFrame.TextRange
wordContent = wordContent & textBoxRange.Text
End If
Next
For Each paragraph In WordDoc.Paragraphs
wordContent = wordContent & paragraph.Range.Text
Next
For Each Table In WordDoc.Tables
For Each Cell In Table.Range.Cells
wordContent = wordContent & Cell.Range.Text
Next
Next
Sheet3.Range("A1") = wordContent
'替换特殊字符
wordContent = Replace(wordContent, Chr(12), "")
wordContent = Replace(wordContent, Chr(13), "")
wordContent = Replace(wordContent, Chr(14), "")
wordContent = Replace(wordContent, " ", "")
wordContent = Replace(wordContent, " ", "")
wordContent = Replace(wordContent, "", "")
wordContent = Replace(wordContent, ":", ":")
wordContent = Replace(wordContent, ChrW(165), "¥")
wordContent = Replace(wordContent, "(", "(")
wordContent = Replace(wordContent, ")", ")")
'使用正则表达式提取信息
Set regEx = CreateObject("VBScript.RegExp")
regEx.Pattern = ".*?代码:(\d{12}).*?号码:(\d{8}).*?(\d{4}年\d+月\d+日).*?验码:(\d{20}).*?称:(.*?)纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10})"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
InvoiceCode = Match(0).submatches(0)
InvoiceNo = Match(0).submatches(1)
invoiceDate = Match(0).submatches(2)
BuyerName = Match(0).submatches(4)
SellerName = Match(0).submatches(6)
SellerTaxID = Match(0).submatches(7)
Else
regEx.Pattern = ".*?号码:(\d{20}).*?(\d{4}年\d+月\d+日).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10})"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
InvoiceCode = Left(Match(0).submatches(0), 12)
InvoiceNo = Right(Match(0).submatches(0), 8)
invoiceDate = Match(0).submatches(1)
BuyerName = Match(0).submatches(2)
SellerName = Match(0).submatches(4)
SellerTaxID = Match(0).submatches(5)
Else
regEx.Pattern = ".*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?号码:(\d{20}).*?(\d{4}年\d+月\d+日)"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
BuyerName = Match(0).submatches(0)
SellerName = Match(0).submatches(2)
SellerTaxID = Match(0).submatches(3)
InvoiceCode = Left(Match(0).submatches(4), 12)
InvoiceNo = Right(Match(0).submatches(4), 8)
invoiceDate = Match(0).submatches(5)
End If
End If
End If
regEx.Pattern = ".货*?务名称(.*?)合"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
ItemName = Match(0).submatches(0)
Else
regEx.Pattern = "项目名称(.*?)合"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
ItemName = Match(0).submatches(0)
End If
End If
regEx.Pattern = "¥(\d+.\d{2})"
regEx.Global = True
Set Match = regEx.Execute(wordContent)
If Match.Count = 3 Then
Amount = Match(0).submatches(0)
TaxAmount = Match(1).submatches(0)
ElseIf Match.Count = 2 Then
Amount = Match(0).submatches(0)
TaxAmount = 0
Else
regEx.Pattern = ".单*?额(\d+.\d{2}).*?税额(\d+.\d{2})"
Set Match = regEx.Execute(wordContent)
If Match.Count > 0 Then
Amount = Match(0).submatches(0)
TaxAmount = Match(0).submatches(1)
End If
End If
If invoiceDate <> "" Then '判断一下有没有数据
k = InStr(BuyerName, "密码区")
If k > 0 Then
BuyerName = Left(BuyerName, k - 1)
End If
End If
'恢复安全提示
WordApp.Application.AutomationSecurity = msoAutomationSecurityByUI
'关闭和清理 Word
WordDoc.Close False
WordApp.Quit
Set WordDoc = Nothing
Set WordApp = Nothing
'删除临时生成的 Word 文件
'Kill WordFilePath
DeleteDirectory tempFolder
Unload Usf_ProgressBar
End Sub
2、读取OFD格式的发票,Sub ReadOFDInvoiceInfo()
Sub ReadOFDInvoiceInfo()
Dim rarPath As String
Dim rarCmd As String
Dim Result As Long
Dim destFolder As String
Dim tempFolder As String
Dim SelectedNode As Object
Dim NewDOMD As Object
On Error Resume Next
'解压OFD文件
rarPath = GetRARPath
If rarPath = "" Then
MsgBox "没有安装RAR压缩程序!"
Exit Sub
End If
tempFolder = "c:\temp\"
If Dir(tempFolder, vbDirectory) = "" Then
MkDir tempFolder
End If
destFolder = tempFolder & Format(Time, "hhmmss") & "\"
rarCmd = rarPath & " X " & currInvoiceFile & " " & destFolder
Result = shell(rarCmd, vbHide)
Sleep 1000 ' Delay for 2 seconds (2000 milliseconds)
'等待解压完成
Do Until Dir(destFolder & "Doc_0\Attachs\original_invoice.xml") <> "" _
Or Dir(destFolder & "Doc_0\Pages\Page_0\Content.xml") <> ""
DoEvents
Loop
'提取XML信息
Set NewDOMD = CreateObject("MSXML2.DOMDocument")
If Dir(destFolder & "Doc_0\Attachs\original_invoice.xml") <> "" Then
'增值税普通电子发票
NewDOMD.Load destFolder & "Doc_0\Attachs\original_invoice.xml"
Set xmld = NewDOMD.DocumentElement.SelectSingleNode("//eInvoice")
Set SelectedNode = xmld.SelectSingleNode("//fp:InvoiceCode")
InvoiceCode = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:InvoiceNo")
InvoiceNo = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:IssueDate")
invoiceDate = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:BuyerName")
BuyerName = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:SellerName")
SellerName = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:SellerTaxID")
SellerTaxID = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:TaxInclusiveTotalAmount")
Amount = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:TaxTotalAmount")
TaxAmount = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//fp:GoodsInfo/fp:Item")
ItemName = SelectedNode.Text
ElseIf Dir(destFolder & "Doc_0\Pages\Page_0\Content.xml") <> "" Then
'数电发票
NewDOMD.Load destFolder & "Doc_0\Pages\Page_0\Content.xml"
Set xmld = NewDOMD.DocumentElement.SelectSingleNode("//ofd:Page")
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6922']/ofd:TextCode")
InvoiceCode = Left(SelectedNode.Text, 12)
InvoiceNo = Right(SelectedNode.Text, 8)
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6923']/ofd:TextCode")
invoiceDate = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6924']/ofd:TextCode")
BuyerName = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6927']/ofd:TextCode")
SellerName = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6928']/ofd:TextCode")
SellerTaxID = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6942']/ofd:TextCode")
Amount = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6943']/ofd:TextCode")
TaxAmount = SelectedNode.Text
Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6939']/ofd:TextCode")
ItemName = SelectedNode.Text
End If
'清理解压后的文件
Set xmld = Nothing
Set NewDOMD = Nothing
DeleteDirectory tempFolder
End Sub