1、读取PDF格式的发票,Sub ReadPDFInvoiceInfo()

Sub ReadPDFInvoiceInfo()
    Dim tempFolder As String
    Dim Table As Object
    Dim Cell As Object
    Dim wordContent As String
    Dim Shape As Object
    Dim textBoxRange As Object
    On Error Resume Next
    With Usf_ProgressBar
       .LbProgressBar.Caption = "正在读取发票信息,请稍候......" & Chr(10) & currInvoiceFile
       .Show 0
    End With
    '关闭所有word进程
    Call CloseAllWordInstances
    tempFolder = "c:\temp\"
    If Dir(tempFolder, vbDirectory) = "" Then
        MkDir tempFolder
    End If
        '创建 Acrobat 应用程序对象
        Set acrobatApp = CreateObject("AcroExch.App")
        '创建 Acrobat AVDoc 对象
        Set AcrobatAVDoc = CreateObject("AcroExch.AVDoc")
        '打开选择的 PDF 文件
        AcrobatAVDoc.Open currInvoiceFile, ""
        acrobatApp.Hide
        Set AcrobatPDDoc = AcrobatAVDoc.GetPDDoc
        Set jsObj = AcrobatPDDoc.getjsobject
        '创建 Word 应用程序对象
        Set WordApp = CreateObject("Word.Application")
       '关闭安全提示
       WordApp.Application.AutomationSecurity = msoAutomationSecurityForceDisable
        '将 PDF 转换为 Word
        WordFilePath = tempFolder & Format(Time, "hhmmss") & ".docx"
        jsObj.SaveAs WordFilePath, "com.adobe.acrobat.docx"
        '关闭和清理 Acrobat
        AcrobatAVDoc.Close 1
        acrobatApp.Exit
        '打开转换后的 Word 文件
        Set WordDoc = WordApp.Documents.Open(WordFilePath)
        '将 Word 文档内容连接成一个字符串
        For Each Shape In WordDoc.Shapes
            If Shape.Type = msoTextBox Then ' 检查对象是否为文本框
                Debug.Print "Text Box Name: " & Shape.Name
                '输出文本框中的文本内容
                Set textBoxRange = Shape.TextFrame.TextRange
                wordContent = wordContent & textBoxRange.Text
            End If
        Next
        For Each paragraph In WordDoc.Paragraphs
            wordContent = wordContent & paragraph.Range.Text
        Next
        For Each Table In WordDoc.Tables
            For Each Cell In Table.Range.Cells
                wordContent = wordContent & Cell.Range.Text
            Next
        Next
        Sheet3.Range("A1") = wordContent
        '替换特殊字符
        wordContent = Replace(wordContent, Chr(12), "")
        wordContent = Replace(wordContent, Chr(13), "")
        wordContent = Replace(wordContent, Chr(14), "")
        wordContent = Replace(wordContent, " ", "")
        wordContent = Replace(wordContent, " ", "")
        wordContent = Replace(wordContent, "", "")
        wordContent = Replace(wordContent, ":", ":")
        wordContent = Replace(wordContent, ChrW(165), "¥")
        wordContent = Replace(wordContent, "(", "(")
        wordContent = Replace(wordContent, ")", ")")
        '使用正则表达式提取信息
        Set regEx = CreateObject("VBScript.RegExp")
        regEx.Pattern = ".*?代码:(\d{12}).*?号码:(\d{8}).*?(\d{4}年\d+月\d+日).*?验码:(\d{20}).*?称:(.*?)纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10})"
        Set Match = regEx.Execute(wordContent)
        If Match.Count > 0 Then
            InvoiceCode = Match(0).submatches(0)
            InvoiceNo = Match(0).submatches(1)
            invoiceDate = Match(0).submatches(2)
            BuyerName = Match(0).submatches(4)
            SellerName = Match(0).submatches(6)
            SellerTaxID = Match(0).submatches(7)
        Else
            regEx.Pattern = ".*?号码:(\d{20}).*?(\d{4}年\d+月\d+日).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10})"
            Set Match = regEx.Execute(wordContent)
            If Match.Count > 0 Then
                InvoiceCode = Left(Match(0).submatches(0), 12)
                InvoiceNo = Right(Match(0).submatches(0), 8)
                invoiceDate = Match(0).submatches(1)
                BuyerName = Match(0).submatches(2)
                SellerName = Match(0).submatches(4)
                SellerTaxID = Match(0).submatches(5)
            Else
                regEx.Pattern = ".*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?称:(.*?)统.*?纳.*?号:([0-9A-Z]{2}\d{6}[0-9A-Z]{10}).*?号码:(\d{20}).*?(\d{4}年\d+月\d+日)"
                Set Match = regEx.Execute(wordContent)
                If Match.Count > 0 Then
                     BuyerName = Match(0).submatches(0)
                     SellerName = Match(0).submatches(2)
                     SellerTaxID = Match(0).submatches(3)
                     InvoiceCode = Left(Match(0).submatches(4), 12)
                     InvoiceNo = Right(Match(0).submatches(4), 8)
                     invoiceDate = Match(0).submatches(5)
                End If
            End If
        End If
        regEx.Pattern = ".货*?务名称(.*?)合"
        Set Match = regEx.Execute(wordContent)
        If Match.Count > 0 Then
            ItemName = Match(0).submatches(0)
        Else
            regEx.Pattern = "项目名称(.*?)合"
            Set Match = regEx.Execute(wordContent)
            If Match.Count > 0 Then
                ItemName = Match(0).submatches(0)
            End If
        End If
        regEx.Pattern = "¥(\d+.\d{2})"
        regEx.Global = True
        Set Match = regEx.Execute(wordContent)
        If Match.Count = 3 Then
            Amount = Match(0).submatches(0)
            TaxAmount = Match(1).submatches(0)
        ElseIf Match.Count = 2 Then
            Amount = Match(0).submatches(0)
            TaxAmount = 0
        Else
            regEx.Pattern = ".单*?额(\d+.\d{2}).*?税额(\d+.\d{2})"
            Set Match = regEx.Execute(wordContent)
            If Match.Count > 0 Then
                Amount = Match(0).submatches(0)
                TaxAmount = Match(0).submatches(1)
            End If
        End If
        If invoiceDate <> "" Then   '判断一下有没有数据
            k = InStr(BuyerName, "密码区")
            If k > 0 Then
                BuyerName = Left(BuyerName, k - 1)
            End If
        End If
        '恢复安全提示
        WordApp.Application.AutomationSecurity = msoAutomationSecurityByUI
        '关闭和清理 Word
        WordDoc.Close False
        WordApp.Quit
        Set WordDoc = Nothing
        Set WordApp = Nothing
        '删除临时生成的 Word 文件
        'Kill WordFilePath
        DeleteDirectory tempFolder
    Unload Usf_ProgressBar
End Sub

 2、读取OFD格式的发票,Sub ReadOFDInvoiceInfo()


Sub ReadOFDInvoiceInfo()
    Dim rarPath As String
    Dim rarCmd As String
    Dim Result As Long
    Dim destFolder As String
    Dim tempFolder As String
    Dim SelectedNode As Object
    Dim NewDOMD As Object
    On Error Resume Next
    '解压OFD文件
    rarPath = GetRARPath
    If rarPath = "" Then
        MsgBox "没有安装RAR压缩程序!"
        Exit Sub
    End If
    tempFolder = "c:\temp\"
    If Dir(tempFolder, vbDirectory) = "" Then
        MkDir tempFolder
    End If
    destFolder = tempFolder & Format(Time, "hhmmss") & "\"
    rarCmd = rarPath & " X " & currInvoiceFile & " " & destFolder
    Result = shell(rarCmd, vbHide)
    Sleep 1000 ' Delay for 2 seconds (2000 milliseconds)
    '等待解压完成
    Do Until Dir(destFolder & "Doc_0\Attachs\original_invoice.xml") <> "" _
    Or Dir(destFolder & "Doc_0\Pages\Page_0\Content.xml") <> ""
        DoEvents
    Loop
    '提取XML信息
    Set NewDOMD = CreateObject("MSXML2.DOMDocument")
    If Dir(destFolder & "Doc_0\Attachs\original_invoice.xml") <> "" Then
        '增值税普通电子发票
        NewDOMD.Load destFolder & "Doc_0\Attachs\original_invoice.xml"
        Set xmld = NewDOMD.DocumentElement.SelectSingleNode("//eInvoice")
        Set SelectedNode = xmld.SelectSingleNode("//fp:InvoiceCode")
        InvoiceCode = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:InvoiceNo")
        InvoiceNo = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:IssueDate")
        invoiceDate = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:BuyerName")
        BuyerName = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:SellerName")
        SellerName = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:SellerTaxID")
        SellerTaxID = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:TaxInclusiveTotalAmount")
        Amount = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:TaxTotalAmount")
        TaxAmount = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//fp:GoodsInfo/fp:Item")
        ItemName = SelectedNode.Text
    ElseIf Dir(destFolder & "Doc_0\Pages\Page_0\Content.xml") <> "" Then
        '数电发票
        NewDOMD.Load destFolder & "Doc_0\Pages\Page_0\Content.xml"
        Set xmld = NewDOMD.DocumentElement.SelectSingleNode("//ofd:Page")
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6922']/ofd:TextCode")
        InvoiceCode = Left(SelectedNode.Text, 12)
        InvoiceNo = Right(SelectedNode.Text, 8)
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6923']/ofd:TextCode")
        invoiceDate = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6924']/ofd:TextCode")
        BuyerName = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6927']/ofd:TextCode")
        SellerName = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6928']/ofd:TextCode")
        SellerTaxID = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6942']/ofd:TextCode")
        Amount = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6943']/ofd:TextCode")
        TaxAmount = SelectedNode.Text
        Set SelectedNode = xmld.SelectSingleNode("//ofd:TextObject[@ID='6939']/ofd:TextCode")
        ItemName = SelectedNode.Text
    End If
    '清理解压后的文件
    Set xmld = Nothing
    Set NewDOMD = Nothing
    DeleteDirectory tempFolder
End Sub
09-12 09:56