在pdf文件中搜索字符串,然后将信息复制到word / excel / txt文件中?

时间:2019-06-18 09:27:02

标签: excel vba pdf ms-word word-vba

我正在尝试打开pdf文件并搜索字符串或子字符串,以便到达我需要的页面,然后将该信息(而不是整个页面,只是其中的一部分)复制到该页面中。 Word文件(或者我可以将信息存储在txt文件或excel中,然后获取它)。

我希望它足够清楚。我是VBA的新手,不知道该怎么做。我在互联网上搜索,没有发现任何有用的信息。我也使用Adobe Reader DC。

1 个答案:

答案 0 :(得分:1)

此外,您将需要安装Adobe Acrobat才能使用VBA扫描PDF文件。我不知道要花多少钱,但这不是免费的。如果需要免费选项,请将所有PDF文件转换为Word文件,然后对它们进行扫描。

Sub ConvertToWord()
   Dim MyObj As Object, MySource As Object, file As Variant
   file = Dir("C:\Users\Excel\Desktop\test\" & "*.pdf") 'pdf path
   Do While (file <> "")
   ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\"
          Documents.Open FileName:=file, ConfirmConversions:=False, ReadOnly:= _
        False, AddToRecentFiles:=False, PasswordDocument:="", PasswordTemplate:= _
        "", Revert:=False, WritePasswordDocument:="", WritePasswordTemplate:="", _
        Format:=wdOpenFormatAuto, XMLTransform:=""
    ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\" 'path for saving word
    ActiveDocument.SaveAs2 FileName:=Replace(file, ".pdf", ".docx"), FileFormat:=wdFormatXMLDocument _
        , LockComments:=False, Password:="", AddToRecentFiles:=True, _
        WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
         SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
        False, CompatibilityMode:=15
    ActiveDocument.Close
     file = Dir
   Loop
End Sub

然后,在Excel中运行以下代码。

Sub OpenAndReadWordDoc()

Rows("2:1000000").Select
Range(Selection, Selection.End(xlDown)).Select
Selection.ClearContents
Range("A1").Select

    ' assumes that the previous procedure has been executed
    Dim oWordApp As Word.Application
    Dim oWordDoc As Word.Document
    Dim blnStart As Boolean
    Dim r As Long
    Dim sFolder As String
    Dim strFilePattern As String
    Dim strFileName As String
    Dim sFileName As String
    Dim ws As Worksheet
    Dim c As Long
    Dim n As Long
    Dim iCount As Long
    Dim strSearch As String

    '~~> Establish an Word application object
    On Error Resume Next
    Set oWordApp = GetObject(, "Word.Application")
    If Err Then
        Set oWordApp = CreateObject("Word.Application")
        ' We started Word for this macro
        blnStart = True
    End If
    On Error GoTo ErrHandler

    Set ws = ActiveSheet
    r = 1 ' startrow for the copied text from the Word document
    ' Last column
    n = ws.Range("A1").End(xlToRight).Column

    sFolder = "C:\Users\Excel\Desktop\test\"

    '~~> This is the extension you want to go in for
    strFilePattern = "*.doc*"
    '~~> Loop through the folder to get the word files
    strFileName = Dir(sFolder & strFilePattern)
    Do Until strFileName = ""
        sFileName = sFolder & strFileName

        '~~> Open the word doc
        Set oWordDoc = oWordApp.Documents.Open(sFileName)
        ' Increase row number
        r = r + 1
        ' Enter file name in column A
        ws.Cells(r, 1).Value = sFileName

        ActiveCell.Offset(1, 0).Select
        ActiveSheet.Hyperlinks.Add Anchor:=Sheets("Sheet1").Range("A" & r), Address:=sFileName, _
        SubAddress:="A" & r, TextToDisplay:=sFileName

        ' Loop through the columns
        For c = 2 To n
            If oWordDoc.Content.Find.Execute(FindText:=Trim(ws.Cells(1, c).Value), _
                    MatchWholeWord:=True, MatchCase:=False) Then

                    strSearch = ws.Cells(1, c).Value
                    iCount = 0

                    With ActiveDocument.Content.Find
                        .Text = strSearch
                        .Format = False
                        .Wrap = wdFindStop
                        Do While .Execute
                            iCount = iCount + 1
                        Loop
                    End With

            ws.Cells(r, c).Value = iCount
            End If
        Next c
        oWordDoc.Close SaveChanges:=False

        '~~> Find next file
        strFileName = Dir
    Loop

ExitHandler:
    On Error Resume Next
    ' close the Word application
    Set oWordDoc = Nothing
    If blnStart Then
        ' We started Word, so we close it
        oWordApp.Quit
    End If
    Set oWordApp = Nothing
    Exit Sub

ErrHandler:
    MsgBox Err.Description, vbExclamation
    Resume ExitHandler
End Sub

Function GetDirectory(path)
   GetDirectory = Left(path, InStrRev(path, "\"))
End Function

enter image description here