从PDF中提取文本 - Excel VBA

时间:2017-09-15 13:12:45

标签: excel excel-vba vba

我有为我编写的代码,它适用于我想要查找的四个条目,但有没有办法可以添加另一个字段值来搜索,比如ID?我试图改变我认为可行的代码,但是我遇到了错误。我从来没有编写过这么复杂的东西,所以我不完全理解作者在每个部分做了什么。

Option Explicit

Public Sub ExtractFieldValues()

  Const CONSTLAST   As Long = 1
  Const CONSTFIRST  As Long = 2
  Const CONSTMIDDLE As Long = 3
  Const CONSTRANK   As Long = 4
  Const TABLEONE     As String = "Table 1"
  Const FIELDVALUES As String = "FieldValues"
  Const LAST_FIRST_MIDDLE As String = "last first middle"
  Const FIELDNAMES        As String = LAST_FIRST_MIDDLE & " rank"
  Const NUMRECORDS As Long = 5 '6

  Dim ƒ As Excel.WorksheetFunction: Set ƒ = Excel.WorksheetFunction
  Dim ¡ As Long
  Dim lrd As Long
  Dim nextRowOutput As Long
  Dim arngFoundCells(CONSTLAST To CONSTRANK) As Range
  Dim varFoundCell As Variant
  Dim lngFirstFoundRow As Long
  Dim lngNextFoundRow As Long
  Dim rngNextFindStart As Range
  Dim dictFields As Object
  Dim astrFieldNames() As String
  Dim astrSplitValues() As String
  Dim strFoundValue As String
  Dim lngFieldCount As Long


Set dictFields = CreateObject("Scripting.Dictionary")
dictFields.CompareMode = vbTextCompare

  With Worksheets
    On Error Resume Next
    .Add(After:=.Item(.count)).name = FIELDVALUES
    On Error GoTo 0
    Application.DisplayAlerts = False
      If .Item(.count).name <> FIELDVALUES Then
        .Item(.count).Delete
        .Item(FIELDVALUES).UsedRange.Clear
      End If
    Application.DisplayAlerts = True
    .Item(TABLEONE).Activate
  End With

  astrFieldNames = Split(" " & FIELDNAMES, " ") ' Force index zero to a blank -> treat as base 1
  Set dictFields = CreateObject("Scripting.Dictionary")
  For ¡ = CONSTLAST To CONSTRANK
    dictFields.Add astrFieldNames(¡), ""
  Next ¡
  lrd _
  = Cells _
  .find _
      ( _
      What:="*" _
      , After:=Cells(1) _
      , LookIn:=xlFormulas _
      , Lookat:=xlPart _
      , SearchOrder:=xlByRows _
      , SearchDirection:=xlPrevious _
      ) _
 .Row

  With Range(Rows(1), Rows(lrd))

    For ¡ = CONSTLAST To CONSTRANK
      Set arngFoundCells(¡) = .find(What:=astrFieldNames(¡), After:=Cells(1))
    Next ¡
    lngFirstFoundRow _
= ƒ.Min _
    ( _
      arngFoundCells(CONSTLAST).Row _
    , arngFoundCells(CONSTFIRST).Row _
    , arngFoundCells(CONSTMIDDLE).Row _
    )
nextRowOutput = 1

Do
  For ¡ = CONSTLAST To CONSTRANK
'        Debug.Print arngFoundCells(¡).Address; " ";
        dictFields.Item(astrFieldNames(¡)) = ""
  Next ¡
'      Debug.Print
      Select Case True
        Case arngFoundCells(CONSTFIRST).Row = arngFoundCells(CONSTMIDDLE).Row:
          If arngFoundCells(CONSTRANK).Row <> arngFoundCells(CONSTFIRST).Row     Then
        Set arngFoundCells(CONSTRANK) = arngFoundCells(CONSTFIRST)
      End If
      For Each varFoundCell In arngFoundCells
        strFoundValue = ƒ.Trim(Replace(varFoundCell.Value2, vbLf, " ")) & " "
        If strFoundValue Like "[']*" Then strFoundValue = Mid$(strFoundValue, 2)
        If LCase$(strFoundValue) Like astrFieldNames(CONSTLAST) & " " Then
          strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
        End If
        If LCase$(strFoundValue) Like LAST_FIRST_MIDDLE & "*" _
        And Len(strFoundValue) - Len(Replace(strFoundValue, " ", "")) < 5 _
        Then
          strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
        End If
        astrSplitValues = Split(" " & strFoundValue, " ") ' Force index zero to a blank -> treat as base 1
        lngFieldCount = Int(UBound(astrSplitValues) / 2)
        For ¡ = 1 To lngFieldCount
          dictFields.Item(LCase(astrSplitValues(¡))) = astrSplitValues(¡ + lngFieldCount)
        Next ¡
      Next varFoundCell
    Case Else
      Debug.Print "  SKIPPED: ";
      For ¡ = CONSTLAST To CONSTRANK
        Debug.Print arngFoundCells(¡).Address; " ";
      Next ¡
      Debug.Print
      For ¡ = CONSTLAST To CONSTRANK
        Debug.Print "    "; ƒ.Trim(arngFoundCells(¡).Value2)
      Next ¡
      Debug.Print
  End Select
  Sheets(FIELDVALUES).Columns(1).Cells(nextRowOutput).Resize(4).Value _
  = ƒ.Transpose(dictFields.Items)
  nextRowOutput = nextRowOutput + NUMRECORDS
  Set rngNextFindStart = Rows(arngFoundCells(CONSTFIRST).Row + 2).Cells(1)
  For ¡ = CONSTLAST To CONSTRANK
    Set arngFoundCells(¡) = .find(What:=astrFieldNames(¡), After:=rngNextFindStart)
  Next ¡
  lngNextFoundRow _
  = ƒ.Min _
      ( _
        arngFoundCells(CONSTLAST).Row _
      , arngFoundCells(CONSTFIRST).Row _
      , arngFoundCells(CONSTMIDDLE).Row _
      )
Loop While lngNextFoundRow <> lngFirstFoundRow

  End With

End Sub

0 个答案:

没有答案