Excel VBA PDF转换文本提取

时间:2017-09-22 14:18:09

标签: excel vba excel-vba

我正在使用为我编写的一些代码,而我在使用Department值时遇到了问题。我不希望它只发出一个现在正在做的单词,而且我不知道在哪里放置它以便除了一个单词之外它不会删除所有文本。这是代码:

(其中一行不会将其更改为代码视图,我不确定如何解决此问题)

'===============================================================
Private Enum i_
    ž__NONE = 0
  ID
  Last
  First
  Middle
  Rank
  Department
    ž__
    ž__FIRST = ž__NONE + 1
    ž__LAST = ž__ - 1
End Enum

Public Sub EditedVersion_ExtractFieldValues()



  Const l_Table_1     As String = "Table 1"
  Const l_FieldValues As String = "FieldValues"
  Const l_last_first_middle As String = "last first middle"
  Const s_FieldNames        As String = "id " & l_last_first_middle & " rank" & " department"
  Const n_OutputRowsPerRecord As Long = 7

  Dim ƒ As Excel.WorksheetFunction: Set ƒ = Excel.WorksheetFunction
  Dim ¡ As Long
  Dim dictFields As Object


 Set dictFields = CreateObject("Scripting.Dictionary")
 dictFields.CompareMode = vbTextCompare


  With Worksheets
    On Error Resume Next
    .Add(After:=.Item(.count)).name = l_FieldValues
    On Error GoTo 0
    Application.DisplayAlerts = False
      If .Item(.count).name <> l_FieldValues Then
        .Item(.count).Delete
        .Item(l_FieldValues).UsedRange.Clear
      End If
      .Item(l_FieldValues).Columns(1).NumberFormat = "@"
    Application.DisplayAlerts = True
    .Item(l_Table_1).Activate
  End With

  Dim astrFieldNames() As String
  astrFieldNames = Split(" " & s_FieldNames, " ") ' Force index zero to a blank -> treat as base 1


  With dictFields
    .CompareMode = vbTextCompare
    For ¡ = i_.ž__FIRST To i_.ž__LAST
      dictFields.Add astrFieldNames(¡), ""
    Next ¡
  End With
  Dim lngLastUsedRow As Long
  lngLastUsedRow _
  = Cells _
 .find _
      ( _
        What:="*" _
      , After:=Cells(1) _
      , LookIn:=xlFormulas _
      , Lookat:=xlPart _
      , SearchOrder:=xlByRows _
      , SearchDirection:=xlPrevious _
      ) _
     .Row

  With Range(Rows(1), Rows(lngLastUsedRow))

    Dim arngFoundCells(i_.ž__FIRST To i_.ž__LAST) As Range
    For ¡ = i_.ž__FIRST To i_.ž__LAST
      Set arngFoundCells(¡) = .find(What:=astrFieldNames(¡),  After:=Cells(1))
    Next ¡
    Dim lngFirstFoundRow As Long
    lngFirstFoundRow _
    = ƒ.Min _
        ( _
          arngFoundCells(i_.Last).Row _
        , arngFoundCells(i_.First).Row _
        , arngFoundCells(i_.Middle).Row _
        )
    Dim lngOuputSheetNextRow As Long
    lngOuputSheetNextRow = 1

    Dim varFoundCell As Variant
    Dim lngNextFoundRow As Long
    Dim rngNextFindStart As Range
    Dim astrSplitValues() As String
    Dim strFoundValue As String
    Dim lngFieldCount As Long
    Do
      For ¡ = i_.ž__FIRST To i_.ž__LAST
        'Debug.Print arngFoundCells(¡).Address; " ";
        dictFields.Item(astrFieldNames(¡)) = ""
      Next ¡
      Debug.Print
      Select Case True
        Case arngFoundCells(i_.First).Row = arngFoundCells(i_.Middle).Row:
      ' Edge case: missing rank (found rank is for next employee) -> copy first to rank (simplifies following code)
          'If varFoundCels(
          If arngFoundCells(i_.Rank).Row <> arngFoundCells(i_.First).Row Then
            Set arngFoundCells(i_.Rank) = arngFoundCells(i_.First)
          End If

          For Each varFoundCell In arngFoundCells
            If Not varFoundCell Like astrFieldNames(i_.Department) Then
            strFoundValue = ƒ.Trim(Replace(Replace(varFoundCell.Value2, vbLf, " "), ":", "")) & " "
            If strFoundValue Like "[']*" Then strFoundValue =     Mid$(strFoundValue, 2)
            ' ID field: only retain the first word of value
            End If
            If LCase$(strFoundValue) Like astrFieldNames(i_.ID) & "*" Then
              strFoundValue = Left$(strFoundValue, InStr(InStr(strFoundValue, " ") + 1, strFoundValue, " "))
            End If
            ' Department field: only retain the first word of value'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''THIS IS THE DEPARTMENT FIELD
            If (varFoundCell) Like astrFieldNames(i_.Department) & " " Then
              strFoundValue = Trim(varFoundCell.Value)
            End If
            ' Edge case: no last name value in merged cell -> assume value is in first cell of following row
            Debug.Print LCase$(strFoundValue)
            If LCase$(strFoundValue) Like astrFieldNames(i_.Last) & " " Then
              strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
            End If
            ' Edge case: Field names only in row -> assume field values are on the following row
            If LCase$(strFoundValue) Like l_last_first_middle & "*" _
            And Len(strFoundValue) - Len(Replace(strFoundValue, " ", "")) < 5 _
            Then
              strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
            End If
            astrSplitValues = Split(" " & strFoundValue, " ") ' Force index zero to a blank -> treat as base 1
            ' Array contains one/some/all field names first and then the values (with a possible extra blank value)
            lngFieldCount = Int(UBound(astrSplitValues) / 2)
            For ¡ = 1 To lngFieldCount
              dictFields.Item(astrSplitValues(¡)) = astrSplitValues(¡ + lngFieldCount)
            Next ¡


          Next varFoundCell


           'Only allow the id to be on the previous row
          If arngFoundCells(i_.ID).Row <> arngFoundCells(i_.First).Row - 1 Then
            dictFields.Item(astrFieldNames(i_.ID)) = 0
          End If
        Case Else
          Debug.Print "  SKIPPED: ";
          For ¡ = i_.ž__FIRST To i_.ž__LAST
            'Debug.Print arngFoundCells(¡).Address; " ";
          Next ¡
          Debug.Print
          For ¡ = i_.ž__FIRST To i_.ž__LAST
            Debug.Print "    "; ƒ.Trim(arngFoundCells(¡).Value2)
          Next ¡
          Debug.Print
      End Select
Sheets(l_FieldValues).Columns(1).Cells(lngOuputSheetNextRow).Resize(n_OutputRowPerRecord - 1).Value = ƒ.Transpose(dictFields.Items)
      lngOuputSheetNextRow = lngOuputSheetNextRow + n_OutputRowsPerRecord
      Set rngNextFindStart = Rows(arngFoundCells(i_.First).Row + 2).Cells(1)
      For ¡ = i_.ž__FIRST To i_.ž__LAST
        Set arngFoundCells(¡) = .find(What:=astrFieldNames(¡),         
After:=rngNextFindStart)
      Next ¡
      lngNextFoundRow _
  = ƒ.Min _
      ( _
        arngFoundCells(i_.Last).Row _
      , arngFoundCells(i_.First).Row _
      , arngFoundCells(i_.Middle).Row _
      )
    Loop While lngNextFoundRow <> lngFirstFoundRow
  End With

End Sub

0 个答案:

没有答案