查找包含“ _”的文本字符串

时间:2018-12-31 14:52:03

标签: vba ms-word

我正在寻找使用文本函数来查找包含下划线的字符串的正确语法。我有一个非常大的文档,其中包含许多带下划线的表名。它们的格式为abc_defabc_def_ghiabc_def_ghi_jkl等。

我尝试了各种组合,但无法正常工作。以下是我的最新迭代。

Sub ExtractTablesToNewDocument()
'=========================
'Macro created 2008 by Lene Fredborg, DocTools - www.thedoctools.com
'THIS MACRO IS COPYRIGHT. YOU ARE WELCOME TO USE THE MACRO BUT YOU MUST KEEP THE LINE ABOVE.
'YOU ARE NOT ALLOWED TO PUBLISH THE MACRO AS YOUR OWN, IN WHOLE OR IN PART.
'=========================
'The macro creates a new document,
'finds all words consisting of 3 or more uppercase letters
'in the active document and inserts the words
'in column 1 of a 3-column table in the new document
'Each acronym is added only once
'Use column 2 for definitions
'Page number of first occurrence is added by the macro in column 3

'Minor adjustments are made to the styles used
'You may need to change the style settings and table layout to fit your needs
'=========================

Dim oDoc_Source As Document
Dim oDoc_Target As Document
Dim strListSep As String
Dim strAcronym As String
Dim oTable As Table
Dim oRange As Range
Dim n As Long
Dim strAllFound As String
Dim Title As String
Dim Msg As String

Title = "Extract Acronyms to New Document"

'Show msg - stop if user does not click Yes
Msg = "This macro finds all words consisting of 3 or more " & _
    "uppercase letters and extracts the words to a table " & _
    "in a new document where you can add definitions." & vbCr & vbCr & _
    "Do you want to continue?"

If MsgBox(Msg, vbYesNo + vbQuestion, Title) <> vbYes Then
    Exit Sub
End If

Application.ScreenUpdating = False

'Find the list separator from international settings
'May be a comma or semicolon depending on the country
strListSep = Application.International(wdListSeparator)

'Start a string to be used for storing names of acronyms found
strAllFound = "#"

Set oDoc_Source = ActiveDocument

'Create new document for acronyms
Set oDoc_Target = Documents.Add

With oDoc_Target
    'Make sure document is empty
    .Range = ""

    'Insert info in header - change date format as you wish
    .PageSetup.TopMargin = CentimetersToPoints(3)
    .Sections(1).Headers(wdHeaderFooterPrimary).Range.Text = _
        "Acronyms extracted from: " & oDoc_Source.FullName & vbCr & _
        "Created by: " & Application.UserName & vbCr & _
        "Creation date: " & Format(Date, "MMMM d, yyyy")

    'Adjust the Normal style and Header style
    With .Styles(wdStyleNormal)
        .Font.Name = "Arial"
        .Font.Size = 10
        .ParagraphFormat.LeftIndent = 0
        .ParagraphFormat.SpaceAfter = 6
    End With

    With .Styles(wdStyleHeader)
        .Font.Size = 8
        .ParagraphFormat.SpaceAfter = 0
    End With

    'Insert a table with room for acronym and definition
    Set oTable = .Tables.Add(Range:=.Range, NumRows:=2, NumColumns:=3)
    With oTable
        'Format the table a bit
        'Insert headings
        .Range.Style = wdStyleNormal
        .AllowAutoFit = False

        .Cell(1, 1).Range.Text = "Acronym"
        .Cell(1, 2).Range.Text = "Definition"
        .Cell(1, 3).Range.Text = "Page"
        'Set row as heading row
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Font.Bold = True
        .PreferredWidthType = wdPreferredWidthPercent
        .Columns(1).PreferredWidth = 20
        .Columns(2).PreferredWidth = 70
        .Columns(3).PreferredWidth = 10
    End With
End With

With oDoc_Source
    Set oRange = .Range

    n = 1 'used to count below

    With oRange.Find
        'Use wildcard search to find strings consisting of 3 or more uppercase letters
        'Set the search conditions
        'NOTE: If you want to find acronyms with e.g. 2 or more letters,
        'change 3 to 2 in the line below
        .Text = "<*>[_]<*>"
        .Forward = True
        .Wrap = wdFindStop
        .Format = False
        .MatchCase = False
        .MatchWildcards = True

        'Perform the search
        Do While .Execute
            'Continue while found
            strAcronym = oRange
            'Insert in target doc

            'If strAcronym is already in strAllFound, do not add again
            If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then
                'Add new row in table from second acronym
                If n > 1 Then oTable.Rows.Add
                'Was not found before
                strAllFound = strAllFound & strAcronym & "#"

                'Insert in column 1 in oTable
                'Compensate for heading row
                With oTable
                    .Cell(n + 1, 1).Range.Text = strAcronym
                    'Insert page number in column 3
                    .Cell(n + 1, 3).Range.Text = oRange.Information(wdActiveEndPageNumber)
                End With

                n = n + 1
            End If
        Loop
    End With
End With

'Sort the acronyms alphabetically - skip if only 1 found
If n > 2 Then
    With Selection
        .Sort ExcludeHeader:=True, FieldNumber:="Column 1", SortFieldType _
            :=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending

        'Go to start of document
        .HomeKey (wdStory)
    End With
End If

Application.ScreenUpdating = True

'If no acronyms found, show msg and close new document without saving
'Else keep open
If n = 1 Then
    Msg = "No acronyms found."
    oDoc_Target.Close savechanges:=wdDoNotSaveChanges
Else
    Msg = "Finished extracting " & n - 1 & " acronymn(s) to a new document."
End If

MsgBox Msg, vbOKOnly, Title

'Clean up
Set oRange = Nothing
Set oDoc_Source = Nothing
Set oDoc_Target = Nothing
Set oTable = Nothing

End Sub

3 个答案:

答案 0 :(得分:1)

尝试:

.Text = "[! ]@_[! ]{1,}"

这将查找包含任何下划线的字符串,包括那些下划线开头或结尾的字符串。

答案 1 :(得分:0)

尝试:

Sub AcronymLister()
Application.ScreenUpdating = False
Dim StrTmp As String, StrAcronyms As String, i As Long, j As Long, k As Long, Rng As Range, Tbl As Table
StrAcronyms = "Acronym" & vbTab & "Page" & vbCr
With ActiveDocument
  With .Range
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .MatchWildcards = True
      .Wrap = wdFindStop
      .Text = "[! ^13^t^11]@_[! ^13^t^11]{1,}"
      .Replacement.Text = ""
      .Execute
    End With
    Do While .Find.Found = True
      If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
      End If
      If Len(.Text) = 0 Then .End = .Paragraphs(1).Range.Next.Start
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
  End With
  With .Range
    Set Rng = .Characters.Last
    With Rng
      If .Characters.First.Previous <> vbCr Then .InsertAfter vbCr
      .InsertAfter Chr(12)
      .Collapse wdCollapseEnd
      .Style = "Normal"
      .Text = StrAcronyms
      Set Tbl = .ConvertToTable(Separator:=vbTab, NumRows:=.Paragraphs.Count, NumColumns:=2)
      With Tbl
        .Columns.AutoFit
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Style = "Strong"
        .Rows.Alignment = wdAlignRowCenter
      End With
      .Collapse wdCollapseStart
    End With
  End With
End With
Set Rng = Nothing: Set Tbl = Nothing
Application.ScreenUpdating = True
End Sub

注意:上面的代码将缩写表放在当前文档的末尾。我已经修改了查找表达式,以排除制表符,段落分隔符和换行符。

答案 2 :(得分:0)

Sub AcronymLister()
Application.ScreenUpdating = False
Dim StrTmp As String, StrAcronyms As String, i As Long, j As Long, k As Long, Rng As Range, Tbl As Table
StrAcronyms = "Acronym" & vbTab & "Page" & vbCr
With ActiveDocument
  With .Range
    With .Find
      .ClearFormatting
      .Replacement.ClearFormatting
      .MatchWildcards = True
      .Wrap = wdFindStop
      .Text = "[! ^13^t^11]@_[! ^13^t^11]{1,}"
      .Replacement.Text = ""
      .Execute
    End With
    Do While .Find.Found = True
      If InStr(.Text, "_") = 0 Then
        If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
        Else
          .End = .Paragraphs(1).Range.Next.Start
        End If
      End If
     If InStr(.Text, "_") > 0 Then
        If InStr(1, StrAcronyms, .Text, vbBinaryCompare) = 0 Then
          StrAcronyms = StrAcronyms & .Text & vbTab & .Information(wdActiveEndAdjustedPageNumber) & vbCr
        Else
          .End = .Paragraphs(1).Range.Next.Start
        End If
      End If
      .Collapse wdCollapseEnd
      .Find.Execute
    Loop
  End With
  With .Range
    Set Rng = .Characters.Last
    With Rng
      If .Characters.First.Previous <> vbCr Then .InsertAfter vbCr
      .InsertAfter Chr(12)
      .Collapse wdCollapseEnd
      .Style = "Normal"
      .Text = StrAcronyms
      Set Tbl = .ConvertToTable(Separator:=vbTab, NumRows:=.Paragraphs.Count, NumColumns:=2)
      With Tbl
        .Columns.AutoFit
        .Rows(1).HeadingFormat = True
        .Rows(1).Range.Style = "Strong"
        .Rows.Alignment = wdAlignRowCenter
      End With
      .Collapse wdCollapseStart
    End With
  End With
End With
Set Rng = Nothing: Set Tbl = Nothing
Application.ScreenUpdating = True
End Sub
@macropod我添加了一个循环来处理零长度的字符串,并且代码似乎正在工作。我知道这不是最有效的方法,您有什么改进建议吗?再次感谢您为我提供帮助,我非常感谢您的帮助和知识。