在VBA中为Regex命名的组

时间:2016-03-07 18:01:35

标签: regex vba excel-vba excel

有没有办法在VBA中使用带有正则表达式的命名组?

我想写一个与文件名中的日期匹配的Excel VBA Sub,并按指定的数量递减这些日期。我需要能够区分dd / mm和mm / dd格式 - 以及其他不规则性 - 并且使用这样的命名组可以解决问题:

(?:<month>\d\d)(?:<day>\d\d)

建议表示赞赏

3 个答案:

答案 0 :(得分:3)

不,VBScript regular expressions中没有命名组。

VBScript使用与JScript相同的正则表达式引擎,因此它与JavaScript正则表达式兼容,后者也没有命名组。

你必须使用未命名的组,并按照它们在表达式上出现的顺序来运行它后通过索引检索它们。

通常,dd / mm和mm / dd无法自动区分,因为有效日期可能是。 (例如01/04可能是1月4日或4月1日)。我不认为你能用正则表达式解决这个问题。

答案 1 :(得分:1)

这里是我今天制作的使用VBA的命名组的实现。希望这对其他人有用!:

'Description:
' An implementation of Regex which includes Named Groups
'  and caching implemented in VBA
'Example:
'  Dim match as Object
'  set match = RegexMatch("01/01/2019","(?<month>\d\d)\/(?<day>\d\d)\/(?<year>\d\d\d\d)")
'  debug.print match("day") & "/" & match("month") & "/" & match("year")
'Options:
'  "i" = IgnoreCase
'Return value:
'  A dictionary object with the following keys:
'    0 = Whole match
'    1,2,3,... = Submatch 1,2,3,...
'    "Count" stores the count of matches
'    "<<NAME>>" stores the match of a specified name
Function RegexMatch(ByVal haystack As String, ByVal pattern As String, Optional ByVal options As String) As Object
  'Cache regexes for optimisation
  Static CachedRegex As Object
  Static CachedNames As Object
  If CachedRegex Is Nothing Then Set CachedRegex = CreateObject("Scripting.Dictionary")
  If CachedNames Is Nothing Then Set CachedNames = CreateObject("Scripting.Dictionary")

  'Named regexp used to detect capturing groups and named capturing groups
  Static NamedRegexp As Object
  If NamedRegexp Is Nothing Then
    Set NamedRegexp = CreateObject("VBScript.RegExp")
    NamedRegexp.pattern = "\((?:\?\<(.*?)\>)?"
    NamedRegexp.Global = True
  End If

  'If cached pattern doesn't exist, create it
  If Not CachedRegex(pattern) Then


    'Create names/capture group object
    Dim testPattern As String, oNames As Object
    testPattern = pattern
    testPattern = Replace(testPattern, "\\", "asdasd")
    testPattern = Replace(testPattern, "\(", "asdasd")

    'Store names for optimisation
    Set CachedNames(options & ")" & pattern) = NamedRegexp.Execute(testPattern)

    'Create new VBA valid pattern
    Dim newPattern As String
    newPattern = NamedRegexp.Replace(pattern, "(")

    'Create regexp from new pattern
    Dim oRegexp As Object
    Set oRegexp = CreateObject("VBScript.RegExp")
    oRegexp.pattern = newPattern

    'Set regex options
    Dim i As Integer
    For i = 1 To Len(flags)
        Select Case Mid(flags, i, 1)
            Case "i"
                oRegexp.ignoreCase = True
            Case "g"
                oRegexp.Global = True
        End Select
    Next

    'Store regex for optimisation

    Set CachedRegex(options & ")" & pattern) = oRegexp
  End If

  'Get matches object
  Dim oMatches As Object
  Set oMatches = CachedRegex(options & ")" & pattern).Execute(haystack)

  'Get names object
  Dim CName As Object
  Set CName = CachedNames(options & ")" & pattern)

  'Create dictionary to return
  Dim oRet As Object
  Set oRet = CreateObject("Scripting.Dictionary")

  'Fill dictionary with names and indexes
  '0 = Whole match
  '1,2,3,... = Submatch 1,2,3,...
  '"Count" stores the count of matches
  '"<<NAME>>" stores the match of a specified name
  For i = 1 To CName.Count
    oRet(i) = oMatches(0).Submatches(i - 1)
    If Not IsEmpty(CName(i - 1).Submatches(0)) Then oRet(CName(i - 1).Submatches(0)) = oMatches(0).Submatches(i - 1)
  Next i
  oRet(0) = oMatches(0)
  oRet("Count") = CName.Count
  Set RegexMatch = oRet
End Function

答案 2 :(得分:0)

感谢@Sancarn的代码!

出于某些原因,我已对其进行了修订。我所做的更改记录在代码中:

' Procedure for testing 'RegexMatch'.
' - It shows how to convert a date from 'mm/dd/yyyy' to 'dd.mm.yyyy' format.
' - It shows how to retrieve named groups by real name: 'Match.Item("group name")'
'   as well as by number: 'Match.Items(group number)'.
' - It shows how to retrieve unnamed groups by number-generated name as well as by number.
' - It shows how to retrieve group count and the whole match by number-generated name as well as by number.
' - It shows that non-capturing groups like '(?:y)?' won't be listed.
' - It shows that left parenthesis inside a character class like '([x(])?' won't disturbe.
' Take notice of:
'  - the small difference between 'Item' and 'Items'
'  - the quotes in 'Match.Item("number of an unnamed group")'
Sub TestRegexMatch()

  Dim Match As Scripting.Dictionary

  Set Match = RegexMatch("01/23/2019z", "(?<month>\d\d)\/([x(])?(?<day>\d\d)\/(?:y)?(?<year>\d\d\d\d)(z)?")

  Debug.Print Match.Item("day") & "." & Match.Item("month") & "." & Match.Item("year") & " vs. " & Match.Items(2) & "." & Match.Items(0) & "." & Match.Items(3)
  Debug.Print "'" & Match.Item("1") & "'" & ", '" & Match.Item("4") & "' vs. '" & Match.Items(1) & "', '" & Match.Items(4) & "'"
  Debug.Print Match.Item("98") & " vs. " & Match.Items(Match.Count - 2)
  Debug.Print Match.Item("99") & " vs. " & Match.Items(Match.Count - 1)

End Sub

' An implementation of regex which includes named groups and caching implemented in VBA.
' The 'Microsoft VBScript Regular Expressions 5.5' library must be referenced (in VBA-editor: Tools -> References).
' Parameters:
'  - haystack: the string the regex is applied on.
'  - originalPattern: the regex pattern with or without named groups.
'    The group naming has to follow .net regex syntax: '(?<group name>group content)'.
'    Group names may contain the following characters: a-z, A-Z, _ (underscore).
'    Group names must not be an empty string.
'  - options: a string that may contain:
'     - 'i' (the regex will work case-insensitive)
'     - 'g' (the regex will work globally)
'     - 'm' (the regex will work in multi-line mode)
'    or any combination of these.
' Returned value: a Scripting.Dictionary object with the following entries:
'  - Item 0 or "0", 1 or "1" ... for the groups content/submatches,
'    following the convention of VBScript_RegExp_55.SubMatches collection, which is 0-based.
'  - Item Match.Count - 2 or "98" for the whole match, assuming that the number of groups is below.
'  - Item Match.Count - 1 or "99" for number of groups/submatches.
' Changes compared to the original version:
'  - Handles non-capturing and positive and negative lookahead groups.
'  - Handles left parenthesis inside a character class.
'  - Named groups do not count twice.
'    E.g. in the original version the second named group occupies items 3 and 4 of the returned
'    dictionary, in this revised version only item 1 (item 0 is the first named group).
'  - Additional 'm' option.
'  - Fixed fetching cached regexes.
'  - Early binding.
'  - Some code cleaning.
' For an example take a look at the 'TestRegexMatch' procedure above.
Function RegexMatch(ByVal haystack As String, ByVal originalPattern As String, Optional ByVal options As String) As Scripting.Dictionary

  Dim GroupsPattern As String
  Dim RealPattern As String
  Dim RealRegExp As VBScript_RegExp_55.RegExp
  Dim RealMatches As VBScript_RegExp_55.MatchCollection
  Dim ReturnData As Scripting.Dictionary
  Dim GroupNames As VBScript_RegExp_55.MatchCollection
  Dim Ctr As Integer

  ' Cache regexes and group names for optimisation.
  Static CachedRegExps As Scripting.Dictionary
  Static CachedGroupNames As Scripting.Dictionary
  ' Group 'meta'-regex used to detect named and unnamed capturing groups.
  Static GroupsRegExp As VBScript_RegExp_55.RegExp

  If CachedRegExps Is Nothing Then Set CachedRegExps = New Scripting.Dictionary
  If CachedGroupNames Is Nothing Then Set CachedGroupNames = New Scripting.Dictionary

  If GroupsRegExp Is Nothing Then
    Set GroupsRegExp = New VBScript_RegExp_55.RegExp
    ' Original version: GroupsRegExp.Pattern = "\((?:\?\<(.*?)\>)?"
    GroupsRegExp.Pattern = "\((?!(?:\?:|\?=|\?!|[^\]\[]*?\]))(?:\?<([a-zA-Z0-9_]+?)>)?"
    GroupsRegExp.Global = True
  End If

  ' If the pattern isn't cached, create it.
  If Not CachedRegExps.Exists("(" & options & ")" & originalPattern) Then

    ' Prepare the pattern for retrieving named and unnamed groups.
    GroupsPattern = Replace(Replace(Replace(Replace(originalPattern, "\\", "X"), "\(", "X"), "\[", "X"), "\]", "X")

    ' Store group names for optimisation.
    CachedGroupNames.Add "(" & options & ")" & originalPattern, GroupsRegExp.Execute(GroupsPattern)

    ' Create new VBScript regex valid pattern and set regex for this pattern.
    RealPattern = GroupsRegExp.Replace(originalPattern, "(")
    Set RealRegExp = New VBScript_RegExp_55.RegExp
    RealRegExp.Pattern = RealPattern

    ' Set regex options.
    For Ctr = 1 To Len(options)
      Select Case Mid(options, Ctr, 1)
        Case "i"
          RealRegExp.IgnoreCase = True
        Case "g"
          RealRegExp.Global = True
        Case "m"
          RealRegExp.MultiLine = True
      End Select
    Next

    ' Store this regex for optimisation.
    CachedRegExps.Add "(" & options & ")" & originalPattern, RealRegExp
  End If

  ' Get matches.
  Set RealMatches = CachedRegExps.Item("(" & options & ")" & originalPattern).Execute(haystack)

  ' Get group names.
  Set GroupNames = CachedGroupNames.Item("(" & options & ")" & originalPattern)

  ' Create dictionary to return.
  Set ReturnData = New Scripting.Dictionary

  ' Fill dictionary with names and indexes as descibed in the remarks introducing this procedure.
  For Ctr = 1 To GroupNames.Count
    If IsEmpty(GroupNames(Ctr - 1).SubMatches(0)) Then
      ReturnData.Add CStr(Ctr - 1), RealMatches(0).SubMatches(Ctr - 1)
    Else
      ReturnData.Add GroupNames(Ctr - 1).SubMatches(0), RealMatches(0).SubMatches(Ctr - 1)
    End If
  Next
  ReturnData.Add "98", RealMatches.Item(0)
  ReturnData.Add "99", GroupNames.Count

  ' Return the result.
  Set RegexMatch = ReturnData

End Function

为进一步改进,此代码可以作为替换VBScript正则表达式的类模块的基础。