Microsoft Word 2010 VBA:使用正则表达式查找和替换

时间:2015-06-04 02:56:21

标签: regex replace ms-word word-vba pci-compliance

我试图让我们的员工快速掩盖旧文档中信用卡数据的中间8位数字。我想用一个宏来使用正则表达式进行查找和替换是他们最快的方法。屏蔽是PCI DSS的要求,可能有数百或数千个文档来屏蔽数据。

我对下面的代码运气不错,但它会识别并修改标准信用卡超过16个字符的字符串,我不知道如何阻止误报。由nhahtdh解决

以下列出了上述问题的Visa,MasterCard和AmEx卡。但是,可以通过为更多卡类型添加正则表达式,并添加用于分解长数字的常用字符来改进它。

以下代码有效,但可以改进。任何人都可以帮助改善这一点:

  • 包括Luhn算法检查
  • 包括其他常用数字分隔符(还有哪些常用?)
  • 包括其他流行卡品牌
  • 停止查找并替换误报(例如,应排除44445555666677778)。由nhahtdh解决
    Sub PCI_mask_card_numbers()
'
' This macro will search a document for numbers that look like Visa, MasterCard and AmEx credit card PANs and mask them with Xs
'
   Dim Counter As Long
   Dim Preexisting As Long

' Let the user know what's about to happen
    Dim Msg, Style, Title, Response, MyString
    Msg = "The macro will now attempt to mask all the credit card numbers it can identify.  e.g. 4444555566667777 will become 4444xxxxxxxx7777"
    Style = vbInformation
    Title = "PCI DSS - Credit Card Masking"
    Response = MsgBox(Msg, Style, Title)

' Count how many things already look like masked PANs so the final tally is correct
    Selection.HomeKey Unit:=wdStory
    With ActiveDocument.Content.Find
        Do While .Execute(FindText:="xxxx", Forward:=True, Format:=True, _
           MatchWholeWord:=True) = True
           Preexisting = Preexisting + 1
        Loop
    End With
    Preexisting = Preexisting / 2   ' because masks with a break were counted twice
    Selection.HomeKey Unit:=wdStory
    With ActiveDocument.Content.Find
        Do While .Execute(FindText:="xxxxxxxx", Forward:=True, Format:=True, _
           MatchWholeWord:=False) = True
           Preexisting = Preexisting + 1
        Loop
    End With

' ########  Start masking PANs  ###################################################

' Mastercard - 16 digits straight
    Selection.HomeKey Unit:=wdStory
    Selection.Find.ClearFormatting
    Selection.Find.Replacement.ClearFormatting
    With Selection.Find
        .Text = "<([4][0-9]{3})([0-9]{4})([0-9]{4})([0-9]{4})>"
        .Replacement.Text = "\1xxxxxxxx\4"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' Visa - 16 digits straight
    With Selection.Find
        .Text = "<([5][0-9]{3})([0-9]{4})([0-9]{4})([0-9]{4})>"
        .Replacement.Text = "\1xxxxxxxx\4"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' AmEx - 15 digits straight
    With Selection.Find
        .Text = "<([3][0-9]{2})([0-9]{4})([0-9]{4})([0-9]{4})>"
        .Replacement.Text = "\1xxxxxxxx\4"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' Visa and Mastercard - PAN broken up by :
    With Selection.Find
        .Text = "<([4][0-9]{3})(:[0-9]{4}:[0-9]{4}:)([0-9]{4})>"
        .Replacement.Text = "\1:xxxx:xxxx:\3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    With Selection.Find
        .Text = "<([5][0-9]{3})(:[0-9]{4}:[0-9]{4}:)([0-9]{4})>"
        .Replacement.Text = "\1:xxxx:xxxx:\3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' Visa and Mastercard - PAN broken up by .
    With Selection.Find
        .Text = "<([5][0-9]{3})(.[0-9]{4}.[0-9]{4}.)([0-9]{4})>"
        .Replacement.Text = "\1.xxxx.xxxx.\3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    With Selection.Find
        .Text = "<([4][0-9]{3})(.[0-9]{4}.[0-9]{4}.)([0-9]{4})>"
        .Replacement.Text = "\1.xxxx.xxxx.\3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' Visa and Mastercard - PAN broken up by spaces
    With Selection.Find
        .Text = "<([4][0-9]{3})( [0-9]{4} [0-9]{4} )([0-9]{4})>"
        .Replacement.Text = "\1 xxxx xxxx \3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    With Selection.Find
        .Text = "<([5][0-9]{3})( [0-9]{4} [0-9]{4} )([0-9]{4})>"
        .Replacement.Text = "\1 xxxx xxxx \3"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll

' Visa and Mastercard - PAN broken up by -
    With Selection.Find
        .Text = "<([5][0-9]{3})(-[0-9]{4}-[0-9]{4}-)([0-9]{4})>"
        .Replacement.Text = "\1-xxxx-xxxx-\3"
        .Forward = True
        .Wrap = wdFindAsk
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    With Selection.Find
        .Text = "<([4][0-9]{3})(-[0-9]{4}-[0-9]{4}-)([0-9]{4})>"
        .Replacement.Text = "\1-xxxx-xxxx-\3"
        .Forward = True
        .Wrap = wdFindAsk
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    Selection.HomeKey Unit:=wdStory

' ########  Done masking PANs  ###################################################

' Count how many changes were done
    Selection.HomeKey Unit:=wdStory
    With ActiveDocument.Content.Find
        Do While .Execute(FindText:="xxxx", Forward:=True, Format:=True, _
           MatchWholeWord:=True) = True
           Counter = Counter + 1
        Loop
    End With
    Counter = Counter / 2   ' because masks with a break were counted twice
    Selection.HomeKey Unit:=wdStory
    With ActiveDocument.Content.Find
        Do While .Execute(FindText:="xxxxxxxx", Forward:=True, Format:=True, _
           MatchWholeWord:=False) = True
           Counter = Counter + 1
        Loop
    End With
    Counter = Counter – Preexisting   ' New masks less previous mask-like data

' Let the user know the job is done
    Msg = "The macro has masked " & Str$(Counter) & " credit cards. Check the results and save the file if the changes are correct. If there are issues with the masking changes, do not save the file and consult the IT team."
    Style = vbInformation
    Title = "PCI DSS - Credit Card Masking." & Str$(Counter) & " cards masked"
    Response = MsgBox(Msg, Style, Title)
End Sub

1 个答案:

答案 0 :(得分:1)

由于您似乎使用的是Word通配符语法,因此您可以使用<来断言单词的开头,而>则断言单词的结尾以防止模式匹配时文本在字母或数字之前或之后(这似乎是从一些简单的测试中起作用的。)

使用

"([4][0-9]{3})(-[0-9]{4}-[0-9]{4}-)([0-9]{4})"

例如,将其修改为

"<([4][0-9]{3})(-[0-9]{4}-[0-9]{4}-)([0-9]{4})>"