在文件上查找正则表达式需要很长时间

时间:2013-09-27 09:20:24

标签: regex vbscript readfile scom

我正在开发一个VBS脚本,用于查找包含在文件中的3个正则表达式。我已经有了自己的脚本,可以很好地查找,但执行时需要很长时间。

正常表达的3个案例是:

Case 1
    Begin pattern:

          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +Mail Stats: Received\([0-9]+\) Sent\(0\) In Queue\([1-9][0-9]*\) In Work\([0-9]+\) Max Sessions\([0-9]+\) Errors\([0-9]+\) Invalid Msgs\([0-9]+\) Tracing\((Yes|No)\)$

     End patern:
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +Mail Stats: Received\([0-9]+\) Sent\([1-9][0-9]*\) In Queue\([0-9]+\) In Work\([0-9]+\) Max Sessions\([0-9]+\) Errors\([0-9]+\) Invalid Msgs\([0-9]+\) Tracing\((Yes|No)\)$


Case 2
     Begin pattern:
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Unable to write to \(pdm_mail_nxd\) for [1-9][0-9]* seconds\.
     End pattern (1):
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Successful write to \(pdm_mail_nxd\) occurred\. Error cleared after [0-9]+ seconds\.
     End pattern (2):
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +STARTUP of pdm_mail_nxd

Case 3
     Begin pattern:
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Too long in error state - dropping connection to SOCKET_PORT\(0x[0-9A-F]+\) description = socket port port_name = pdm_mail_nxd
     End pattern:
          ^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +STARTUP of pdm_mail_nxd

......这是我的剧本:

Dim oAPI, oBag
Dim objFso, objFile, objTextFile, obj_Carpeta
Dim str_Path, strFile, stdlog, fileName, text, i, j, LineCount, sline, strlog, fLines
Dim strMatch_Start1, strMatch_End1, strMatch_Start2, strMatch_End2A, strMatch_End2B, strMatch_Start3, strMatch_End3
Dim exp_Start1, exp_Start2, exp_Start3, exp_End1, exp_End2A, exp_End2B, exp_End3
Dim case1, case2, case3

str_Path = "E:\CA\Service Desk Manager\log2"

'CASE 1
exp_Start1 = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +Mail Stats: Received\([0-9]+\) Sent\(0\) In Queue\([1-9][0-9]*\) In Work\([0-9]+\) Max Sessions\([0-9]+\) Errors\([0-9]+\) Invalid Msgs\([0-9]+\) Tracing\((Yes|No)\)$"
exp_End1   = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +Mail Stats: Received\([0-9]+\) Sent\([1-9][0-9]*\) In Queue\([0-9]+\) In Work\([0-9]+\) Max Sessions\([0-9]+\) Errors\([0-9]+\) Invalid Msgs\([0-9]+\) Tracing\((Yes|No)\)$"
'CASE 2
exp_Start2 = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Unable to write to \(pdm_mail_nxd\) for [1-9][0-9]* seconds\."
exp_End2A  = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Successful write to \(pdm_mail_nxd\) occurred\. Error cleared after [0-9]+ seconds\." 
exp_End2B  = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +STARTUP of pdm_mail_nxd"
'CASE 3
exp_Start3 = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +slump_nxd +[0-9]+ +SIGNIFICANT +socket_port\.c +[0-9]+ +Too long in error state - dropping connection to SOCKET_PORT\(0x[0-9A-F]+\) description = socket port port_name = pdm_mail_nxd"
exp_End3   = "^[0-9]{2}/[0-9]{2} +[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{2} +[a-zA-Z0-9_\.]+ +pdm_mail_nxd +[0-9]+ +SIGNIFICANT +pdm_mail_nxd\.c +[0-9]+ +STARTUP of pdm_mail_nxd"

Set oAPI = CreateObject("MOM.ScriptAPI")
Set oBag = oAPI.CreatePropertyBag()

Set objFso = CreateObject("Scripting.FileSystemObject")
Set obj_Carpeta = objFso.GetFolder(str_Path)

'Check valid path
If not objFso.FolderExists(str_Path) Then
    WScript.Echo "Error: " & str_Path & " -- not found."
    'Call oAPI.LogScriptEvent("mail_queue SDM",9551,0, "Process pdm_mail_nxd: path not found" )
    Set objFso = Nothing
    WScript.Quit 2
End If

'Call oAPI.LogScriptEvent("mail_queue SDM",9552,0, "init monitor" )

 For i = 0 to 8 
    For Each objFile In obj_Carpeta.Files
        'Check if it has files    
        stdlog = "stdlog." &i
        fileName = objFile.Name
        case1 = "False" 
        case2 = "False"
        case3 = "False"
        WScript.echo fileName
        WScript.echo stdlog
        If fileName = stdlog Then
            'Have file
            text = "" 
            i = i+1 
            strFile = str_Path & "\" & fileName
            WScript.echo strFile
            'Set objTextFile = objFile.OpenAsTextStream(ForReading)   
            text = objFile.OpenAsTextStream(ForReading).ReadAll
            WScript.echo "Reading File ..."
            'Split by lines, put into an array
            fLines = Split(text,vbCrLf)
            'Use UBound to count the lines
            LineCount = UBound(fLines)
            Set objFile = Nothing 'Cleanup
            WScript.echo "File read...   " & LineCount  & " lines" & vbCrLf


            If (InStr(1,text,"pdm_mail_nxd") > 0) Then
                'exp_Start1, exp_End1, exp_End2B, exp_End3
                WScript.echo "WORD pdm_mail_nxd FOUND!!" 
                For j = 0 to LineCount
                    sline = fLines(j)

                    'Match expression
                    strMatch_Start1 = TestRegExp (exp_Start1 , sline)
                    strMatch_End1 = TestRegExp (exp_End1 , sline)
                    'strMatch_End2B = TestRegExp (exp_End2B , sline)
                    'strMatch_End3 = TestRegExp (exp_End3 , sline)

                    If strMatch_Start1  = "True" Then

                        WScript.echo "------>   CASe 1 open"    
                        case1 = "True"      
                        strlog = sline
                        WScript.echo strlog
                        'j = j+1
                    Else

                        If strMatch_End1  = "True" Then 
                            case1 = "False" 
                        End If
                        case2 = "False"
                        case3 = "False"
                    End If
                Next
            End If

            If (InStr(1,text,"slump_nxd") > 0) Then
                'strMatch_Start2, strMatch_End2A, exp_Start3
                WScript.echo "WORD pdm_mail_nxd FOUND!!"    & vbCrLf
                For j = 0 to LineCount
                    sline = fLines(j)   
                    'Match expression
                    strMatch_Start2 = TestRegExp (exp_Start2 , sline)
                    strMatch_End2A = TestRegExp (exp_End2A , sline)
                    strMatch_Start3 = TestRegExp (exp_Start3 , sline)
                    If strMatch_Start2  = "True" Then

                        WScript.echo "-----> CASe 2 open"   
                        case2 = "True"
                        strlog = sline
                        WScript.echo strlog                 
                        'j = j+1
                    ElseIf strMatch_Start3  = "True" Then

                        WScript.echo "----> CASe 3 open"    
                        case3 = "True"                      
                        strlog = sline
                        WScript.echo strlog
                        'j = j+1
                    Else

                        case1 = "False" 
                        'case2 = "False"
                        'case3 = "False"
                    End If
                Next
            End If      
        End if
        WScript.echo "***************"
    Next
Next

If ((case1 = "True") or (case2 = "True") or (case3 = "True")) Then
    Call oAPI.LogScriptEvent("mail_queue SDM",9552,0, "DELAY -->  " & StdlogValue )
    Call oBag.AddValue("Status","Demora en envio de correo")
    Call oBag.AddValue("StdlogValue",strlog)
Else
    Call oAPI.LogScriptEvent("mail_queue SDM",9552,0, "NO DELAY")
    Call oBag.AddValue("Status","Ok")
End If

Function TestRegExp (myPattern , myString)
   Dim objRegExp, objMatch, colMatches, RetStr

   ' Create a regular expression object
   Set objRegExp = CreateObject("VBScript.RegExp")  
   objRegExp.IgnoreCase = True ' Set Case Insensitivity
   objRegExp.Global = True 'Set global applicability
   'Set the pattern by using the Pattern property
   objRegExp.Pattern = myPattern

   'Test whether the String can be compared
    If objRegExp.Test(myString) = True Then
       'Get the matches
        Set colMatches = objRegExp.Execute(myString)  ' Execute search
        For Each objMatch In colMatches   ' Iterate Matches collection
          RetStr = RetStr & objMatch.Value 
        Next
        If IsNull(RetStr) then
            RetStr = "False"
        Else
            RetStr = "True"
        End if
   Else
    ' String Matching Failed
     RetStr = "False"
   End If
   TestRegExp = RetStr

End Function    

'Call oAPI.LogScriptEvent("mail_queue SDM",9552,0, "End monitor" )
oAPI.Return(oBag)

'Cleanup objects 
Set objFSO = Nothing 
Set objFile = Nothing 
Set strlog = Nothing
WScript.Quit()

谁能告诉我我做错了什么?任何解决方案或建议,以使其更快?

1 个答案:

答案 0 :(得分:2)

性能问题的原因可能是您要多次解析每个文件。数组处理和多个正则表达式执行可能也无济于事。尝试这样的事情:

re_start = "..."
re_end   = "..."

Set re = New RegExp
re.Pattern = re_start & "\r\n([\s\S]*?)" & re_end
re.Global  = True

For Each f In objFso.GetFolder(str_Path).Files
  txt = f.OpenAsTextStream.ReadAll
  For Each m In re.Execute(txt)
    WScript.Echo m.SubMatches(0)
  Next
Next

在这种情况下,表达式re_startre_end不得以^$开头/结尾,并且其中的所有组都必须成为非捕获组( (?:...)代替(...)),例如:

"[0-9]{2}/... Tracing\((?:Yes|No)\)"

而不是

"^[0-9]{2}/... Tracing\((Yes|No)\)$"