作为对此问题的跟进:Pattern matching in VBS
我试图将模式匹配实施到我的第一步。我的第一步是将所有来自1,000页的内部HTML转储到一个文本文件中,然后使用上面链接中的技术在文本文件中找到模式匹配。这对于一些innerhtml转储工作正常,但当我处理我需要的每个页面时,我生成的文本文件超过1GB!当我试图对它进行模式匹配时,它只是挂起(甚至让它整夜运行)。
我的下一步想法是在我的第一步中直接与innerhtml进行模式匹配,所以我只需要将所需的URL写入文本文件。这就是我所拥有的 - 没有收到错误,但文本文件没有被填充。我哪里错了?
我认为问题出在这里:
Dim objRegExp
Set objRegExp = New RegExp
objRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "LicenseDetail[^']+"
dim matches()
dim iii: iii = 0
strHTML = ie.document.body.innerhtml
Dim objMatch
For Each objMatch in objRegExp.Execute(strHTML)
redim preserve matches(iii)
matches(iii) = objMatch.SubMatches(0)
iii = (iii + 1)
Next
'read back
for iii = 0 to ubound(matches)
set fso = createobject("scripting.filesystemobject")
set ts = fso.opentextfile("C:\AgentURLsRaw.txt",8,true)
ts.write matches(iii)
ts.close
next
完整代码:
Set objWshShell = Wscript.CreateObject("Wscript.Shell")
Set IE = CreateObject("internetexplorer.application")
Set fso = CreateObject("Scripting.FileSystemObject")
For ii=12 To 81
If ii=81 Then Exit For
IE.Visible = True
IE.Navigate "https://www.myfloridalicense.com/wl11.asp?Mode=1&SID=&brd=&typ="
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
IE.Document.getElementsByTagName("select")("Board").Value = "25"
Set Form = IE.Document.Forms("reportForm")
Form.hDDChange.Value = "Y"
Form.Submit
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
IE.Document.getElementsByTagName("select")("LicenseType").Value = "2501"
IE.Document.getElementsByTagName("select")("County").Value = ii
IE.Document.getElementsByTagName("select")("RecsPerPage").Value = "50"
For Each btn In IE.Document.getElementsByTagName("input")
If btn.type = "image" Then btn.Click()
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Next
Do Until IE.Document.ReadyState = "complete": WScript.sleep 15: Loop
pg = IE.Document.getElementsByName("hTotalPages")(0).Value
Dim i
i = 0
Do while i < pg +1
i = i + 1
on error resume next
Dim objRegExp
Set objRegExp = New RegExp
objRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "LicenseDetail[^']+"
dim matches()
dim iii: iii = 0
strHTML = ie.document.body.innerhtml
Dim objMatch
For Each objMatch in objRegExp.Execute(strHTML)
redim preserve matches(iii)
matches(iii) = objMatch.SubMatches(0)
iii = (iii + 1)
Next
'read back
for iii = 0 to ubound(matches)
set fso = createobject("scripting.filesystemobject")
set ts = fso.opentextfile("C:\AgentURLsRaw.txt",8,true)
ts.write matches(iii)
ts.close
next
Do Until IE.Document.ReadyState = "complete": WScript.sleep 15: Loop
For Each btn In IE.Document.getElementsByTagName("input")
If btn.name = "SearchForward" and btn.value = "Search" Then btn.Click()
Next
loop
next
Wscript.echo DONE
谢谢!
答案 0 :(得分:1)
想出来。这是我的最终代码,以防它对其他人有帮助:
Set objWshShell = Wscript.CreateObject("Wscript.Shell")
Set IE = CreateObject("internetexplorer.application")
Set fso = CreateObject("Scripting.FileSystemObject")
For ii=12 To 81
If ii=81 Then Exit For
IE.Visible = True
IE.Navigate "https://www.myfloridalicense.com/wl11.asp?Mode=1&SID=&brd=&typ="
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
IE.Document.getElementsByTagName("select")("Board").Value = "25"
Set Form = IE.Document.Forms("reportForm")
Form.hDDChange.Value = "Y"
Form.Submit
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Do Until IE.Document.ReadyState = "complete": WScript.sleep 10: Loop
IE.Document.getElementsByTagName("select")("LicenseType").Value = "2501"
IE.Document.getElementsByTagName("select")("County").Value = ii
IE.Document.getElementsByTagName("select")("RecsPerPage").Value = "50"
For Each btn In IE.Document.getElementsByTagName("input")
If btn.type = "image" Then btn.Click()
Do Until IE.ReadyState = 4: WScript.sleep 15: Loop
Next
Do Until IE.Document.ReadyState = "complete": WScript.sleep 15: Loop
pg = IE.Document.getElementsByName("hTotalPages")(0).Value
Dim i
i = 0
Do while i < pg +1
i = i + 1
on error resume next
strPattern = "LicenseDetail[^""]+"
strTestString = ie.document.body.innerhtml
arrAllMatches = fGetMatches(strPattern, strTestString)
If UBound(arrAllMatches) <> 0 Then
set fso = createobject("scripting.filesystemobject")
set ts = fso.opentextfile("C:\AgentURLsRaw.txt",8,true)
ts.write Join(arrAllMatches, vbCrlf)
ts.close
'Reopens newly created txt file and corrects ampersand in addresses
set ts = fso.opentextfile("C:\AgentURLsRaw.txt",1,true)
tsread2 = ts.ReadAll
ts.close
tsreadreplaceAMP = Replace(tsread2, "LicenseDetail.asp?SID=&id=","https://www.myfloridalicense.com/LicenseDetail.asp?SID=&id=")
set ts = fso.opentextfile("C:\AgentURLsRaw.txt",2,true)
ts.WriteLine tsreadreplaceAMP
ts.Close
Else
WScript.Echo "-- None Found --"
End if
Do Until IE.Document.ReadyState = "complete": WScript.sleep 15: Loop
For Each btn In IE.Document.getElementsByTagName("input")
If btn.name = "SearchForward" and btn.value = "Search" Then btn.Click()
Next
loop
next
Wscript.echo DONE
Function fGetMatches(sPattern, sStr)
Dim regEx, retVal, sMatch, colMatches, temp
Set regEx = New RegExp ' Create a regular expression.
regEx.Pattern = sPattern ' Set pattern.
regEx.IgnoreCase = True ' Set case insensitivity.
regEx.Global = True ' Set global applicability.
Set colMatches = regEx.Execute(sStr) ' Execute search.
If colMatches.Count = 0 Then
temp = Array("")
Else
'# Convert Collection to Array
For Each sMatch In colMatches
temp = temp & sMatch & "¶"
Next
temp = Left(temp, Len(temp) - 1)
temp = Split(temp, "¶")
End If
fGetMatches = temp
End Function