我已经使用ServerXMLHTTP
请求在vba中编写了一个脚本,以便能够使用proxy
并在其中设置timeout
参数。当我运行脚本时,它似乎可以正常工作,但问题是-使用第一个代理后卡住了。我希望它能一直运行到没有代理可用为止。我定义此行While .readyState < 4: DoEvents: Wend
仅是为了避免脚本冻结。 代理是否可以正常工作,对吗?
这是我尝试过的:
Sub MakeProxiedRequests()
Dim Http As New ServerXMLHTTP60, Html As New HTMLDocument
Dim elem As Object, proxyList As Variant, oProxy As Variant
proxyList = Array( _
"191.96.42.184:3129", _
"138.197.108.5:3128", _
"35.245.145.147:8080", _
"173.46.67.172:58517", _
"191.96.42.82:3129", _
"157.55.201.224:8080", _
"67.205.172.239:3128", _
"191.96.42.106:3129" _
)
For Each oProxy In proxyList
Debug.Print "trying with: " & oProxy
With Http
.Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setProxy 2, oProxy
.setTimeouts 600000, 600000, 15000, 15000 'I don't know the ideal timeout parameters
On Error Resume Next
.send
While .readyState < 4: DoEvents: Wend 'to let not freeze the script
Html.body.innerHTML = .responseText
Set elem = Html.querySelectorAll(".summary .question-hyperlink")
On Error GoTo 0
End With
If elem.Length > 0 Then
Debug.Print elem(0).innerText
Else:
Debug.Print "failed with: " & oProxy
End If
Next oProxy
End Sub
如何在所有代理都用尽之前让脚本运行?
答案 0 :(得分:2)
可能的方法是控制请求的总体经过时间并限制它。任何运行时错误也会被检查。
Sub MakeProxiedRequests()
Const Timeout = "0:00:15"
Dim oHttp As New ServerXMLHTTP60
Dim oHtml As New HTMLDocument
Dim oElem As Object
Dim aProxyList
Dim sProxy
Dim t As Date
Dim bFailed As Boolean
aProxyList = Array( _
"191.96.42.184:3129", _
"138.197.108.5:3128", _
"35.245.145.147:8080", _
"173.46.67.172:58517", _
"191.96.42.82:3129", _
"157.55.201.224:8080", _
"67.205.172.239:3128", _
"191.96.42.106:3129" _
)
For Each sProxy In aProxyList
Debug.Print "Trying with: " & sProxy
With oHttp
.Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setProxy 2, sProxy
.setTimeouts 60000, 60000, 60000, 60000
.send
t = Now() + TimeValue(Timeout)
bFailed = False
On Error Resume Next
Do
If .readyState = 4 Then Exit Do
bFailed = (Now() > t) Or (Err.Number <> 0)
If bFailed Then Exit Do
DoEvents
Loop
On Error GoTo 0
If Not bFailed Then
oHtml.body.innerHTML = .responseText
Set oElem = oHtml.querySelectorAll(".summary .question-hyperlink")
bFailed = oElem.Length = 0
End If
End With
If Not bFailed Then
Debug.Print oElem(0).innerText
Else
Debug.Print "Failed with: " & sProxy
End If
Next
End Sub