在某些循环结束之前无法使我的脚本处理错误

时间:2019-03-17 12:10:18

标签: vba web-scraping error-handling proxy

我已经在vba中编写了一个脚本,以在提出代理请求后抓取填充的IP地址。我已经在vba脚本中使用了代理(不在代理列表中)进行测试(目前可能没有一个在工作)。

  

但是,我想要实现的是,当请求失败时,以下脚本将打印该错误消息并继续处理下一个请求,否则它将解析该站点的ip地址并继续进行直到循环耗尽

到目前为止,我的尝试(考虑将proxyList视为可行的方法):

Sub ValidateProxies()
    Dim Http As New ServerXMLHTTP60, elem As Object, S$
    Dim proxyList As Variant, oProxy As Variant

    proxyList = [{"98.163.59.8:8080","134.209.115.223:3128","191.101.233.198:3129","198.177.126.218:80","35.185.201.225:8080"}]

    For Each oProxy In proxyList
        On Error Resume Next
        With Http
            .Open "GET", "https://www.myip.com/", False
            .setRequestHeader "User-Agent", "Mozilla/5.0"
            .setProxy 2, oProxy
            .send
        End With
        On Error GoTo 0

        If Err.Number <> 0 Then
            Debug.Print "Encountered an error"

        Else:
            With New HTMLDocument
                .body.innerHTML = Http.responseText
                Set elem = .querySelector("#ip")
                R = R + 1: Cells(R, 1) = oProxy
                Cells(R, 2) = elem.innerText
            End With
        End If
    Next oProxy
End Sub

当出现错误时,如何使脚本打印任何错误并不断滚动直到循环结束?

2 个答案:

答案 0 :(得分:2)

这里是带有异步请求池并将状态和错误记录到工作表的示例。它使用free-proxy-list.net中的代理列表。

Option Explicit

Sub TestProxy()

    Const PoolCapacity = 50
    Const ReqTimeout = 15

    Dim sResp
    Dim aProxyList
    Dim oMatch
    Dim oWS
    Dim lIndex
    Dim ocPool
    Dim i
    Dim sResult
    Dim oReq

    ' Parsing proxy list from free-proxy-list.net
    With CreateObject("MSXML2.ServerXMLHTTP.6.0")
        .Open "GET", "https://free-proxy-list.net/", True
        .SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
        .Send
        Do Until .ReadyState = 4: DoEvents: Loop
        sResp = .ResponseText
    End With
    With CreateObject("VBScript.RegExp")
        .Global = True
        .Pattern = "<td[^>]*>(\d+\.\d+\.\d+\.\d+)<\/td><td[^>]*>(\d+)<\/td>"
        aProxyList = Array()
        For Each oMatch In .Execute(sResp)
            ReDim Preserve aProxyList(UBound(aProxyList) + 1)
            aProxyList(UBound(aProxyList)) = oMatch.SubMatches(0) & ":" & oMatch.SubMatches(1)
        Next
    End With
    ' Proxy checking with api.myip.com requests
    Set oWS = ThisWorkbook.Sheets(1)
    oWS.Cells.Delete
    Set ocPool = New Collection
    lIndex = 0
    Do
        ' Check pool for completed requests
        For i = ocPool.Count To 1 Step -1
            On Error Resume Next
            sResult = ""
            With ocPool(i)(0)
                Select Case True
                    Case .ReadyState < 4
                    Case .Status \ 100 <> 2
                        sResult = "Status " & .Status & " / " & .StatusText
                    Case Else
                        sResult = .ResponseText
                End Select
            End With
            Select Case True
                Case Err.Number <> 0
                    sResult = "Error " & Err.Number & " / " & Err.Description
                Case (Now - ocPool(i)(1)) * 86400 > ReqTimeout
                    sResult = "Timeout"
            End Select
            On Error GoTo 0
            If sResult <> "" Then
                oWS.Cells(ocPool(i)(2), 2).Value = sResult
                ocPool.Remove i
            End If
            DoEvents
        Next
        ' Add new request to pool
        If ocPool.Count < PoolCapacity And lIndex <= UBound(aProxyList) Then
            Set oReq = CreateObject("MSXML2.ServerXMLHTTP.6.0")
            With oWS.Cells(lIndex + 1, 1)
                .Value = aProxyList(lIndex)
                .Select
            End With
            With oReq
                .Open "GET", "https://api.myip.com/", True
                .SetProxy 2, aProxyList(lIndex)
                .SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
                .Send
            End With
            ocPool.Add Array( _
                oReq, _
                Now, _
                lIndex + 1 _
            )
            lIndex = lIndex + 1
            DoEvents
        End If
    Loop While ocPool.Count > 0
    MsgBox "Completed"

End Sub

答案 1 :(得分:1)

这将打印遇到的所有错误,您应该根据err.Number进行剪裁

Option Explicit
Public Sub ValidateProxies()
    Dim http As New ServerXMLHTTP60, elem As Object, S$
    Dim proxyList As Variant, oProxy As Variant, r As Long
    Dim html As HTMLDocument
    Set html = New HTMLDocument
    proxyList = [{"98.163.59.8:8080","134.209.115.223:3128","191.101.233.198:3129","198.177.126.218:80","35.185.201.225:8080"}]

    For Each oProxy In proxyList
        On Error GoTo errhand:
        With http
            .Open "GET", "https://www.myip.com/", False
            .setRequestHeader "User-Agent", "Mozilla/5.0"
            .SetProxy 2, oProxy
            .send
            With html
                .body.innerHTML = http.responseText
                Set elem = .querySelector("#ip")
                r = r + 1: ActiveSheet.Cells(r, 1) = oProxy
                ActiveSheet.Cells(r, 2) = elem.innerText
            End With
        End With
    Next oProxy
    Exit Sub

errhand:
    If Err.Number <> 0 Then
        Debug.Print "Encountered an error " & Err.Description, oProxy
        Err.Clear
        Resume Next
    End If

End Sub