周期性单击链接时,我的脚本抛出错误

时间:2018-07-06 10:00:37

标签: vba excel-vba web-scraping internet-explorer-11 excel

我已经在vba中结合IE编写了一个脚本,以对连接到网页每个配置文件的一些javascript链接进行点击。我的脚本可以完美地单击第一个链接,但是在第二次迭代中单击下一个链接时,会引发permission denied错误。每个个人资料上都有有效的链接,因此我不能将这些链接用作导航。如何修改我的脚本以便周期性地单击链接?

这是我的脚本:

Sub ClickLinks()
    Const Url As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
    Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, I&

    With IE
        .Visible = True
        .navigate Url
        While .Busy = True Or .readyState < 4: DoEvents: Wend
        Set Htmldoc = .document
    End With

    With Htmldoc.querySelectorAll("#main table tr a")
        For I = 0 To .Length - 1
            .Item(I).Click  'in second iteration this line throws permission denied error
            Application.Wait Now + TimeValue("00:00:03")
        Next I
    End With
End Sub

1 个答案:

答案 0 :(得分:1)

使用XHR请求。下面执行初始GET请求以检索所有职员ID。然后,它循环循环为每个ID发出POST请求的ID。为了显示访问量,我从每个页面检索了员工的电子邮件地址。

Option Explicit
Public Sub GetInfo()
    Dim objHTTP As Object, URL As String, html As New HTMLDocument, i As Long, sBody As String
    Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
    URL = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=details"

    With objHTTP
        .Open "GET", "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic", False
        .send
        html.body.innerHTML = .responseText
        Dim staffIDs As Object
        Set staffIDs = html.querySelectorAll("input[name=employeeID]")

        For i = 0 To staffIDs.Length - 1
            sBody = "employeeID=" & staffIDs(i).getAttribute("value")
            .SetTimeouts 10000, 10000, 10000, 10000
            .Open "POST", URL, False
            .setRequestHeader "User-Agent", "User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"
            .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
            On Error Resume Next
            .send (sBody)
            If Err.Number = 0 Then
                If .Status = "200" Then
                    html.body.innerHTML = .responseText
                Else
                    Debug.Print "HTTP " & .Status & " " & .statusText
                    Exit Sub
                End If
            Else
                Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
                Exit Sub
            End If
            On Error GoTo 0
            Debug.Print html.querySelector("td a").innerText
        Next i
    End With
End Sub

目标网页上的示例视图:

sample view


页面上的示例代码打印输出:

enter image description here


基于笨拙的时间等待刷新,然后导航回到登录页面,因此可以提交下一个表单。这需要改进和重新排序。

Option Explicit
Public Sub ClickLinks2()
    Const URL As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
    Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, i&

    With IE
        .Visible = True
        .navigate URL
        While .Busy = True Or .readyState < 4: DoEvents: Wend
        Set Htmldoc = .document

        Dim numEmployees As Long, a As Object
        numEmployees = Htmldoc.querySelectorAll("a.names").Length

        For i = 1 To 3                           'numEmployees   (1-792)
            While .Busy = True Or .readyState < 4: DoEvents: Wend
            .navigate URL
            Application.Wait Now + TimeSerial(0, 0, 5)
            .document.parentWindow.execScript "document.form" & i & ".submit();" ''javascript:document.form1.submit(); ''<== Adapted this
        Next i
    End With
End Sub