Excel VBA拉网站数据

时间:2018-07-27 12:18:45

标签: excel vba web-scraping automation pull

我想通过输入卷号“ 217449”将网站“ http://result.biselahore.com/”中的所有数据提取到Excel Sheet中。输入“卷号”后,它将进入带有详细主题标记的结果卡页面。

要从下一页获取主题标记并将其粘贴到excel上,以下代码无法正常工作,并且给出错误号91,“未设置块变量的对象变量”。

这是我的完整代码:

Sub WData()

Do Until ActiveCell.Value = "100000"

Dim IE As New InternetExplorer

Dim DOCS As HTMLDocument

Dim str, str1, str2, str3, str4, str5 As String

IE.navigate "http://result.biselahore.com/"

IE.Visible = True

Do

DoEvents

Loop Until IE.readyState = READYSTATE_COMPLETE

IE.document.getElementById("rollNum").Value = ActiveCell.Value

IE.document.forms(0).submit

Do While IE.Busy

DoEvents

Loop

Set DOCS = IE.document

Do While DOCS.readyState <> "complete"

DoEvents

Loop

str = IE.document.getElementsByTagName("td")(4).innerText

str1 = IE.document.getElementsByTagName("td")(7).innerText

str2 = IE.document.getElementsByTagName("td")(9).innerText

str3 = IE.document.getElementsByTagName("td")(20).innerText

str4 = IE.document.getElementsByTagName("td")(23).innerText

str5 = IE.document.getElementsByTagName("td")(25).innerText

str6 = IE.document.getElementsByTagName("td")(27).innerText

str7 = IE.document.getElementsByTagName("td")(37).innerText

str8 = IE.document.getElementsByTagName("td")(38).innerText

str9 = IE.document.getElementsByTagName("td")(42).innerText

str10 = IE.document.getElementsByTagName("td")(43).innerText

str11 = IE.document.getElementsByTagName("td")(47).innerText

str12 = IE.document.getElementsByTagName("td")(48).innerText

str13 = IE.document.getElementsByTagName("td")(52).innerText

str14 = IE.document.getElementsByTagName("td")(53).innerText

str15 = IE.document.getElementsByTagName("td")(57).innerText

str16 = IE.document.getElementsByTagName("td")(58).innerText

str17 = IE.document.getElementsByTagName("td")(62).innerText

str18 = IE.document.getElementsByTagName("td")(63).innerText

str19 = IE.document.getElementsByTagName("td")(71).innerText

Dim lastrow As Integer

lastrow = Worksheets(1).Range("b" & Worksheets(1).Rows.Count).End(xlUp).Row + 1

Cells(lastrow, 2).Value = Trim(str)

Cells(lastrow, 3).Value = Trim(str1)

Cells(lastrow, 4).Value = Trim(str2)

Cells(lastrow, 5).Value = Trim(str3)

Cells(lastrow, 6).Value = Trim(str4)

Cells(lastrow, 7).Value = Trim(str5)

Cells(lastrow, 8).Value = Trim(str6)

Cells(lastrow, 9).Value = Trim(str7)

Cells(lastrow, 10).Value = Trim(str8)

Cells(lastrow, 11).Value = Trim(str9)

Cells(lastrow, 12).Value = Trim(str10)

Cells(lastrow, 13).Value = Trim(str11)

Cells(lastrow, 14).Value = Trim(str12)

Cells(lastrow, 15).Value = Trim(str13)

Cells(lastrow, 16).Value = Trim(str14)

Cells(lastrow, 17).Value = Trim(str15)

Cells(lastrow, 18).Value = Trim(str16)

Cells(lastrow, 19).Value = Trim(str17)

Cells(lastrow, 20).Value = Trim(str18)

Cells(lastrow, 21).Value = Trim(str19)

IE.Quit

Set IE = Nothing

Selection.Offset(1, 0).Select

Loop

End Sub

我想要的输出:

Subject Marks   Subject    Marks    Subject     Marks  Subject     Marks

URDU    68  62  ENGLISH     75  70  ISLAMIAT    50 49 MATHEMATICS   75 75 

PHYSICS 60  59  CHEMISTRY   60  60  BIOLOGY     58 59 

1 个答案:

答案 0 :(得分:0)

网络“表格”一团糟。我跳过了具有“合并单元格”的2个标题。

我添加了一个循环检查,直到由@PeterAlbert设置了具有超时功能的表,以在设置的时间后退出循环,以停止无限循环。

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, html As HTMLDocument, hTable As Object, ele As Object
    With IE
        .Visible = True
        .navigate "http://result.biselahore.com/"
        While .Busy Or .readyState < 4: DoEvents: Wend
        .document.querySelector("#rollNum").innerText = 217449
        .document.forms(0).submit
        Dim dblStart As Double
        Dim tmp As Long

        Const cDblMaxTimeInSeconds As Double = 5 '<==Second to wait until timeout

        dblStart = Timer

        While .Busy Or .readyState < 4: DoEvents: Wend
        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementsByTagName("table")(1)
            On Error GoTo 0
            If TimerDiff(dblStart, Timer) > cDblMaxTimeInSeconds Then Exit Do
        Loop While hTable Is Nothing
        Dim list As Object, list2 As Object
        Set list = hTable.getElementsByTagName("tr")
        Dim i As Long, j As Long, r As Long, c As Long
        Application.ScreenUpdating = False
        For i = 13 To list.Length - 1
            Set list2 = list.item(i).getElementsByTagName("td")
            r = r + 1: c = 0
            For j = 0 To list2.Length - 1
                c = c + 1
                Cells(r, c) = list2.item(j).innerText
            Next j
        Next i
        Application.ScreenUpdating = True
    End With
End Sub

Public Function TimerDiff(ByVal dblTimerStart As Double, ByVal dblTimerEnd As Double) As Double
    Dim dblTemp As Double
    dblTemp = dblTimerEnd - dblTimerStart
    If dblTemp < -43200 Then 'half a day
        dblTemp = dblTemp + 86400
    End If
    TimerDiff = dblTemp
End Function

版本2(使用上面的计时器功能)

Public Sub GetInfo()
    Dim IE As New InternetExplorer, html As HTMLDocument, hTable As Object, ele As Object
    With IE
        .Visible = True
        .navigate "http://result.biselahore.com/"
        While .Busy Or .readyState < 4: DoEvents: Wend
        .document.querySelector("#rollNum").innerText = 217449
        .document.forms(0).submit
        Dim dblStart As Double, tmp As Long, clipboard As Object

        Const cDblMaxTimeInSeconds As Double = 5 '<==Second to wait until timeout

        dblStart = Timer

        While .Busy Or .readyState < 4: DoEvents: Wend
        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementsByTagName("table")(1)
            On Error GoTo 0
            If TimerDiff(dblStart, Timer) > cDblMaxTimeInSeconds Then Exit Do
        Loop While hTable Is Nothing

        Application.ScreenUpdating = False
        Set clipboard = New MSForms.DataObject
        clipboard.SetText hTable.outerHTML
        clipboard.PutInClipboard
        ActiveSheet.Cells(1, 1).PasteSpecial
        Application.ScreenUpdating = True
    End With
End Sub