通过Excel VBA进行网页抓取

时间:2020-10-04 14:43:14

标签: excel vba web-scraping

我想从https://finviz.com/quote.ashx?t=amzn中的表中抓取最后一行(以'Recom'开头)

不幸的是,在代码Children(x)的输出部分中,我的代码错误出现:对象变量或未设置块变量。

您能帮助我理解为什么即使尊重html层次结构也导致代码错误吗?

谢谢!

Dim y As Long
Dim x As String
Dim lastrow As Long
Dim elem As Object, tRow As Object, S$, R&



lastrow = Sheets("Table5").usedrange.Row - 1 + Sheets("Table5").usedrange.Rows.Count

For y = 11 To lastrow Step 2

x = Sheets("Table5").Range("A" & y).Value
If x = "" Then
Exit Sub
Else
'On Error Resume Next

With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://finviz.com/quote.ashx?t=" & x, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
        .send
        S = .responseText
    End With
    
    With CreateObject("HTMLFile")
        .write S
        
        For Each elem In .getElementsByTagName("tr")
        
        If InStr(elem.innerText, "Recom") > 0 Then

        Sheets("Table5").Range("AD" & y) = elem.Children(1).innerText
        Sheets("Table5").Range("AE" & y) = elem.Children(3).innerText
        
        End If
        Next elem
        
    End With
    
End If
Next y

1 个答案:

答案 0 :(得分:1)

好的,这似乎很容易解决。您需要查找以tr开头的RecomLike运算符所做的与我尝试说的完全相同

Sub FetchContent()
    Const Url$ = "https://finviz.com/quote.ashx?t=amzn"
    Dim elem As Object, tRow As Object, S$, R&

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", Url, False
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
        .Send
        S = .responseText
    End With

    With CreateObject("HTMLFile")
        .write S
        For Each elem In .getElementsByTagName("tr")
            If elem.innerText Like "Recom*" Then
                R = R + 1: Cells(R, 1) = elem.Children(0).innerText
                Cells(R, 2) = elem.Children(1).innerText
                Cells(R, 3) = elem.Children(2).innerText
                Cells(R, 4) = elem.Children(3).innerText
                Cells(R, 5) = elem.Children(4).innerText
                Cells(R, 6) = elem.Children(5).innerText
                Cells(R, 7) = elem.Children(6).innerText
                Exit For
            End If
        Next elem
    End With
End Sub