如果四个项目中缺少一个项目,则刮刀会省略整个线索

时间:2017-01-25 13:18:09

标签: vba web-scraping

我做了一个运行良好的刮刀,但同时从中提取信息 如果它看到任何项目中缺少四个,那么它会跳过一定的线索 领先并继续下一个领先。

该过程使其无论如何都会提取其余信息 缺少一两件物品

Option Explicit
Const url As String = "http://www.yellowpages.com"
Const pageurl As String = "http://www.yellowpages.com/search?search_terms=Coffee%20Shops&geo_location_terms=AZ&page="
Sub yptest()
Dim http As New MSXML2.XMLHTTP60, html As New HTMLDocument, hmm As New HTMLDocument
Dim topics As Object, topic As Object, posts As Object, post As Object, lng As Object, mng As Object, nng As Object
Dim parse As Object, par As Object, pas As Object, pae As Object, sas As Object, jas As Object
Dim x As Long, zz As String, ss As String, y As Long, qas As Object, i As Long, t As Long

x = 2

for t = 1 to 10

http.Open "GET", pageurl & t, False
http.send
html.body.innerHTML = http.responseText

Set topics = html.getElementsByClassName("info")

For y = 0 To topics.Length - 1
Set posts = topics(y).getElementsByTagName("a")(0)
zz = posts.getAttribute("href")
ss = url & Mid(zz, InStr(zz, ":") + 1)

    http.Open "GET", ss, False
    http.send
    hmm.body.innerHTML = http.responseText

Set parse = hmm.getElementsByClassName("sales-info")
Set sas = hmm.getElementsByClassName("address")
Set qas = hmm.getElementsByClassName("phone")
Set jas = hmm.getElementsByClassName("email-business")

    If parse.Length > 0 And sas.Length > 0 And qas.Length > 0 And jas.Length > 0 Then
        For i = 0 To parse.Length - 1
        Cells(x, 1) = parse(i).innerText
        Cells(x, 2) = sas(i).innerText
        Cells(x, 3) = qas(i).innerText
        Cells(x, 4) = jas(i).getAttribute("href")

        x = x + 1
        Next i
    End If
Next y
next t
End Sub

1 个答案:

答案 0 :(得分:1)

只需更改

If parse.Length > 0 And sas.Length > 0 And qas.Length > 0 And jas.Length > 0 Then
    For i = 0 To parse.Length - 1
    Cells(x, 1) = parse(i).innerText
    Cells(x, 2) = sas(i).innerText
    Cells(x, 3) = qas(i).innerText
    Cells(x, 4) = jas(i).getAttribute("href")

    x = x + 1
    Next i
End If

If Parse.Length > 0 Then
    For i = 0 To Parse.Length - 1
        Cells(i + 1, 1) = Parse(i).innerText
    Next i
End If
If sas.Length > 0 Then
    For i = 0 To sas.Length - 1
        Cells(i + 1, 2) = sas(i).innerText
    Next i
End If
If qas.Length > 0 Then
    For i = 0 To qas.Length - 1
        Cells(i + 1, 3) = qas(i).innerText
    Next i
End If
If jas.Length > 0 Then
    For i = 0 To jas.Length - 1
        Cells(i + 1, 4) = jas(i).getAttribute("href")
    Next i
End If