我做了一个运行良好的刮刀,但同时从中提取信息 如果它看到任何项目中缺少四个,那么它会跳过一定的线索 领先并继续下一个领先。
该过程使其无论如何都会提取其余信息 缺少一两件物品
Option Explicit
Const url As String = "http://www.yellowpages.com"
Const pageurl As String = "http://www.yellowpages.com/search?search_terms=Coffee%20Shops&geo_location_terms=AZ&page="
Sub yptest()
Dim http As New MSXML2.XMLHTTP60, html As New HTMLDocument, hmm As New HTMLDocument
Dim topics As Object, topic As Object, posts As Object, post As Object, lng As Object, mng As Object, nng As Object
Dim parse As Object, par As Object, pas As Object, pae As Object, sas As Object, jas As Object
Dim x As Long, zz As String, ss As String, y As Long, qas As Object, i As Long, t As Long
x = 2
for t = 1 to 10
http.Open "GET", pageurl & t, False
http.send
html.body.innerHTML = http.responseText
Set topics = html.getElementsByClassName("info")
For y = 0 To topics.Length - 1
Set posts = topics(y).getElementsByTagName("a")(0)
zz = posts.getAttribute("href")
ss = url & Mid(zz, InStr(zz, ":") + 1)
http.Open "GET", ss, False
http.send
hmm.body.innerHTML = http.responseText
Set parse = hmm.getElementsByClassName("sales-info")
Set sas = hmm.getElementsByClassName("address")
Set qas = hmm.getElementsByClassName("phone")
Set jas = hmm.getElementsByClassName("email-business")
If parse.Length > 0 And sas.Length > 0 And qas.Length > 0 And jas.Length > 0 Then
For i = 0 To parse.Length - 1
Cells(x, 1) = parse(i).innerText
Cells(x, 2) = sas(i).innerText
Cells(x, 3) = qas(i).innerText
Cells(x, 4) = jas(i).getAttribute("href")
x = x + 1
Next i
End If
Next y
next t
End Sub
答案 0 :(得分:1)
只需更改
If parse.Length > 0 And sas.Length > 0 And qas.Length > 0 And jas.Length > 0 Then
For i = 0 To parse.Length - 1
Cells(x, 1) = parse(i).innerText
Cells(x, 2) = sas(i).innerText
Cells(x, 3) = qas(i).innerText
Cells(x, 4) = jas(i).getAttribute("href")
x = x + 1
Next i
End If
要
If Parse.Length > 0 Then
For i = 0 To Parse.Length - 1
Cells(i + 1, 1) = Parse(i).innerText
Next i
End If
If sas.Length > 0 Then
For i = 0 To sas.Length - 1
Cells(i + 1, 2) = sas(i).innerText
Next i
End If
If qas.Length > 0 Then
For i = 0 To qas.Length - 1
Cells(i + 1, 3) = qas(i).innerText
Next i
End If
If jas.Length > 0 Then
For i = 0 To jas.Length - 1
Cells(i + 1, 4) = jas(i).getAttribute("href")
Next i
End If