下面的宏应该填充网站上的搜索参数并从结果中抓取数据 - 131页,通常每页5行数据。为了测试我注释掉了循环,所以它只做了1页。
以下数据列填写在Excel工作表中:公司名称,联系人姓名,联系电子邮件,位置,电话(BH),移动,服务。
我只有Phone(BH)和Mobile才有问题。原因是公司有时只有电话(BH)或手机,而不是两者。
我循环的方式(5个公司名称,然后5个联系人姓名等)要求检测所有数据,否则我不知道该数据应该在哪一行。
Option Explicit
Private Sub CommandButton1_Click()
Dim htmlele As IHTMLElement
Dim Link As String
Dim ie As Object
Dim page As Integer
Dim companyresult As Long, nameresult As Long, emailresult As Long, locationresult As Long, servicesresult As Long
Application.ScreenUpdating = False
Link = "http://bdav.org.au/find"
Set ie = CreateObject("InternetExplorer.Application")
ie.Navigate Link
'ie.Visible = True
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "postcode" Then htmlele.Value = "3000"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "dist" Then htmlele.Value = "99"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Type = "submit" Then htmlele.Click
Next htmlele
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
'Do
page = page + 1
Link = "http://bdav.org.au/find/" & page
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
With ThisWorkbook.Worksheets("results")
For Each htmlele In ie.Document.getElementsByTagName("h2")
companyresult = companyresult + 1
.Range("a" & companyresult + 1).Value = htmlele.innerHTML
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
nameresult = nameresult + 1
.Range("b" & nameresult + 1).Value = htmlele.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
emailresult = emailresult + 1
.Range("c" & emailresult + 1).Value = Mid(htmlele.href, 8)
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("label")
If htmlele.innerHTML = "Location:" Then
locationresult = locationresult + 1
.Range("d" & locationresult + 1).Value = htmlele.NextSibling.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("label")
If htmlele.innerHTML = "Services:" Then
servicesresult = servicesresult + 1
If Right(htmlele.NextSibling.innerHTML, 4) = "<br>" Then
htmlele.NextSibling.innerHTML = Left(htmlele.NextSibling.innerHTML, Len(htmlele.NextSibling.innerHTML) - 4)
End If
.Range("g" & servicesresult + 1).Value = Replace(htmlele.NextSibling.innerHTML, "<br>", vbLf)
End If
Next htmlele
End With
'Loop
ie.Quit
Set ie = Nothing
Application.ScreenUpdating = True
End Sub
答案 0 :(得分:0)
考虑继续并解析 Phone 和 Mobile 数据(假设它们存在于页面上),然后运行条件错误句柄以说明潜在的不存在。具体来说,对于未设置的对象变量,错误句柄将使用Resume Next
(在问题行后继续)Error 91
。
至于对齐行,尝试使用不同的迭代器,phoneresult = phoneresult + 1
如果随机丢失则不可靠。因此,使用servicesresult
或任何始终存在的节点都是电话或移动的兄弟节点。以下不是确切的代码,但包含在内以证明概念。
Private Sub CommandButton1_Click()
On Error Goto ErrHandle
'...
For Each htmlele In ie.Document.getElementsByTagName("Phone")
If htmlele.innerHTML = "Phone (BH):" Then
'...other needed code
.Range("h" & servicesresult + 1).Value = htmlele.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("Mobile")
If htmlele.innerHTML = "Mobile:" Then
'...other needed code
.Range("i" & servicesresult + 1).Value = htmlele.innerHTML
End If
Next htmlele
'...
Exit Sub
ErrHandle:
' MISSING NODE ERROR
If Err.Number = 91 Then
Resume Next
' ALL OTHER ERRORS
Else:
MsgBox Err.Number & " - " & Err.Description
Exit Sub
End If
End Sub
答案 1 :(得分:0)
我找到了一种比我之前尝试过的方法更清晰的解决方案,只有1个循环通过标签标签元素集合。我根据位置标签外观取一个行号(这个标签总是出现在公司)。如果缺少公司的任何其他标签,则不会将其放在此行中。
Option Explicit
Private Sub CommandButton1_Click()
Dim htmlele As IHTMLElement
Dim Link As String
Dim ie As Object
Dim page As Integer
Dim companyresult As Long, nameresult As Long, emailresult As Long, labelresult As Long
Application.ScreenUpdating = False
Link = "http://bdav.org.au/find"
Set ie = CreateObject("InternetExplorer.Application")
ie.Navigate Link
'ie.Visible = True
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "postcode" Then htmlele.Value = "3000"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "dist" Then htmlele.Value = "99"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Type = "submit" Then htmlele.Click
Next htmlele
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
Do
page = page + 1
Link = "http://bdav.org.au/find/" & page
ie.Navigate Link
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
With ThisWorkbook.Worksheets("results")
For Each htmlele In ie.Document.getElementsByTagName("h2")
companyresult = companyresult + 1
.Range("a" & companyresult + 1).Value = htmlele.innerHTML
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
nameresult = nameresult + 1
.Range("b" & nameresult + 1).Value = htmlele.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
emailresult = emailresult + 1
.Range("c" & emailresult + 1).Value = Mid(htmlele.href, 8)
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("label")
If htmlele.innerHTML = "Location:" Then
labelresult = labelresult + 1
.Range("d" & labelresult + 1).Value = htmlele.NextSibling.innerHTML
ElseIf htmlele.innerHTML = "Phone (BH):" Then
.Range("e" & labelresult + 1).Value = htmlele.NextSibling.innerHTML
ElseIf htmlele.innerHTML = "Mobile:" Then
.Range("f" & labelresult + 1).Value = htmlele.NextSibling.innerHTML
ElseIf htmlele.innerHTML = "Services:" Then
If Right(htmlele.NextSibling.innerHTML, 4) = "<br>" Then
htmlele.NextSibling.innerHTML = Left(htmlele.NextSibling.innerHTML, Len(htmlele.NextSibling.innerHTML) - 4)
End If
.Range("g" & labelresult + 1).Value = Replace(htmlele.NextSibling.innerHTML, "<br>", vbLf)
End If
Next htmlele
End With
Loop
ie.Quit
Set ie = Nothing
Application.ScreenUpdating = True
End Sub