我已使用以下代码成功将电子邮件中的数据提取到Excel中:
Set ElementCol = html.getElementsByTagName("a")
For Each Link In ElementCol
If InStr(Link, "mailto:") Then
Cells(MyRow, MyCol).Value = Link
Cells(MyRow, MyCol).Select
MyCol = MyCol + 1
End If
Next 'email address loop
这与:
但是,我不能使用类似的代码来输入名称:
我尝试过getElementById()
和getElementsByClassName
,并且尝试将html.
更改为doc.
和Text.
,因为我认为问题可能是电子邮件地址是超链接,所以html.
是合适的,但是名称是纯文本,所以我应该使用其他名称?我收到“对象必需”错误。
编辑:感谢您的意见。下面是我的完整代码。我将在英国下议院网站上搜集MP的详细信息。该URL有一个随机数,因此我将URL强制使用,尝试从1到5000的每个数字。我突出显示了无效部分(当我尝试此处建议的新代码时,该部分仍然无效)示例URL:{ {3}}
Sub scrapeHyperlinksWebsite()
Dim ie As InternetExplorer
Dim html As HTMLDocument
Dim Link As Object
Dim ElementCol As Object
Dim erow As Long
Dim MyURL As String
Dim MyRow, MyCol As Integer
Dim Constituency As String
Dim coll As IHTMLElementCollection ' suggestion from Stackoverflow
Application.ScreenUpdating = True
'clear existing data
Call clearRows
Set ie = New InternetExplorer
ie.Visible = False
'initiate variable
URL_Number = 0
'static elements:
URL_Str1 = "https://members.parliament.uk/member/"
URL_Str2 = "/contact"
'Begin loop down page
For MyRow = 2 To 5000
URL_Number = URL_Number + 1
MyCol = 1
MyURL = URL_Str1 & URL_Number & URL_Str2
ie.navigate MyURL
Do While ie.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading website…"
DoEvents
Loop
Set html = ie.document
Range("L2") = html.DocumentElement.innerHTML
'Get MP email addresses
MyCol = 1
Set ElementCol = html.getElementsByTagName("a")
For Each Link In ElementCol 'find emails in each site
If InStr(Link, "mailto:") Then
Cells(MyRow, MyCol).Value = Link
Cells(MyRow, MyCol).Select
MyCol = MyCol + 1
End If
Next 'email address loop
MyCol = 3
'Get MP phone numbers
Set ElementCol = html.getElementsByTagName("a")
For Each Link In ElementCol 'find emails in each site
If InStr(Link, "tel:") Then
Cells(MyRow, MyCol).Value = Link
Cells(MyRow, MyCol).Select
MyCol = MyCol + 1
End If
Next 'email address loop
'Get Constituency - This section is not working
' MyCol = 5
'
' Set coll = html.getElementsByTagName("h1")
'
' For Each Link In coll 'find emails in each site
'
' If InStr(Link, "Contact") Then
'
' Cells(MyRow, MyCol).Value = coll
'
' Cells(MyRow, MyCol).Select
'
' MyCol = MyCol + 1
'
' End If
'
' Next 'email address loop
'
'above section is not working
Next ' main loop
Set ie = Nothing
Application.StatusBar = ""
'Tidy data
Cells.Select
With Selection
.VerticalAlignment = xlBottom
.Orientation = 0
.AddIndent = False
.IndentLevel = 0
.ShrinkToFit = False
.ReadingOrder = xlContext
.MergeCells = False
End With
With Selection
.VerticalAlignment = xlCenter
.Orientation = 0
.AddIndent = False
.IndentLevel = 0
.ShrinkToFit = False
.ReadingOrder = xlContext
.MergeCells = False
End With
Selection.RowHeight = 15
Range("D1").Select
End Sub
Sub clearRows()
Range("A2:M50000").Select
Selection.ClearContents
Range("A2").Select
End Sub
答案 0 :(得分:0)
The code to get the H1s works fine. There is no contact there, and so you can try the below:
MyCol = 5
Set coll = html.getElementsByTagName("h1")
For Each Link In coll 'find emails in each site
If InStr(Link, "Contact") Then
Cells(MyRow, MyCol).Value = Link.innerText
MyCol = MyCol + 1
End If
Next Link 'MP Name Loop