我正试图在https://biz.yahoo.com/p/s_conameu.html网站内获取各个部门(基本资料,综合企业等)的链接。
我有以下代码,但由于此网站不使用ID来设置这些链接的内容,我不知道如何实际找到链接。此外,链接实际上只是部分。我知道我需要添加什么来使它们变满(在html文件的部分链接前添加https://biz.yahoo.com/p/。我看到的第一个链接出现在html的第238行,但我不知道如何实际上搜索并找到这个,因为这些部门的名称将来可能会发生变化。我试图让它不可知,它总能搜索这个列表并拉出所有链接。
以下是显示链接的html快照(从第236行开始):
nowrap
bgcolor=ffffee><a
href=1conameu.html><font
face=arial
size=-1>Basic Materials</a></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>-0.13</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>293348.2B</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>17.42</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>6.50</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>4.12</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>69.76</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>3.09</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>1.35</font></td><td
align=right
bgcolor=ffffff><font
face=arial
size=-1>6.48</font></td></tr><tr><td
nowrap
bgcolor=ffffee><a
href=2conameu.html><font
face=arial
size=-1>Conglomerates</a></td><td
以下是我抓住网站并获取内容的代码。
Public Sub clicklick()
Dim internet As Object
Dim html As HTMLDocument
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Dim i As Integer
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = False
URL = "https://biz.yahoo.com/p/s_conameu.html"
internet.Navigate URL
While internet.Busy
DoEvents
Wend
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getElementById("res") // This does not work (obviously, but not sure how to really search). Returns nothing.
Set header_links = div_result.getelementsbytagname("h3") //This fails because div_result has nothing.
MsgBox html.DocumentElement.innerHTML
MsgBox div_result
SPws.Cells.ClearContents
For Each h In header_links
Set link = h.ChildNodes.Item(0)
SPws.Cells(Range("A" & Rows.count).End(xlUp).row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
提前感谢您的帮助。我希望我能做我想做的事情,所以我可以自动进行一些搜索。
谢谢你, 艾伦答案 0 :(得分:1)
你可以这样做。它将拉出页面上的所有链接,然后您可以打印出单元格中的值。
x = 1
For Each link In internet.document.Links
Cells(x, 1) = link
'or you can add your prefix URL and link at the same time
Cells(x, 1) = "https://biz.yahoo.com/p/" & link
x = x + 1
Next
答案 1 :(得分:0)
这可能适合你。
Sub Macro1()
Range("A1").Select
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;https://biz.yahoo.com/p/s_conameu.html", Destination:=Range("$A$1"))
.CommandType = 0
.Name = "s_conameu_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "4"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub