我在网上找到了这段代码,该代码可以从页面中提取HTML表并将其转储到工作表中。在遇到需要导入的当前页面之前,我没有任何问题。它有多个表
我试过让它检查不同的类名,以查看拉出了什么,但是它总是错误地抓住了这些第一项。我知道那是什么,我只是缺乏调整知识。
Sub getVTAdata()
' Declare all of our variables
Dim ie As Object
Dim btn As Object
Dim tRows As Object
Dim temp As Object
Dim Table As Object
Dim tHead As Object
Dim tCells As Object
Dim np As Variant
Dim numPages As String
Dim pos As Integer
Dim rNum As Integer
Dim cNum As Integer
Dim h
Dim r
Dim c
Dim url As String
ThisWorkbook.Worksheets("UpdateTemp").Select
url = "https://www.canada.ca/en/treasury-board-secretariat/services/terms-conditions-employment/isolated-posts-government-housing/vacation-travel-assistance-april-2019.html"
' Create Internet Explorer object
Set ie = CreateObject("InternetExplorer.Application")
' Make it invisible
ie.Visible = False
' Navigate to the webpage
ie.navigate url
' Wait while the page is loading
While ie.Busy
DoEvents
Wend
' Wait an additional 3 seconds for good measure
Application.Wait DateAdd("s", 3, Now)
' Create variables to track the row and column to output the
' text on our spreadsheet
rNum = 1
cNum = 1
' Looking at the source code we see that the table we want to
' scrape data from is in class called "page-content new-list-styles"
' so we can use the getelementsbyclass name method to get that
Set Table = ie.document.getelementsbyclassname("mwsgeneric-base-html parbase section")
' Again, Table is now a collection, so to refer to the first
' item in the collection, we use (0) We want to pull each row in
' that section, so we can use the getelementsbytagname method
' and use the table row tag of "tr"
Set tRows = Table(0).getelementsbytagname("tr")
' First we can get the column headings which use the "th" tag
Set tHead = Table(0).getelementsbytagname("th")
' Loop through each column heading
For Each h In tHead
' Output the contents of the cell to the spreadsheet
Worksheets("UpdateTemp").Cells(rNum, cNum).Value = h.innertext
' Increase the cNum value so the next time around the data will
' output to the column to the right
cNum = cNum + 1
Next
' Move on to the next row before pulling the data and reset the
' column back to 1
rNum = rNum + 1
cNum = 1
' Loop through each row in the table
For Each r In tRows
' Within each row, pull each cell by using the
' getelementsbytagname method and use the table tag "td"
Set tCells = r.getelementsbytagname("td")
' Loop through each cell of the row
For Each c In tCells
' Output the contents of the cell to the spreadsheet
Worksheets("UpdateTemp").Cells(rNum, cNum).Value = c.innertext
' Increase the cNum value so the next time around the data will
' output to the column to the right
cNum = cNum + 1
Next
' When we switch to the next row of the table, increase the rNum
' value so we go to the next row of our spreadsheet, and also
' reset back to column number 1
rNum = rNum + 1
cNum = 1
Next
' Wait while the page updates
While ie.Busy
DoEvents
Wend
' Quit the internet explorer application
ie.Quit
' Clear the ie object. This probably isn't necessary, but helps
' clean things up
Set ie = Nothing
End Sub
它正在拉动表,但是发生的是,它们在每一行的第一项上使用了标签,因此它将在第一行而不是第一列中添加所有标签。
标题|第1列,第1列|第2行,第1列|第3行,第1列
第1行第2列
第2行第2列
第3行第2列
第4行,第2列
应该发生的是,所有行都与每个位置名称对齐。
标题
第1行第1列|第1列,第2列
第2行第1列|第2列,第2列
第3行第1列|第3行,第2列
第4行第1列|第4行,第2列