在[Link}(Scraping table from local HTML with unicode characters)这个线程中,QHarr帮助我从本地html文件中抓取了一个表。 我在此Link
有一个html文件然后我使用相同的代码,并对变量“ startTableNumber”,“ endTableNumber”和“ numColumns”进行了一些修改
Public Sub Test()
Dim fStream As ADODB.Stream, html As HTMLDocument
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
With fStream
.Charset = "UTF-8"
.Open
.LoadFromFile "C:\Users\Future\Desktop\Sample 2.html"
html.body.innerHTML = .ReadText
.Close
End With
Dim hTables As Object, startTableNumber As Long, i As Long, r As Long, c As Long
Dim counter As Long, endTableNumber, numColumns As Long
startTableNumber = 91
endTableNumber = 509
numColumns = 14
Set hTables = html.getElementsByTagName("table")
r = 2: c = 1
For i = startTableNumber To endTableNumber Step 2
counter = counter + 1
If counter = 10 Then
c = 1: r = r + 1: counter = 1
End If
Cells(r, c) = hTables(i).innerText
c = c + 1
Next
End Sub
但是我想进一步分散表的数据,我想找到一种灵活的方法,使代码无需手动分配即可识别这些变量。 我希望找到使用硒的解决方案。希望也不要收到负面代表。我已尽力澄清问题 问候
答案 0 :(得分:1)
因此,正如我在评论中所说,您需要研究数据在后面的表格标签中的显示方式,并执行映射以获取正确的顺序。下面写出表格。正如我还提到的那样,这并不可靠,只有该方法可能可以转移到其他文档。
在您的情况下,您不会从文件中读取文件,而是会使用
Set tables = driver.FindElementsByCss("table[width='100%'] table:first-child")
然后您将For Each
遍历集合中的Web元素,根据需要调整语法,例如.Text
,而不是.innerText
。硒由于对webElements的索引编制,可能还会有其他一些对硒的适应方法,但是您需要做的所有事情在下面都应该显而易见。
VBA:
Option Explicit
Public Sub ParseInfo()
Dim html As HTMLDocument, tables As Object, ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet2")
Dim fStream As ADODB.Stream
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
With fStream
.Charset = "UTF-8"
.Open
.LoadFromFile "C:\Users\User\Desktop\test.html"
html.body.innerHTML = .ReadText
.Close
End With
Set tables = html.querySelectorAll("table[width='100%'] table:first-child")
Dim rowCounter: rowCounter = 2
Dim mappings(), j As Long, headers(), arr(13)
headers = Array("Notes", "Type", "Enrollment status", "Governorate of birth", "Year", "Month", "Day", "Date of Birth", "Religion", _
"Nationality", "Student Name", "National Number", "Student Code", "M")
mappings = Array(3, 8, 9, 12, 11, 10, 2, 7, 1, 6, 5, 4, 13)
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For i = 89 To 504 Step 26
arr(0) = vbNullString
For j = 0 To 12
arr(mappings(j)) = tables.item(2 * j + i).innerText
Next
ws.Cells(rowCounter + 1, 1).Resize(1, UBound(arr) + 1) = arr
rowCounter = rowCounter + 1
Next
End Sub