我已成功导航到包含要从中提取数据的表的页面。让我从一开始就勇往直前,这是我第一次尝试这样的事情,而我为实现这一目标感到非常高兴。我导航到想要的网页,更新了用户名,密码,然后导航到包含我感兴趣的表的页面。
我现在正尝试从表中提取数据,并且遇到以下错误,请参见下文:
我的代码如下:
'==============================================================
'
Public IE As New SHDocVw.InternetExplorer
'==============================================================
' HTML DOCUMENT
'
Public HTMLDoc As MSHTML.HTMLDocument
'==============================================================
' BUTTON COLLECTION
'
Public HTMLButtons As MSHTML.IHTMLElementCollection
Public HTMLButton As MSHTML.IHTMLElement
'==============================================================
' ATTRIBUTE COLLECTION
'
Public HTMLAs As MSHTML.IHTMLElementCollection3
Public HTMLA As MSHTML.IHTMLElement3
'==============================================================
' TABLE COLLECTION
'
Public HTMLTables As MSHTML.IHTMLElementCollection
Public HTMLTable As MSHTML.IHTMLElement
'==============================================================
' TABLE ELEMENTS
'
Public TableBody As MSHTML.IHTMLElementCollection2
Public TableRows As MSHTML.IHTMLElementCollection3
Public TableCell As MSHTML.IHTMLElementCollection4
'==============================================================
Public RowNum As Long
Public ColNum As Long
'==============================================================
'
'
Public Sub TableCollection()
Worksheets.Add
RowNum = 1
Set TableBody = HTMLDoc.getElementsByTagName("tbody")
Set TableRows = HTMLDoc.getElementsByTagName("tr")
Set TableCell = HTMLDoc.getElementsByTagName("td")
For Each TableRows In TableBody
ColNum = 1
For Each TableCell In TableRows
Cells(RowNum, ColNum).Value = TableCell.innerText
ColNum = ColNum + 1
Next TableCell
RowNum = RowNum + 1
Next TableRows
End Sub
“ ================================================ ======================
下面是我要抓取的表的标题和元素之一。 我已将网址替换为WEBADDRESS
<html><head>
<title>
Transaction SpreadSheet for the Current Month to date - April 2020</title>
</head>
<body>
<style>
td { font-family:arial,verdana,sans-serif;font-size:12px;color:#000000;line-height:16px;}
</style>
<table cellpadding="2">
<tbody>
<tr>
<td>
<b>Date</b>
</td>
<td>
<b>Reference</b>
</td>
<td>
<b>Item</b>
</td>
<td>
<b>Particulars</b>
</td>
<td>
<b>Buyer</b>
</td>
<td>
<b>Order Id</b>
</td>
<td>
<b>Note</b>
</td>
<td>
<b>Transaction Amount</b>
</td>
</tr>
<tr>
<td>
04 Apr 2020</td>
<td>
239137532</td>
<td>
<a href="https://WEBADDRESS">461619577</a></td>
<td>
Success Fee</td>
<td>
<a title="User profile for Joe" href="WEBADDRESS">RoySch2510</a>
</td>
<td>
<a href="https://WEBADDRESS" rel="nofollow,noindex">17314294</a>
</td>
<td>
</td>
<td>
-62.55</td>
</tr>
<tr>
请告知我在做什么错
好,这是我的所有代码,希望能对您有所帮助:
Option Explicit
Public Sub GetHTMLDocument()
'===========================================================================
' ESTABLISH PUBLIC VARIABLES
'
Call PublicHTMLVariables
'===========================================================================
' NAVIGATE TO IE
'
Call NavigateToIE("https://old.bidorbuy.co.za/jsp/login/UserLogin.jsp")
'===========================================================================
' LOGIN
'
Call LoginToWebsite("JoeCam9517", "********")
'===========================================================================
' NAVIGATE TO 1st PAGE
'
Call NavigateToFirstPage
'===========================================================================
' NAVIGATE TO ACCOUNT HISTORY PAGE
'
Call NavigateToAccountsPage
'===========================================================================
' CHANGE THE DATE RANGE FOR TRANSACTION SELECTION
'
'Call ChangeDateRange
'===========================================================================
' NAVIGATE TO ACCOUNT TABLE PAGE
'
Call NavigateToTablesPage
'===========================================================================
' COLLECT TABLE ELEMENTS TO WORKSHEET
'
Call TableCollection
'===========================================================================
MsgBox "Pause"
' MORE CODE STILL TO BE DEVELOPED
End Sub
公共变量
Option Explicit
'==============================================================
'
Public IE As New SHDocVw.InternetExplorer
'==============================================================
' HTML DOCUMENT
'
Public HTMLDoc As MSHTML.HTMLDocument
'==============================================================
' HTML ELEMENTS
'
Public HTMLInput As MSHTML.IHTMLElement
Public FromDay As MSHTML.IHTMLElement
Public FromYearMonth As MSHTML.IHTMLElement
Public ToDay As MSHTML.IHTMLElement
'==============================================================
' BUTTON COLLECTION
'
Public HTMLButtons As MSHTML.IHTMLElementCollection
Public HTMLButton As MSHTML.IHTMLElement
'==============================================================
' ATTRIBUTE COLLECTION
'
Public HTMLAs As MSHTML.IHTMLElementCollection3
Public HTMLA As MSHTML.IHTMLElement3
'==============================================================
' TABLE COLLECTION
'
Public HTMLTable As MSHTML.IHTMLElement
Public HTMLTableRows As MSHTML.IHTMLElementCollection
Public HTMLTableCells As MSHTML.IHTMLElementCollection
'==============================================================
' DATE ELEMENTS
'
Public ToYearMonth As MSHTML.IHTMLElement
'==============================================================
' TABLE ELEMENTS
'
'Public TableBody As MSHTML.IHTMLElementCollection2
'Public TableRows As MSHTML.IHTMLElementCollection3
'Public TableCell As MSHTML.IHTMLElementCollection4
'==============================================================
Public H As Integer
Public RowNum As Long
Public ColNum As Long
'==============================================================
Public Sub PublicHTMLVariables()
End Sub
导航到网页
Option Explicit
Public Sub NavigateToIE(Destination As String)
IE.Visible = True
IE.Navigate Destination
Do Until IE.ReadyState = 4
DoEvents
Loop
End Sub
准备登录
Option Explicit
Public Sub LoginToWebsite(UserID As String, PassWord As String)
Set HTMLDoc = IE.Document
Set HTMLInput = HTMLDoc.getElementById("username")
HTMLInput.Value = UserID
Set HTMLInput = HTMLDoc.getElementById("password")
HTMLInput.Value = PassWord
End Sub
导航到第一页
Option Explicit
'===========================================================================
'
'
Public Sub NavigateToFirstPage()
Set HTMLButtons = HTMLdoc.getElementsByTagName("button")
HTMLButtons(3).Click
Do While IE.ReadyState = 4: DoEvents: Loop
Do Until IE.ReadyState = 4: DoEvents: Loop
End Sub
导航到帐户历史记录页面
Option Explicit
'===========================================================================
' NAVIGATE TO ACCOUNT HISTORY PAGE
'
Public Sub NavigateToAccountsPage()
H = 0
Set HTMLAs = HTMLdoc.getElementsByTagName("a")
For Each HTMLA In HTMLAs
If HTMLA.href = "https://old.bidorbuy.co.za/jsp/fee/UserAccount.jsp" Then
GoTo ButtonFound
End If
H = H + 1
Next HTMLA
ButtonFound:
HTMLAs(H).Click
Do While IE.ReadyState = 4: DoEvents: Loop
Do Until IE.ReadyState = 4: DoEvents: Loop
End Sub
更改日期范围-不起作用-我要在晚些时候寻求帮助
导航到表格页面
Option Explicit
'=========================================================================
'
' NAVIGATE TO ACCOUNT TABLE PAGE
'
Public Sub NavigateToTablesPage()
Set HTMLButtons = HTMLdoc.getElementsByName("DetailSubmit")
HTMLButtons(1).Click
End Sub
这个程序使我感到困扰
Option Explicit
'===========================================================================
'
'
Public Sub TableCollection()
Worksheets.Add
Dim HTMLdoc As New HTMLDocument
Dim trow As Object
Dim tcel As Object
Dim rowNum As Long
Dim colNum As Long
rowNum = 1
For Each trow In HTMLdoc.getElementsByTagName("tbody")(0).getElementsByTagName("tr")
colNum = 1
For Each tcel In trow.getElementsByTagName("td")
Cells(rowNum, colNum).Value = tcel.innerText
colNum = colNum + 1
Next tcel
rowNum = rowNum + 1
Next trow
End Sub
'Set HTMLTable = HTMLDoc.getElementsByTagName("body")
'Set HTMLTableRows = HTMLdoc.getElementsByTagName("tr")
'Set HTMLTableCells = HTMLdoc.getElementsByTagName("td")
'For Each HTMLTableCells In HTMLTableRows
'Debug.Print HTMLTableRows.innerText
'Next HTMLTableCells
' ColNum = 1
' For Each TableCell In TableRows
' Cells(RowNum, ColNum).Value = TableCell.innerText
' ColNum = ColNum + 1
' Next TableCell
'RowNum = RowNum + 1
'Next TableRows
我知道有很多其他人的代码需要仔细阅读,但是我确实尝试编写我的代码时认为其他人可能必须对其进行编辑。另外,我很抱歉,我没有遵循正常的约定,但是当我看到变量以小写字母开头,然后在中途遇到大写字母时,这让我感到烦恼,对不起:-)
我开始怀疑问题在于表的构造方式,这可能吗?
我想对所有尝试解决我的问题的人表示感谢,但我仍然坚持同样的结果。使用上面的代码,我进入了这张表: 然后我得到这个错误。
正如您从注释掉的代码中看到的那样,我尝试了几种不同的编码选项,但我一直遇到错误。
答案 0 :(得分:1)
我已经写了一些函数来读取任何HTML表。尝试使用它。 HTMLTab作为该函数的参数当然必须是HTMLTable / IHTMLTable Object。 :)
Function ReadTable(HTMLTab) As Variant
Dim myTable() As Variant
rLen = HTMLTab.Rows.Length
CLen = HTMLTab.Cells.Length / rLen
ReDim myTable(0 To rLen - 1, 0 To CLen - 1)
For Each myRow In HTMLTab.Rows
j = 0
For Each myCell In myRow.Cells
myTable(i, j) = myCell.outerText
j = j + 1
Next myCell
i = i + 1
Next myRow
ReadTable = myTable
End Function