VBA抓取tr onclick

时间:2019-01-28 01:20:23

标签: javascript html excel vba web-scraping

下面是html / javascript代码:我想在tr标签onclick中抓取ID。如何刮取onClick标记并在最后获取ID?下面的代码是一个动态的安全网站表,所有行都超链接并按奇数和偶数类排序,如下所示。感谢您的帮助,谢谢!

<tr class='odd' onmouseover='mov(this)' onMouseOut="mou(this, 'odd')" onClick='location.href="custlist.php?rptname=Report&custm_id=**13857**"'> 
<td align='right'>&nbsp;1.&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;Company&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;blah blah&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp; bbb&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,084,771.10&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,060,787.10&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,203,000.00&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;30,233.90&nbsp;</td>
<td nowrap style='text-align:left;'>&nbsp;&nbsp;</td>
</tr>
<tr  class='even'  onmouseover='mov(this)' onMouseOut="mou(this, 'even')" onClick='location.href="custlist.php?rptname=report&custm_id=22012"'>
<td align='right'>&nbsp;2.&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;T3 bbhj &nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;hhht &nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;720,260.00&nbsp;</td>


<tr>

* 25行后,它会显示换行代码:

            <tr class='header'>

  <th width='20px'>&nbsp;</th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=1&desc=ASC&perpage=ALL' class='sorter'>
              CUSTOMER</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=2&desc=ASC&perpage=ALL' class='sorter'>
              CUSTOMER AGENT</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=3&desc=ASC&perpage=ALL' class='sorter'>
              ACCT MANAGERS</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=4&desc=ASC&perpage=ALL' class='sorter'>
              TOTAL</a>
          &nbsp;<img src='images/up_arrow.gif'>&nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=5&desc=ASC&perpage=ALL' class='sorter'>
              BALANCE</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=6&desc=ASC&perpage=ALL' class='sorter'>
              CREDIT LIMIT</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=7&desc=ASC&perpage=ALL' class='sorter'>
              CREDIT AVAILABLE</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=8&desc=ASC&perpage=ALL' class='sorter'>
              NOTES</a>
          &nbsp;


      </th>

到目前为止,我的VBA代码:

Sub GetCreditLimit()



Dim ieApp As Object
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject


Dim td As Object
Dim tr As Object

Dim objTbl As Object
Dim htmlTR As MSHTML.IHTMLElementCollection
Dim htmlTD As MSHTML.IHTMLElementCollection

Dim i As Integer
Dim Links As Object
Dim objElement As HTMLObjectElement
Dim n As Integer
Dim elems As Object
Dim e As Object




'create a new instance of ie
Set ieApp = New InternetExplorer

'you don’t need this, but it’s good for debugging
ieApp.Visible = True



'assume we’re not logged in and just go directly to the login page
ieApp.Navigate "https://brokerage.suntecktts.com/agents/login"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

Set ieDoc = ieApp.Document

'fill in the login form – View Source from your browser to get the control names
With ieDoc.forms(0)
.agent_login.Value = "username"
.agent_password.Value = "password"
.submit
End With
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'now that we’re in, go to the page we want
ieApp.Navigate "https://brokerage.suntecktts.com/fats/custlist.php?srid=8897&page=1&perpage=ALL"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'get the table based on the table's id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("general-report-wrapper")



'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "<html>" & ieTable.outerHTML & "</html>"
clip.PutInClipboard

ActiveSheet.Range("A1").Select
ActiveSheet.PasteSpecial "Unicode Text"
Rows("1:1").Select

End If

With ieApp
Set ieDoc = ieApp.Document
End With
Do While ieApp.Busy Or Not ieApp.ReadyState = READYSTATE_COMPLETE
        DoEvents
    Loop

Set objTbl = ieDoc.getElementById("report name")
Set htmlTR = objTbl.getElementsByTagName("tr")
Set htmlTD = htmlTR.getElementsByTagName("td")
Set td = ieDoc.getElementByTagName("td")

    For Each tr In htmlTD
      ActiveSheet.Range("J2" & Rows.count).Value = tr.onclick
            i = i + 1


    Next tr






ieApp.Quit
Set ieApp = Nothing
'close 'er up

End Sub

2 个答案:

答案 0 :(得分:0)

您的HTML似乎格式不正确。使用格式正确的HTML,您应该能够通过组合类选择器,使用属性=值选择器,使用包含=的odd的{​​{1}}类来结合使用css选择器来定位元素(*)修饰符,规定tr属性值必须包含子字符串onclick。然后,您提取custm_id属性值字符串,并使用onclick访问ID。假定Internet Explorer是访问方法:

Split

所有ID

Dim val As String
val = ie.document.querySelector("tr.odd[onClick*='custm_id']").getAttribute("onclick")
Debug.Print Split(Split(val, "custm_id=")(1), Chr$(34))(0)

答案 1 :(得分:0)

我知道了...请参阅下面的代码

Sub GetCredit()

Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject
Dim internetdata As Object
Dim ieResult As Object
Dim header_links As Object
Dim link As Object
Dim ieTag As Object
Dim b As Object
Dim td As Object
Dim tr As Object
Dim NewURL As String
Dim iRow As Integer


    Set ieApp = New InternetExplorer
    ieApp.Visible = True
    ieApp.Navigate "https://brokerage.suntecktts.com/agents/login"
    Do While ieApp.Busy: DoEvents: Loop
    Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

    Set ieDoc = ieApp.Document

    'fill in the login form – View Source from your browser to get the control names
    With ieDoc.forms(0)
        .agent_login.Value = "username" 'id
        .agent_password.Value = "password" 'password
        .submit
    End With
    Do While ieApp.Busy: DoEvents: Loop
    Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

    ieApp.Navigate "https://brokerage.suntecktts.com/fats/custlist.php?srid=3691&page=1&perpage=ALL"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'get the table based on the table's id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("general-report-wrapper")



'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "<html>" & ieTable.outerHTML & "</html>"
clip.PutInClipboard

ActiveSheet.Range("A1").Select
ActiveSheet.PasteSpecial "Unicode Text"
Rows("1:1").Select

End If

Dim y As Integer



Dim LR As Long
LR = ActiveSheet.UsedRange.Rows.count
Range("J2").AutoFill Destination:=Range("J2:J" & LR)



With ieApp
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

Set ieDoc = ieApp.Document

Dim ids As Object, i As Long, val As String
Set ids = ieDoc.querySelectorAll("tr[onClick*='custm_id']")
Debug.Print ids



For i = 0 To ids.Length - 1
val = ids.Item(i).getAttribute("onclick")
Debug.Print val
Debug.Print Split(Split(val, "custm_id=")(1), Chr$(34))(0)
ActiveSheet.Range("J2").Offset(i, y).Value = val
    y = y
Next i
    i = i + 1







'close 'er up
    ieApp.Quit
    Set ieApp = Nothing








End With


End Sub