我需要在此网页上获取具体值。如何将这些值添加到Excel中?
http://www.securities-administrators.ca/nrs/nrsIndvSearchResults.aspx?mode=QS&type=I&indv=david+hill
下面是结果的HTML内容片段,我需要值" BOOTH,David(Arthur)"例如,要导入Excel。
<div>
<table border="1" cellspacing="0" class="gridview_style" id="ctl00_bodyContent_gvIndividuals" rules="all" style="width:100%;border-collapse:collapse;">
<tr>
<th scope="col" style="width:50%;"><span id="ctl00_bodyContent_gvIndividuals_ctl01_lbl_ind">Name</span></th>
<th scope="col" style="width:50%;">Firm(s)</th>
</tr>
<tr>
<td class="articleText">
<a href="javascript:__doPostBack('ctl00$bodyContent$gvIndividuals$ctl02$lbtnIndDetail','')" id="ctl00_bodyContent_gvIndividuals_ctl02_lbtnIndDetail">BOOTH, David (Arthur)</a>
</td>
<td class="articleText"><span id="ctl00_bodyContent_gvIndividuals_ctl02_lblFirmName">SCOTIA CAPITAL INC. / SCOTIA CAPITAUX INC.</span></td>
</tr>
<tr>
<td class="articleText">
<a href="javascript:__doPostBack('ctl00$bodyContent$gvIndividuals$ctl03$lbtnIndDetail','')" id="ctl00_bodyContent_gvIndividuals_ctl03_lbtnIndDetail">HILL, David (Adam)</a>
</td>
<td class="articleText"><span id="ctl00_bodyContent_gvIndividuals_ctl03_lblFirmName">RBC DIRECT INVESTING INC./RBC PLACEMENTS EN DIRECT INC.</span></td>
</tr>
<tr>
<td class="articleText">
<a href="javascript:__doPostBack('ctl00$bodyContent$gvIndividuals$ctl04$lbtnIndDetail','')" id="ctl00_bodyContent_gvIndividuals_ctl04_lbtnIndDetail">HILL, David (Lowell)</a>
</td>
<td class="articleText"><span id="ctl00_bodyContent_gvIndividuals_ctl04_lblFirmName">STATE FARM INVESTOR SERVICES (CANADA) CO.</span></td>
</tr>
</table>
</div>
答案 0 :(得分:0)
以下示例显示如何通过XHR和RegEx从网页检索必要的数据:
Option Explicit
Sub Test()
Dim QueryString As String
Dim PageURL As String
Dim PageContent As String
Dim Match As Variant
Dim a() As Variant
QueryString = "david hill"
PageURL = "http://www.securities-administrators.ca/nrs/nrsIndvSearchResults.aspx?mode=QS&type=I&indv=" & EncodeUriComponent(QueryString)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", PageURL, False
.send
PageContent = .responseText
End With
With CreateObject("Scripting.Dictionary")
For Each Match In RegExMatches(PageContent, "<a id=""ctl00_bodyContent_gvIndividuals_ctl\d{2}_lbtnIndDetail""[^>]*>([^<]*)")
.Item(.Count) = Match.SubMatches(0)
Next
a = WorksheetFunction.Transpose(.Items())
End With
Output2DArray Sheets(1).Cells(1, 1), a
End Sub
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function RegExMatches(sText, sPattern, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True)
With CreateObject("VBScript.RegExp")
.Global = bGlobal
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = sPattern
Set RegExMatches = .Execute(sText)
End With
End Function
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub