我有一些HTML源代码,我从网站上获取选项报价。 (请参阅下文)
在tr中提取各种文本值并根据执行价格存储在集合中的最佳方式是什么(在这种情况下,4700中间有4700
)有些人建议使用正则表达式,而其他人建议使用html解析器。我在VBA这样做,所以最好的方式是什么?
<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>
<td><a href="javascript:popup1('','','','','CE')"><img src="/images/print3.gif"></a>
</td>-->
<td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','CE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>
<td class="ylwbg"> 2,935,500</td>
<td class="ylwbg"> 27,550</td>
<td class="ylwbg"> 12,458</td>
<td class="ylwbg"> 23.79</td>
<!-- End-->
<td class="ylwbg">
<a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=25JAN2012" target="_blank"> 139.25</a>
</td>
<!--*Net Change*-->
<td class="ylwbg" Style="color:Red;"> -7.35</td>
<td class="ylwbg"> 200</td>
<td class="ylwbg"> 139.15</td>
<td class="ylwbg"> 142.45</td>
<td class="ylwbg"> 200</td>
<td class="grybg"><a href="/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"><b>4700.00</b></a></td>
<td class="nobg"> 1,300</td>
<td class="nobg"> 76.00</td>
<td class="nobg"> 79.00</td>
<td class="nobg"> 1,350</td>
<!--*Net Change*-->
<td class="nobg" Style="color:Red;"> -1.55</td>
<td class="nobg">
<!-- <a href="javascript:popup1('NIFTY','OPTIDX','25JAN2012','4700.00','PE')"> 76.00</a> -->
<a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=PE&expiry=25JAN2012" target="_blank"> 76.00</a>
</td>
<td class="nobg"> 26.33</td>
<td class="nobg"> 32,772</td>
<td class="nobg"> 103,700</td>
<td class="nobg"> 5,123,300</td>
<td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','PE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>
<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>
<td><a href="javascript:popup1('','','','','PE')"><img src="/images/print3.gif"></a></td>-->
</tr>
答案 0 :(得分:2)
经过一番摆弄后,我使用
派生了一个正则表达式/ VBA解决方案strSite
以适应)查看源HTML以查找正则表达式
调用选项有一个共同的起始和结束字符串,用于分隔10个值,但有三个不同的字符串
<td class="ylwbg">
X </td>
Style
>
前面有X
(和其他文字)
<a href text
X </a>
正则表达式
.Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
提取所有需要的字符串,但稍后需要在字符串5上进一步工作
Put 选项以<td class="nobg"
开头,因此很难通过获得积分1-3的正则表达式提取这些选项
实际代码
Sub GetTxt()
Dim objXmlHTTP As Object
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim strResponse As String
Dim strSite As String
Dim lngCnt As Long
Dim strTemp As String
Dim X(1 To 20, 1 To 10)
X(1, 1) = "OI"
X(1, 2) = "Chng in vol"
X(1, 3) = "Volume"
X(1, 4) = "IV"
X(1, 5) = "LTP"
X(1, 6) = "Net Chg"
X(1, 7) = "Bid Qty"
X(1, 8) = "Bid Price"
X(1, 9) = "Ask Price"
X(1, 10) = "Ask Qnty"
Set objXmlHTTP = CreateObject("MSXML2.XMLHTTP")
strSite = "http://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"
On Error GoTo ErrHandler
With objXmlHTTP
.Open "GET", strSite, False
.Send
If .Status = 200 Then strResponse = .ResponseText
End With
On Error GoTo 0
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
'*cleaning regex* to remove all spaces
.Pattern = "[\xA0\s]+"
.Global = True
strResponse = .Replace(strResponse, vbNullString)
.Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
If .Test(strResponse) Then
lngCnt = 20
Set objRegMC = .Execute(strResponse)
For Each objRegM In objRegMC
lngCnt = lngCnt + 1
If Right$(objRegM.submatches(2), 2) <> "a>" Then
X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = objRegM.submatches(2)
Else
'Get submatches of the form <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=23FEB2012" target="_blank"> 206.40</a>
strTemp = Val(Right(objRegM.submatches(2), Len(objRegM.submatches(2)) - InStrRev(objRegM.submatches(2), """") - 1))
X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = strTemp
End If
Next
Else
MsgBox "Parsing unsuccessful", vbCritical
End If
End With
Set objRegex = Nothing
Set objXmlHTTP = Nothing
[a1].Resize(UBound(X, 1), UBound(X, 2)) = X
Exit Sub
ErrHandler:
MsgBox "Site not accessible"
If Not objXmlHTTP Is Nothing Then Set objXmlHTTP = Nothing
End Sub