使用vb.net解析/获取股票交易数据的网页数据

时间:2014-08-21 16:34:39

标签: html vb.net web-scraping html-table

我想用vb.net来获取它并发布到mysql服务器。请建议使用vb.net代码。我在html上非常弱,我希望通过学习这个特定的,我可以自学将其应用到其他网站。

这是6列,大约有20行(可能)。并且将具有不同的页面取决于该特定股票的交易数据。

对不起,我已经尽力以格式化 原创网站 - > http://boc.quotepower.com/web/bochk/stocks_mktTransactions.jsp?stock=931&lotsize=100&searchType=2&turnover=500000&rangeType=1&begin_hour=9&begin_min=30&end_hour=16&end_min=0&lang=zh_TW&domain=BOCHK&rand=-433965158&lastLevel1Name=nav_stocks&lastStock=01300&x=37&y=10

但是,数据将在每天00:00 GMT + 8之前清除。

    <table width ="100%" border="0" cellspacing="0" cellpadding="0" class="data">
    <tr>
        <td  align="left" class="col">交易時間</td>
        <td  align="right" class="col">交易價格</td>
        <td  align="right" class="col">成交量</td>
        <td  align="right" class="col">成交額</td>
        <td  align="right" class="col">交易量(手)</td>
        <td  align="center" class="col">高於/低於市價</td>
    </tr>
    <tr><td align="left" class="r1">15:58:50</td><td align="right" class="r1">0.82</td><td align="right" class="r1">650K</td><td align="right" class="r1">53.30萬</td><td align="right" class="r1">130</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>



        <tr><td align="left" class="r0">15:58:27</td><td align="right" class="r0">0.83</td><td align="right" class="r0">945K</td><td align="right" class="r0">78.44萬</td><td align="right" class="r0">189</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">15:58:27</td><td align="right" class="r1">0.82</td><td align="right" class="r1">1.96M</td><td align="right" class="r1">160.31萬</td><td align="right" class="r1">391</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr><tr><td align="left" class="r0">15:57:37</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1M</td><td align="right" class="r0">82萬</td><td align="right" class="r0">200</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">15:56:19</td><td align="right" class="r1">0.82</td><td align="right" class="r1">850K</td><td align="right" class="r1">69.70萬</td><td align="right" class="r1">170</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">15:56:17</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1M</td><td align="right" class="r0">82萬</td><td align="right" class="r0">200</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">15:55:23</td><td align="right" class="r1">0.82</td><td align="right" class="r1">1M</td><td align="right" class="r1">82萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">15:38:32</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1.20M</td><td align="right" class="r0">98.40萬</td><td align="right" class="r0">240</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">15:22:54</td><td align="right" class="r1">0.81</td><td align="right" class="r1">820K</td><td align="right" class="r1">66.42萬</td><td align="right" class="r1">164</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">14:41:21</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1.68M</td><td align="right" class="r0">137.76萬</td><td align="right" class="r0">336</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">14:07:45</td><td align="right" class="r1">0.82</td><td align="right" class="r1">1M</td><td align="right" class="r1">82萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">13:55:13</td><td align="right" class="r0">0.82</td><td align="right" class="r0">645K</td><td align="right" class="r0">52.89萬</td><td align="right" class="r0">129</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">13:55:12</td><td align="right" class="r1">0.82</td><td align="right" class="r1">5M</td><td align="right" class="r1">410萬</td><td align="right" class="r1">1000</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">13:54:58</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1M</td><td align="right" class="r0">82萬</td><td align="right" class="r0">200</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">13:54:11</td><td align="right" class="r1">0.82</td><td align="right" class="r1">1M</td><td align="right" class="r1">82萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">13:20:13</td><td align="right" class="r0">0.83</td><td align="right" class="r0">1.22M</td><td align="right" class="r0">100.84萬</td><td align="right" class="r0">243</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">13:20:03</td><td align="right" class="r1">0.83</td><td align="right" class="r1">1.50M</td><td align="right" class="r1">124.50萬</td><td align="right" class="r1">300</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">13:14:39</td><td align="right" class="r0">0.83</td><td align="right" class="r0">5.85M</td><td align="right" class="r0">485.55萬</td><td align="right" class="r0">1170</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">13:10:05</td><td align="right" class="r1">0.83</td><td align="right" class="r1">1M</td><td align="right" class="r1">83萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">13:00:37</td><td align="right" class="r0">0.82</td><td align="right" class="r0">685K</td><td align="right" class="r0">56.17萬</td><td align="right" class="r0">137</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">11:11:02</td><td align="right" class="r1">0.82</td><td align="right" class="r1">755K</td><td align="right" class="r1">61.91萬</td><td align="right" class="r1">151</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">10:36:34</td><td align="right" class="r0">0.83</td><td align="right" class="r0">1.70M</td><td align="right" class="r0">141.10萬</td><td align="right" class="r0">340</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">10:28:40</td><td align="right" class="r1">0.83</td><td align="right" class="r1">1M</td><td align="right" class="r1">83萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">10:25:50</td><td align="right" class="r0">0.83</td><td align="right" class="r0">800K</td><td align="right" class="r0">66.40萬</td><td align="right" class="r0">160</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">10:23:36</td><td align="right" class="r1">0.83</td><td align="right" class="r1">1M</td><td align="right" class="r1">83萬</td><td align="right" class="r1">200</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">10:19:05</td><td align="right" class="r0">0.83</td><td align="right" class="r0">1.50M</td><td align="right" class="r0">124.50萬</td><td align="right" class="r0">300</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r1">10:07:19</td><td align="right" class="r1">0.83</td><td align="right" class="r1">700K</td><td align="right" class="r1">58.10萬</td><td align="right" class="r1">140</td><td align="center" class="r1"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r0">10:03:18</td><td align="right" class="r0">0.83</td><td align="right" class="r0">645K</td><td align="right" class="r0">53.54萬</td><td align="right" class="r0">129</td><td align="center" class="r0"><img src="images/arrow_down.gif"/></td></tr>
<tr><td align="left" class="r1">09:58:59</td><td align="right" class="r1">0.83</td><td align="right" class="r1">4.57M</td><td align="right" class="r1">379.31萬</td><td align="right" class="r1">914</td><td align="center" class="r1"><img src="images/arrow_up.gif"/></td></tr>
<tr><td align="left" class="r0">09:56:36</td><td align="right" class="r0">0.82</td><td align="right" class="r0">1.19M</td><td align="right" class="r0">97.58萬</td><td align="right" class="r0">238</td><td align="center" class="r0"><img src="images/arrow_up.gif"/></td></tr>

    </table>

1 个答案:

答案 0 :(得分:0)

1)下载页面

[ VB.NET ]

' We need these namespaces
Imports System
Imports System.Text
Imports System.Net

Partial Class _Default
  Inherits System.Web.UI.Page

Protected Sub btnGetHTML_Click(ByVal sender As Object, ByVal e As System.EventArgs) Handles btnGetHTML.Click
  ' We'll use WebClient class for reading HTML of web page
  Dim MyWebClient As WebClient = New WebClient()

  ' Read web page HTML to byte array
  Dim PageHTMLBytes() As Byte
  If txtURL.Text <> "" Then
    PageHTMLBytes = MyWebClient.DownloadData(txtURL.Text)

    ' Convert result from byte array to string
    ' and display it in TextBox txtPageHTML
    Dim oUTF8 As UTF8Encoding = New UTF8Encoding()
    txtPageHTML.Text = oUTF8.GetString(PageHTMLBytes)
  End If
End Sub

End Class

2)解析其内容

Imports mshtml

Function parseMyHtml(ByVal htmlToParse$) As String
    Dim htmlDocument As IHTMLDocument2 = New HTMLDocumentClass()
    htmlDocument.write(htmlToParse)
    htmlDocument.close()

    Dim allElements As IHTMLElementCollection = htmlDocument.body.all

    Dim allInputs As IHTMLElementCollection = allElements.tags("a")
    Dim element As IHTMLElement
    For Each element In allInputs
        element.title = element.innerText
    Next

    Return htmlDocument.body.innerHTML
End Function

3)写入MySQL

Public Function InsertQuote(stock_param,price_param,time_param)As Boolean

SQLConnection = New MySqlConnection()
SQLConnection.ConnectionString = connectionString
SQLConnection.Open()
Dim sqlCommand As New MySqlCommand
Dim str_quoteSql As String

Try

    str_quoteSql = "insert into quotes (stock, price, time)"
    sqlCommand.Connection = SQLConnection
    sqlCommand.CommandText = str_quoteSql
    sqlCommand.Parameters.AddWithValue("@stock", stock_param)
    sqlCommand.Parameters.AddWithValue("@price", price_param)
    sqlCommand.Parameters.AddWithValue("@time", time_param)
    sqlCommand.ExecuteNonQuery()

    Return True

Catch ex As Exception
    Return False
    MsgBox("Error occured: Could not insert record")
End Try

结束功能