vb.net正确的方法来线程化这个应用程序

时间:2016-03-23 17:20:12

标签: vb.net multithreading

我的应用程序是一个将信息存储在数据库中的Web scraper(大部分)。到目前为止,我有两节课:

  1. clsSpyder - 这基本上卷起了刮刀流程
  2. clsDB - 这可以处理任何数据库进程
  3. 我的测试程序会查看所有网址,抓取内容,推送到数据库中。顺序非常简单,但我想说N个线程运行这些进程(刮擦和存储)。我的顺序代码是:

    Private Sub Button4_Click(sender As Object, e As EventArgs) Handles Button4.Click
    
    
        'Grab List
        Dim tDS As New DataSet
        Dim tDB As New clsTermsDB
        Dim tSpyder As New clsAGDSpyder
        Dim sResult As New TermsRuns
    
        'Grab a list of all URLS
        tDS = tDB.GetTermsList(1)
    
        Try
    
            For Each Row As DataRow In tDS.Tables(0).Rows
    
                rtbList.AppendText(Row("url_toBeCollected") & vbCrLf)
                sResult = tSpyder.SpiderPage(Row("url_toBeCollected"))
    
                'If nothing is found, do not store
                If sResult.html <> "" And sResult.text <> "" Then
                    tDB.InsertScrape(Now(), sResult.html, sResult.text, Row("url_uid"), 1)
                End If
    
            Next
    
            Exit Sub
    
        Catch ex As Exception
            MessageBox.Show(ex.Message)
        End Try
    End Sub
    

    考虑到这一点,并注意到我将变量传递给SpiderPage和InsertScrape方法..我怎么能实现线程?它必须简单,但我觉得我一直在谷歌搜索和尝试几天没有成功:(

    *** ADDED:SpiderPage方法:

        Public Function SpiderPage(PageURL As String) As TermsRuns
        Dim webget As New HtmlWeb
        Dim node As HtmlNode
        Dim doc As New HtmlDocument
        Dim docNOHTML As HtmlDocument
    
        Dim uri As New Uri(PageURL)
        Dim wc As HttpWebRequest = DirectCast(WebRequest.Create(uri.AbsoluteUri), HttpWebRequest)
        Dim wcStream As Stream
    
    
        wc.AllowAutoRedirect = True
        wc.MaximumAutomaticRedirections = 3
    
        'Set Headers
        wc.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
        wc.Headers.Add("REMOTE_ADDR", "66.83.101.5")
        wc.Headers.Add("HTTP_REFERER", "66.83.101.5")
    
    
        'Set HTMLAgility Kit Useragent Spoofing (not needed, I don't think)
        webget.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
    
        'Certification STuff
        wc.UseDefaultCredentials = True
        wc.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials
        ServicePointManager.ServerCertificateValidationCallback = AddressOf AcceptAllCertifications
    
        'Create Cookie Jar
        Dim CookieJar As New CookieContainer
        wc.CookieContainer = CookieJar
    
        'Keep Alive Settings
        wc.KeepAlive = True
        wc.Timeout = &H7530
    
        'Read the web page
        Dim wr As HttpWebResponse = Nothing
        Try
    
            wcStream = wc.GetResponse.GetResponseStream
    
            doc.Load(wcStream)
    
            'Remove HTML from the document
            docNOHTML = RemoveUnWantedTags(doc)
    
            'Grab only the content inside the <body> tag
            node = docNOHTML.DocumentNode.SelectSingleNode("//body")
    
            'Output
            SpiderPage = New TermsRuns
            SpiderPage.html = node.InnerHtml
            SpiderPage.text = node.InnerText
            Return SpiderPage
    
        Catch ex As Exception
            'Something goes here when scraping returns an error
            SpiderPage = New TermsRuns
            SpiderPage.html = ""
            SpiderPage.text = ""
    
        End Try
    
    
    End Function
    

    ***添加了InsertScrape:

    Public Function InsertScrape(scrape_ts As DateTime, scrape_html As String, scrape_text As String, url_id As Integer, tas_id As Integer) As Boolean
        Dim myCommand As MySqlClient.MySqlCommand
    
        Dim dt As New DataTable
    
        'Create ds/dt for fill
        Dim ds As New DataSet
        Dim dtbl As New DataTable
    
        Try
    
            'Set Connection String
            myConn.ConnectionString = myConnectionString
    
            'Push Command to Client Object
            myCommand = New MySqlClient.MySqlCommand
            myCommand.Connection = myConn
            myCommand.CommandText = "spInsertScrape"
            myCommand.CommandType = CommandType.StoredProcedure
            myCommand.Parameters.AddWithValue("@scrape_ts", scrape_ts)
            myCommand.Parameters("@scrape_ts").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@scrape_html", scrape_html)
            myCommand.Parameters("@scrape_html").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@scrape_text", scrape_text)
            myCommand.Parameters("@scrape_text").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@url_id", url_id)
            myCommand.Parameters("@url_id").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@tas_id", tas_id)
            myCommand.Parameters("@tas_id").Direction = ParameterDirection.Input
    
            'Open Connection
            myConn.Open()
            myCommand.ExecuteNonQuery()
    
    
            'Close Connection
            myConn.Close()
    
            InsertScrape = True
    
        Catch ex As Exception
            'Put Message Here
            InsertScrape = False
            MessageBox.Show(ex.Message)
        End Try
    End Function
    

    提前感谢。

0 个答案:

没有答案