所以,我有一个wpf表单,它发送到一个站点,解析html,并返回一个强类型的'href'值列表。 (是的,这是我自己的网站)
我正在利用backgroundworker来释放UI的挂断,并呈现正在运行的进度条。
虽然它只对网站的第一页有用,但如果我决定递归网站,进度条会挂起,而递归就会发生,那么一旦递归完成,进度条就会恢复生机。
你能告诉我这里做错了什么吗?并且可以指导我使用进度条正确使用所述后台工作程序......基本上,进度条应该在执行任务时运行,但我假设基于代码,这确实不是这种情况。
以下是完成此操作的窗口的代码隐藏:
Imports System.Threading.Tasks
Imports System.Threading
Class MainWindow
Private _previousCursor As Cursor = Mouse.OverrideCursor
Private _Spider As New Spider.SpiderIt
Private _Worker As New ComponentModel.BackgroundWorker
Private _RunCount As Integer = 0
Private Sub MainWindow_Loaded(sender As Object, e As System.Windows.RoutedEventArgs) Handles Me.Loaded
Me.workProgress.Visibility = Windows.Visibility.Hidden
_Worker.WorkerReportsProgress = True
_Worker.WorkerSupportsCancellation = True
AddHandler _Worker.DoWork, New System.ComponentModel.DoWorkEventHandler(AddressOf Spider)
AddHandler _Worker.ProgressChanged, New System.ComponentModel.ProgressChangedEventHandler(AddressOf worker_ProgressChanged)
AddHandler _Worker.RunWorkerCompleted, New System.ComponentModel.RunWorkerCompletedEventHandler(AddressOf worker_RunWorkerCompleted)
Me.SiteParse.Focus()
End Sub
Private Sub SiteParseKeyDown(sender As System.Object, e As System.Windows.Input.KeyEventArgs)
If (e.Key = Key.Return) Then
Me.btnParseAll.IsEnabled = False
Me.btnParseSelected.IsEnabled = False
Me.SiteParse.IsEnabled = False
Mouse.OverrideCursor = Cursors.Wait
Me.workProgress.Visibility = Windows.Visibility.Visible
_Worker.RunWorkerAsync(New Typing() With {.Url = SiteParse.Text, .Recurse = Recurse.IsChecked})
End If
End Sub
Private Sub btnParseAll_Click(sender As Object, e As System.Windows.RoutedEventArgs) Handles btnParseAll.Click
Me.btnParseAll.IsEnabled = False
Me.btnParseSelected.IsEnabled = False
Me.SiteParse.IsEnabled = False
Dim _TL As New List(Of DGTyping)
Using New WaitCursor
For Each Item In DG_SiteLinks.Items
_TL.Add(New DGTyping() With {
.SiteUrl = Item.SiteUrl,
.SiteTitle = Item.SiteTitle
})
Next
End Using
Dim _T As New ParseLinks(Me, _TL)
_T.ShowDialog()
End Sub
Private Sub btnParseSelected_Click(sender As Object, e As System.Windows.RoutedEventArgs) Handles btnParseSelected.Click
Me.btnParseAll.IsEnabled = False
Me.btnParseSelected.IsEnabled = False
Me.SiteParse.IsEnabled = False
Dim _TL As New List(Of DGTyping)
Using New WaitCursor
For Each Item In DG_SiteLinks.SelectedItems
_TL.Add(New DGTyping() With {
.SiteUrl = Item.SiteUrl,
.SiteTitle = Item.SiteTitle
})
Next
End Using
Dim _T As New ParseLinks(Me, _TL)
_T.ShowDialog()
End Sub
#Region "Get Site Links"
Private Sub Spider(sender As Object, e As System.ComponentModel.DoWorkEventArgs)
'Do the work here, but need to get the value of SiteParse first
With _Spider
.UrlToParse = DirectCast(e.Argument.Url, String)
.ShouldRecurse = DirectCast(e.Argument.Recurse, Boolean)
.RecurseLevels = 20
.SpiderIt(_Worker)
End With
End Sub
Private Sub worker_ProgressChanged(sender As Object, e As System.ComponentModel.ProgressChangedEventArgs)
workProgress.Value = e.ProgressPercentage
End Sub
Private Sub worker_RunWorkerCompleted(sender As Object, e As System.ComponentModel.RunWorkerCompletedEventArgs)
EndRest()
Dim _IL As List(Of Spider.Typing.InternalLinks)
_IL = _Spider.InternalLinks()
Dim _TL As New List(Of DGTyping)
For Each item In _IL
_TL.Add(New DGTyping() With {
.SiteUrl = item.Url,
.SiteTitle = If(item.Title.Length > 0, item.Title, item.Content)
})
Next
_IL.Clear()
Me.DG_SiteLinks.ItemsSource = _TL
EndSync()
End Sub
Private Sub BrowseSite(sender As Object, e As RoutedEventArgs)
Dim _URL As String = DirectCast(sender, TextBlock).Text
Dim _T As New Browser(_URL)
_T.ShowDialog()
End Sub
Private Sub Window_Closing(sender As Object, e As System.ComponentModel.CancelEventArgs)
If _Worker IsNot Nothing Then
If _Worker.IsBusy Then
_Worker.CancelAsync()
End If
End If
End Sub
Private Sub EndSync()
_Worker.CancelAsync()
_Worker.Dispose()
_Spider.Dispose()
End Sub
Private Sub EndRest()
workProgress.Value = 0
workProgress.Visibility = Windows.Visibility.Hidden
Me.btnParseAll.IsEnabled = True
Me.btnParseSelected.IsEnabled = True
Me.SiteParse.IsEnabled = True
Mouse.OverrideCursor = _previousCursor
End Sub
Partial Public Class Typing
Public Property Url As String
Public Property Recurse As Boolean
End Class
Partial Public Class DGTyping
Public Property SiteUrl As String
Public Property SiteTitle As String
End Class
#End Region
End Class
.SpiderIt()走出指定的站点,将html抓取为HDocument(SuperstarCoders LinqToHtml),解析内部链接,并将它们抛出到强类型列表中。这是在一个单独的类程序集中完成的,并且表现完美。
SpiderIt方法并包含类:
Imports Superstar.Html.Linq
Imports System.Threading.Tasks
Public Class SpiderIt
Implements IDisposable
#Region "Public Properties"
''' <summary>
''' Specify the initial URL to parse
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public Property UrlToParse As String
''' <summary>
''' Should this recurse the internal links of the site
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public Property ShouldRecurse As Boolean = False
''' <summary>
''' Specify the number of levels to recurse
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public Property RecurseLevels As Long = 0
''' <summary>
''' Returns a message from the SpiderIt method
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property Message() As String
Get
Return _Msg
End Get
End Property
''' <summary>
''' Returns a strongly typed list of internal links
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property InternalLinks() As List(Of Typing.InternalLinks)
Get
Return _InternalLinkList
End Get
End Property
''' <summary>
''' Returns a strongly typed list of external links
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ExternalLinks() As List(Of Typing.ExternalLinks)
Get
Return _ExternalLinkList
End Get
End Property
#End Region
#Region "Internal Properties"
Private disposedValue As Boolean
Private _Msg As String
Private _Ctr As Long = 0
Private _InternalLinkList As New List(Of Typing.InternalLinks)
Private _ExternalLinkList As New List(Of Typing.ExternalLinks)
Private _DLer As New Downloader
Private _RCt As Long = 0
#End Region
#Region "Public Methods"
''' <summary>
''' Parse with the specified values
''' </summary>
''' <returns>Boolean</returns>
''' <remarks>Returns true or false, based on if it has completed, as well as a message
''' Spits out 2 strongly typed lists. Both internal and external URLs
''' </remarks>
Public Function SpiderIt(ByVal _Worker) As Boolean
For i As Integer = 1 To 99
_Worker.ReportProgress(i)
System.Threading.Thread.Sleep(50)
Next
_Worker.ReportProgress(100)
Dim _Doc As HDocument = _DLer.DownloadHDoc(UrlToParse)
With _Doc
If _Doc Is Nothing Then
_Msg = "There is no document to parse."
Return False
Else
Try
Dim _AL = .Descendants("a")
'Parse the internal links
ParseLinks(_AL)
_Msg = "Internal Link List Built"
Return True
Catch ex As Exception
_Msg = ex.Message
Return False
End Try
End If
End With
End Function
#End Region
#Region "Internal Methods"
#Region "Spider Helpers"
Private Sub ParseLinks(ByVal _AL As IEnumerable(Of HElement))
Try
Dim _Link As String, _D As HDocument
For i As Long = 0 To _AL.Count - 1
If _AL(i).Attribute("href") IsNot Nothing AndAlso Not (_AL(i).Attribute("href").Value.Contains("//") OrElse
_AL(i).Attribute("href").Value.Contains("http://") OrElse
_AL(i).Attribute("href").Value.Contains("https://") OrElse
_AL(i).Attribute("href").Value.Contains("ftp://") OrElse
_AL(i).Attribute("href").Value.Contains("mailto:") OrElse
_AL(i).Attribute("href").Value.Contains("#")) Then
_Link = UrlToParse & "/" & _AL(i).Attribute("href").Value
If Not (_InternalLinkList.Any(Function(x) x.Url = _Link.Replace("//", "/").Replace("http:/", "http://").Replace("https:/", "https://"))) Then
AddInternalLinks(_Link.Replace("//", "/").Replace("http:/", "http://").Replace("https:/", "https://"),
If(_AL(i).Attribute("target") Is Nothing,
String.Empty,
_AL(i).Attribute("target").Value),
_AL(i).Value,
If(_AL(i).Attribute("title") Is Nothing,
String.Empty,
_AL(i).Attribute("title").Value))
If ShouldRecurse Then
_RCt += 1
If _RCt <= RecurseLevels Then
_D = _DLer.DownloadHDoc(_Link)
ParseLinks(_D.Descendants("a"))
End If
End If
End If
Else
_Link = _AL(i).Attribute("href").Value
If Not (_ExternalLinkList.Any(Function(x) x.Url = _Link)) Then
AddExternalLinks(_Link,
If(_AL(i).Attribute("target") Is Nothing,
String.Empty,
_AL(i).Attribute("target").Value),
_AL(i).Value,
If(_AL(i).Attribute("title") Is Nothing,
String.Empty,
_AL(i).Attribute("title").Value))
End If
End If
Next
Catch ex As Exception
_Msg += ex.StackTrace
End Try
End Sub
Private Sub AddExternalLinks(ByVal _Link As String, ByVal _Target As String, ByVal _Content As String, ByVal _Title As String)
Try
_ExternalLinkList.Add(New Typing.ExternalLinks With {
.Url = _Link,
.Content = _Content,
.Target = _Target,
.Title = _Title
})
Catch ex As Exception
_Msg += ex.StackTrace
End Try
End Sub
Private Sub AddInternalLinks(ByVal _Link As String, ByVal _Target As String, ByVal _Content As String, ByVal _Title As String)
Try
_InternalLinkList.Add(New Typing.InternalLinks With {
.Url = _Link,
.Content = _Content,
.Target = _Target,
.Title = _Title
})
Catch ex As Exception
_Msg += ex.StackTrace
End Try
End Sub
#End Region
#Region "IDisposable Support"
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
End If
_Msg = String.Empty
_InternalLinkList.Clear()
_ExternalLinkList.Clear()
_DLer.Dispose()
End If
Me.disposedValue = True
End Sub
Public Sub Dispose() Implements IDisposable.Dispose
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
#End Region
End Class
答案 0 :(得分:1)
我发布这个作为答案'因为评论文字太长了。
也许我错过了一些东西(以这种格式阅读代码很痛苦)。
您的计数从1到99,并且每50毫秒报告一次进度。在两者之间似乎没有发生任何事情,我的意思是工作负荷会增加一些真正的延迟。然后你报告100%,然后它似乎实际上加载文件和解析,这需要一段时间,我猜。
你不应该在ParseLinks()方法的某个地方抛出一个ReportProgress()。当然,您必须能够计算要解析的节点数,这样您才能以一定的速度报告进度,这与工作完成时的100%进度相吻合。
编写另一个递归方法,只提前计算节点数(应该很快),然后使用该数字,你将知道要传递给ReportProgress()的值(你应该再次调用ParseLinks() )所以你将有稳定的进步,达到100%。 (显然你必须将对BackgroundWorker的引用传递给ParseLinks()to)
可能很难,但没人说这很容易:D。
干杯。