我正在写一个.net网络蜘蛛。虽然它在我的一个网站(大约20页)上工作得很好,但它会在我管理的另一个网站(大约500页)上发生System.StackOverflowException炸弹。
我正在win7 64bit i3笔记本电脑上开发这款产品,配备8g内存,128g hyperx ssd,无交换文件。
我的问题是......我是否因为没有交换文件而抛出此异常?
cpu使用率(vs2010调试过程)仅达到34%左右,只有74-75m内存使用率。
如果是这种情况,我怎样才能确保它不会发生?
这没有递归。
代码:
Imports System.Reflection
Imports System.Net
Imports Superstar.Html.Linq
Public Class Downloader
Implements IDisposable
''' <summary>
''' Get the returned downloaded string
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnString As String
Get
Return _StrReturn
End Get
End Property
Private Property _StrReturn As String
''' <summary>
''' Get the returned downloaded byte array
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnBytes As Byte()
Get
Return _FSReturn
End Get
End Property
Private Property _FSReturn As Byte()
Private Property _UserAgent As String = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"
Private Property DataReceived As Boolean = False
''' <summary>
''' Download a string, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadString(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
SetAllowUnsafeHeaderParsing20()
Using wc As New Net.WebClient()
With wc
Dim _ct As Long = 0
DataReceived = False
.Headers.Add("user-agent", _UserAgent)
.DownloadStringAsync(New System.Uri(_Path))
AddHandler .DownloadStringCompleted, AddressOf StringDownloaded
Do While Not DataReceived
If _Worker IsNot Nothing Then
_ct += 1
ReportProgress(_ct, _Worker)
End If
Loop
End With
End Using
End Sub
''' <summary>
''' Download a file, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadFile(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
SetAllowUnsafeHeaderParsing20()
Using wc As New Net.WebClient()
With wc
Dim _ct As Long = 0
DataReceived = False
.Headers.Add("user-agent", _UserAgent)
.DownloadDataAsync(New System.Uri(_Path))
AddHandler .DownloadDataCompleted, AddressOf FileStreamDownload
Do While Not DataReceived
If _Worker IsNot Nothing Then
_ct += 1
ReportProgress(_ct, _Worker)
End If
Loop
End With
End Using
End Sub
''' <summary>
''' Download a parsable HDocument, for using HtmlToLinq
''' </summary>
''' <param name="_Path"></param>
''' <returns></returns>
''' <remarks></remarks>
Public Function DownloadHDoc(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As HDocument
Try
'
'
'
'
'
'
'StackOverFlowException Occurring Here!
DownloadString(_Path, _Worker)
Return HDocument.Parse(_StrReturn)
Catch soex As StackOverflowException
'put some logging in here, with the path attempted
Return Nothing
Catch ex As Exception
SetAllowUnsafeHeaderParsing20()
Return HDocument.Load(_Path)
End Try
End Function
#Region "Internals"
Private Sub SetAllowUnsafeHeaderParsing20()
Dim a As New System.Net.Configuration.SettingsSection
Dim aNetAssembly As System.Reflection.Assembly = Assembly.GetAssembly(a.GetType)
Dim aSettingsType As Type = aNetAssembly.GetType("System.Net.Configuration.SettingsSectionInternal")
Dim args As Object() = Nothing
Dim anInstance As Object = aSettingsType.InvokeMember("Section", BindingFlags.Static Or BindingFlags.GetProperty Or BindingFlags.NonPublic, Nothing, Nothing, args)
Dim aUseUnsafeHeaderParsing As FieldInfo = aSettingsType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic Or BindingFlags.Instance)
aUseUnsafeHeaderParsing.SetValue(anInstance, True)
End Sub
Private Sub FileStreamDownload(ByVal sender As Object, ByVal e As DownloadDataCompletedEventArgs)
If e.Cancelled = False AndAlso e.Error Is Nothing Then
DataReceived = True
_FSReturn = DirectCast(e.Result, Byte())
Else
_FSReturn = Nothing
End If
End Sub
Private Sub StringDownloaded(ByVal sender As Object, ByVal e As DownloadStringCompletedEventArgs)
If e.Cancelled = False AndAlso e.Error Is Nothing Then
DataReceived = True
_StrReturn = DirectCast(e.Result, String)
Else
_StrReturn = String.Empty
End If
End Sub
#End Region
#Region "IDisposable Support"
Private disposedValue As Boolean ' To detect redundant calls
' IDisposable
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
End If
_StrReturn = String.Empty
_FSReturn = Nothing
End If
Me.disposedValue = True
End Sub
Public Sub Dispose() Implements IDisposable.Dispose
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
End Class
调用stackoverflow的代码
Private Function PopulateSEOList(Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As List(Of Typing.SEO)
Dim _L = LinkList, _Ct As Long = 0
Dim _NL As New List(Of Typing.SEO)
Dim _EL As Typing.SEO.Elements = Nothing
Dim _Doc As HDocument = Nothing, _Keywords As String = String.Empty, _Description As String = String.Empty, _Content As HElement = Nothing
For i As Long = 0 To _L.Count - 1
Try
_Ct += 1
Using _HDoc As New Downloader
With _HDoc
_Doc = .DownloadHDoc(_L(i).SiteUrl)
End With
End Using
Tasks.Parallel.Invoke(Sub()
'Keywords
For Each Item In _Doc.Descendants("meta")
If Item.Attribute("name") = "keywords" Then
_Keywords = Item.Attribute("content").Value
'Exit For
End If
Next
End Sub,
Sub()
'Description
For Each Item In _Doc.Descendants("meta")
If Item.Attribute("name") = "description" Then
_Description = Item.Attribute("content").Value
'Exit For
End If
Next
End Sub,
Sub()
If _Doc.Descendants("body") IsNot Nothing Then
_Content = _Doc.Descendants("body").FirstOrDefault
End If
End Sub,
Sub()
_EL = New Typing.SEO.Elements() With {
.H1 = If(_Doc.Descendants("h1") IsNot Nothing, (From n In _Doc.Descendants("h1").AsParallel()
Select n.Value).ToList(), Nothing),
.H2 = If(_Doc.Descendants("h2") IsNot Nothing, (From n In _Doc.Descendants("h2").AsParallel()
Select n.Value).ToList(), Nothing),
.H3 = If(_Doc.Descendants("h3") IsNot Nothing, (From n In _Doc.Descendants("h3").AsParallel()
Select n.Value).ToList(), Nothing),
.H4 = If(_Doc.Descendants("h4") IsNot Nothing, (From n In _Doc.Descendants("h4").AsParallel()
Select n.Value).ToList(), Nothing),
.H5 = If(_Doc.Descendants("h5") IsNot Nothing, (From n In _Doc.Descendants("h5").AsParallel()
Select n.Value).ToList(), Nothing),
.H6 = If(_Doc.Descendants("h6") IsNot Nothing, (From n In _Doc.Descendants("h6").AsParallel()
Select n.Value).ToList(), Nothing),
.UL = If(_Doc.Descendants("ul") IsNot Nothing, (From n In _Doc.Descendants("ul").AsParallel()
Select n.Value).ToList(), Nothing),
.OL = If(_Doc.Descendants("ol") IsNot Nothing, (From n In _Doc.Descendants("ol").AsParallel()
Select n.Value).ToList(), Nothing),
.STRONG = If(_Doc.Descendants("strong") IsNot Nothing OrElse _Doc.Descendants("b") IsNot Nothing,
(From n In _Doc.Descendants("strong").AsParallel()
Select n.Value).Union(From n In _Doc.Descendants("b").AsParallel()
Select n.Value).ToList(), Nothing),
.BLOCKQUOTE = If(_Doc.Descendants("blockquote") IsNot Nothing, (From n In _Doc.Descendants("blockquote").AsParallel()
Select n.Value).ToList(), Nothing),
.EM = If(_Doc.Descendants("em") IsNot Nothing OrElse _Doc.Descendants("i") IsNot Nothing,
(From n In _Doc.Descendants("em").AsParallel()
Select n.Value).Union(From n In _Doc.Descendants("i").AsParallel()
Select n.Value).ToList(), Nothing),
.A = If(_Doc.Descendants("a") IsNot Nothing, (From n In _Doc.Descendants("a").AsParallel()
Select New Typing.SEO.Elements.Links() With {
.Content = n.Value,
.Title = If(n.Attribute("title") IsNot Nothing,
n.Attribute("title").Value,
Nothing),
.Target = If(n.Attribute("target") IsNot Nothing,
n.Attribute("target").Value,
Nothing),
.Rel = If(n.Attribute("rel") IsNot Nothing,
n.Attribute("rel").Value,
Nothing),
.Href = If(n.Attribute("href") IsNot Nothing,
n.Attribute("href").Value,
Nothing)
}).ToList(), Nothing),
.IMG = If(_Doc.Descendants("img") IsNot Nothing,
(From n In _Doc.Descendants("img").AsParallel()
Select New Typing.SEO.Elements.Images() With {
.Alt = If(n.Attribute("alt") IsNot Nothing,
n.Attribute("alt").Value,
Nothing),
.Source = If(n.Attribute("src") IsNot Nothing,
n.Attribute("src").Value,
Nothing),
.Title = If(n.Attribute("title") IsNot Nothing,
n.Attribute("title").Value,
Nothing)
}).ToList(),
Nothing)
}
End Sub)
_NL.Add(New Typing.SEO() With {
.Link = _L(i).SiteUrl,
.Title = _Doc.Descendants("title").First().Value,
.Keywords = _Keywords,
.Description = _Description,
.Content = _Content,
.ContentElements = _EL
})
_L.RemoveAt(i)
_EL = Nothing : _Doc = Nothing
ReportProgress((_Ct / _L.Count) * 100, _Worker)
Catch ex As Exception
'Put logging in here
End Try
Next
Return _NL
End Function
答案 0 :(得分:1)
您可能知道,此错误很可能是由于代码中的错误导致递归算法中的无限循环。虽然你说你不使用递归,但你可能会无意中发生递归。
找出导致它的原因的最简单方法是附加调试器,配置Visual Studio以中断异常,并在应用程序中触发错误。
当错误发生且调试器中断时,请查看调用堆栈 - 希望您能看到问题所在。
答案 1 :(得分:1)
我的猜测可能是软件问题。 StackOverflowException通常在递归算法出现问题时发生(尽管您提到不使用递归算法)。另一个常见原因是属性实现或相等比较中的错误。 例如:
public string Name
{
set
{
Name = value;
}
}