好的我有一个重大问题。
我的软件使用了极高量的内存。我正在使用大量页面来源的HtmlAgilityPack.HtmlDocument
个对象。
然而,所有对象都在静态函数中使用,HtmlAgilityPack.HtmlDocument
不是IDisposable
那么我需要将每个变量显式设置为null吗?
即使它们在静态函数内部?
例如,我是否需要在
下面的函数末尾将这些变量设置为null我问的变量:lstDrwList
?或者因为它在里面会自动处理?
我应该明确地调用垃圾收集器吗?
C#.net 4.5 WPF应用程序
private static void func_CheckWaitingToProcessPages(Object state)
{
ParallelOptions myOptions = new ParallelOptions();
myOptions.MaxDegreeOfParallelism = PublicSettings.ir_How_Many_Tasks_For_Per_Pages_Process;
List<DataRow> lstDrwList = new List<DataRow>();
using (DataTable dtMyTable = DbConnection.db_Select_DataTable(srSelectTopProcessPagesQuery))
{
foreach (DataRow drw in dtMyTable.Rows)
{
lstDrwList.Add(drw);
}
}
Parallel.ForEach(lstDrwList, myOptions, drw =>
{
process_Given_Page(drw);
});
}
问题是找到问题是如何修复
这里发生的问题在10秒内我使用了visual studio profiler
此处导致此巨大内存泄漏问题的完整类
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace doktora_tez_projesi_crawler_program
{
public static class PagesProcessor
{
private static Timer _timer;
private static int howManySeconds = 10;
public static void func_StartCrawlingWaitingUrls()
{
PublicStaticFunctions.AddMsgToEvents("Checking waiting to process crawled urls process started every " + howManySeconds + " seconds!");
_timer = new Timer(func_CheckWaitingToProcessPages, null, PublicSettings.irTimers_Delayed_Start_MiliSeconds, howManySeconds * 1000);
}
private static string srSelectTopProcessPagesQuery = " select top 100 cl_IdUrl,cl_RooSiteId,cl_CrawlSource,cl_CrawlOrgUrl from tblCrawlUrls " +
" where cl_PageProcessed=0 and cl_TotalCrawlTimes > 0 " +
" order by cl_LastProcessDate asc";
private static void func_CheckWaitingToProcessPages(Object state)
{
ParallelOptions myOptions = new ParallelOptions();
myOptions.MaxDegreeOfParallelism = PublicSettings.ir_How_Many_Tasks_For_Per_Pages_Process;
List<DataRow> lstDrwList = new List<DataRow>();
using (DataTable dtMyTable = DbConnection.db_Select_DataTable(srSelectTopProcessPagesQuery))
{
foreach (DataRow drw in dtMyTable.Rows)
{
lstDrwList.Add(drw);
}
}
Parallel.ForEach(lstDrwList, myOptions, drw =>
{
process_Given_Page(drw);
});
}
private class csProductFeatures
{
public string srProductRootSiteId = "null", srProductTitle = "null", srProductCode = "null", srProductImageLink = "null";
public string srProductDetailedExplanation = "null", srProductFeatures = "null", srCrawledOrgUrl = "null", srProductIdCode = "null";
public bool blPossibleProductPage = false, blFreeCargo = false, blProductPage = true;
public List<string> lstProductCategories = new List<string>();
public int irProductPrice = 0;
public List<csProductComments> lstProductComments = new List<csProductComments>();
public List<KeyValuePair<string, string>> lstProductFeatures = new List<KeyValuePair<string, string>>();
}
private class csProductComments
{
public string srCommentTitle = "null", srCommentPros = "null", srCommentCons = "null";
public int irCommentScore = 0; //0 = negative 5=full star
}
private static void process_Given_Page(DataRow drw)
{
csProductFeatures temp_ProductFeatures = new csProductFeatures();
temp_ProductFeatures.srProductRootSiteId = drw["cl_RooSiteId"].ToString();
temp_ProductFeatures.srCrawledOrgUrl = drw["cl_CrawlOrgUrl"].ToString();
HtmlDocument hdMyDoc = new HtmlDocument();//nulled
hdMyDoc.LoadHtml(drw["cl_CrawlSource"].ToString());
bool blBreakLoop = false;
foreach (var vrVariable in PublicVariables.dicRootSites[temp_ProductFeatures.srProductRootSiteId].lstRootSiteIdentifiers)
{
if (vrVariable.srHtmlObjectType != "link")
{
HtmlNodeCollection hdNodes;
if (vrVariable.blSelectMultipleNodes == false)
hdNodes = hdMyDoc.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
vrVariable.srHtmlObjectTypeIdentifier, vrVariable.srHtmlObjectTypeName));
else
hdNodes = hdMyDoc.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']//{3}", vrVariable.srHtmlObjectType,
vrVariable.srHtmlObjectTypeIdentifier, vrVariable.srHtmlObjectTypeName, vrVariable.srHtmlSubIdentifierType));
if (hdNodes == null && vrVariable.srIndetifierType == "ProductTitle")
{
blBreakLoop = true;
temp_ProductFeatures.blProductPage = false;
continue;
}
if (blBreakLoop == true)
break;
if (hdNodes == null)
continue;
string sr_Node_Required_Val = "null";
if (hdNodes[0].InnerText != null)
sr_Node_Required_Val = hdNodes[0].InnerText;
string srLinkVal = "null";
if (vrVariable.srHtmlObjectType == "a" && hdNodes[0].Attributes != null)
{
if (hdNodes[0].Attributes["href"] != null)
{
srLinkVal = PublicStaticFunctions.Return_Absolute_Url(hdNodes[0].Attributes["href"].Value, temp_ProductFeatures.srCrawledOrgUrl);
}
}
if (vrVariable.blGetValue == true)
{
if (hdNodes[0].Attributes != null)
if (hdNodes[0].Attributes["value"] != null)
sr_Node_Required_Val = hdNodes[0].Attributes["value"].Value;
}
sr_Node_Required_Val = sr_Node_Required_Val.Trim();
switch (vrVariable.srIndetifierType)
{
case "ProductPage":
temp_ProductFeatures.blPossibleProductPage = true;
break;
case "ProductTitle":
temp_ProductFeatures.srProductTitle = sr_Node_Required_Val;
break;
case "ProductCode":
temp_ProductFeatures.srProductCode = sr_Node_Required_Val;
break;
case "ProductCargo":
temp_ProductFeatures.blFreeCargo = true;
break;
case "ProductCategories":
temp_ProductFeatures.lstProductCategories = func_Return_Product_Categories(hdNodes);
break;
case "ProductPrice":
temp_ProductFeatures.irProductPrice = func_Return_Product_Price(sr_Node_Required_Val, temp_ProductFeatures.srProductRootSiteId);
break;
case "ProductImage":
temp_ProductFeatures.srProductImageLink = srLinkVal;
break;
case "ProductIdCode":
temp_ProductFeatures.srProductIdCode = sr_Node_Required_Val;
break;
}
}
if (vrVariable.srHtmlObjectType == "link")
{
string srLinkToFetch = vrVariable.srHtmlObjectTypeIdentifier;
if (vrVariable.blUsesProductIdCode == true)
{
srLinkToFetch = string.Format(srLinkToFetch, temp_ProductFeatures.srProductIdCode);
}
string srFetchResult = CrawlGivenUrl.func_fetch_Page(srLinkToFetch);
string srResultToAssign = "null";
if (srFetchResult == PublicSettings.srCrawlFailedMessage)
{
srResultToAssign = srFetchResult;
}
else
{
HtmlDocument temp_HdDocument = new HtmlDocument();//nulled
temp_HdDocument.LoadHtml(srFetchResult);
if (temp_HdDocument.DocumentNode != null)
if (temp_HdDocument.DocumentNode.InnerText != null)
srResultToAssign = temp_HdDocument.DocumentNode.InnerText;
temp_HdDocument = null;
}
switch (vrVariable.srIndetifierType)
{
case "ProductExplanation":
temp_ProductFeatures.srProductDetailedExplanation = srResultToAssign;
break;
case "ProductFeatures":
temp_ProductFeatures.lstProductFeatures = func_Return_Product_Features(temp_ProductFeatures.srProductRootSiteId, srFetchResult, temp_ProductFeatures.srCrawledOrgUrl);
break;
}
}
}
if (temp_ProductFeatures.blProductPage == true)
{
string asdas = "";
}
hdMyDoc = null;
}
private static List<string> func_Return_Product_Categories(HtmlNodeCollection hdNodeCollection)
{
List<string> lstCategories = new List<string> { };
foreach (HtmlNode hdNode in hdNodeCollection)
{
if (hdNode.InnerText != null)
{
lstCategories.Add(hdNode.InnerText);
}
}
return lstCategories;
}
private static int func_Return_Product_Price(string srPriceText, string srRootSiteId)
{
int irPrice = 0;
srPriceText = srPriceText.Replace(PublicVariables.dicRootSites[srRootSiteId].srPriceDelimeter, "");
if (srPriceText.Contains(PublicVariables.dicRootSites[srRootSiteId].srPriceIgnoreDelimeter) == true)
{
srPriceText = srPriceText.Substring(0, srPriceText.IndexOf(PublicVariables.dicRootSites[srRootSiteId].srPriceIgnoreDelimeter));
}
Int32.TryParse(srPriceText, out irPrice);
return irPrice;
}
private static List<KeyValuePair<string, string>> func_Return_Product_Features(string srRootSiteId, string srPageSource, string srCrawlUrl)
{
List<KeyValuePair<string, string>> lstFoundFeatures = new List<KeyValuePair<string, string>>();
if (srPageSource == PublicSettings.srCrawlFailedMessage)
return lstFoundFeatures;
HtmlDocument temp_HdDocument = new HtmlDocument();//nulled
temp_HdDocument.LoadHtml(srPageSource);
List<string> lstFeatureTitles = new List<string>();
List<string> lstFeatureDescriptions = new List<string>();
foreach (var vrVariable in PublicVariables.dicRootSites[srRootSiteId].lstRootSitesFeaturesIdentifiers)
{
if (vrVariable.blPerFeatureIdentifier == true)
{
HtmlNodeCollection hdNodes = temp_HdDocument.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
vrVariable.srHtmlObjectIdentifier, vrVariable.srHtmlObjectIdentifierName));
if (hdNodes != null)
foreach (var vrNewVariable in PublicVariables.dicRootSites[srRootSiteId].lstRootSitesFeaturesIdentifiers)
{
if (vrNewVariable.blPerFeatureIdentifier == false)
{
foreach (HtmlNode hdTempNode in hdNodes)
{
var vrTempNewNode = hdTempNode.SelectSingleNode(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
vrVariable.srHtmlObjectIdentifier, vrVariable.srHtmlObjectIdentifierName));
if (vrTempNewNode != null)
if (vrTempNewNode.InnerText != null)
{
string srNodeFeature = vrTempNewNode.InnerText.Trim();
switch (vrVariable.srWhichFeatureIdentifier)
{
case "FeatureTitle":
lstFeatureTitles.Add(srNodeFeature);
break;
case "FeatureDescription":
lstFeatureDescriptions.Add(srNodeFeature);
break;
}
}
}
}
}
break;
}
}
temp_HdDocument = null;
if (lstFeatureDescriptions.Count != lstFeatureTitles.Count)
{
ErrorLogger.LogError("found features count not equal to features description count crawled url: " + srCrawlUrl);
return lstFoundFeatures;
}
for (int i = 0; i < lstFeatureDescriptions.Count; i++)
{
KeyValuePair<string, string> myKeyValPair = new KeyValuePair<string, string>(lstFeatureTitles[i], lstFeatureDescriptions[i]);
lstFoundFeatures.Add(myKeyValPair);
}
return lstFoundFeatures;
}
}
}
答案 0 :(得分:0)
不,您不需要在static
和instance
方法中将变量设置为空。方法内部的变量(甚至在static
方法内部)位于方法的stack
空间,因此通常它们将在方法执行结束时超出范围,并且将作为垃圾收集的目标。通常明确地调用garbage collector
并不是一个好习惯。