是否有必要在静态函数内部署对象?

时间:2014-08-18 00:58:57

标签: c# wpf garbage-collection html-agility-pack idisposable

好的我有一个重大问题。

我的软件使用了极高量的内存。我正在使用大量页面来源的HtmlAgilityPack.HtmlDocument个对象。

然而,所有对象都在静态函数中使用,HtmlAgilityPack.HtmlDocument不是IDisposable

那么我需要将每个变量显式设置为null吗?

即使它们在静态函数内部?

例如,我是否需要在

下面的函数末尾将这些变量设置为null

我问的变量:lstDrwList?或者因为它在里面会自动处理?

我应该明确地调用垃圾收集器吗?

C#.net 4.5 WPF应用程序

private static void func_CheckWaitingToProcessPages(Object state)
{
    ParallelOptions myOptions = new ParallelOptions();
    myOptions.MaxDegreeOfParallelism = PublicSettings.ir_How_Many_Tasks_For_Per_Pages_Process;

    List<DataRow> lstDrwList = new List<DataRow>();

    using (DataTable dtMyTable = DbConnection.db_Select_DataTable(srSelectTopProcessPagesQuery))
    {
        foreach (DataRow drw in dtMyTable.Rows)
        {
            lstDrwList.Add(drw);
        }
    }

    Parallel.ForEach(lstDrwList, myOptions, drw =>
      {
          process_Given_Page(drw);
      });
}

问题是找到问题是如何修复

这里发生的问题在10秒内我使用了visual studio profiler

enter image description here

此处导致此巨大内存泄漏问题的完整类

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;


namespace doktora_tez_projesi_crawler_program
{
    public static class PagesProcessor
    {
        private static Timer _timer;
        private static int howManySeconds = 10;
        public static void func_StartCrawlingWaitingUrls()
        {
            PublicStaticFunctions.AddMsgToEvents("Checking waiting to process crawled urls process started every " + howManySeconds + " seconds!");
            _timer = new Timer(func_CheckWaitingToProcessPages, null, PublicSettings.irTimers_Delayed_Start_MiliSeconds, howManySeconds * 1000);
        }

        private static string srSelectTopProcessPagesQuery = " select top 100 cl_IdUrl,cl_RooSiteId,cl_CrawlSource,cl_CrawlOrgUrl from tblCrawlUrls " +
                                                             " where cl_PageProcessed=0 and cl_TotalCrawlTimes > 0 " +
                                                             " order by cl_LastProcessDate asc";

        private static void func_CheckWaitingToProcessPages(Object state)
        {
            ParallelOptions myOptions = new ParallelOptions();
            myOptions.MaxDegreeOfParallelism = PublicSettings.ir_How_Many_Tasks_For_Per_Pages_Process;

            List<DataRow> lstDrwList = new List<DataRow>();

            using (DataTable dtMyTable = DbConnection.db_Select_DataTable(srSelectTopProcessPagesQuery))
            {
                foreach (DataRow drw in dtMyTable.Rows)
                {
                    lstDrwList.Add(drw);
                }
            }

            Parallel.ForEach(lstDrwList, myOptions, drw =>
              {
                  process_Given_Page(drw);
              });
        }

        private class csProductFeatures
        {
            public string srProductRootSiteId = "null", srProductTitle = "null", srProductCode = "null", srProductImageLink = "null";
            public string srProductDetailedExplanation = "null", srProductFeatures = "null", srCrawledOrgUrl = "null", srProductIdCode = "null";
            public bool blPossibleProductPage = false, blFreeCargo = false, blProductPage = true;
            public List<string> lstProductCategories = new List<string>();
            public int irProductPrice = 0;
            public List<csProductComments> lstProductComments = new List<csProductComments>();
            public List<KeyValuePair<string, string>> lstProductFeatures = new List<KeyValuePair<string, string>>();
        }

        private class csProductComments
        {
            public string srCommentTitle = "null", srCommentPros = "null", srCommentCons = "null";
            public int irCommentScore = 0; //0 = negative 5=full star
        }

        private static void process_Given_Page(DataRow drw)
        {
            csProductFeatures temp_ProductFeatures = new csProductFeatures();
            temp_ProductFeatures.srProductRootSiteId = drw["cl_RooSiteId"].ToString();
            temp_ProductFeatures.srCrawledOrgUrl = drw["cl_CrawlOrgUrl"].ToString();

            HtmlDocument hdMyDoc = new HtmlDocument();//nulled
            hdMyDoc.LoadHtml(drw["cl_CrawlSource"].ToString());

            bool blBreakLoop = false;

            foreach (var vrVariable in PublicVariables.dicRootSites[temp_ProductFeatures.srProductRootSiteId].lstRootSiteIdentifiers)
            {
                if (vrVariable.srHtmlObjectType != "link")
                {
                    HtmlNodeCollection hdNodes;

                    if (vrVariable.blSelectMultipleNodes == false)
                        hdNodes = hdMyDoc.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
                                 vrVariable.srHtmlObjectTypeIdentifier, vrVariable.srHtmlObjectTypeName));
                    else
                        hdNodes = hdMyDoc.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']//{3}", vrVariable.srHtmlObjectType,
                          vrVariable.srHtmlObjectTypeIdentifier, vrVariable.srHtmlObjectTypeName, vrVariable.srHtmlSubIdentifierType));

                    if (hdNodes == null && vrVariable.srIndetifierType == "ProductTitle")
                    {
                        blBreakLoop = true;
                        temp_ProductFeatures.blProductPage = false;
                        continue;
                    }

                    if (blBreakLoop == true)
                        break;

                    if (hdNodes == null)
                        continue;

                    string sr_Node_Required_Val = "null";
                    if (hdNodes[0].InnerText != null)
                        sr_Node_Required_Val = hdNodes[0].InnerText;

                    string srLinkVal = "null";

                    if (vrVariable.srHtmlObjectType == "a" && hdNodes[0].Attributes != null)
                    {
                        if (hdNodes[0].Attributes["href"] != null)
                        {
                            srLinkVal = PublicStaticFunctions.Return_Absolute_Url(hdNodes[0].Attributes["href"].Value, temp_ProductFeatures.srCrawledOrgUrl);
                        }
                    }

                    if (vrVariable.blGetValue == true)
                    {
                        if (hdNodes[0].Attributes != null)
                            if (hdNodes[0].Attributes["value"] != null)
                                sr_Node_Required_Val = hdNodes[0].Attributes["value"].Value;
                    }

                    sr_Node_Required_Val = sr_Node_Required_Val.Trim();

                    switch (vrVariable.srIndetifierType)
                    {
                        case "ProductPage":
                            temp_ProductFeatures.blPossibleProductPage = true;
                            break;
                        case "ProductTitle":
                            temp_ProductFeatures.srProductTitle = sr_Node_Required_Val;
                            break;
                        case "ProductCode":
                            temp_ProductFeatures.srProductCode = sr_Node_Required_Val;
                            break;
                        case "ProductCargo":
                            temp_ProductFeatures.blFreeCargo = true;
                            break;
                        case "ProductCategories":
                            temp_ProductFeatures.lstProductCategories = func_Return_Product_Categories(hdNodes);
                            break;
                        case "ProductPrice":
                            temp_ProductFeatures.irProductPrice = func_Return_Product_Price(sr_Node_Required_Val, temp_ProductFeatures.srProductRootSiteId);
                            break;
                        case "ProductImage":
                            temp_ProductFeatures.srProductImageLink = srLinkVal;
                            break;
                        case "ProductIdCode":
                            temp_ProductFeatures.srProductIdCode = sr_Node_Required_Val;
                            break;
                    }
                }

                if (vrVariable.srHtmlObjectType == "link")
                {
                    string srLinkToFetch = vrVariable.srHtmlObjectTypeIdentifier;

                    if (vrVariable.blUsesProductIdCode == true)
                    {
                        srLinkToFetch = string.Format(srLinkToFetch, temp_ProductFeatures.srProductIdCode);
                    }

                    string srFetchResult = CrawlGivenUrl.func_fetch_Page(srLinkToFetch);

                    string srResultToAssign = "null";

                    if (srFetchResult == PublicSettings.srCrawlFailedMessage)
                    {
                        srResultToAssign = srFetchResult;
                    }
                    else
                    {
                        HtmlDocument temp_HdDocument = new HtmlDocument();//nulled
                        temp_HdDocument.LoadHtml(srFetchResult);
                        if (temp_HdDocument.DocumentNode != null)
                            if (temp_HdDocument.DocumentNode.InnerText != null)
                                srResultToAssign = temp_HdDocument.DocumentNode.InnerText;
                        temp_HdDocument = null;
                    }

                    switch (vrVariable.srIndetifierType)
                    {
                        case "ProductExplanation":
                            temp_ProductFeatures.srProductDetailedExplanation = srResultToAssign;
                            break;
                        case "ProductFeatures":
                            temp_ProductFeatures.lstProductFeatures = func_Return_Product_Features(temp_ProductFeatures.srProductRootSiteId, srFetchResult, temp_ProductFeatures.srCrawledOrgUrl);
                            break;
                    }
                }
            }

            if (temp_ProductFeatures.blProductPage == true)
            {
                string asdas = "";
            }

            hdMyDoc = null;
        }

        private static List<string> func_Return_Product_Categories(HtmlNodeCollection hdNodeCollection)
        {
            List<string> lstCategories = new List<string> { };
            foreach (HtmlNode hdNode in hdNodeCollection)
            {
                if (hdNode.InnerText != null)
                {
                    lstCategories.Add(hdNode.InnerText);
                }
            }

            return lstCategories;
        }

        private static int func_Return_Product_Price(string srPriceText, string srRootSiteId)
        {
            int irPrice = 0;
            srPriceText = srPriceText.Replace(PublicVariables.dicRootSites[srRootSiteId].srPriceDelimeter, "");
            if (srPriceText.Contains(PublicVariables.dicRootSites[srRootSiteId].srPriceIgnoreDelimeter) == true)
            {
                srPriceText = srPriceText.Substring(0, srPriceText.IndexOf(PublicVariables.dicRootSites[srRootSiteId].srPriceIgnoreDelimeter));
            }

            Int32.TryParse(srPriceText, out irPrice);
            return irPrice;
        }

        private static List<KeyValuePair<string, string>> func_Return_Product_Features(string srRootSiteId, string srPageSource, string srCrawlUrl)
        {
            List<KeyValuePair<string, string>> lstFoundFeatures = new List<KeyValuePair<string, string>>();

            if (srPageSource == PublicSettings.srCrawlFailedMessage)
                return lstFoundFeatures;

            HtmlDocument temp_HdDocument = new HtmlDocument();//nulled
            temp_HdDocument.LoadHtml(srPageSource);

            List<string> lstFeatureTitles = new List<string>();
            List<string> lstFeatureDescriptions = new List<string>();

            foreach (var vrVariable in PublicVariables.dicRootSites[srRootSiteId].lstRootSitesFeaturesIdentifiers)
            {
                if (vrVariable.blPerFeatureIdentifier == true)
                {
                    HtmlNodeCollection hdNodes = temp_HdDocument.DocumentNode.SelectNodes(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
                                 vrVariable.srHtmlObjectIdentifier, vrVariable.srHtmlObjectIdentifierName));

                    if (hdNodes != null)
                        foreach (var vrNewVariable in PublicVariables.dicRootSites[srRootSiteId].lstRootSitesFeaturesIdentifiers)
                        {
                            if (vrNewVariable.blPerFeatureIdentifier == false)
                            {
                                foreach (HtmlNode hdTempNode in hdNodes)
                                {
                                    var vrTempNewNode = hdTempNode.SelectSingleNode(string.Format("//{0}[@{1}='{2}']", vrVariable.srHtmlObjectType,
                                     vrVariable.srHtmlObjectIdentifier, vrVariable.srHtmlObjectIdentifierName));
                                    if (vrTempNewNode != null)
                                        if (vrTempNewNode.InnerText != null)
                                        {
                                            string srNodeFeature = vrTempNewNode.InnerText.Trim();
                                            switch (vrVariable.srWhichFeatureIdentifier)
                                            {
                                                case "FeatureTitle":
                                                    lstFeatureTitles.Add(srNodeFeature);
                                                    break;
                                                case "FeatureDescription":
                                                    lstFeatureDescriptions.Add(srNodeFeature);
                                                    break;
                                            }
                                        }
                                }
                            }
                        }
                    break;
                }
            }

            temp_HdDocument = null;

            if (lstFeatureDescriptions.Count != lstFeatureTitles.Count)
            {
                ErrorLogger.LogError("found features count not equal to features description count crawled url: " + srCrawlUrl);
                return lstFoundFeatures;
            }

            for (int i = 0; i < lstFeatureDescriptions.Count; i++)
            {
                KeyValuePair<string, string> myKeyValPair = new KeyValuePair<string, string>(lstFeatureTitles[i], lstFeatureDescriptions[i]);
                lstFoundFeatures.Add(myKeyValPair);
            }

            return lstFoundFeatures;
        }

    }
}

1 个答案:

答案 0 :(得分:0)

不,您不需要在staticinstance方法中将变量设置为空。方法内部的变量(甚至在static方法内部)位于方法的stack空间,因此通常它们将在方法执行结束时超出范围,并且将作为垃圾收集的目标。通常明确地调用garbage collector并不是一个好习惯。