linq查询的性能问题

时间:2014-10-03 15:05:26

标签: c# performance linq

我有一个函数从目录中获取文件列表,然后在文件名中搜索List中的匹配项。表现很糟糕。

这是功能:

public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
    {
        XmlConfigurator.Configure();
        log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
        List<fileStatus> results = new List<fileStatus>();
        DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
        if (dirInfo.Exists)
        {
            // GET LIST OF ALL FILES IN DIRECTORY
            string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);

            log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));


            if (files.Length > 0 && permitNumbers.Count > 0)
            {
                log.Debug("Checking for matching files");
                // CHECK FOR MATCHING FILES
                switch (type)
                {
                    case fileType.Well:

                        var matchingFiles = (from f in files
                                             where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
                                             where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
                                             select new fileStatus(fileType.Well, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));


                        var permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        var nonMatchingFiles = (from p in permitNumbers
                                                where !permitNumbersWithMatches.Contains(p)
                                                select new fileStatus(fileType.Well, p, 0, string.Empty));

                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);

                        break;
                    case fileType.DrillerLog:
                        matchingFiles = (from f in files
                                         where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
                                         where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
                                         select new fileStatus(fileType.DrillerLog, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));

                        permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        nonMatchingFiles = (from p in permitNumbers
                                                where !permitNumbersWithMatches.Contains(p)
                                            select new fileStatus(fileType.DrillerLog, p, 0, string.Empty));


                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);

                        break;
                    case fileType.RasterLog:

                        matchingFiles = (from f in files
                                         where f.Substring(f.LastIndexOf("\\") + 1).Length > 13
                                         where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 14))
                                         select new fileStatus(fileType.RasterLog, f.Substring(f.LastIndexOf("\\") + 1, 14), 1, f.Substring(f.LastIndexOf("\\") + 1)));

                        permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        nonMatchingFiles = (from p in permitNumbers
                                                where !permitNumbersWithMatches.Contains(p)
                                            select new fileStatus(fileType.RasterLog, p, 0, string.Empty));



                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);
                        break;
                    default:
                        break;
                }
                log.Debug("Done checking for matching files");
            }
        }
        return results;

    }

只要它到达为“matchingFiles”提供值的linq查询,它就会挂起。这是一套大量的“permitNumbers”(如5000)以及一大套“文件”。

我有什么办法可以加快速度吗?

考虑到下面提供的建议,我将功能修改为如下,现在性能按预期工作。非常感谢你们! =)

public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
    {
        HashSet<string> numbers = new HashSet<string>(permitNumbers);
        XmlConfigurator.Configure();
        log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
        List<fileStatus> results = new List<fileStatus>();
        DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
        if (dirInfo.Exists)
        {
            // GET LIST OF ALL FILES IN DIRECTORY
            string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
            HashSet<string> fileNames = new HashSet<string>(files.Select(f => Path.GetFileName(f)));

            log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));


            if (fileNames.Count > 0 && numbers.Count > 0)
            {
                log.Debug("Checking for matching files");
                // CHECK FOR MATCHING FILES
                switch (type)
                {
                    case fileType.Well:
                        var matchingFiles = (from f in fileNames
                                             where f.Length > 4
                                             where numbers.Contains(f.Substring(0, 5))
                                             select new fileStatus(fileType.Well, f.Substring(0, 5), 1, f));


                        var permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        var nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
                            .Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));

                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);

                        break;
                    case fileType.DrillerLog:
                        matchingFiles = (from f in fileNames
                                         where f.Length > 4
                                         where numbers.Contains(f.Substring(0, 5))
                                         select new fileStatus(fileType.DrillerLog, f.Substring(0, 5), 1, f));


                        permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
                            .Select(p => new fileStatus(fileType.DrillerLog, p, 0, string.Empty));


                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);

                        break;
                    case fileType.RasterLog:

                        matchingFiles = (from f in fileNames
                                         where f.Length > 13
                                         where numbers.Contains(f.Substring(0, 14))
                                         select new fileStatus(fileType.RasterLog, f.Substring(0, 14), 1, f));

                        permitNumbersWithMatches = (from x in matchingFiles
                                                       select x.PermitNumber);

                        nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
                            .Select(p => new fileStatus(fileType.RasterLog, p, 0, string.Empty));


                        results.AddRange(matchingFiles);
                        results.AddRange(nonMatchingFiles);
                        break;
                    default:
                        break;
                }
                log.Debug("Done checking for matching files");
            }
        }
        return results;

    }

2 个答案:

答案 0 :(得分:2)

您正在创建查询matchingFiles,在迭代时,将遍历您拥有的所有文件,以多种方式操作它们,以及对您的数字集进行线性搜索。然后,你接受这个查询并执行它(需要重复从磁盘读取大量数据,如果你有足够的数据来摧毁缓存,这是非常昂贵的)并为线性搜索 >每个许可证号码。这导致O(N ^ 2 * M)的渐近复杂度,其中N是许可数的数量,M是文件的数量。那......非常糟糕。

这里的关键是避免1)进行线性搜索和2)多次迭代复杂查询,特别是避免在其他序列中为每个项目迭代它们。

对于#1,只需将permitNumbers设为HashSet<string>而不是列表,然后检查其中是否包含项目将成为O(1)操作。

对于#2,用一个只需要迭代源序列的操作替换你的第三个查询:

var nonMatchingFiles = permitNumbers.Except(permitNumbersWithMatches)
    .Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));

答案 1 :(得分:1)

我将通过对Path.GetFileName(f)的单次调用来消除对f.Substring(f.LastIndexOf(“\”)+ 1))的所有重复调用

例如

var fileNames = files.Select(f => Path.GetFileName(f));    

var matchingFiles = (from fname in fileNames
                     where fname.Length > 4
                     where permitNumbers.Contains(fname.Substring(0, 5))
                     select new fileStatus(fileType.Well, fname.Substring(0, 5), 1, fname);