我有一个函数从目录中获取文件列表,然后在文件名中搜索List中的匹配项。表现很糟糕。
这是功能:
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (files.Length > 0 && permitNumbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.Well, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 13
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 14))
select new fileStatus(fileType.RasterLog, f.Substring(f.LastIndexOf("\\") + 1, 14), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
只要它到达为“matchingFiles”提供值的linq查询,它就会挂起。这是一套大量的“permitNumbers”(如5000)以及一大套“文件”。
我有什么办法可以加快速度吗?
考虑到下面提供的建议,我将功能修改为如下,现在性能按预期工作。非常感谢你们! =)
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
HashSet<string> numbers = new HashSet<string>(permitNumbers);
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
HashSet<string> fileNames = new HashSet<string>(files.Select(f => Path.GetFileName(f)));
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (fileNames.Count > 0 && numbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.Well, f.Substring(0, 5), 1, f));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(0, 5), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in fileNames
where f.Length > 13
where numbers.Contains(f.Substring(0, 14))
select new fileStatus(fileType.RasterLog, f.Substring(0, 14), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
答案 0 :(得分:2)
您正在创建查询,matchingFiles
,在迭代时,将遍历您拥有的所有文件,以多种方式操作它们,以及对您的数字集进行线性搜索。然后,你接受这个查询并执行它(需要重复从磁盘读取大量数据,如果你有足够的数据来摧毁缓存,这是非常昂贵的)并为线性搜索 >每个许可证号码。这导致O(N ^ 2 * M)的渐近复杂度,其中N是许可数的数量,M是文件的数量。那......非常糟糕。
这里的关键是避免1)进行线性搜索和2)多次迭代复杂查询,特别是避免在其他序列中为每个项目迭代它们。
对于#1,只需将permitNumbers
设为HashSet<string>
而不是列表,然后检查其中是否包含项目将成为O(1)操作。
对于#2,用一个只需要迭代源序列的操作替换你的第三个查询:
var nonMatchingFiles = permitNumbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));
答案 1 :(得分:1)
我将通过对Path.GetFileName(f)的单次调用来消除对f.Substring(f.LastIndexOf(“\”)+ 1))的所有重复调用
例如
var fileNames = files.Select(f => Path.GetFileName(f));
var matchingFiles = (from fname in fileNames
where fname.Length > 4
where permitNumbers.Contains(fname.Substring(0, 5))
select new fileStatus(fileType.Well, fname.Substring(0, 5), 1, fname);