我有一个预定义的正则表达式模式字符串列表(大约7 thousand
类型的正则表达式模式,用于对相似类型的消息进行分组)。
现在我有两个设置为regex patterns
列出一个,为real messages
列出一个包含一些变量名称的设置。
我需要对所有类似的消息进行分组并显示这些分组的消息,现在我已遍历7000个正则表达式模式,以便对1000条消息中的类似项进行分组。需要m*n iterations
才能找到正确的群组。
为减少处理时间,我从邮件列表中删除了匹配的项目。例如1000 - (matched items on the previous iteration)
。
处理这两个列表需要花费太多时间。为了减少时间,我将其分组在消息类别类型上并在并行任务中处理它们。
List<KBError> warningKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Warning").ToList();
List<KBError> fatalKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Fatal").ToList();
List<KBError> severeKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Severe").ToList();
List<KBError> cbeccErrorKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Error").ToList();
//Remove All error message which should be processed
errors.RemoveAll(error => !processingErrorType.HasFlag(error.ErrorType));
List<Error> warningErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Warning).ToList();
List<Error> fatalErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Fatal).ToList();
List<Error> severeErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Severe).ToList();
List<Error> cbeccErrors = errors.Where(kbErr => kbErr.ErrorType ==ErrorType.Error).ToList();
之后,通过将它们分配到相同的项子集中,在并行任务中处理这些消息。
Func<List<KBError>, List<Error>, List<Error>> FindDistinctErrorMessages = (filteredKBErros, filteredErros) =>
{
ConcurrentBag<Error> errorsList = new ConcurrentBag<Error>();
object lockObject = new object();
System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Start();
Parallel.For(0, filteredKBErros.Count,
() => new Dictionary<KBError, List<Error>>(),
(x, loopState, kpErrorResult) =>
{
kpErrorResult.Add(filteredKBErros[(int)x], filteredErros
.Where(error => Regex.IsMatch(error.ErrorMessage,
filteredKBErros[(int)x].ErrorMessage, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace)).ToList());
return kpErrorResult;
},
(kpErrorResult) =>
{
lock (lockObject)
{
foreach (KeyValuePair<KBError, List<Error>> errorResult in kpErrorResult)
{
if (errorResult.Value.Count > 0)
{
Error error = null;
if (errorResult.Value.Count == 1)
{
error = errorResult.Value.First();
}
else
{
error = new Error();
error.ErrorMessage = errorResult.Value.First().ErrorMessage;
error.Errors = errorResult.Value;
error.ErrorType = errorResult.Value.First().ErrorType;
}
error.ErrorCount = errorResult.Value.Count;
error.ErrorCode = errorResult.Key.ErrorCode;
AddErrorResolutionMessage(error, errorResult.Key);
error.ErrorMessagePattern = errorResult.Key.ErrorMessage;
errors.Add(error);
errorResult.Value.ForEach(err => errors.Remove(err));
}
}
}
}
);
sw.Stop();
System.Diagnostics.Debug.WriteLine(string.Format("Completed in {0} seconds", sw.Elapsed.TotalSeconds));
return errors.ToList();
};
//Filter the Warning KB List
List<KBError> filteredWarningKBList = FilterKBList(warningKBErrors, warningErrors);
List<KBError> filteredSevereKBList = FilterKBList(severeKBErrors, severeErrors);
List<KBError> filteredFatalKBList = FilterKBList(fatalKBErrors, fatalErrors);
List<KBError> filteredcbeccErrorsKBList = FilterKBList(cbeccErrorKBErrors, cbeccErrors);
List<Task<List<Error>>> tasks = new List<Task<List<Error>>>();
if (warningErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Warning) || processingErrorType.Equals(ErrorType.All)))
{
int equalCounts = warningErrors.Count < 10 ? 1 : warningErrors.Count / 10;
foreach (IEnumerable<Error> subSet in warningErrors.Split(equalCounts))
{
tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredWarningKBList, subSet.ToList()), CancellationToken.None));
}
}
if (severeErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Severe) || processingErrorType == ErrorType.All))
{
int equalCounts = severeErrors.Count < 10 ? 1 : severeErrors.Count / 10;
foreach (IEnumerable<Error> subSet in severeErrors.Split(equalCounts))
{
tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredSevereKBList, subSet.ToList()), CancellationToken.None));
}
}
if (fatalErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Fatal) || processingErrorType.Equals(ErrorType.All)))
{
int equalCounts = fatalErrors.Count < 10 ? 1 : fatalErrors.Count / 10;
foreach (IEnumerable<Error> subSet in fatalErrors.Split(equalCounts))
{
tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredFatalKBList, subSet.ToList()), CancellationToken.None));
}
}
if (cbeccErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Error) || processingErrorType.Equals(ErrorType.All)))
{
int equalCounts = cbeccErrors.Count < 10 ? 1 : cbeccErrors.Count / 10;
foreach (IEnumerable<Error> subSet in cbeccErrors.Split(equalCounts))
{
tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredcbeccErrorsKBList, subSet.ToList()), CancellationToken.None));
}
}
启动这些任务后,需要很多时间才能完成这些任务。这些创建的任务的wait语句以某种方式将应用程序置于挂起状态。
try
{
List<Error> result = new List<Error>();
Task.WaitAll(tasks.ToArray());
foreach (var task in tasks)
{
result.AddRange(task.Result);
}
result = result.Distinct().ToList();
result.GroupBy(res => res.ErrorMessagePattern).ToList()
.ForEach(grp =>
{
Error error = grp.First();
error.ErrorCount = grp.Sum(r => r.ErrorCount);
if (grp.Count() > 1)
{
grp.ToList().ForEach(grpElement =>
{
if (grpElement != error)
{
if (error.Errors == null)
error.Errors = new List<Error>();
grpElement.ErrorCount = 1;
if (grpElement.Errors != null && grpElement.Errors.Count > 0)
{
error.Errors.AddRange(grpElement.Errors);
grpElement.Errors = null;
}
}
});
}
distinctErrors.Add(error);
});
}
finally
{
}
errors.ForEach(error =>
{
error.ErrorCount = 1;
AddErrorResolutionMessage(error, null);
distinctErrors.Add(error);
if (error.PossibleResolution == "Not Found")
logMessage.AppendLine(error.ErrorMessage);
});
是否有更好的方法或算法来缩短处理时间 这些列表并减少了流程的时间复杂度 处理mxn元素?