//=========================================================================
///This is the main function
/// Convert String to array of string patterns
/// Clean the new array from any redundances
/// Get the repeated items and their repeated numbers
/// </summary>
/// <param name="source">Original string that was generated</param>
/// <param name="repeatedNumber">how many items were found</param>
/// <param name="sequenceLength">the user input to create the string patterns</param>
/// <returns>the list of repeated items and their repeated numbers </returns>
static List<MyDataClass> GetConsecutiveChars(string source, out int repeatedNumber, int sequenceLength)
{
//var matchList = Regex.Matches(source, "([a-zA-Z0-9\\+\\-\\*\\&\\^\\%\\$\\#\\@\\!])\\1{" + (sequenceLength - 1) + "}").Cast<Match>()
// .Select(m => m.Value)
// .ToArray();
////var matchList = Regex.Matches(source, "([a-zA-Z0-9])\\1{" + (sequenceLength - 1) + "}").Cast<Match>()
//// .Select(m => m.Value)
//// .ToArray();
//var result2 = source.GroupBy(c => c).Where(c => c.Count() >1).Select(c => new { charName = c.Key, charCount = c.Count() });
//===========================================================================
//Devid the source to the pieces :
List<string> list = DistributedStringToArray(source, sequenceLength);
//===========================================================================
//Clean list from duplicated values
list = (from item in list select item).Distinct().ToList();
//===========================================================================
//Check if it was repeated or not
List<MyDataClass> result = FillListWtihRepeatedItems(source, list);
//===========================================================================
//Get the number of repeated items
repeatedNumber = 0;
repeatedNumber = result.Count();
//===========================================================================
//return the list of repeated items and their repeated numbers
return result;
}
我的问题: 我可以在一个正则表达式语句或linq语句中完成所有这些吗?
我试过,但我不能。我在正则表达式语句和linq语句中添加了注释。
请告诉我。
我将当前的申请上传到https://dotnetfiddle.net/qO7PvS#run-results
示例:
statement is : [I like to know little]
结果:
k = 2
o = 2
i = 2
li = 2
....
尝试在语句中计算重复的字符或单词2次。
答案 0 :(得分:1)
要获得连续的字符,您需要使用所谓的named back reference。它的工作原理如下:
(?<char>\w)\k<char>
例如,以下内容将找到连续的b
s:
var input = "aaaaabbbbbbccccc";
var match = Regex.Match(input, @"(?<char>b)\k<char>+");
输出:bbbbbb
答案 1 :(得分:0)
可能不是最有效的,但如果我在LINQPad中尝试这个:
Enumerable.Range(1, source.Length - 1)
.SelectMany(l => Enumerable.Range(0, source.Length - l - 1)
.Select(i => source.Substring(i, l)) )
.GroupBy(s => s)
.Where(g => g.Count() > 1 && !g.Key.Any(char.IsWhiteSpace))
.Select(c => new { c.Key, Count = c.Count() })
我明白了:
Key Count
l 2
i 2
k 2
t 3
o 2
li 2