我和我的同事尝试建立一个单词计数器,以从某个路径查找5个最常见的单词并将其输出到控制台中。到目前为止,我们仅设法编写了一个代码来搜索输入的单词及其出现的次数。
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace wordcounter_2._0
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Bitte geben sie eine Keywort ein.");
string patternWord = Console.ReadLine();
Console.WriteLine("Bitte geben sie einen Pfad ein.");
string Pfad = Console.ReadLine();
try
{
StreamReader MyReader = new StreamReader(Pfad);
string text = MyReader.ReadToEnd();
}
catch
{
Console.WriteLine("Bitte geben sie einen gültigen Pfad ein.");
}
string[] inputSentence = Console.ReadLine().Split();
int count = 0;
string pattern = @"(?:\b\w+\ \s|\S)*" + patternWord + @"(?:\b\w+\b\ \s|\S)?";
Regex rx = new Regex(pattern, RegexOptions.IgnoreCase);
for (int i = 0; i < inputSentence.Length; i++)
{
var mc = rx.Matches(inputSentence[i]);
foreach (Match m in mc)
{
count++;
}
}
Console.WriteLine("Das Wort " + patternWord + " kommt " + "{0}", count + " mal vor.");
Console.ReadLine();
}
}
}
答案 0 :(得分:5)
class Program
{
static void Main(string[] args)
{
string words = "I felt happy because I saw the others were happy and because I knew I should feel happy, but I wasn’t really happy.";
string[] splitWords = words.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
var commonWords = splitWords.ToList().GroupBy(e => e).Select(g => new { Value = g.Key, Count = g.Count() }).OrderByDescending(e => e.Count).Take(5);
foreach (var x in commonWords)
{
Console.WriteLine("Value: " + x.Value); // These are the 5 most used words, if you want you can include + " Count: " + x.Count to see count
}
Console.ReadLine();
}
}
这将解决您的目的
答案 1 :(得分:0)
这里是基于正则表达式的解决方案
string regWords = "I felt happy because I saw the others were happy and because I knew I should feel happy, but I wasn’t really happy.";
var regMatches = Regex.Matches(regWords, "\\b(?<word>[^\\s]+?)\\b");
var regCommonWords = regMatches.GroupBy(x => x.Groups["word"].Value).OrderByDescending(x => x.Count()).Take(5).Select(x => x.Key);
foreach (var x in regCommonWords)
Console.WriteLine("Value: " + x);
要了解正则表达式的功能,请看以下内容:
https://regex101.com/r/OTBN5V/1
请牢记,通过拆分字符串,此解决方案比非正则表达式解决方案要慢。
https://dotnetfiddle.net/E4GDrj
使用vinothvs答案比较速度差异。
警告:
我的解决方案没有将“不是”视为一个单词,而是将“ wasn”和“ t”视为单独的单词,找不到解决此问题的令人满意的方法。