using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
namespace WordFreq
{
class Program
{
static void Main(string[] args)
{
string fullReview = File.ReadAllText("C:\\Reviews.txt").ToLower();
string[] stripChars = { ";", ",", ".", "-", "_", "^", "(", ")", "[", "]",
"0", "1", "2", "3", "4", "5", "6", "7", "8",
"9", "\n", "\t", "\r","<",">" };
foreach (string character in stripChars)
{
fullReview = fullReview.Replace(character, "");
}
// Split on spaces into a List of strings
List<string> wordList = fullReview.Split(' ').ToList();
// Define and remove stopwords
string[] stopwords = new string[] {"a", "about", "above", "after", "again",
"against", "all", "am", "an", "and", "any",
"are", "aren't", "as","at", "be", "because",
"been", "before", "being", "below", "between",
"both", "but", "by", "can't", "cannot",
"could", "couldn't", "did", "didn't", "do",
"does", "doesn't", "doing", "don't", "down",
"during", "each", "few", "for", "from",
"further", "had", "hadn't", "has", "hasn't",
"have", "haven't", "having", "he", "he'd",
"he'll", "he's", "her", "here", "here's",
"hers", "herself", "him", "himself", "his",
"how", "how's", "i", "i'd", "i'll", "i'm",
"i've", "if", "in", "into", "is", "isn't",
"it", "it's", "its", "itself", "let's",
"me", "more", "most", "mustn't", "my",
"myself", "no", "nor", "not", "of", "off",
"on", "once", "only", "or", "other", "ought",
"our", "ours ", " ourselves", "out", "over",
"own", "same", "shan't", "she", "she'd",
"she'll", "she's", "should", "shouldn't", "so",
"some", "such", "than", "that", "that's",
"the", "their", "theirs", "them", "themselves",
"then", "there", "there's", "these", "they",
"they'd", "they'll", "they're", "they've",
"this", "those", "through", "to", "too",
"under", "until", "up", "very", "was",
"wasn't", "we", "we'd", "we'll", "we're",
"we've", "were", "weren't", "what", "what's",
"when", "when's", "where", "where's",
"which", "while", "who", "who's", "whom",
"why", "why's", "with", "won't", "would",
"wouldn't", "you", "you'd", "you'll", "you're",
"you've", "your", "yours", "yourself",
"yourselves" };
foreach (string word in stopwords)
{
// While there's still an instance of a stopword in the wordList, remove it.
// If we don't use a while loop on this each call to Remove simply removes a single
// instance of the stopword from our wordList, and we can't call Replace on the
// entire string (as opposed to the individual words in the string) as it's
// too indiscriminate (i.e. removing 'and' will turn words like 'bandage' into 'bdage'!)
while (wordList.Contains(word))
{
wordList.Remove(word);
}
}
// Create a new Dictionary object
Dictionary<string, int> dictionary = new Dictionary<string, int>();
// Loop over all over the words in our wordList...
foreach (string word in wordList)
{
// If the length of the word is at least three letters...
if (word.Length >= 2)
{
// ...check if the dictionary already has the word.
if (dictionary.ContainsKey(word))
{
// If we already have the word in the dictionary, increment the count of how many times it appears
dictionary[word]++;
}
else
{
// Otherwise, if it's a new word then add it to the dictionary with an initial count of 1
dictionary[word] = 1;
}
} // End of word length check
} // End of loop over each word in our input
// Create a dictionary sorted by value (i.e. how many times a word occurs)
var sortedDict = (from entry
in dictionary
orderby entry.Value descending
select entry).ToDictionary(KeyValuePair => KeyValuePair.Key,
KeyValuePair => KeyValuePair.Value);
// Loop through the sorted dictionary and output the top 10 most frequently occurring words
int count = 1;
Console.WriteLine("---- Most Frequent Terms in the File: " +fullReview+ " ----");
Console.WriteLine();
foreach (KeyValuePair<string, int> KeyValuePair in sortedDict)
{
// Output the most frequently occurring words and the associated word counts
Console.WriteLine(count + "\t" + KeyValuePair.Key + "\t" + KeyValuePair.Value);
count++;
// Only display the top 10 words then break out of the loop!
}
using (StreamWriter streamWrite = new StreamWriter("C:\\output.txt"))
foreach (KeyValuePair<string, int> KeyValuePair in dictionary)
{
streamWrite.WriteLine("{0}:{1}", KeyValuePair.Key, KeyValuePair.Value);
}
// Wait for the user to press a key before exiting
} // End of Main method
} // End of Program class
} // End of namespace
答案 0 :(得分:1)
首先请看一下Remus在评论中建议的链接。
其次,你的代码应该可行,这里唯一可能错误的是你试图直接写入C:\
。尝试将它放在C:\Temp\
之类的目录下,你应该好好去。
请查看here以获取有关其原因的更多信息。
答案 1 :(得分:0)
C:\ temp让我阅读和写作