遇到了一个非常大的问题。我的任务是将输入文本拆分为句子,然后将句子拆分为单词。代码如下:
using System.Collections.Generic;
using System.Linq;
namespace TextAnalysis
{
static class SentencesParserTask
{
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
List<string>[] mas = new List<string>[splittedText.Length];
for (int i = 0; i < splittedText.Length; i++)
{
mas[i] = new List<string>();
}
for (int j = 0; j < splittedText.Length; j++)
{
mas[j]= GetWordsOutOfTheSentence(splittedText);
bool isEmpty = !(mas[j]).Any();
if(!isEmpty)
sentencesList.Add(mas[j]);
}
return sentencesList;
}
private static List<string> GetWordsOutOfTheSentence(string[] splittedText)
{
var wordList = new List<string>();
foreach (var sentence in splittedText)
{
var wordsArray = sentence.Split('^', '#', '$', '-', '+', '1', '=', ' ', '\t', '\n', '\r',',');
for (int i = 0; i < wordsArray.Length; i++)
{
if (wordsArray[i] != string.Empty)
{
var fineWord = wordsArray[i];
wordList.Add(fineWord.ToLower());
}
}
}
return wordList;
}
}
}
主要问题在于测试1)
失败:TextAnalysis.SentencesParser_Tests.CorrectlyParse_SentenceDelimiters
输入文字:[a.b!c?d:e; f(g)h; i]
句子#0错误
预期是具有1个元素的>,实际是具有9个元素的 > 值在索引[1]处不同
额外:<“ b”,“ c”,“ d” ...>
我的代码只是继续在列表中添加新单词,然后在主列表中添加该列表。我该怎么办?
答案 0 :(得分:0)
如其中一条注释中所述,您正在将整个splittedText变量传递给GetWordsOutOfTheSentence,而不仅仅是该句子。这意味着您传递的是9个句子列表,而不是一个句子。如注释中所建议,您的代码应改为通过特定的句子。
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
List<string>[] mas = new List<string>[splittedText.Length];
for (int i = 0; i < splittedText.Length; i++)
{
mas[i] = new List<string>();
}
for (int j = 0; j < splittedText.Length; j++)
{
//Passes entire splittedText:
mas[j]= GetWordsOutOfTheSentence(splittedText);
//Passes just the relevant sentence
mas[j]= GetWordsOutOfTheSentence(splittedText[j]);
bool isEmpty = !(mas[j]).Any();
if(!isEmpty)
sentencesList.Add(mas[j]);
}
return sentencesList;
}
答案 1 :(得分:0)
实际上,我只是使用其他列表来解决该问题。谢谢大家,太棒了!
using System.Collections.Generic;
using System.Linq;
namespace TextAnalysis
{
static class SentencesParserTask
{
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
foreach (var sentence in splittedText)
{
var wordsArray = sentence.Split('^', '#', '$', '-', '+', '1', '=', ' ', '\t', '\n', '\r', ',');
var additionalMainList = new List<string>();
var wordList = new List<string>();
foreach (var word in wordsArray)
{
if (word != string.Empty)
{
var fineWord = word;
wordList.Add(fineWord.ToLower());
additionalMainList.Add(fineWord.ToLower());
}
}
bool isEmpty = !(wordList).Any();
if (!isEmpty)
sentencesList.Add(additionalMainList);
wordList.Clear();
}
return sentencesList;
}
}
}