如何获得最短/最长的发布列表

时间:2013-05-08 08:33:30

标签: c# winforms information-retrieval

我在这里写了Class InvertedIndexTable { }

public interface IInvertedIndex
{
    int IndexSize(string path);
    void Load(string path);
}
class InvertedIndexTable : IInvertedIndex
{
     Dictionary<string, List<string>> index = new Dictionary<string, List<string>>();
     CreateMatrix r = new CreateMatrix(); // an object of another class contains stopwords{A,AN,...}
                                          // and also contains RemoveStopword() method
     public HashSet<string> DistincTerms = new HashSet<string>();
     public List<string> filesCollection = new List<string>();
     public int IndexSize(string pa)
     {
         Load(pa);
         return index.Count;
     }
     public void Load(string path)
      {
          string[] filePaths = Directory.GetFiles(Path.GetFullPath(path));
          foreach (string file in filePaths)
          {
              string contents = File.ReadAllText(file);
              contents = RemoveNonAlphaChars(contents);
              String[] tokensCollection = r.RemoveStopsWords(contents.ToUpper().Split(' '));
              foreach (string token in tokensCollection)
              {
                  if (!r.booleanOperator.Contains(token) && !DistincTerms.Contains(token))
                  {
                      DistincTerms.Add(token);
                  }
              }
          }
          Frequenty(filePaths);
      }
     public void Frequenty(string[] path1)
      {
        foreach (string d in DistincTerms)
        {
            foreach (string f in path1)
            {
                if (File.ReadAllText(f).Contains(d))
                {
                    filesCollection.Add(f);
                }

            }
            index.Add(d, filesCollection);
          }
      }
     private string RemoveNonAlphaChars(string content)
      {
          StringBuilder sb = new StringBuilder();

          foreach (char c in content.ToCharArray())
          {
              if (char.IsLetter(c) || char.IsSeparator(c))
              {
                  sb.Append(c);
              }
          }
          return sb.ToString();
      }
     public  string GetSmallestPosting(string p)
      {
          List<int> numbers = new List<int>();
          if (index != null)
          {
              foreach( KeyValuePair<string,List<string>> i in index)
              {
                  string content= i.Value.ToString();
                  String[] itemsList = content.ToUpper().Split(' ');
                  numbers.Add(itemsList.Length); 
              }

              return numbers.Min().ToString();
          }
          return null;
      }
     public string GetLongestPosting(string p)
      {
          List<int> numbers = new List<int>();
          if (index != null)
          {
              foreach (KeyValuePair<string, List<string>> i in index)
              {

                  string content = i.Value.ToString();
                  String[] itemsList = content.ToUpper().Split(' ');
                  numbers.Add(itemsList.Count());
              }
              return numbers.Max().ToString(); 
          }
          return null;
      }
}

我打算准备button6,向我展示Class InvertedIndexTable { }最小和最长的帖子列表,以及Dictionary<string,List<string>> index的KeyValuePair数量。 它没有任何错误和异常,但问题是:DictionaryPairsNumbers的返回值是正确的,但MinSizePosting和MaxSizePosting的返回值是错误的,代码总是为它们返回值“1”。为什么?怎么回事?

我为button6编写的代码就在这里:

    `  InvertedIndexTable i = new InvertedIndexTabe(); 
    private void button5_Click(object sender, EventArgs e)
    {
     MessageBox.Show("DictionaryPairsNumbers: " + i.IndexSize(textBox1.Text)+"\n\rMaxSizePosting: " + i.GetLongestPosting(textBox1.Text)+"\n\rMinSizePosting: "+ i.GetSmallestPosting(textBox1.Text));
    }
    `

拜托,如果有任何方法可以达到预期的结果,请告诉我。 我需要的结果是List<string>中最短和最长Dictionary index的大小我以为我为GetSmallestPosting()GetLongestPosting()方法编写了正确的代码,但似乎我错了,请告诉我我这两种方法有什么问题?为什么他们返回相同的值,总是???为什么这个值是“1”,总是???

顺便说一下,GetSmallestPosting()找到List<string>的最短Dictionary<string,List<string>> indexGetLongestPosting()找到最长的{{1}}。

谢谢你的时间。

2 个答案:

答案 0 :(得分:0)

您可以使用Linq执行此操作。

向InvertedIndex类添加两个新方法。

最小遍历字典中的所有键(X)值(列表)对,并返回具有最小项目数的列表。 Max 恰恰相反。

public List<T> GetSmallestPosting()
{
    if(_Index!=null)
       return  _Index.Values.First(v => v.Count == _Index.Min(kv => kv.Value.Count)).ToList();

    return null;
}

public List<T> GetLongestPosting()
{
    if(_Index!=null)
      return   _Index.Values.First(v => v.Count == _Index.Max(kv => kv.Value.Count)).ToList();

    return null;
}

答案 1 :(得分:0)

首先我改变了Frequenty() { }方法,它必须像这样

public void Frequenty(string[] path1)
      {
         List<string> filesCollection = new List<string>();
         for (int i = 0; i < DistincTerms.Count(); i++ )
         {
             string d = DistincTerms.ElementAt(i);
             foreach (string f in path1)
             {
                 string c = File.ReadAllText(f);
                 c = r.RemoveNonAlphaChars(c);
                 String[] T = r.RemoveStopsWords(c.ToUpper().Split(' '));
                 foreach (string term in T)
                 {
                     if (term.Equals(d) && !filesCollection.Contains(f))
                     {
                         filesCollection.Add(f);
                     }
                 }

             }
             countor.Add(filesCollection.Count);
             index.Add(d, countor);
             filesCollection.Clear();
         }

现在,我已经将两种方法都改为GetSmallest / LongestPostingList:

 public string GetSmallestPosting(string p)
      {
          if (index != null)
          {
            return countor.Min().ToString();
          }
          return null;
      }
     public string GetLongestPosting(string p)
      {
          if (index != null)
          {
              return countor.Max().ToString();
          }
          return null;
      }

它有效。我测试了它。