如何总结总价值?

时间:2011-03-14 12:20:04

标签: java

for (a = 0; a < filename; a++) {

        try {
            System.out
                    .println(" _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _  ");
            System.out.println("\n");
            System.out.println("The word inputted : " + word2);
            File file = new File(
                    "C:\\Users\\user\\fypworkspace\\TextRenderer\\abc" + a
                            + ".txt");
            System.out.println(" _________________");

            System.out.print("| File = abc" + a + ".txt | \t\t \n");

            for (int i = 0; i < array2.length; i++) {

                totalCount = 0;
                wordCount = 0;

                Scanner s = new Scanner(file);
                {
                    while (s.hasNext()) {
                        totalCount++;
                        if (s.next().equals(array2[i]))
                            wordCount++;

                    }

                    System.out.print(array2[i] + " --> Word count =  "
                            + "\t " + "|" + wordCount + "|");
                    System.out.print("  Total count = " + "\t " + "|"
                            + totalCount + "|");
                    System.out.printf("  Term Frequency =  | %8.4f |",
                            (double) wordCount / totalCount);

                    System.out.println("\t ");

                    double inverseTF =  Math.log10((float) numDoc
                            / (numofDoc[i]));
                    System.out.println("    --> IDF = " + inverseTF );

                    double TFIDF = (((double) wordCount / totalCount) * inverseTF);
                    System.out.println("    --> TF/IDF = " + TFIDF + "\n");



                }
            }
        } catch (FileNotFoundException e) {
            System.out.println("File is not found");
        }
    }
}

}

这是示例输出:

输入的单词:你好吗


| File = abc0.txt |

how - &gt;字数= | 4 |总计数= | 957 |期限频率= | 0.0042 |

--> IDF = 0.5642714398516419

--> TF/IDF = 0.0023585013159943234

是 - &gt;字数= | 7 |总计数= | 957 |期限频率= | 0.0073 |

--> IDF = 0.1962946357308887

--> TF/IDF = 0.00143580193324579

你 - &gt;字数= | 10 |总计数= | 957 |期限频率= | 0.0104 |

--> IDF = 0.1962946357308887

--> TF/IDF = 0.002051145618922557

如何为每个文本文件总结整个3 TF / IDF?

1 个答案:

答案 0 :(得分:1)

假设您只想要一个总计可以显示,然后在for loop之前添加以下内容:

double runningTfIDF = 0;

然后在计算当前TF / IDF后立即添加线

runningTfIDF += TFIDF;

然后,在for loop之后,您可以添加一行来打印runningTfIDF。

已修改为包含更完整的答案

HashMap<String, BigDecimal> runningTdIDF = new HashMap<String, Double>();
HashMap<String, BigDecimal> wordCount = new HashMap<String, Double>();
HashMap<String, BigDecimal> frequency = new HashMap<String, Double>();
HashMap<String, BigDecimal> inverseTF = new HashMap<String, Double>();
for (int i = 0; i < array2.length; i++) {

    totalCount = 0;
    wordCountVal = 0;

    Scanner s = new Scanner(file);
    {
        while (s.hasNext()) {
            totalCount++;
            if (s.next().equals(array2[i]))
                 wordCountVal++;

            }

            BigDecimal wordCount(array2[i],new BigDecimal(wordCountVal));

            BigDecimal frequencyVal = new BigDecimal( (double) wordCount / totalCount));
        frequency.put(array2[i],frequencyVal);

            BigDecimal inverseTFVal =  new BigDecimal(Math.log10((float) numDoc
                            / (numofDoc[i])));
        inverseTF.put(array2[i], inverseTFVal);


            BigDecaim TFIDF =new BigDecima( (( wordCount / totalCount) * inverseTF));
            runningTfIDF.put(array2[i], TFIDF);

    }

    for(String word : wordCount.keySet()){
         System.out.print(word + " --> word count " 
        + "\t |"+wordCount.get(word)+"|");
         System.out.print("  Total count = " + "\t " + "|"
             + totalCount + "|");
         System.out.printf("  Term Frequency =  | %8.4f |",
             frequency.get(word));

         System.out.println("\t ");

         System.out.println("    --> IDF = " + inverseTF.get(word));

         System.out.println("    --> TF/IDF = " + runningTfIDF.get(word) + "\n");
    }

}

到目前为止,这不是最干净的实施方式,但简而言之,您需要存储每个单词的信息,并在创建总计后循环显示单词,如果要显示从第一个单词开始的总计结果。这有意义吗?