了解mahout分类输出

时间:2013-01-15 13:15:08

标签: mahout

我使用20newsGroupExample训练了三个类别Category_A,Category_B,Category_C的mahout模型,现在我想用这个模型对我的文档进行分类。有人可以帮助我理解我从这个模型得到的输出。

这是我的输出

{0:-2813549.8786637094,1:-2651723.736745838,2:-2710651.7525975127}

根据文档的输出类别是1,但是预期的类别是2.我的代码中是否正确或缺少什么?

public class NaiveBayesClassifierExample {

public static void loadClassifier(String strModelPath, Vector v)
        throws IOException {
    Configuration conf = new Configuration();

    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(strModelPath), conf);
    AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);

    Vector st = classifier.classifyFull(v);
    System.out.println(st.asFormatString());
    System.out.println(st.maxValueIndex());
    st.asFormatString();
}

public static Vector createVect() throws IOException {
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);

    String inputData=readData();

    StringReader in = new StringReader(inputData);

    TokenStream ts = analyzer.tokenStream("body", in);

    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    Vector v1 = new RandomAccessSparseVector(100000);

    while (ts.incrementToken()) {
        char[] termBuffer = termAtt.buffer();
        int termLen = termAtt.length();
        String w = new String(termBuffer, 0, termLen);
        encoder.addToVector(w, 1.0, v1);
    }
    v1.normalize();
    return v1;
}

private static String readData() {
    // TODO Auto-generated method stub

    BufferedReader reader=null;
    String line, results = "";
    try{
    reader = new BufferedReader(new FileReader("c:\\inputFile.txt"));

    while( ( line = reader.readLine() ) != null)
    {
        results += line;
    }
    reader.close();


    }
    catch(Exception ex)
    {
        ex.printStackTrace();
    }
    return results;
}

public static void main(String[] args) throws IOException {
    Vector v = createVect();
    String mp = "E:\\Final_Model\\model";
    loadClassifier(mp, v);
}

}

0 个答案:

没有答案