使用天真的贝叶斯创建模型

时间:2014-02-13 05:26:10

标签: java

Iam使用lingpipe工具进行朴素贝叶斯算法。我使用训练好的数据对其进行训练,并成功测试我的测试数据。但每次我每次训练时都运行算法。我不想每次都训练它,而是想建立一个我可以应用测试数据的模型。

public class ClassifyNews {
private static File TRAINING_DIR= new File("train");
private static File TESTING_DIR=  new File("test");
private static String[] CATEGORIES
    =  { "c1",
        "c2",
        "c3"};
private static int NGRAM_SIZE = 6;
public static void main(String[] args)throws ClassNotFoundException, IOException
{
DynamicLMClassifier<NGramProcessLM> classifier
=DynamicLMClassifier.createNGramProcess(CATEGORIES,NGRAM_SIZE);
for(int i=0; i<CATEGORIES.length; ++i)
{
   File classDir = new File(TRAINING_DIR,CATEGORIES[i]);
   if (!classDir.isDirectory())
   {
     String msg = "Could not find training directory="+ classDir
     + "\nTraining directory not found";
     System.out.println(msg); 
    throw new IllegalArgumentException(msg);
   }
 String[] trainingFiles = classDir.list();
for (int j = 0; j < trainingFiles.length; ++j)
{
 File file = new File(classDir,trainingFiles[j]);
String text = Files.readFromFile(file,"ISO-8859-1");
System.out.println("Training on " + CATEGORIES[i] + "/" + trainingFiles[j]);
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
classifier.handle(classified);}
}
System.out.println("Compiling");
JointClassifier<CharSequence> compiledClassifier
= (JointClassifier<CharSequence>)
AbstractExternalizable.compile(classifier);
boolean storeCategories = true;
JointClassifierEvaluator<CharSequence> evaluator =
new JointClassifierEvaluator
<CharSequence> (compiledClassifier,CATEGORIES,storeCategories);
for(int i = 0; i < CATEGORIES.length; ++i) 
{
File classDir = new File(TESTING_DIR,CATEGORIES[i]);
String[] testingFiles = classDir.list();
for (int j=0; j<testingFiles.length;  ++j) 
{
String text= Files.readFromFile(new File(classDir,testingFiles[j]),"ISO-8859-1");
System.out.print("\nTesting on " + CATEGORIES[i] + "/" + testingFiles[j] + " ");
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
    evaluator.handle(classified);
JointClassification jc =compiledClassifier.classify(text);
String bestCategory = jc.bestCategory();
String details = jc.toString();
System.out.println("\tGot best category of: " + bestCategory);
System.out.println(jc.toString());
}}

} }

0 个答案:

没有答案
相关问题