Iam使用lingpipe工具进行朴素贝叶斯算法。我使用训练好的数据对其进行训练,并成功测试我的测试数据。但每次我每次训练时都运行算法。我不想每次都训练它,而是想建立一个我可以应用测试数据的模型。
public class ClassifyNews {
private static File TRAINING_DIR= new File("train");
private static File TESTING_DIR= new File("test");
private static String[] CATEGORIES
= { "c1",
"c2",
"c3"};
private static int NGRAM_SIZE = 6;
public static void main(String[] args)throws ClassNotFoundException, IOException
{
DynamicLMClassifier<NGramProcessLM> classifier
=DynamicLMClassifier.createNGramProcess(CATEGORIES,NGRAM_SIZE);
for(int i=0; i<CATEGORIES.length; ++i)
{
File classDir = new File(TRAINING_DIR,CATEGORIES[i]);
if (!classDir.isDirectory())
{
String msg = "Could not find training directory="+ classDir
+ "\nTraining directory not found";
System.out.println(msg);
throw new IllegalArgumentException(msg);
}
String[] trainingFiles = classDir.list();
for (int j = 0; j < trainingFiles.length; ++j)
{
File file = new File(classDir,trainingFiles[j]);
String text = Files.readFromFile(file,"ISO-8859-1");
System.out.println("Training on " + CATEGORIES[i] + "/" + trainingFiles[j]);
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
classifier.handle(classified);}
}
System.out.println("Compiling");
JointClassifier<CharSequence> compiledClassifier
= (JointClassifier<CharSequence>)
AbstractExternalizable.compile(classifier);
boolean storeCategories = true;
JointClassifierEvaluator<CharSequence> evaluator =
new JointClassifierEvaluator
<CharSequence> (compiledClassifier,CATEGORIES,storeCategories);
for(int i = 0; i < CATEGORIES.length; ++i)
{
File classDir = new File(TESTING_DIR,CATEGORIES[i]);
String[] testingFiles = classDir.list();
for (int j=0; j<testingFiles.length; ++j)
{
String text= Files.readFromFile(new File(classDir,testingFiles[j]),"ISO-8859-1");
System.out.print("\nTesting on " + CATEGORIES[i] + "/" + testingFiles[j] + " ");
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
evaluator.handle(classified);
JointClassification jc =compiledClassifier.classify(text);
String bestCategory = jc.bestCategory();
String details = jc.toString();
System.out.println("\tGot best category of: " + bestCategory);
System.out.println(jc.toString());
}}
} }