Weka RandomTree模型预测

时间:2018-07-25 13:38:16

标签: java weka

我尝试使用WEKA进行文档预测 我无法将其与从StringToWordVector生成的Arff一起使用 我了解到我需要对希望预测的数据进行相同的过滤 但我只是做错了。

原始.arff

@relation foodsource

@attribute characteristic string
@attribute foodtype {fruit,vegetable,meat}

@data
"apple orange pineapple grape banana",fruit
"spinach brocoli tomato cucumber",vegetable
"ham chicken cow chop fish",meat

我执行StringToWordVector之后的新.arff

@relation 'foodsource-weka.filters.unsupervised.attribute.StringToWordVector-R1-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.Reorder-R2,3,4,5,6,7,8,9,10,11,12,13,14,15,1-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"-weka.filters.unsupervised.attribute.StringToWordVector-Rfirst-last-W1000-prune-rate-1.0-N0-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?!\"'

@attribute apple numeric
@attribute banana numeric
@attribute grape numeric
@attribute orange numeric
@attribute pineapple numeric
@attribute brocoli numeric
@attribute cucumber numeric
@attribute spinach numeric
@attribute tomato numeric
@attribute chicken numeric
@attribute chop numeric
@attribute cow numeric
@attribute fish numeric
@attribute ham numeric
@attribute foodtype {fruit,vegetable,meat}

@data
{0 1,1 1,2 1,3 1,4 1}
{5 1,6 1,7 1,8 1,14 vegetable}
{9 1,10 1,11 1,12 1,13 1,14 meat}

然后我建立RandomTree模型 我试图用模型预测输入字符串,但无法正常工作。

为此尝试浪费了2天。 真的很感谢有人能指点我。

import weka.classifiers.trees.RandomTree;
import weka.core.*;
import weka.core.stemmers.NullStemmer;
import weka.core.tokenizers.WordTokenizer;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

import java.io.IOException;
import java.util.ArrayList;

public class WekaClassifier {
    public static void main(String args[])
    {
        try {
            ArrayList<Attribute> attributes = new ArrayList<Attribute>();
            attributes.add(0,new Attribute("characteristic" ,true));
            attributes.add(1,new Attribute("foodtype",true));
            Instances traindata = new Instances("foodsource",attributes,0);
            traindata.setClassIndex(1);
            Instance i = new DenseInstance(2);
            i.setDataset(traindata);
            i.setValue(0,"apple orange pineapple grape banana");
            //i.setValue(0,"fish chicken");
            i.setClassMissing();
            traindata.add(i);


            RandomTree rt = (RandomTree) weka.core.SerializationHelper.read("src/main/resources/foodsource.model");

            StringToWordVector stwv = new StringToWordVector();
            stwv.setIDFTransform(false);
            stwv.setTFTransform(false);
            stwv.setAttributeIndices("first-last");
            stwv.setDebug(false);
            stwv.setDoNotCheckCapabilities(false);
            stwv.setDoNotOperateOnPerClassBasis(false);
            stwv.setInvertSelection(false);
            stwv.setLowerCaseTokens(false);
            stwv.setMinTermFreq(1);
            stwv.setInputFormat(traindata);
            stwv.setOutputWordCounts(false);
            stwv.setPeriodicPruning(-1.0);
            stwv.setStemmer(new NullStemmer());
            stwv.setStopwordsHandler(null);
            WordTokenizer wt=new WordTokenizer();
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!");
            stwv.setTokenizer(wt);

            Instances output = Filter.useFilter(traindata, stwv);
            output.setClassIndex(output.numAttributes() - 1);

            double label = rt.classifyInstance(output.instance(0));

            System.out.println(label);
            output.instance(0).setClassValue(label);

//          System.out.println(output.instance(0).stringValue((int)label));

        } catch (IOException ex)
        {
            ex.printStackTrace();
        } catch (ClassNotFoundException ex)
        {
            ex.printStackTrace();
        } catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
}

谢谢。

0 个答案:

没有答案