使用Weka对多个属性进行分类和预测

时间:2013-12-07 23:20:39

标签: java machine-learning artificial-intelligence weka

我需要输入6个属性,并使用Java / Weka以编程方式对该输入中的3个属性进行分类/预测。我已经想出了如何预测1(最后一个)属性,但是如何更改它以同时训练和预测最后3个?

.arff文件中的数字对应于数据库中的电影对象。

这是我的Java代码:

import java.io.BufferedReader;
import java.io.FileReader;

import weka.classifiers.meta.FilteredClassifier;
import weka.classifiers.trees.DecisionStump;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.classifiers.trees.RandomTree;
import weka.core.Instances;
import weka.filters.unsupervised.attribute.Remove;

public class WekaTrial {

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        // Create training data instance
        Instances training_data = new Instances(
                new BufferedReader(
                        new FileReader(
                                "C:/Users/Me/Desktop/File_Project/src/movie_training.arff")));
        training_data.setClassIndex(training_data.numAttributes() - 1);

        // Create testing data instance
        Instances testing_data = new Instances(
                new BufferedReader(
                        new FileReader(
                                "C:/Users/Me/Desktop/FileProject/src/movie_testing.arff")));
        testing_data.setClassIndex(training_data.numAttributes() - 1);

        // Print initial data summary
        String summary = training_data.toSummaryString();
        int number_samples = training_data.numInstances();
        int number_attributes_per_sample = training_data.numAttributes();
        System.out.println("Number of attributes in model = "
                + number_attributes_per_sample);
        System.out.println("Number of samples = " + number_samples);
        System.out.println("Summary: " + summary);
        System.out.println();

        // a classifier for decision trees:
        J48 j48 = new J48();

        // filter for removing samples:
        Remove rm = new Remove();
        rm.setAttributeIndices("1"); // remove 1st attribute

        // filtered classifier
        FilteredClassifier fc = new FilteredClassifier();
        fc.setFilter(rm);
        fc.setClassifier(j48);

                    // Create counters and print values
        float correct = 0;
        float incorrect = 0;

                    // train using stock_training_data.arff:
        fc.buildClassifier(training_data);

                    // test using stock_testing_data.arff:
        for (int i = 0; i < testing_data.numInstances(); i++) {
            double pred = fc.classifyInstance(testing_data.instance(i));
            System.out.print("Expected values: "
                    + testing_data.classAttribute().value(
                            (int) testing_data.instance(i).classValue()));
            System.out.println(", Predicted values: "
                    + testing_data.classAttribute().value((int) pred));
            // Increment correct/incorrect values
            if (testing_data.classAttribute().value(
                    (int) testing_data.instance(i).classValue()) == testing_data.classAttribute().value((int) pred)) {
                        correct += 1;
                    } else {
                        incorrect += 1;
                    }
        }

                    // Print correct/incorrect
        float percent_correct = correct/(correct+incorrect)*100;
        System.out.println("Number correct: " + correct + "\nNumber incorrect: " + incorrect + "\nPercent correct: " +
                percent_correct + "%");

    }

}

这是我的.arff培训文件(删除了多余的行):

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,18,18,18,18,18,18,18,18
28,18,36,18,53,10769,18,53,10769
37,37,37,28,12,14,28,12,14
27,53,27,18,10749,10769,27,53,27
12,12,12,35,10751,35,12,12,12
35,18,10749,18,18,18,35,18,10749
28,12,878,53,53,53,53,53,53
18,18,18,28,37,10769,18,18,18
18,53,18,28,12,35,18,53,18
28,80,53,80,18,10749,28,80,53
18,10749,18,18,10756,18,18,10756,18
18,10749,10769,28,12,878,18,10749,10769
18,10756,18,16,35,10751,16,35,10751
35,18,10751,35,18,10752,35,18,10751

.arff测试文件:

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,27,53,18,53,10756,18,27,53
35,18,10749,18,10769,18,18,10769,18
16,878,53,16,18,16,16,18,16
35,10749,10757,18,18,18,18,18,18
80,18,10748,18,10749,18,18,10749,18
28,18,36,35,18,10751,28,18,36
18,10749,10769,35,18,10402,35,18,10402
28,12,878,18,10749,10769,18,10749,10769
35,10749,35,14,10402,10751,14,10402,10751

1 个答案:

答案 0 :(得分:0)

如果我理解正确,您会遇到“多级”或“多目标”问题。 您有几个简单的选项来解决问题:

  1. 创建一个包含所有3个目标类的新目标类(decision_one,decision_two和decision_three的串联)

  2. 分别训练每个目标。