我正在尝试构建一个扩展SimpleBatchFilter的自定义批处理过滤器。但是,我遇到了第二次运行它以获得反向输出的问题。以下是完成两次运行后我得到的相关代码和错误:
Exception in thread "main" java.lang.IndexOutOfBoundsException: Index: 79, Size: 79
at java.util.ArrayList.rangeCheck(ArrayList.java:653)
at java.util.ArrayList.get(ArrayList.java:429)
at weka.core.Attribute.addStringValue(Attribute.java:994)
at weka.core.StringLocator.copyStringValues(StringLocator.java:155)
at weka.core.StringLocator.copyStringValues(StringLocator.java:91)
at weka.filters.Filter.copyValues(Filter.java:373)
at weka.filters.Filter.push(Filter.java:290)
at weka.filters.SimpleBatchFilter.batchFinished(SimpleBatchFilter.java:266)
at weka.filters.Filter.useFilter(Filter.java:667)
at likeability.Main.main(Main.java:30)
以下是相关代码:
public class TestFilter extends SimpleBatchFilter {
private Attribute a;
private Attribute b;
private int sampleSizePercent = 15;
private boolean invert = false;
private int seed = 1;
@Override
protected Instances process(Instances inst) throws Exception {
ArrayList<Instances> partitionsA = partition(inst, a);
ArrayList<Instances> partitions = new ArrayList<Instances>();
for(Instances data: partitionsA) {
partitions.addAll(partition(data, b));
}
return getTestSet(partitions);
}
/*
* Partitions the data so that there's only one nominal value of the
* attribute a in one partition.
*/
private ArrayList<Instances> partition(Instances data, Attribute att) throws Exception {
ArrayList<Instances> instances = new ArrayList<Instances>();
for (int i = 0; i < att.numValues(); i++){
RemoveWithValues rm = new RemoveWithValues();
rm.setAttributeIndex(Integer.toString(att.index()+1));
rm.setInvertSelection(true);
rm.setNominalIndices(Integer.toString(i+1));
rm.setInputFormat(data);
instances.add(Filter.useFilter(data, rm));
}
return instances;
}
private Instances getTestSet(List<Instances> insts) throws Exception {
Instances output = new Instances(insts.get(0), 0);
for(Instances inst: insts) {
Resample filter = new Resample();
filter.setRandomSeed(seed);
filter.setNoReplacement(true);
filter.setInvertSelection(invert);
filter.setSampleSizePercent(sampleSizePercent);
filter.setInputFormat(inst);
Instances curr = Filter.useFilter(inst, filter);
System.out.println(inst.size() + " " + curr.size());
output.addAll(curr);
}
return output;
}
@Override
protected Instances determineOutputFormat(Instances arg) throws Exception {
return new Instances(arg, 0);
}
@Override
public String globalInfo() {
return "A filter which partitions the data so that each partition contains"
+ " only instances with one value of attribute a and b, then takes "
+ "a random subset of values from each partition and merges them to"
+ " produce the final set.";
}
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.enableAllAttributes();
result.enableAllClasses();
result.enable(Capability.NO_CLASS); // filter doesn't need class to be set
return result;
}
//Main and getters and setters
}
这就是我所说的:
TestFilter filter = new TestFilter();
filter.setA(data.attribute("gender"));
filter.setB(data.attribute("age"));
filter.setInputFormat(data);
Instances test = Filter.useFilter(data, filter);
filter.setInvert(true);
filter.setInputFormat(data);
Instances train = Filter.useFilter(data, filter);
在我看来,我需要在调用之间使用这两行,这非常愚蠢。我怀疑我应该使用isBatchFinished(),这是否意味着我必须实现它扩展BatchFilter而不是SimpleBatchFilter?看到一些成功的实现也会有所帮助,因为我可以找到WEKA手册中唯一的实现。
答案 0 :(得分:0)
我通过扩展Filter而不是将process函数更改为batchFinished()来解决它。我发布了这个答案,因为我还没有在其他任何地方找到自定义过滤器示例。
@Override
public boolean batchFinished() throws Exception {
if(isFirstBatchDone()) {
invert = true;
}
if (getInputFormat() == null)
throw new NullPointerException("No input instance format defined");
Instances inst = getInputFormat();
ArrayList<Instances> partitionsA = partition(inst, a);
ArrayList<Instances> partitions = new ArrayList<Instances>();
for(Instances data: partitionsA) {
partitions.addAll(partition(data, b));
}
private void getTestSet(List<Instances> insts) throws Exception {
for(Instances inst: insts) {
Resample filter = new Resample();
filter.setRandomSeed(seed);
filter.setNoReplacement(true);
filter.setInvertSelection(invert);
filter.setSampleSizePercent(sampleSizePercent);
filter.setInputFormat(inst);
Instances curr = Filter.useFilter(inst, filter);
System.out.println(inst.size() + " " + curr.size());
curr.forEach((i) -> push(i));
}
}
@Override
public boolean setInputFormat(Instances arg) throws Exception {
super.setInputFormat(arg);
Instances outputFormat = new Instances(arg, 0);
setOutputFormat(outputFormat);
return true;
}