使用java的weka runclassifier中的属性值是什么?

时间:2014-04-09 10:12:35

标签: java weka

在这个程序中,main函数中“argv”的值是多少。那是字符串数组,我不知道数组值应该是什么。而且我想知道如何运行分类器并使用weka上的java获取arff文件的混淆矩阵

...提前谢谢......

package Codings;
import java.io.PrintStream;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.functions.LinearRegression;
import weka.classifiers.functions.Logistic;
import weka.classifiers.rules.ZeroR;
import weka.clusterers.MakeDensityBasedClusterer;
import weka.clusterers.SimpleKMeans;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ClusterMembership;
import weka.filters.unsupervised.attribute.Standardize;

public class RBFNetwork extends Classifier
implements OptionHandler
{
  static final long serialVersionUID = -3669814959712675720L;
  private Logistic m_logistic;
private LinearRegression m_linear;
private ClusterMembership m_basisFilter;
private Standardize m_standardize;
private int m_numClusters = 2;

protected double m_ridge = 1.0E-008D;

private int m_maxIts = -1;

private int m_clusteringSeed = 1;

private double m_minStdDev = 0.1D;
private Classifier m_ZeroR;

public String globalInfo()
{
  return "Class that implements a normalized Gaussian radial basisbasis function network.\nIt     uses the k-means clustering algorithm to provide the basis functions and learns either a logistic regression (discrete class problems) or linear regression (numeric class problems) on top of that. Symmetric multivariate Gaussians are fit to the data from each cluster. If the class is nominal it uses the given number of clusters per class.It standardizes all numeric attributes to zero mean and unit variance.";
}

public Capabilities getCapabilities()
{
Capabilities result = new Logistic().getCapabilities();
result.or(new LinearRegression().getCapabilities());
Capabilities classes = result.getClassCapabilities();
result.and(new SimpleKMeans().getCapabilities());
result.or(classes);
return result;
}

public void buildClassifier(Instances instances)
throws Exception
{
getCapabilities().testWithFail(instances);

instances = new Instances(instances);
instances.deleteWithMissingClass();

if (instances.numAttributes() == 1) {
  System.err.println("Cannot build model (only class attribute present in data!), using ZeroR model instead!");

  this.m_ZeroR = new ZeroR();
  this.m_ZeroR.buildClassifier(instances);
  return;
}

this.m_ZeroR = null;

this.m_standardize = new Standardize();
this.m_standardize.setInputFormat(instances);
instances = Filter.useFilter(instances, this.m_standardize);

SimpleKMeans sk = new SimpleKMeans();
sk.setNumClusters(this.m_numClusters);
sk.setSeed(this.m_clusteringSeed);
MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer();
dc.setClusterer(sk);
dc.setMinStdDev(this.m_minStdDev);
this.m_basisFilter = new ClusterMembership();
this.m_basisFilter.setDensityBasedClusterer(dc);
this.m_basisFilter.setInputFormat(instances);
Instances transformed = Filter.useFilter(instances, this.m_basisFilter);

if (instances.classAttribute().isNominal()) {
  this.m_linear = null;
  this.m_logistic = new Logistic();
  this.m_logistic.setRidge(this.m_ridge);
  this.m_logistic.setMaxIts(this.m_maxIts);
  this.m_logistic.buildClassifier(transformed);
  } else {
  this.m_logistic = null;
  this.m_linear = new LinearRegression();
  this.m_linear.setAttributeSelectionMethod(new SelectedTag(1, LinearRegression.TAGS_SELECTION));

  this.m_linear.setRidge(this.m_ridge);
  this.m_linear.buildClassifier(transformed);
  }
  }

@Override
public double[] distributionForInstance(Instance instance)
 throws Exception
{
if (this.m_ZeroR != null) {
  return this.m_ZeroR.distributionForInstance(instance);
}

this.m_standardize.input(instance);
this.m_basisFilter.input(this.m_standardize.output());
Instance transformed = this.m_basisFilter.output();

 return instance.classAttribute().isNominal() ?       this.m_logistic.distributionForInstance(transformed) :    this.m_linear.distributionForInstance(transformed);
}

@Override
public String toString()
 {
 if (this.m_ZeroR != null) {
  StringBuffer buf = new StringBuffer();
  buf.append(getClass().getName().replaceAll(".*\\.", "") + "\n");
  buf.append(getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
  buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
  buf.append(this.m_ZeroR.toString());
  return buf.toString();
 }

if (this.m_basisFilter == null) {
  return "No classifier built yet!";
}

StringBuffer sb = new StringBuffer();
sb.append("Radial basis function network\n");
sb.append(this.m_linear == null ? "(Logistic regression " : "(Linear regression ");

sb.append("applied to K-means clusters as basis functions):\n\n");
sb.append(this.m_linear == null ? this.m_logistic.toString() : this.m_linear.toString());

return sb.toString();
}

public String maxItsTipText()
{
return "Maximum number of iterations for the logistic regression to perform. Only applied to discrete class problems.";
 }

public int getMaxIts()
{
return this.m_maxIts;
}

public void setMaxIts(int newMaxIts)
{
this.m_maxIts = newMaxIts;
}

public String ridgeTipText()
{
  return "Set the Ridge value for the logistic or linear regression.";
}

public void setRidge(double ridge)
{
this.m_ridge = ridge;
}

public double getRidge()
{
return this.m_ridge;
}

public String numClustersTipText()
{
return "The number of clusters for K-Means to generate.";
}

public void setNumClusters(int numClusters)
{
if (numClusters > 0)
  this.m_numClusters = numClusters;
}

public int getNumClusters()
{
return this.m_numClusters;
}

public String clusteringSeedTipText()
{
return "The random seed to pass on to K-means.";
}

public void setClusteringSeed(int seed)
{
this.m_clusteringSeed = seed;
}

public int getClusteringSeed()
{
return this.m_clusteringSeed;
}

public String minStdDevTipText()
{
return "Sets the minimum standard deviation for the clusters.";
}

public double getMinStdDev()
{
return this.m_minStdDev;
}

public void setMinStdDev(double newMinStdDev)
{
this.m_minStdDev = newMinStdDev;
}

public Enumeration listOptions()
{
Vector newVector = new Vector(4);

newVector.addElement(new Option("\tSet the number of clusters (basis functions) to generate. (default = 2).", "B", 1, "-B <number>"));

newVector.addElement(new Option("\tSet the random seed to be used by K-means. (default = 1).", "S", 1, "-S <seed>"));

newVector.addElement(new Option("\tSet the ridge value for the logistic or linear regression.", "R", 1, "-R <ridge>"));

newVector.addElement(new Option("\tSet the maximum number of iterations for the logistic regression. (default -1, until convergence).", "M", 1, "-M <number>"));

newVector.addElement(new Option("\tSet the minimum standard deviation for the clusters. (default 0.1).", "W", 1, "-W <number>"));

return newVector.elements();
}

public void setOptions(String[] options)
throws Exception
{
setDebug(Utils.getFlag('D', options));

String ridgeString = Utils.getOption('R', options);
if (ridgeString.length() != 0)
  this.m_ridge = Double.parseDouble(ridgeString);
else {
  this.m_ridge = 1.0E-008D;
}

String maxItsString = Utils.getOption('M', options);
if (maxItsString.length() != 0)
  this.m_maxIts = Integer.parseInt(maxItsString);
else {
  this.m_maxIts = -1;
}

String numClustersString = Utils.getOption('B', options);
if (numClustersString.length() != 0) {
  setNumClusters(Integer.parseInt(numClustersString));
}

String seedString = Utils.getOption('S', options);
if (seedString.length() != 0) {
  setClusteringSeed(Integer.parseInt(seedString));
}
String stdString = Utils.getOption('W', options);
if (stdString.length() != 0) {
  setMinStdDev(Double.parseDouble(stdString));
}
Utils.checkForRemainingOptions(options);
}

public String[] getOptions()
{
String[] options = new String[10];
int current = 0;

options[(current++)] = "-B";
options[(current++)] = ("" + this.m_numClusters);
options[(current++)] = "-S";
options[(current++)] = ("" + this.m_clusteringSeed);
options[(current++)] = "-R";
options[(current++)] = ("" + this.m_ridge);
options[(current++)] = "-M";
options[(current++)] = ("" + this.m_maxIts);
options[(current++)] = "-W";
options[(current++)] = ("" + this.m_minStdDev);

while (current < options.length)
  options[(current++)] = "";
return options;
}

public String getRevision()
{
return RevisionUtils.extract("$Revision: 1.10 $");
}

public static void main(String[] argv)
{
  String[] ar=new String[8];

runClassifier(new RBFNetwork(), argv);
}
}

1 个答案:

答案 0 :(得分:1)

argv定义分类器所需的参数。看一眼: http://weka.sourceforge.net/doc.dev/weka/classifiers/AbstractClassifier.html#runClassifier(weka.classifiers.Classifier,java.lang.String [])

在这种情况下,您正在运行RBFNetwork。 有效选项似乎在这里定义: http://www.dbs.ifi.lmu.de/~zimek/diplomathesis/implementations/EHNDs/doc/weka/classifiers/functions/RBFNetwork.html

有效选项包括:

-B num 设置要使用的簇数(基函数)。

-R ridge 设置逻辑回归或线性回归的脊参数。

-M num 设置逻辑回归的最大迭代次数。 (默认为-1,直到收敛)

-S种子 设置生成集群时K-means使用的随机种子。 (默认1)。