我有这段代码
import weka.core.Instances;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.trees.J48;
import weka.classifiers.Evaluation;
import weka.filters.Filter;
import weka.filters.supervised.instance.Resample;
import java.util.Random;
import java.io.BufferedReader;
import java.io.FileReader ;
import weka.filters.unsupervised.attribute.NumericToNominal;
public class LoadModelWeka
{
public static void main(String[] args) throws Exception {
// training
BufferedReader reader = null;
reader=new BufferedReader(new FileReader("D:\\aaaaaaaaaaaaaaaaaaaaaa\\Licenta\\partXML\\TestTask1_partXML.arff"));
Instances train =new Instances (reader);
Instances classBalancerInstances=homogenizeTrainingData(train);
// classBalancerInstances.setClassIndex(classBalancerInstances.numAttributes() - 1);
classBalancerInstances.setClassIndex(0);
reader.close();
NaiveBayes nb = new NaiveBayes();
nb.buildClassifier(classBalancerInstances);
Evaluation eval = new Evaluation(classBalancerInstances);
eval.crossValidateModel(nb, classBalancerInstances, 10 , new Random(1));
System.out.println(eval.toSummaryString("\n Results \n=====\n",true));
System.out.println(eval.fMeasure(1)+" "+eval.precision(1)+" "+eval.recall(1)+" ");
}
public static Instances homogenizeTrainingData(Instances data) throws Exception
{
final Resample filter = new Resample();
Instances filteredIns = null;
filter.setBiasToUniformClass(1.0);
Instances newData=convertToNominal(data);
try {
data.setClassIndex(data.numAttributes() - 1);
filter.setInputFormat(newData);
filter.setNoReplacement(false);
filter.setSampleSizePercent(100);
filteredIns = Filter.useFilter(newData, filter);
} catch (Exception e) {
System.out.println("Error when resampling input data!");
e.printStackTrace();
}
return filteredIns;
}
public static Instances convertToNominal(Instances originalTrain) throws Exception{
NumericToNominal convert= new NumericToNominal();
String[] options= new String[2];
options[0]="-R";
options[1]="1-9"; //range of variables to make numeric
convert.setOptions(options);
convert.setInputFormat(originalTrain);
Instances newData=Filter.useFilter(originalTrain, convert);
System.out.println("Before");
for(int i=0; i<2; i=i+1)
{
System.out.println("Nominal? "+originalTrain.attribute(i).isNominal());
}
System.out.println("After");
for(int i=0; i<2; i=i+1)
{
System.out.println("Nominal? "+newData.attribute(i).isNominal());
}
return newData;
}
}
这里我想用类平衡器过滤我的数据集,但是出现了一个错误并且说属性必须是名义上的。我试图转换为名义但是发生了这个错误:
weka.core.UnassignedClassException: weka.filters.supervised.instance.Resample: Class attribute not set!
at weka.core.Capabilities.test(Unknown Source)
at weka.core.Capabilities.test(Unknown Source)
at weka.core.Capabilities.testWithFail(Unknown Source)
at weka.filters.Filter.testInputFormat(Unknown Source)
at weka.filters.Filter.setInputFormat(Unknown Source)
at weka.filters.supervised.instance.Resample.setInputFormat(Unknown Source)
at licenta1.LoadModelWeka.homogenizeTrainingData(LoadModelWeka.java:44)
at licenta1.LoadModelWeka.main(LoadModelWeka.java:22)
Exception in thread "main" java.lang.NullPointerException
at licenta1.LoadModelWeka.main(LoadModelWeka.java:24)
我试图解决它来设置类索引,但我不太了解它是如何工作的所以我不知道如何设置实例运行良好。 我的文件是一个有9个属性的arff,并且使用属性编号9进行分类,所以我想,我必须将所有列转换为1到9,但我不确定我是否理解了过滤器以及从Numeric到Nominal的转换的工作原理。
可以帮助我吗?
我的.arff文件看起来像下面的代码:
@attribute Word {Induction,of,during,monocyte,differentiation,by,HIV,type,1.0,infection,Electrophoretic,mobility,experiments,were,usedare,the,mostmayat,me,to,detect,located,in,long,terminal,repeat,PMA,treatment,acts,probablyin,inducing,expression,nuclei,In,nuclear,extracts,from,monocytes,or,macrophages,induction,occurred,only,if,cells,previously,infected,with,When,U937,no,factor,was,detected,whereas,high,level,progeny,virions,produced,suggesting,that,this,not,required,These,results,indicate,monocytic,cell,lineage,could,mimic,some,stimuli,allowing,Positive,and,negative,regulation,immunoglobulinis,most,suggesthere,gene,a,novel,enhancer,element,A,new,specific,Tandem,copies,fragment,transcription,B,but,Jurkat,T,HeLa,Footprinting,analysis,revealed,Hex,Gel,addition,ubiquitous,band,found,also,agreement,gel,may,Furthermorein,vivo,activity,striking,contrast,mouse,sequence,very,its,own,Interestingly,suppress,promoter,Moreover,simian,virus,40.0,blocked,Thus,identified,study,is,probably,target,site,for,both,positive,factors,The,production,human,immunodeficiency,purified}
@attribute w-2 {'null ','Induction ','of ','during ','monocyte ','differentiation ','by ','HIV ','type ',1.0,'infection ','Electrophoretic ','mobility ','experiments ','were ','usedare ','the ','mostmayat ','me ','to ','detect ','located ','in ','long ','terminal ','repeat ','PMA ','treatment ','acts ','probablyin ','inducing ','expression ','nuclei ','In ','nuclear ','extracts ','from ','monocytes ','or ','macrophages ','induction ','occurred ','only ','if ','cells ','previously ','infected ','with ','When ','U937 ','no ','factor ','was ','detected ','whereas ','high ','level ','progeny ','virions ','was ','produced ','suggesting ','that ','this ','not ','required ','These ','results ','indicate ','monocytic ','cell ','lineage ','could ','mimic ','some ','stimuli ','allowing ','Positive ','and ','negative ','regulation ','immunoglobulinis ','most ','suggesthere ','gene ','a ','novel ','enhancer ','element ','A ','new ','specific ','Tandem ','copies ','fragment ','transcription ','B ','but ','Jurkat ','T ','HeLa ','Footprinting ','analysis ','revealed ','from ','Hex ','Gel ','addition ','ubiquitous ','band ','found ','also ','T ','agreement ','gel ','in ','may ','Furthermorein ','vivo ','activity ','striking ','contrast ','mouse ','sequence ','very ','its ','own ','Interestingly ','the ','suppress ','promoter ','Moreover ','simian ','virus ',40.0,'blocked ','Thus ','identified ','study ','is ','probably ','target ','site ','for ','both ','positive ','factors ','The ','production ','human ','immunodeficiency ','purified ','human ','monocytes '}
@attribute w-1 {' null ',' Induction ',' of ',' during ',' monocyte ',' differentiation ',' by ',' HIV ',' type ',1.0,' infection ',' Electrophoretic ',' mobility ',' experiments ',' were ',' usedare ',' the ',' mostmayat ',' me ',' to ',' detect ',' located ',' in ',' long ',' terminal ',' repeat ',' PMA ',' treatment ',' acts ',' probablyin ',' inducing ',' expression ',' nuclei ',' In ',' nuclear ',' extracts ',' from ',' monocytes ',' or ',' macrophages ',' induction ',' occurred ',' only ',' if ',' cells ',' previously ',' infected ',' with ',' When ',' U937 ',' no ',' factor ',' was ',' detected ',' whereas ',' high ',' level ',' progeny ',' virions ',' produced ',' suggesting ',' that ',' this ',' not ',' required ',' These ',' results ',' indicate ',' monocytic ',' cell ',' lineage ',' could ',' mimic ',' some ',' stimuli ',' allowing ',' Positive ',' and ',' negative ',' regulation ',' immunoglobulinis ',' most ',' suggesthere ',' gene ',' a ',' novel ',' enhancer ',' element ',' A ',' new ',' specific ',' Tandem ',' copies ',' fragment ',' transcription ',' B ',' but ',' Jurkat ',' T ',' cells ',' HeLa ',' Footprinting ',' analysis ',' revealed ',' Hex ',' Gel ',' addition ',' ubiquitous ',' band ',' found ',' also ',' agreement ',' gel ',' Jurkat ',' may ',' Furthermorein ',' vivo ',' activity ',' striking ',' contrast ',' mouse ',' sequence ',' very ',' its ',' own ',' Interestingly ',' fragment ',' suppress ',' promoter ',' Moreover ',' simian ',' virus ',40.0,' blocked ',' Thus ',' identified ',' study ',' is ',' probably ',' target ',' site ',' for ',' both ',' positive ',' factors ',' The ',' production ',' human ',' immunodeficiency ',' purified ',' and '}
@attribute w {' Induction ',' of ',' during ',' monocyte ',' differentiation ',' by ',' HIV ',' type ',1.0,' infection ',' Electrophoretic ',' mobility ',' experiments ',' were ',' usedare ',' the ',' mostmayat ',' me ',' to ',' detect ',' located ',' in ',' long ',' terminal ',' repeat ',' PMA ',' treatment ',' acts ',' probablyin ',' inducing ',' expression ',' nuclei ',' In ',' nuclear ',' extracts ',' from ',' monocytes ',' or ',' macrophages ',' induction ',' occurred ',' only ',' if ',' cells ',' previously ',' infected ',' with ',' When ',' U937 ',' no ',' factor ',' was ',' detected ',' whereas ',' high ',' level ',' progeny ',' virions ',' produced ',' suggesting ',' that ',' this ',' not ',' required ',' These ',' results ',' indicate ',' monocytic ',' cell ',' lineage ',' could ',' mimic ',' some ',' stimuli ',' allowing ',' Positive ',' and ',' negative ',' regulation ',' immunoglobulinis ',' most ',' suggesthere ',' gene ',' a ',' novel ',' enhancer ',' element ',' A ',' new ',' specific ',' Tandem ',' copies ',' fragment ',' transcription ',' B ',' but ',' Jurkat ',' T ',' HeLa ',' Footprinting ',' analysis ',' revealed ',' Hex ',' Gel ',' addition ',' ubiquitous ',' band ',' found ',' also ',' agreement ',' gel ',' may ',' Furthermorein ',' vivo ',' activity ',' striking ',' contrast ',' mouse ',' sequence ',' very ',' its ',' own ',' Interestingly ',' suppress ',' promoter ',' Moreover ',' simian ',' virus ',40.0,' blocked ',' Thus ',' identified ',' study ',' is ',' probably ',' target ',' site ',' for ',' both ',' positive ',' factors ',' The ',' production ',' human ',' immunodeficiency ',' purified ',' macrophages '}
@attribute w+1 {' of ',' during ',' monocyte ',' differentiation ',' by ',' HIV ',' type ',1.0,' infection ',' Electrophoretic ',' mobility ',' experiments ',' were ',' usedare ',' the ',' mostmayat ',' me ',' to ',' detect ',' located ',' in ',' long ',' terminal ',' repeat ',' PMA ',' treatment ',' acts ',' probablyin ',' inducing ',' expression ',' nuclei ',' In ',' nuclear ',' extracts ',' from ',' monocytes ',' or ',' macrophages ',' induction ',' of ',' occurred ',' only ',' if ',' cells ',' previously ',' infected ',' with ',' When ',' U937 ',' no ',' factor ',' was ',' detected ',' whereas ',' high ',' level ',' progeny ',' virions ',' produced ',' suggesting ',' that ',' this ',' not ',' required ',' These ',' results ',' indicate ',' that ',' monocytic ',' cell ',' lineage ',' could ',' mimic ',' some ',' stimuli ',' allowing ',' Positive ',' and ',' negative ',' regulation ',' immunoglobulinis ',' most ',' suggesthere ',' gene ',' a ',' novel ',' enhancer ',' element ',' A ',' new ',' specific ',' Tandem ',' copies ',' fragment ',' transcription ',' B ',' but ',' Jurkat ',' T ',' HeLa ',' Footprinting ',' analysis ',' revealed ',' Hex ',' Gel ',' addition ',' ubiquitous ',' band ',' found ',' also ',' HeLa ',' agreement ',' gel ',' may ',' Furthermorein ',' vivo ',' activity ',' striking ',' contrast ',' mouse ',' sequence ',' very ',' its ',' own ',' Interestingly ',' suppress ',' promoter ',' Moreover ',' simian ',' virus ',40.0,' blocked ',' Thus ',' identified ',' study ',' is ',' probably ',' a ',' target ',' site ',' for ',' both ',' positive ',' factors ',' The ',' production ',' human ',' immunodeficiency ',' purified ',' macrophages ',' null '}
@attribute w+2 {' during ',' monocyte ',' differentiation ',' by ',' HIV ',' type ',1.0,' infection ',' Electrophoretic ',' mobility ',' experiments ',' were ',' usedare ',' the ',' mostmayat ',' me ',' to ',' detect ',' located ',' in ',' long ',' terminal ',' repeat ',' PMA ',' treatment ',' acts ',' probablyin ',' inducing ',' expression ',' nuclei ',' In ',' nuclear ',' extracts ',' from ',' monocytes ',' or ',' macrophages ',' induction ',' of ',' occurred ',' only ',' if ',' cells ',' previously ',' infected ',' with ',' When ',' U937 ',' no ',' induction ',' factor ',' was ',' detected ',' whereas ',' high ',' level ',' progeny ',' virions ',' produced ',' suggesting ',' that ',' this ',' not ',' required ',' These ',' results ',' indicate ',' monocytic ',' cell ',' lineage ',' could ',' mimic ',' some ',' stimuli ',' allowing ',' Positive ',' and ',' negative ',' regulation ',' immunoglobulinis ',' most ',' suggesthere ',' gene ',' a ',' novel ',' enhancer ',' element ',' A ',' new ',' specific ',' Tandem ',' copies ',' this ',' fragment ',' transcription ',' B ',' but ',' Jurkat ',' T ',' HeLa ',' cells ',' Footprinting ',' analysis ',' revealed ',' Hex ',' Gel ',' addition ',' ubiquitous ',' band ',' found ',' also ',' agreement ',' gel ',' may ',' Furthermorein ',' vivo ',' activity ',' striking ',' contrast ',' mouse ',' sequence ',' very ',' its ',' own ',' Interestingly ',' suppress ',' promoter ',' Moreover ',' simian ',' virus ',40.0,' blocked ',' Thus ',' identified ',' study ',' is ',' probably ',' target ',' site ',' for ',' both ',' positive ',' factors ',' The ',' production ',' human ',' immunodeficiency ',' purified ',' null ',' null '}
@attribute Lemma {induction,of,during,monocyte,differentiation,by,hiv,type,1.0,infection,electrophoretic,mobility,experiment,be,usedare,the,mostmayat,I,to,detect,located,in,long,terminal,repeat,pma,treatment,act,probablyin,induce,expression,nucleus,nuclear,extract,from,or,macrophage,occur,only,if,cell,previously,infected,with,when,u937,no,factor,whereas,high,level,progeny,virion,produce,suggest,that,this,not,require,these,result,indicate,monocytic,lineage,could,mimic,some,stimulus,allow,positive,and,negative,regulation,immunoglobulinis,most,suggesthere,gene,a,novel,enhancer,element,new,specific,Tandem,copy,fragment,transcription,b,but,jurkat,t,hela,footprinting,analysis,reveal,hex,gel,addition,ubiquitous,band,find,also,agreement,may,furthermorein,vivo,activity,striking,contrast,mouse,sequence,very,its,own,interestingly,suppress,promoter,moreover,simian,virus,40.0,block,thus,identify,study,probably,target,site,for,both,production,human,immunodeficiency,purify}
@attribute POS {NNP,IN,NN,CD,RB,NNS,VBD,DT,PRP,TO,VB,VBN,UH,WRB,JJ,VBG,MD,JJS,SYM,VBZ}
@attribute IsCue {' F',' T'}
@data
Induction,'null ',' null ',' Induction ',' of ',' during ',induction,NNP,' F'
of,'null ',' Induction ',' of ',' during ',' monocyte ',of,IN,' F'
during,'Induction ',' of ',' during ',' monocyte ',' differentiation ',during,IN,' F'
monocyte,'of ',' during ',' monocyte ',' differentiation ',' by ',monocyte,NN,' F'
differentiation,'during ',' monocyte ',' differentiation ',' by ',' HIV ',differentiation,NN,' F'
by,'monocyte ',' differentiation ',' by ',' HIV ',' type ',by,IN,' F'
HIV,'differentiation ',' by ',' HIV ',' type ',1.0,hiv,NNP,' F'
type,'by ',' HIV ',' type ',1.0,' infection ',type,NN,' F'
1.0,'HIV ',' type ',1.0,' infection ',' Electrophoretic ',1.0,CD,' F'
infection,'type ',1.0,' infection ',' Electrophoretic ',' mobility ',infection,NN,' F'
分类必须是IsCue属性的函数。如果当前单词(我的意思是行上的第一个单词)是肯定的,则isCue为true,否则为false。
答案 0 :(得分:0)
我找到了这个问题的答案,我发布了它。想想可以帮助别人。 ClassIndex是我想在分类进程中使用的类的索引。在我的例子中,我想通过使用属性IsCue进行classfiy,所以我需要在方法numericTonNominal中添加下一行。它必须是我方法中的第一行像这样:
public static Instances convertToNominal(Instances originalTrain) throws Exception{
originalTrain.setClassIndex(8);
//your code here
}
我将索引设置为8,因为我的属性是数字9,但它开始为0,因此它将在分类过程中变为数字8