我想实现一个程序,我用java编写,在map reduce中,以便它可以在hadoop框架中实现,但我是hadoop的新手应该如何继续。我已经粘贴了程序的基本结构(还有其他更多的类和方法,但我省略了它们,并省略了一些代码以保持简单)我应该如何开始编写驱动程序,映射器和reducer类。
public class TC{
TC tc = new TC();
long start =System.currentTimeMillis(); // staring the timer to get running time
int i=0;
int a=0, c=0, g=0, i=0, m=0, s=0, t=0, e=0;
String categories[]={"a","c","e","g","i","m","s","t"}; // different categories in the training set
Map<String, Integer> all_wordMap=new HamnMap<String, Integer>(); creating a map
for(i=0;i<categories.length;i++)
{
String class_file[]=no_of_files("path"+categories[i]);
for(int j=0;j<class_file.length;j++){
// code block for counting all distinct words in the files and putting them into the map
}
}
int total_words=all_wordMap.size(); // size of the map
Map<String,String> bvm=new HasnMap<String,String>(); // creating another map
for(i=0;i<categories.length;i++)
{
String class_file[]=tc.no_of_files("path"+categories[i]);
for(int j=0;j<class_file.length;j++){
//code block to create binary vector for each text file
}
}
}
String classs = null;
String s="path"; // path from where the test files will be read
String files[]=tc.no_of_files(s); // no. of test files
for(int y=0;y<files.length;y++){
String file1 =files[y];
classs=tc.classifier(file1,bvm,all_wordMap); // calling a method classifier which classify the test file(which class does the test file belong to)
System.out.println("The category is "+classs); // these are the outputs
if ("a".equals(classs)){
a++;
}
if ("c".equals(classs)){
c++;
}
if ("e".equals(classs)){
e++;
}
if ("g".equals(classs)){
g++;
}
if ("in".equals(classs)){
in++;
}
if ("mn".equals(classs)){
mn++;
}
if ("sh".equals(classs)){
sh++;
}
if ("td".equals(classs)){
td++;
}
}
System.out.println ("a = "+a); //counting the no. of files of this class
System.out.println ("c = "+c);
System.out.println ("e = "+e);
System.out.println ("g = "+g);
System.out.println ("in = "+in);
System.out.println ("mn = "+mn);
System.out.println ("sh = "+sh);
System.out.println ("td = "+td);
System.out.println(System.currentTimeMillis() - start); // stopping the clock
}