我研究hdfs,所以我测试了简单的MapReduce代码,它在JSON文件中得到了整体的平均值。
Json表格的样本位于here(click me)
GetAverage.java
package some.package.path.here.bigdata;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.json.*;
public class GetAverage extends Configured implements Tool {
public static void main(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
int res = ToolRunner.run(new Configuration(), new GetAverage(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.toString(args));
Job job = Job.getInstance(getConf());
job.setJarByClass(GetAverage.class);
job.setOutputKeyClass(Text.class);
//Overall value type is float.
job.setOutputValueClass(FloatWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
return 0;
}
public static class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {
private Text asin_value = new Text();
private FloatWritable overall_value = new FloatWritable();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String asin;
float overall;
String line = value.toString();
String[] tuple = line.split("\\n");
try{
for(int i=0; i<tuple.length; i++){
JSONObject obj = new JSONObject(tuple[i]);
System.out.println(obj);
asin = obj.getString("asin");
overall = Float.parseFloat(obj.getString("overall"));
asin_value.set(asin);
overall_value.set(overall);
context.write(asin_value, overall_value);
}
}catch(JSONException e){
e.printStackTrace();
}
}
}
public static class Reduce extends Reducer<Text, FloatWritable, Text, FloatWritable> {
@Override
public void reduce(Text key, Iterable<FloatWritable> values, Context context)
throws IOException, InterruptedException {
float sum = 0;
int length = 0;
for (FloatWritable val : values) {
sum += val.get();
length++;
}
float average = sum / length;
context.write(key, new FloatWritable(average));
}
}
}
编译时,代码不显示任何消息并生成任何jar文件。
我用under命令编译这个java文件。
cat ./complie command
javac -classpath /usr/local/hadoop/share/hadoop/common/hadoop-common-2.8.0.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core- 2.8.0.jar:../ org.json.jar -d getaverage_classes / GetAverage.java
我的目录结构是......
~/test |- GetAverage.java
|- compile_command
|- getaverage_classes - some/package/path/here/bigdata |-GetAverage$Map.class
|- GetAverage$Reduce.class
|- GetAverage.class
请回答我的问题,任何人都知道这个问题。 :)