我正在尝试学习MapReduce并完成这项任务。
我的输入如下(州,体育,金额(美元)):
California Football 69.09
California Swimming 31.5
Illinois Golf 8.31
Illinois Tennis 15.75
Oklahoma Golf 15.44
Oklahoma Tennis 8.33
Texas Golf 16.71
Texas Swimming 71.59
Washington Football 50.32000000000001
我期待我的输出,输出应显示哪种运动在特定状态下流行(取决于运动项目的最高销量)。例如:
California Football 69.09
Illinois Tennis 15.75
Oklahoma Golf 15.44
等等
以下是我的Mapper,Reducer和驱动程序代码:
映射器代码:
package org.assignment.sports;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Sports_Mapper2 extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{
String[] s= value.toString().split(" ");
String Sport_State = s[0];
String other = s[1]+" "+s[2];
context.write(new Text(Sport_State), new Text(other));
}
}
减速机代码:
package org.assignment.sports;
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Sports_Reducer2 extends Reducer<Text, Text, Text, DoubleWritable>{
private static double MAX=0.00;
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
//String[] k= values.toString().split(" ");
for (Text value:values){
String[] k= value.toString().split(" ");
DoubleWritable price = new DoubleWritable(Double.parseDouble(k[1]));
if(price.get()>MAX){
MAX = price.get();
}
else{
continue;
}
String ss = key.toString()+" "+ k[0];
context.write(new Text(ss), new DoubleWritable(MAX));
}
}
}
驱动程序代码:
package org.assignment.sports;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sports_Driver2 {
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
Job job = new Job(conf, "Sports_Driver2");
String[] otherArgs =new GenericOptionsParser(conf, args).getRemainingArgs();
job.setJarByClass(Sports_Driver2.class);
job.setMapperClass(Sports_Mapper2.class);
job.setReducerClass(Sports_Reducer2.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)? 0: 1);
}
}
我得到如下输出:
California Football 69.09
Texas Swimming 71.59
我哪里错了?任何帮助表示赞赏
答案 0 :(得分:2)
问题是在写入每个特定状态后,Reducer中的MAX值没有被重置。
String ss = key.toString()+" "+ k[0];
context.write(new Text(ss), new DoubleWritable(MAX));
MAX = 0.00;
答案 1 :(得分:0)
要获取减速器中每个值的最大值,您需要跟踪运动的名称。否则会产生错误的结果。 请尝试以下代码。
驱动程序
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sports_Driver2 {
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Job job = new Job(conf, "Sports_Driver2");
String[] otherArgs =new GenericOptionsParser(conf, args).getRemainingArgs();
job.setJarByClass(Sports_Driver2.class);
job.setMapperClass(Sports_Mapper2.class);
job.setReducerClass(Sports_Reducer2.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
if(fs.exists(new Path(otherArgs[1]))){
fs.delete(new Path(otherArgs[1]), true);
}
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)? 0: 1);
}
}
映射
public class Sports_Mapper2 extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{
String[] s= value.toString().split(" ");
String Sport_State = s[0];
String other = s[1]+" "+s[2];
context.write(new Text(Sport_State), new Text(other));
}
}
减速
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Sports_Reducer2 extends Reducer<Text, Text, Text, DoubleWritable>{
Text keyEmit = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
Map<String,Double> getMax = new HashMap<>();
String sportName = "";
for (Text value:values){
String[] k= value.toString().split(" ");
sportName = k[0];
//store values
getMax.put(sportName, Double.parseDouble(k[1]));
}
/*
* Get maximum
*/
Map.Entry<String, Double> maxEntry = null;
for (Entry<String, Double> entry : getMax.entrySet())
{
if (maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0)
{
maxEntry = entry;
}
}
keyEmit.set(key.toString()+" "+maxEntry.getKey());
context.write(keyEmit, new DoubleWritable(maxEntry.getValue()));
}
}
输出
California Football 69.09
Illinois Tennis 15.75
Oklahoma Golf 15.44
Texas Swimming 71.59
Washington Football 50.32000000000001
希望这有帮助。