Question

我正在尝试学习MapReduce并完成这项任务。

我的输入如下（州，体育，金额（美元））：

California Football 69.09 California Swimming 31.5 Illinois Golf 8.31 Illinois Tennis 15.75 Oklahoma Golf 15.44 Oklahoma Tennis 8.33 Texas Golf 16.71 Texas Swimming 71.59 Washington Football 50.32000000000001

我期待我的输出，输出应显示哪种运动在特定状态下流行（取决于运动项目的最高销量）。例如：

California Football 69.09 Illinois Tennis 15.75 Oklahoma Golf 15.44 等等

以下是我的Mapper，Reducer和驱动程序代码：

映射器代码：

package org.assignment.sports;

import java.io.IOException;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Sports_Mapper2 extends Mapper<LongWritable, Text, Text, Text>{
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{
        String[] s= value.toString().split(" ");
        String Sport_State = s[0];
        String other = s[1]+" "+s[2];
        context.write(new Text(Sport_State), new Text(other));
    }
}

减速机代码：

package org.assignment.sports;

import java.io.IOException;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class Sports_Reducer2 extends Reducer<Text, Text, Text, DoubleWritable>{

    private static double MAX=0.00;
    public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
    {
        //String[] k= values.toString().split(" ");
        for (Text value:values){
            String[] k= value.toString().split(" ");
            DoubleWritable price = new DoubleWritable(Double.parseDouble(k[1]));
        if(price.get()>MAX){
            MAX = price.get();
        }
        else{
            continue;
        }
        String ss = key.toString()+" "+ k[0];
        context.write(new Text(ss), new DoubleWritable(MAX));
        }               
        }

}

驱动程序代码：

package org.assignment.sports;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Sports_Driver2 {
    public static void main(String[] args) throws Exception
    {
        Configuration conf = new Configuration();

        Job job = new Job(conf, "Sports_Driver2");

        String[] otherArgs =new GenericOptionsParser(conf, args).getRemainingArgs();

        job.setJarByClass(Sports_Driver2.class);
        job.setMapperClass(Sports_Mapper2.class);
        job.setReducerClass(Sports_Reducer2.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));

        System.exit(job.waitForCompletion(true)? 0: 1);
    }

}

我得到如下输出：

California Football 69.09 Texas Swimming 71.59

我哪里错了？任何帮助表示赞赏

Answer 1

问题是在写入每个特定状态后，Reducer中的MAX值没有被重置。

String ss = key.toString()+" "+ k[0];
context.write(new Text(ss), new DoubleWritable(MAX));
MAX = 0.00;

Answer 2

要获取减速器中每个值的最大值，您需要跟踪运动的名称。否则会产生错误的结果。请尝试以下代码。

驱动程序

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Sports_Driver2 {
    public static void main(String[] args) throws Exception
    {
        Configuration conf = new Configuration();

        FileSystem fs = FileSystem.get(conf);
        Job job = new Job(conf, "Sports_Driver2");

        String[] otherArgs =new GenericOptionsParser(conf, args).getRemainingArgs();

        job.setJarByClass(Sports_Driver2.class);
        job.setMapperClass(Sports_Mapper2.class);
        job.setReducerClass(Sports_Reducer2.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        if(fs.exists(new Path(otherArgs[1]))){
            fs.delete(new Path(otherArgs[1]), true);
        }

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));

        System.exit(job.waitForCompletion(true)? 0: 1);
    }

}

映射

public class Sports_Mapper2 extends Mapper<LongWritable, Text, Text, Text>{
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{
        String[] s= value.toString().split(" ");
        String Sport_State = s[0];
        String other = s[1]+" "+s[2];
        context.write(new Text(Sport_State), new Text(other));
    }
}

减速

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class Sports_Reducer2 extends Reducer<Text, Text, Text, DoubleWritable>{

    Text keyEmit = new Text();
    public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
    {
        Map<String,Double> getMax = new HashMap<>();
        String sportName = "";
        for (Text value:values){
            String[] k= value.toString().split(" ");
            sportName = k[0];
            //store values
            getMax.put(sportName, Double.parseDouble(k[1]));
        }   
        /*
         * Get maximum
         */
        Map.Entry<String, Double> maxEntry = null;
        for (Entry<String, Double> entry : getMax.entrySet())
        {
            if (maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0)
            {
                maxEntry = entry;
            }
        }
        keyEmit.set(key.toString()+" "+maxEntry.getKey());
        context.write(keyEmit, new DoubleWritable(maxEntry.getValue()));
    }

}

输出

California Football 69.09
Illinois Tennis 15.75
Oklahoma Golf   15.44
Texas Swimming  71.59
Washington Football 50.32000000000001

希望这有帮助。

无法在Hadoop中使用Mapreduce获得预期的减少输出

2 个答案: