MapReduce - 不调用Reduce

时间:2016-03-20 17:38:46

标签: java hadoop mapreduce

我一直在尝试运行这个项目,我在互联网上找到了这个项目并根据我的意图进行了修改。

调用Map函数并正常工作,我从控制台检查了结果。但减少并没有被称为

前两位是关键,其余是值。 enter image description here

我已经控制了地图输出和减少输入键,值对之间的匹配,我已经多次更改它们,尝试了不同的东西但是无法得到解决方案。

由于我是这个话题的初学者,可能会有一个小错误。我写了另一个项目并再次犯了同样的错误“减少不被称为”

我还尝试将reduce的输出值类更改为IntWritable,TextWritable而不是MedianStdDevTuple并配置了作业但没有任何更改。

我不需要解决方案,也想知道原因。谢谢。 这是代码

package usercommend;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;

import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.htrace.commons.logging.LogFactory;

import usercommend.starter.map;


public class starter extends Configured implements Tool {

    public static void main (String[] args) throws Exception{
        int res =ToolRunner.run(new starter(), args);
        System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception {
        Job job=Job.getInstance(getConf(),"starter");
        job.setJarByClass(this.getClass());

        job.setMapperClass(map.class);
        job.setReducerClass(reduces.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(MedianStdDevTuple.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        return job.waitForCompletion(true) ? 0 : 1;

    }

    public static class map extends Mapper<LongWritable, Text,IntWritable, IntWritable> {
        private IntWritable outHour = new IntWritable();
        private IntWritable outCommentLength = new IntWritable();
        private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");

        @SuppressWarnings("deprecation")
        @Override
        public void map(LongWritable key , Text value,Context context) throws IOException, InterruptedException
        {
            //System.err.println(value.toString()+"vv");
            Map<String, String> parsed = transforXmlToMap1(value.toString());
            //System.err.println("1");
            String strDate = parsed.get("CreationDate");
            //System.err.println(strDate);
            String text = parsed.get("Text");
            //System.err.println(text);
            Date creationDate=new Date();
            try {
            //  System.err.println("basla");
                 creationDate = frmt.parse(strDate);
                    outHour.set(creationDate.getHours());
                    outCommentLength.set(text.length());
                    System.err.println(outHour+""+outCommentLength);
                    context.write(outHour, outCommentLength);
            } catch (ParseException e) {
                // TODO Auto-generated catch block
                System.err.println("catch");
                e.printStackTrace();
                return;
            }

            //context.write(new IntWritable(2), new IntWritable(12));
        }

        public static Map<String,String> transforXmlToMap1(String xml) {

            Map<String, String> map = new HashMap<String, String>();
            try {
                String[] tokens = xml.trim().substring(5, xml.trim().length()-3).split("\"");

                for(int i = 0; i < tokens.length-1 ; i+=2) {
                    String key = tokens[i].trim();
                    String val = tokens[i+1];

                    map.put(key.substring(0, key.length()-1),val);
                    //System.err.println(val.toString());
                }
            } catch (StringIndexOutOfBoundsException e) {
                System.err.println(xml);
            }
            return map;
        }
    }

    public static class reduces extends Reducer<IntWritable, IntWritable, IntWritable, MedianStdDevTuple> {

        private MedianStdDevTuple result = new MedianStdDevTuple();
        private ArrayList<Float> commentLengths = new ArrayList<Float>();
        Log log=(Log) LogFactory.getLog(this.getClass());

        @Override
        public void reduce(IntWritable key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
            System.out.println("1");
            log.info("aa");
            float sum = 0;
            float count = 0;
            commentLengths.clear();
            result.setStdDev(0);

            for(IntWritable val : values) {
                commentLengths.add((float)val.get());
                sum+=val.get();
                ++count;        
            }

            Collections.sort(commentLengths);

            if(count % 2 ==0) {
                result.setMedian((commentLengths.get((int)count / 2 -1)+
                            commentLengths.get((int) count / 2)) / 2.0f);
            } else {
                result.setMedian(commentLengths.get((int)count / 2));
            }

            double avg = sum/commentLengths.size();
            double totalSquare = 0;
            for(int i =0 ;i<commentLengths.size();i++) {
                double diff = commentLengths.get(i)-avg;
                totalSquare += (diff*diff);
            }

            double stdSapma= Math.sqrt(totalSquare/(commentLengths.size()));
            result.setStdDev(stdSapma);
            context.write(key, result);
        }
    }
}

示例输入

<row Id="2" PostId="7" Score="0" Text="I see what you mean, but I've had Linux systems set up so that if the mouse stayed on a window for a certain time period (greater than zero), then that window became active.  That would be one solution.  Another would be to simply let clicks pass to whatever control they are over, whether it is in the currently active window or not.  Is that doable?" CreationDate="2010-08-17T19:38:20.410" UserId="115" />
<row Id="3" PostId="13" Score="1" Text="I am using Iwork and OpenOffice right now But I need some features that just MS has it." CreationDate="2010-08-17T19:42:04.487" UserId="135" />
<row Id="4" PostId="17" Score="0" Text="I've been using that on my MacBook Pro since I got it, with no issues.  Last week I got an iMac and immediately installed StartSound.PrefPane but it doesn't work -- any ideas why?  The settings on the two machines are identical (except the iMac has v1.1b3 instead of v1.1b2), but one is silent at startup and the other isn't...." CreationDate="2010-08-17T19:42:15.097" UserId="115" />
<row Id="5" PostId="6" Score="0" Text="+agreed.  I would add that I think you can choose to not clone everything so it takes less time to make a bootable volume" CreationDate="2010-08-17T19:44:00.270" UserId="2" />
<row Id="6" PostId="22" Score="2" Text="Applications are removed from memory by the OS at it's discretion.  Just because they are in the 'task manager' does not imply they are running and in memory.  I have confirmed this with my own apps.&#xA;&#xA;After a reboot, these applications are not reloaded until launched by a user." CreationDate="2010-08-17T19:46:01.950" UserId="589" />
<row Id="7" PostId="7" Score="0" Text="Honestly, I don't know.  It's definitely interesting though.  I'm currently scouring Google, since it would save on input clicks.  I'm just concerned that any solution might get a little &quot;hack-y&quot; and not behave consistently in all UI elements or applications.  The last thing I'd want is to not know if I'm focusing a window or pressing a button :(" CreationDate="2010-08-17T19:50:00.723" UserId="421" />
<row Id="8" PostId="21" Score="3" Text="Could you expand on the features for those not familiar with ShakesPeer?" CreationDate="2010-08-17T19:51:11.953" UserId="581" />
<row Id="9" PostId="23" Score="1" Text="Apple's vernacular is Safe Sleep." CreationDate="2010-08-17T19:51:35.557" UserId="171" />

1 个答案:

答案 0 :(得分:1)

你带了this code?我猜测问题是你没有为作业设置正确的输入和输出。

以下是您根据类定义尝试执行的操作。

  • 地图输入:(Object, Text)
  • 地图输出:(IntWritable, IntWritable)
  • 减少输入:(IntWritable, IntWritable)
  • 减少输出:(IntWritable, MedianStdDevTuple)

但是,根据您的工作配置

job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(MedianStdDevTuple.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

它认为你想要这样做

  • 地图输入:(Object, Text) - 我认为它实际上是LongWritable而不是Object,但对于文件拆分位置
  • 地图输出:(IntWritable, MedianStdDevTuple )
  • 减少输入:(IntWritable, IntWritable)
  • 减少输出:( Text , IntWritable )

注意那些是不同的?您的reducer期望在IntWritable而不是MedianStdDevTuple中读取,并且输出也是错误的类,因此,它不会运行。

要修复,请更改您的工作配置

job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(MedianStdDevTuple.class);

编辑:让它运行正常,我在上面链接中的代码之外唯一真正改变的是使用此方法的mapper类。

public static Map<String, String> transforXmlToMap1(String xml) {

    Map<String, String> map = new HashMap<String, String>();
    try {
        String[] tokens = xml.trim().substring(5, xml.trim().length() - 3)
                .split("\"");

        for (int i = 0; i < tokens.length - 1; i += 2) {
            String key = tokens[i].replaceAll("[= ]", "");
            String val = tokens[i + 1];

            map.put(key, val);
            // System.err.println(val.toString());
        }
    } catch (StringIndexOutOfBoundsException e) {
        System.err.println(xml);
    }

    return map;
}