Hadoop MapReduce辅助排序:不会调用Reducer

时间:2014-08-26 06:26:41

标签: java sorting hadoop mapreduce

我正在尝试对输出中的4个值进行二次排序。我提到了this教程。 我有一个运行Hadoop 2.2.0的4节点集群。我使用Idea IntelliJ IDE进行本地调试。

以下是我正在使用的所有课程。

MAPPER:

public class MapredHiveMapper extends Mapper<LongWritable, Text, CompositeKey, Text> {

private Text finalString = new Text();
private CompositeKey internalKey;
boolean flag = false;
String pt_timestamp = null;
private int assetId = 0;


public void map(LongWritable key, Text value,Context context)
        throws IOException, InterruptedException {

    String append = "";
    String row = value.toString();

    String[] line;
    line = row.split(",", -1);



    finalString.set(row);
    internalKey = new CompositeKey(Integer.parseInt(line[0]),line[1],Integer.parseInt(line[2]),Integer.parseInt(line[3]));
    context.write(internalKey, finalString);
}

}

减速机:

public class MapredHiveReducer extends Reducer<CompositeKey, Text, Text, Text> {

private Text outputValue = new Text();
private Text customKey = new Text();
boolean flag = false;
String pt_timestamp = null;
private int assetId = 0;

public void reduce(CompositeKey key,Text values,Context context) throws IOException, InterruptedException {

    String append = "";
    String row = values.toString();

    String[] line;
    line = row.split(",", -1);

    if(Integer.parseInt(line[7])==0 || (Integer.parseInt(line[7])>0 && line[4]==line[6])) {

        if (!line[4].equals("\\N")) {
            assetId = Integer.parseInt(line[0]);
            pt_timestamp = line[4];
            append = row;
            flag = true;
        } else {
            if (flag)
                if (Integer.parseInt(line[0])==assetId)
                    append = row + ", new :- " + pt_timestamp;
                else
                    append = row;
        }
    }

    outputValue.set(append);
    customKey.set(key.toString());
    context.write(customKey, outputValue);
  }

}

CompositeKey.java

public class CompositeKey implements WritableComparable<CompositeKey>  {

private Integer assetId;
private String date;
private Integer hourNum;
private Integer quarterNum;

public CompositeKey() { }

public CompositeKey(int assetId, String date,int hourNum,int quarterNum) {

    this.assetId = assetId;
    this.date = date;
    this.hourNum = hourNum;
    this.quarterNum = quarterNum;
}

@Override
public String toString() {

    return (new StringBuilder()).append(assetId).append(',').append(date).append(',').append(hourNum).append(',').append(quarterNum).toString();
}

@Override
public void readFields(DataInput in) throws IOException {

    assetId = WritableUtils.readVInt(in);
    date = WritableUtils.readString(in);
    hourNum = WritableUtils.readVInt(in);
    quarterNum = WritableUtils.readVInt(in);
}

@Override
public void write(DataOutput out) throws IOException {

    WritableUtils.writeVInt(out, assetId);
    WritableUtils.writeString(out, date);
    WritableUtils.writeVInt(out, hourNum);
    WritableUtils.writeVInt(out, quarterNum);
}

@Override
public int compareTo(CompositeKey o) {

    int result = assetId.compareTo(o.assetId);

    if (0 == result) {
        result = date.compareTo(o.date);
    }
    if (0 == result) {
        result = hourNum.compareTo(o.hourNum);
    }
    if (0 == result) {
        result = quarterNum.compareTo(o.quarterNum);
    }
    return result;
}

public Integer getAssetId() {
    return assetId;
}

public void setAssetId(Integer assetId) {
    this.assetId = assetId;
}

public String getDate() {
    return date;
}

public void setDate(String date) {
    this.date = date;
}

public Integer getHourNum() {
    return hourNum;
}

public void setHourNum(Integer hourNum) {
    this.hourNum = hourNum;
}

public Integer getQuarterNum() {
    return quarterNum;
}

public void setQuarterNum(Integer quarterNum) {
    this.quarterNum = quarterNum;
  }

}

ActualKeyPartitioner.java

public class ActualKeyPartitioner extends Partitioner<CompositeKey, Text> {

HashPartitioner<Text, Text> hashPartitioner = new HashPartitioner<Text, Text>();
Text newKey = new Text();

@Override
public int getPartition(CompositeKey key, Text value, int numReduceTasks) {

    try {
        // Execute the default partitioner over the first part of the key
        newKey.set(key.getAssetId().toString());
        return hashPartitioner.getPartition(newKey, value, numReduceTasks);

    } catch (Exception e) {
        e.printStackTrace();
        return (int) (Math.random() * numReduceTasks); // this would return
                                                        // a random value in
                                                        // the range
        // [0,numReduceTasks)
    }
  }
}

ActualKeyGroupingComparator.java

public class ActualKeyPartitioner extends Partitioner<CompositeKey, Text> {

HashPartitioner<Text, Text> hashPartitioner = new HashPartitioner<Text, Text>();
Text newKey = new Text();

@Override
public int getPartition(CompositeKey key, Text value, int numReduceTasks) {

    try {
        // Execute the default partitioner over the first part of the key
        newKey.set(key.getAssetId().toString());
        return hashPartitioner.getPartition(newKey, value, numReduceTasks);

    } catch (Exception e) {
        e.printStackTrace();
        return (int) (Math.random() * numReduceTasks); // this would return
                                                        // a random value in
                                                        // the range
        // [0,numReduceTasks)
    }
  }
}

驾驶员课程:

public class MapredHiveDriver {

public static void main(String[] args) throws IOException {

    Configuration config = new Configuration();

    Job job = Job.getInstance(config);

    try {

        job.setJobName("MapredHive");

        job.setJarByClass(MapredHiveDriver.class);

        job.setMapperClass(MapredHiveMapper.class);
        job.setReducerClass(MapredHiveReducer.class);

        job.setMapOutputKeyClass(CompositeKey.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setPartitionerClass(ActualKeyPartitioner.class);
        job.setGroupingComparatorClass(ActualKeyGroupingComparator.class);
        //job.setSortComparatorClass(CompositeKeyComparator.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);



        job.submit();

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    } catch (Exception e) {
        e.printStackTrace();
    }

  }

}

我一直试图在最小输入集(500kB csv文件)上运行它。 IntelliJ中的调试显示还没有调用reducer。

IntelliJ日志(本地工作):

    2014-08-26 11:46:32,165 WARN  [main] util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2014-08-26 11:46:33,420 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2014-08-26 11:46:33,432 INFO  [main] jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2014-08-26 11:46:34,079 WARN  [main] mapreduce.JobSubmitter (JobSubmitter.java:copyAndConfigureFiles(149)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2014-08-26 11:46:34,138 WARN  [main] mapreduce.JobSubmitter (JobSubmitter.java:copyAndConfigureFiles(258)) - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
2014-08-26 11:46:34,201 INFO  [main] input.FileInputFormat (FileInputFormat.java:listStatus(287)) - Total input paths to process : 1
2014-08-26 11:46:34,253 INFO  [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(394)) - number of splits:1
2014-08-26 11:46:34,266 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - user.name is deprecated. Instead, use mapreduce.job.user.name
2014-08-26 11:46:34,267 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapreduce.partitioner.class is deprecated. Instead, use mapreduce.job.partitioner.class
2014-08-26 11:46:34,267 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.output.value.class is deprecated. Instead, use mapreduce.job.output.value.class
2014-08-26 11:46:34,267 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.mapoutput.value.class is deprecated. Instead, use mapreduce.map.output.value.class
2014-08-26 11:46:34,272 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
2014-08-26 11:46:34,272 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.job.name is deprecated. Instead, use mapreduce.job.name
2014-08-26 11:46:34,272 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.output.value.groupfn.class is deprecated. Instead, use mapreduce.job.output.group.comparator.class
2014-08-26 11:46:34,272 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class
2014-08-26 11:46:34,272 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
2014-08-26 11:46:34,278 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
2014-08-26 11:46:34,278 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.output.dir is deprecated. Instead, use mapreduce.output.fileoutputformat.outputdir
2014-08-26 11:46:34,278 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class
2014-08-26 11:46:34,279 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
2014-08-26 11:46:34,279 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
2014-08-26 11:46:34,280 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.mapoutput.key.class is deprecated. Instead, use mapreduce.map.output.key.class
2014-08-26 11:46:34,280 INFO  [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
2014-08-26 11:46:34,527 INFO  [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(477)) - Submitting tokens for job: job_local1652633966_0001
2014-08-26 11:46:34,833 WARN  [main] conf.Configuration (Configuration.java:loadProperty(2172)) - file:/tmp/hadoop-hdfs/mapred/staging/hdfs1652633966/.staging/job_local1652633966_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
2014-08-26 11:46:34,833 WARN  [main] conf.Configuration (Configuration.java:loadProperty(2172)) - file:/tmp/hadoop-hdfs/mapred/staging/hdfs1652633966/.staging/job_local1652633966_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
2014-08-26 11:46:35,380 WARN  [main] conf.Configuration (Configuration.java:loadProperty(2172)) - file:/tmp/hadoop-hdfs/mapred/local/localRunner/hdfs/job_local1652633966_0001/job_local1652633966_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
2014-08-26 11:46:35,381 WARN  [main] conf.Configuration (Configuration.java:loadProperty(2172)) - file:/tmp/hadoop-hdfs/mapred/local/localRunner/hdfs/job_local1652633966_0001/job_local1652633966_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
2014-08-26 11:46:35,396 INFO  [main] mapreduce.Job (Job.java:submit(1272)) - The url to track the job: http://localhost:8080/
2014-08-26 11:46:35,397 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1317)) - Running job: job_local1652633966_0001
2014-08-26 11:46:35,400 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(323)) - OutputCommitter set in config null
2014-08-26 11:46:35,407 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(341)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2014-08-26 11:46:35,524 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:run(389)) - Waiting for map tasks
2014-08-26 11:46:35,525 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(216)) - Starting task: attempt_local1652633966_0001_m_000000_0
2014-08-26 11:46:35,610 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(581)) -  Using ResourceCalculatorProcessTree : [ ]
2014-08-26 11:46:35,614 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(732)) - Processing split: hdfs://hdp2minion1.domain.com:8020/data/small/wc/tblchillerstatus_base3/000000_0:0+568128
2014-08-26 11:46:35,633 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(387)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2014-08-26 11:46:35,669 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1183)) - (EQUATOR) 0 kvi 26214396(104857584)
2014-08-26 11:46:35,669 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(975)) - mapreduce.task.io.sort.mb: 100
2014-08-26 11:46:35,669 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(976)) - soft limit at 83886080
2014-08-26 11:46:35,669 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(977)) - bufstart = 0; bufvoid = 104857600
2014-08-26 11:46:35,669 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(978)) - kvstart = 26214396; length = 6553600
2014-08-26 11:46:36,399 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1338)) - Job job_local1652633966_0001 running in uber mode : false
2014-08-26 11:46:36,402 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1345)) -  map 0% reduce 0%
2014-08-26 11:46:36,457 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(513)) - 
2014-08-26 11:46:36,470 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1440)) - Starting flush of map output
2014-08-26 11:46:36,471 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1459)) - Spilling map output
2014-08-26 11:46:36,471 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - bufstart = 0; bufend = 906963; bufvoid = 104857600
2014-08-26 11:46:36,471 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1462)) - kvstart = 26214396(104857584); kvend = 26141344(104565376); length = 73053/6553600
2014-08-26 11:46:37,247 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1648)) - Finished spill 0
2014-08-26 11:46:37,250 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(995)) - Task:attempt_local1652633966_0001_m_000000_0 is done. And is in the process of committing
2014-08-26 11:46:37,259 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(513)) - map
2014-08-26 11:46:37,259 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1115)) - Task 'attempt_local1652633966_0001_m_000000_0' done.
2014-08-26 11:46:37,259 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(241)) - Finishing task: attempt_local1652633966_0001_m_000000_0
2014-08-26 11:46:37,260 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:run(397)) - Map task executor complete.
2014-08-26 11:46:37,270 INFO  [Thread-12] mapred.Task (Task.java:initialize(581)) -  Using ResourceCalculatorProcessTree : [ ]
2014-08-26 11:46:37,280 INFO  [Thread-12] mapred.Merger (Merger.java:merge(568)) - Merging 1 sorted segments
2014-08-26 11:46:37,287 INFO  [Thread-12] mapred.Merger (Merger.java:merge(667)) - Down to the last merge-pass, with 1 segments left of total size: 943473 bytes
2014-08-26 11:46:37,287 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(513)) - 
2014-08-26 11:46:37,378 INFO  [Thread-12] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(840)) - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2014-08-26 11:46:37,405 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1345)) -  map 100% reduce 0%
2014-08-26 11:46:37,895 INFO  [Thread-12] mapred.Task (Task.java:done(995)) - Task:attempt_local1652633966_0001_r_000000_0 is done. And is in the process of committing
2014-08-26 11:46:37,898 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(513)) - 
2014-08-26 11:46:37,898 INFO  [Thread-12] mapred.Task (Task.java:commit(1156)) - Task attempt_local1652633966_0001_r_000000_0 is allowed to commit now
2014-08-26 11:46:37,928 INFO  [Thread-12] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local1652633966_0001_r_000000_0' to hdfs://hdp2minion1.domain.com:8020/data/small/wc/tblchillerstatus_base5/_temporary/0/task_local1652633966_0001_r_000000
2014-08-26 11:46:37,929 INFO  [Thread-12] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(513)) - reduce > reduce
2014-08-26 11:46:37,929 INFO  [Thread-12] mapred.Task (Task.java:sendDone(1115)) - Task 'attempt_local1652633966_0001_r_000000_0' done.
2014-08-26 11:46:38,405 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1345)) -  map 100% reduce 100%
2014-08-26 11:46:38,406 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1356)) - Job job_local1652633966_0001 completed successfully
2014-08-26 11:46:38,420 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1363)) - Counters: 32
    File System Counters
        FILE: Number of bytes read=944019
        FILE: Number of bytes written=2263114
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
        HDFS: Number of bytes read=1136256
        HDFS: Number of bytes written=925797
        HDFS: Number of read operations=15
        HDFS: Number of large read operations=0
        HDFS: Number of write operations=4
    Map-Reduce Framework
        Map input records=18264
        Map output records=18264
        Map output bytes=906963
        Map output materialized bytes=943497
        Input split bytes=145
        Combine input records=0
        Combine output records=0
        Reduce input groups=38
        Reduce shuffle bytes=0
        Reduce input records=18264
        Reduce output records=18264
        Spilled Records=36528
        Shuffled Maps =0
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=44
        CPU time spent (ms)=0
        Physical memory (bytes) snapshot=0
        Virtual memory (bytes) snapshot=0
        Total committed heap usage (bytes)=511967232
    File Input Format Counters 
        Bytes Read=568128
    File Output Format Counters 
        Bytes Written=925797

Process finished with exit code 0

请帮忙。

0 个答案:

没有答案