Map Reduce Program抛出异常IOException"键入地图中的密钥不匹配"

时间:2016-05-29 06:25:12

标签: java hadoop mapreduce

嗨请在下面找到我的代码,这是抛出异常。

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Text, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Text key, Text value, Reducer.Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Reducer.Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;
            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital count");
        job.setJarByClass(HospitalCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class,Text.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

这是我的错误日志:

SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/NetBeansProjects/BDGRUSDML/Libs/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/NetBeansProjects/BDGRUSDML/Libs/slf4j-nop-1.7.12.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
2016-05-29 11:50:41,302 WARN  util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2016-05-29 11:50:41,965 INFO  Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1173)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2016-05-29 11:50:41,965 INFO  jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2016-05-29 11:50:42,024 WARN  mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(64)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2016-05-29 11:50:42,046 WARN  mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
2016-05-29 11:50:42,093 INFO  input.FileInputFormat (FileInputFormat.java:listStatus(283)) - Total input paths to process : 1
2016-05-29 11:50:42,148 INFO  mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(198)) - number of splits:1
2016-05-29 11:50:42,255 INFO  mapreduce.JobSubmitter (JobSubmitter.java:printTokens(287)) - Submitting tokens for job: job_local527592655_0001
2016-05-29 11:50:42,439 INFO  mapreduce.Job (Job.java:submit(1294)) - The url to track the job: http://localhost:8080/
2016-05-29 11:50:42,440 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1339)) - Running job: job_local527592655_0001
2016-05-29 11:50:42,441 INFO  mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2016-05-29 11:50:42,450 INFO  output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-05-29 11:50:42,455 INFO  mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2016-05-29 11:50:42,537 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2016-05-29 11:50:42,538 INFO  mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local527592655_0001_m_000000_0
2016-05-29 11:50:42,565 INFO  output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-05-29 11:50:42,579 INFO  mapred.Task (Task.java:initialize(612)) -  Using ResourceCalculatorProcessTree : [ ]
2016-05-29 11:50:42,584 INFO  mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: hdfs://127.0.0.1:9000/test/hospital/hospitals.txt:0+624
2016-05-29 11:50:42,671 INFO  mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2016-05-29 11:50:42,672 INFO  mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2016-05-29 11:50:42,672 INFO  mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2016-05-29 11:50:42,672 INFO  mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2016-05-29 11:50:42,672 INFO  mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2016-05-29 11:50:42,675 INFO  mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2016-05-29 11:50:42,733 INFO  mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2016-05-29 11:50:42,747 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2016-05-29 11:50:42,760 WARN  mapred.LocalJobRunner (LocalJobRunner.java:run(560)) - job_local527592655_0001
java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072)
    at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:715)
    at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
    at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
    at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:125)
    at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
    at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
2016-05-29 11:50:43,444 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local527592655_0001 running in uber mode : false
2016-05-29 11:50:43,446 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 0% reduce 0%
2016-05-29 11:50:43,449 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1380)) - Job job_local527592655_0001 failed with state FAILED due to: NA
2016-05-29 11:50:43,465 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 0

编辑:

修改下面的代码后,我现在可以执行我的代码了。 从Reducer.Context到Context

public void map(Text key, Text value,Context context) throws IOException, InterruptedException

请在下面找到更新的代码:

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Object, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Iterator<Text>, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;

             System.out.println("This is Reducer Jobs");

            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                   System.out.println("This is Value " + value);
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital_count");
        job.setJarByClass(HospitalsMapReduce.class);
        job.setMapperClass(TokenizerMapper.class);
        //job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class, IntWritable.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital/"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

但现在我的问题是,Reducer功能没有被执行。我的输出只显示地图功能的输出。

4 个答案:

答案 0 :(得分:1)

在Java Iterator中使用Hadoop Iterable。

更改您的reducer定义和代码,如下所示。

public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
    System.out.println("This is Red Value ");
    int sum = 0;
    int budget = 0;
    float avgBudget = 0;
    System.out.println("This is Reducer Jobs");
    for (Text val : values) {
        String value[] = val.toString().split(",");
        System.out.println("This is Reduce Value " + value);
        sum = sum + Integer.parseInt(value[0]);
        budget = budget + Integer.parseInt(value[2]);
    }
    avgBudget = budget / sum;
    result.set(sum + " " + avgBudget);
    context.write(key, result);
}}

答案 1 :(得分:0)

我将您的问题概括为

  

我的键和值都是字符串(Text),但是Map / Reduce   框架认为我提供了数字(LongWritable

嗯,我同意源代码可能会使这不可能,因为所有mapper / reducer键和值都是Text

因此,您可能需要查看应用程序jar文件的打包 - 以查看是否正在将正确的版本发送到hadoop集群。否则,您的代码似乎无法以给定的异常结束。

答案 2 :(得分:0)

看起来你的合成器导致了这个问题。您已将减速器功能用作组合器。但是,map函数和组合函数的输出格式不相同,不应该发生。组合器在映射函数的输出上调用,并且是进一步组合器操作或减少操作的输入。无论是否在合并器处理之后出现,Reducer都希望从到达它的数据中获得相同格式的Key-Value对。

另外,从上面的代码中可以看出,在组合函数中找到平均值并不是正确的做法。平均值永远不会是正确的。

总而言之,取消组合器操作仅用于提高性能。一旦您知道您的代码在功能上运行良好,就会引入它。

答案 3 :(得分:0)

你的reducer定义应该是,

public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
                Context context) throws IOException, InterruptedException {

     //your logic
    }

}