迭代通过ArrayWritable - NoSuchMethodException

时间:2014-06-17 23:00:45

标签: hadoop mapreduce iteration nosuchmethoderror

我刚刚开始使用MapReduce,我遇到了一个我无法通过Google回答的奇怪错误。我正在使用ArrayWritable创建一个基本程序,但是当我运行它时,我在Reduce期间收到以下错误:

java.lang.RuntimeException:
java.lang.NoSuchMethodException:org.apache.hadoop.io.ArrayWritable.<init>()
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115)
at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62)
at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
at org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1276)
at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1214)
at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:250)
at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:246)
at PageRank$Reduce.reduce(Unknown Source)
at PageRank$Reduce.reduce(Unknown Source)
at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:522)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)

我正在使用Hadoop 1.2.1。这是我的代码:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.join.*;
import java.io.IOException;
import java.util.Iterator;

public class TempClass {

  public static class MapClass extends MapReduceBase
  implements Mapper<LongWritable, Text, Text, ArrayWritable> {
    public void map(LongWritable key, Text value,
        OutputCollector<Text, ArrayWritable> output,
        Reporter reporter) throws IOException {

      String[] arr_str = new String[]{"a","b","c"};
      for(int i=0; i<3; i++)
        output.collect(new Text("my_key"), new ArrayWritable(arr_str));
    }
  }    

  public static class Reduce extends MapReduceBase
  implements Reducer<Text, ArrayWritable, Text, ArrayWritable> {

    public void reduce(Text key, Iterator<ArrayWritable> values,
        OutputCollector<Text, ArrayWritable> output,
        Reporter reporter) throws IOException {

      ArrayWritable tmp;

      while(values.hasNext()){
          tmp = values.next();
          output.collect(key, tmp);
      }
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    JobConf job = new JobConf(conf, TempClass.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ArrayWritable.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setInputFormat(TextInputFormat.class);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    FileInputFormat.setInputPaths( job, new Path( args[0] ) );
    FileOutputFormat.setOutputPath( job, new Path( args[1] ) );

    job.setJobName( "TempClass" );

    JobClient.runJob(job);
  }
}

如果我评论下面的行(Reduce Class):

      //while(values.hasNext()){
      //    tmp = values.next();
          output.collect(key, tmp);
      //}
一切都会好的。你有什么想法吗?

1 个答案:

答案 0 :(得分:7)

  

可写入包含类实例的数组。的要素   这个可写的必须都是同一个类的实例。如果这   writable将是Reducer的输入,你需要创建一个   将类设置为正确类型的子类。例如:   公共类IntArrayWritable扩展了ArrayWritable {public   IntArrayWritable(){super(IntWritable.class); }}

以下是ArrayWritable的文档。通常,Writable应该有一个没有参数的构造函数。

我刚刚将您的代码修改为:

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

public class TempClass {

    public static class TextArrayWritable extends ArrayWritable {
        public TextArrayWritable() {
            super(Text.class);
        }

        public TextArrayWritable(String[] strings) {
            super(Text.class);
            Text[] texts = new Text[strings.length];
            for (int i = 0; i < strings.length; i++) {
                texts[i] = new Text(strings[i]);
            }
            set(texts);
        }
    }

    public static class MapClass extends MapReduceBase implements
            Mapper<LongWritable, Text, Text, ArrayWritable> {
        public void map(LongWritable key, Text value,
                OutputCollector<Text, ArrayWritable> output, Reporter reporter)
                throws IOException {

            String[] arr_str = new String[] {
                    "a", "b", "c" };
            for (int i = 0; i < 3; i++)
                output.collect(new Text("my_key"), new TextArrayWritable(
                        arr_str));
        }
    }

    public static class Reduce extends MapReduceBase implements
            Reducer<Text, TextArrayWritable, Text, TextArrayWritable> {

        public void reduce(Text key, Iterator<TextArrayWritable> values,
                OutputCollector<Text, TextArrayWritable> output,
                Reporter reporter) throws IOException {

            TextArrayWritable tmp;

            while (values.hasNext()) {
                tmp = values.next();
                output.collect(key, tmp);
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        JobConf job = new JobConf(conf, TempClass.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TextArrayWritable.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setInputFormat(TextInputFormat.class);

        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setJobName("TempClass");

        JobClient.runJob(job);
    }
}