java.lang.ClassCastException:MapReduce程序中的类org.json.JSONObject

时间:2014-10-20 11:30:02

标签: java json hadoop mapreduce

我有一个输入文本文件,如下所示(部分):

{"author":"Martti Paturi","book":"Aiotko oppikouluun"}
{"author":"International Meeting of Neurobiologists Amsterdam 1959.","book":"Structure and function of the cerebral cortex"}
{"author":"Paraná (Brazil : State). Comissão de Desenvolvimento Municipal.","book":"Plano diretor de desenvolvimento de Maringá"}

我需要在这个文件上执行MapReduce,以获取一个JSON对象作为输出,该对象包含来自JSON数组中同一作者的所有书籍,格式如下:

{"author": "Ian Fleming", "books": [{"book": "Goldfinger"},{"book": "Moonraker"}]}

我的代码如下:

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.json.*;

public class CombineBooks {

  //TODO define variables and implement necessary components

  /*public static class MyTuple implements Writable{
    private String author;
    private String book;

    public void readFields(DataInput in){

        JSONObject obj = new JSONObject(in.readLine());
        author = obj.getString("author");
        book = obj.getString("book");
    }

    public void write(DataOutput out){

        out.writeBytes(author);
        out.writeBytes(book);
    }

    public static MyTuple read(DataInput in){

        MyTuple tup = new MyTuple();
        tup.readFields(in);
        return tup;
    }

  }*/

  public static class Map extends Mapper<LongWritable, Text, Text, Text>{

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{

        String author;
        String book;
        String line = value.toString();
        String[] tuple = line.split("\\n");
        try{
            for(int i=0;i<tuple.length; i++){
                JSONObject obj = new JSONObject(tuple[i]);
                author = obj.getString("author");
                book = obj.getString("book");
                context.write(new Text(author), new Text(book));
            }
        }catch(JSONException e){
            e.printStackTrace();
        }
    }
  }

  public static class Combine extends Reducer<Text, Text, Text, Text>{

    public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{

        String booklist = null;
        int i = 0;
        for(Text val : values){
            if(booklist.equals(null)){
                booklist = booklist + val.toString();
            }
            else{
                booklist = booklist + "," + val.toString();
            }
            i++;
        }
        context.write(key, new Text(booklist));
    }
  }

  public static class Reduce extends Reducer<Text,Text,JSONObject,NullWritable>{

    public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{

        try{
            JSONArray ja = new JSONArray();
            String[] book = null;
            for(Text val : values){
                book = val.toString().split(",");
            }
            for(int i=0; i<book.length; i++){
                JSONObject jo = new JSONObject().put("book", book[i]);
                ja.put(jo); 
            }
            JSONObject obj = new JSONObject();
            obj.put("author", key.toString());
            obj.put("books", ja);
            context.write(obj, NullWritable.get());
        }catch(JSONException e){
            e.printStackTrace();
        }
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: CombineBooks <in> <out>");
      System.exit(2);
    }


    //TODO implement CombineBooks

    Job job = new Job(conf, "CombineBooks");
    job.setJarByClass(CombineBooks.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Combine.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(JSONObject.class);
    job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    //TODO implement CombineBooks

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

当我尝试运行它时,我遇到了以下错误:

java.lang.ClassCastException: class org.json.JSONObject
    at java.lang.Class.asSubclass(Class.java:3165)
    at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795)
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:964)
    at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:673)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:756)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

我使用java-json.jar作为外部依赖项。我不确定这里的错误是什么。任何沮丧都表示赞赏!

2 个答案:

答案 0 :(得分:1)

json jar文件必须保存在hadoop lib文件夹中,然后尝试执行该程序。

答案 1 :(得分:0)

看看:Hadoop Writable。虽然您确实告诉Hadoop设置输出键的值,但JSONObject并没有实现Writable接口。

为什么你不输出text

context.write(new Text(jo.toString()), NullWritable.get());