hadoop - 如何访问TwoDArrayWritable

时间:2014-12-22 20:35:44

标签: java hadoop

我在mapper中实现了2d数组。我需要设置2d数组的值。我提到了这个(How to emit 2D double array from mapper using TwoDArrayWritable)帖子。

我将数组初始化为IntWritable并指定其行长度。

TwoDArrayWritable array2d = new TwoDArrayWritable (IntWritable.class);
IntWritable[][] jaccard = new IntWritable[2][];

但是当我执行这段代码时,我在访问数组时遇到了这个错误:

14/12/22 20:14:30 INFO mapreduce.Job: Task Id : attempt_1419259182533_0007_m_000000_1, Status : FAILED
Error: java.lang.NullPointerException
        at JaccardMapper.map(Unknown Source)
        at JaccardMapper.map(Unknown Source)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:167)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1554)

以下是映射器代码:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.Hashtable;
import java.util.Set;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.commons.httpclient.URI;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;



public class JaccardMapper extends Mapper<LongWritable, Text, IntTextPair, TwoDArrayWritable> {

    Hashtable movieInfo = new Hashtable<String, String>();
    String[] genres, actors, entities;
    String[] attributes = new String[] {"genre", "actors", "directors", "country", "year", "ratings"};
    double p,q,r,s;
    double result = 0.0;
    String input[] = new String[]{""};
    Set<String> keys;

    TwoDArrayWritable array2d = new TwoDArrayWritable (IntWritable.class);
    IntWritable[][] jaccard = new IntWritable[2][];

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
    {
        p = 0;
        q = 0;
        r = 0;
        s = 0;

        input = value.toString().toLowerCase().split(",");
        keys = movieInfo.keySet();

        int columnlength = input[1].split("\\|").length + input[2].split("\\|").length + input[3].split("\\|").length + input[4].split("\\|").length + 2;
        jaccard = new IntWritable[2][]; //columnlength

        //int[][] jaccard = new int[2][columnlength];

        if (input.length > 0)
        {
            //iterate through the dataset in cache
            for(String keyy : keys)
            {
                //iterate to user's input attributes
                for (int attribute = 1; attribute < attributes.length; attribute++)
                {
                    if (!input[attribute].equals("-")) 
                    {
                        entities = input[attribute].toLowerCase().split("\\|");
                        int subattributecount = 0;

                        for(String entity : entities)
                        {
                                if (movieInfo.get(keyy).toString().toLowerCase().contains(entity))
                                {
                                    //if user criteria match with the data set, mark 1, 1
                                    jaccard[0][attribute + subattributecount] = new IntWritable(1);
                                    jaccard[1][attribute + subattributecount] = new IntWritable(1);
                                }
                                else
                                {
                                    //if user criteria doesn't match with the data set, mark 1, 0
                                    jaccard[0][attribute + subattributecount] = new IntWritable(1);
                                    jaccard[1][attribute + subattributecount] = new IntWritable(0);
                                }
                                subattributecount += 1;
                        }
                    }
                }

                IntTextPair pair = new IntTextPair(Integer.parseInt(input[0].toString()), movieInfo.get(keyy).toString());

                array2d.set(jaccard);
                context.write(pair, array2d);
            }

}
        }


    @Override
      protected void setup(Context context) throws IOException, InterruptedException {



        // We know there is only one cache file, so we only retrieve that URI
        java.net.URI fileUri = context.getCacheFiles()[0];
        FileSystem fs = FileSystem.get(context.getConfiguration());
        FSDataInputStream in = fs.open( new Path(fileUri) );
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String line = null;
        try {
         // we discard the header row
          br.readLine();
          while ((line = br.readLine()) != null) {
            String[] fields = line.split(",");
            //id,whole_line
              movieInfo.put(fields[0], line);
          }
          br.close();
         } catch (IOException e1) {
         }
       super.setup(context);
       }
}

我哪里错了?请帮助我。

0 个答案:

没有答案