我在mapper中实现了2d数组。我需要设置2d数组的值。我提到了这个(How to emit 2D double array from mapper using TwoDArrayWritable)帖子。
我将数组初始化为IntWritable并指定其行长度。
TwoDArrayWritable array2d = new TwoDArrayWritable (IntWritable.class);
IntWritable[][] jaccard = new IntWritable[2][];
但是当我执行这段代码时,我在访问数组时遇到了这个错误:
14/12/22 20:14:30 INFO mapreduce.Job: Task Id : attempt_1419259182533_0007_m_000000_1, Status : FAILED
Error: java.lang.NullPointerException
at JaccardMapper.map(Unknown Source)
at JaccardMapper.map(Unknown Source)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:167)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1554)
以下是映射器代码:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.Hashtable;
import java.util.Set;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.commons.httpclient.URI;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
public class JaccardMapper extends Mapper<LongWritable, Text, IntTextPair, TwoDArrayWritable> {
Hashtable movieInfo = new Hashtable<String, String>();
String[] genres, actors, entities;
String[] attributes = new String[] {"genre", "actors", "directors", "country", "year", "ratings"};
double p,q,r,s;
double result = 0.0;
String input[] = new String[]{""};
Set<String> keys;
TwoDArrayWritable array2d = new TwoDArrayWritable (IntWritable.class);
IntWritable[][] jaccard = new IntWritable[2][];
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
p = 0;
q = 0;
r = 0;
s = 0;
input = value.toString().toLowerCase().split(",");
keys = movieInfo.keySet();
int columnlength = input[1].split("\\|").length + input[2].split("\\|").length + input[3].split("\\|").length + input[4].split("\\|").length + 2;
jaccard = new IntWritable[2][]; //columnlength
//int[][] jaccard = new int[2][columnlength];
if (input.length > 0)
{
//iterate through the dataset in cache
for(String keyy : keys)
{
//iterate to user's input attributes
for (int attribute = 1; attribute < attributes.length; attribute++)
{
if (!input[attribute].equals("-"))
{
entities = input[attribute].toLowerCase().split("\\|");
int subattributecount = 0;
for(String entity : entities)
{
if (movieInfo.get(keyy).toString().toLowerCase().contains(entity))
{
//if user criteria match with the data set, mark 1, 1
jaccard[0][attribute + subattributecount] = new IntWritable(1);
jaccard[1][attribute + subattributecount] = new IntWritable(1);
}
else
{
//if user criteria doesn't match with the data set, mark 1, 0
jaccard[0][attribute + subattributecount] = new IntWritable(1);
jaccard[1][attribute + subattributecount] = new IntWritable(0);
}
subattributecount += 1;
}
}
}
IntTextPair pair = new IntTextPair(Integer.parseInt(input[0].toString()), movieInfo.get(keyy).toString());
array2d.set(jaccard);
context.write(pair, array2d);
}
}
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
// We know there is only one cache file, so we only retrieve that URI
java.net.URI fileUri = context.getCacheFiles()[0];
FileSystem fs = FileSystem.get(context.getConfiguration());
FSDataInputStream in = fs.open( new Path(fileUri) );
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String line = null;
try {
// we discard the header row
br.readLine();
while ((line = br.readLine()) != null) {
String[] fields = line.split(",");
//id,whole_line
movieInfo.put(fields[0], line);
}
br.close();
} catch (IOException e1) {
}
super.setup(context);
}
}
我哪里错了?请帮助我。