空指针异常在hadoop Mapreduce程序中

时间:2014-09-16 06:24:11

标签: java hadoop nullpointerexception mapreduce

我有一个像tab delimiter

这样的文本文件
20001204X00000  Accident    10  9   6   Hyd 
20001204X00001  Accident        8   7   vzg 2
20001204X00002  Accident    10  7       sec 1
20001204X00003  Accident    23      9   kkd 23

我想得到输出航班ID,乘客总数,这里我要总计乘客总数的所有数值列值这样

20001204X00000 25
20001204X00001 17
20001204X00002 18
20001204X00003 55

当尝试添加四个数字列时,我得到NullPointer异常,请帮助如何避免nullPointerException以及如何用零替换null或空格值

实际上这是Hadoop Map reduce Java Code

package com.flightsdamage.mr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlightsDamage {
public static class FlightsMaper extends Mapper<LongWritable, Text, Text, LongWritable> {
    LongWritable pass2;
    @Override
    protected void map(LongWritable key, Text value,
            org.apache.hadoop.mapreduce.Mapper.Context context)
            throws IOException, InterruptedException,NumberFormatException,NullPointerException {
        String line = value.toString();
        String[] column=line.split("|");
        Text word=new Text();
        word.set(column[0]);
        String str = "n";
        try {
            long a = Long.parseLong(str);
        long a1=Long.parseLong("col1[]");
        long a2=Long.parseLong("col2[]");
        long a3=Long.parseLong("col3[]");
        long a4=Long.parseLong("col4[]");
        long sum = a1+a2+a3+a4;
        LongWritable pass0 = new LongWritable(a1);
        LongWritable pass = new LongWritable(a2);
        LongWritable pass1 = new LongWritable(a3);
        LongWritable pass3 = new LongWritable(a4);
         pass2 = new LongWritable(sum);

        } catch (Exception e) {
            // TODO: handle exception


        }finally{
            context.write(word,pass2);
        }

        }

        }

public static void main(String[] args)throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Flights MR");
    job.setJarByClass(FlightsDamage.class);
    job.setMapperClass(FlightsMaper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    //FileInputFormat.addInputPath(job, new Path("/home/node1/data-AviationData.txt"));
    FileInputFormat.addInputPath(job, new Path("/home/node1/Filghtdamage.txt"));


    FileOutputFormat.setOutputPath(job, new Path("/home/node1/output"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

1 个答案:

答案 0 :(得分:2)

在解析字符串之前,您需要检查字符串是否为数字类型。像:

int value = 0;
if (StringUtils.isNumeric(str)) {
    value = Integer.parseInt(str);
}

如果输入字符串是非数字的(无论是null还是其他非数字值),StringUtils.isNumeric()将返回false,变量将0作为默认值。

这是一个简单的程序,演示了StringUtils.isNumeric()

的用法

测试类:

import org.apache.commons.lang3.StringUtils;

public class LineParse {

    public static void main(String[] args) {
        String[] input = {
                "20001204X00000\tAccident\t10\t9\t6\tHyd",
                "20001204X00001\tAccident\t\t8\t7\tvzg\t2",
                "20001204X00002\tAccident\t10\t7\t\tsec\t1",
                "20001204X00003\tAccident\t23\t\t9\tkkd\t23"
        };

        StringBuilder output = new StringBuilder();
        for (String line : input) {
            int sum = 0;
            String[] tokens = line.split("\t");
            if (tokens.length > 0) {
                output.append(tokens[0]);
                output.append("\t");
                for (int i = 1;i < tokens.length;i++) {
                    // Check if String is of type numeric.
                    if (StringUtils.isNumeric(tokens[i])) {
                        sum += Integer.parseInt(tokens[i]);
                    }
                }
            }
            output.append(sum);
            output.append("\n");
        }
        System.out.println(output.toString());
    }
}

输出:

20001204X00000  25
20001204X00001  17
20001204X00002  18
20001204X00003  55

我假设所有数字都是Integer。否则使用Double.parseDouble()

相关问题