遇到Hadoop.Javanullpointerexception时实现连接

时间:2013-12-02 18:07:05

标签: java hadoop mapreduce

我有3个文件说File1,File2和File3。 File1和File2位于HDFS的同一目录中,File3位于不同的目录中。文件格式如下:

文件1:

V1 V2 V3 V4 V5 V6 V7 V8 V9 (V1-V9 are attributes)

V2+V3 is the key combination

文件2:

V1 V2 V3 V4 V5 V6 V7 V8 V9 (same format as File1)
V2+V3 is the key combination

文件3:

T1 T2 T3 T4 (T1-T4 variables)
Here T2+T3 is the common key as compared to V2+V3 in Files 1 and 2.

加入后的必需输出:

案例1: 匹配记录(我需要根据公共密钥获得V9和T4)

(是否有任何进程(V2 + V3)和V9?)

案例2: 不匹配的记录

现在通过MapReduce,我想使用两个映射器分别从这两个目录中读取文件,并通过一个reducer获取输出。

请找到下面的代码(使用包含少量记录的测试样本文件运行)并告诉我可能的错误在哪里。

13/12/03 08:23:04 INFO util.NativeCodeLoader: Loaded the native-hadoop library
13/12/03 08:23:04 WARN snappy.LoadSnappy: Snappy native library not loaded
13/12/03 08:23:04 INFO mapred.FileInputFormat: Total input paths to process : 1
13/12/03 08:23:05 INFO mapred.FileInputFormat: Total input paths to process : 1
13/12/03 08:23:05 INFO mapred.JobClient: Running job: job_201311220353_0068
13/12/03 08:23:06 INFO mapred.JobClient:  map 0% reduce 0%
13/12/03 08:23:27 INFO mapred.JobClient:  map 25% reduce 0%
13/12/03 08:23:28 INFO mapred.JobClient:  map 50% reduce 0%
13/12/03 08:25:58 INFO mapred.JobClient:  map 50% reduce 16%
13/12/03 08:26:00 INFO mapred.JobClient:  map 100% reduce 16%
13/12/03 08:26:16 INFO mapred.JobClient:  map 100% reduce 33%
13/12/03 08:26:23 INFO mapred.JobClient: Task Id : attempt_201311220353_0068_r_000000_0, Status : FAILED
java.lang.NullPointerException
        at org.apache.hadoop.io.Text.encode(Text.java:388)
        at org.apache.hadoop.io.Text.set(Text.java:178)
        at org.apache.hadoop.io.Text.<init>(Text.java:81)
        at StockAnalyzer$StockAnalysisReducer.reduce(StockAnalyzer.java:82)
        at StockAnalyzer$StockAnalysisReducer.reduce(StockAnalyzer.java:1)
        at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:522)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)

13/12/03 08:28:32 INFO mapred.JobClient:  map 100% reduce 33%
13/12/03 08:28:36 INFO mapred.JobClient:  map 100% reduce 0%
13/12/03 08:28:39 INFO mapred.JobClient: Job complete: job_201311220353_0068
13/12/03 08:28:39 INFO mapred.JobClient: Counters: 24
13/12/03 08:28:39 INFO mapred.JobClient:   Job Counters
13/12/03 08:28:39 INFO mapred.JobClient:     Launched reduce tasks=4
13/12/03 08:28:39 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=342406
13/12/03 08:28:39 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
13/12/03 08:28:39 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
13/12/03 08:28:39 INFO mapred.JobClient:     Launched map tasks=4
13/12/03 08:28:39 INFO mapred.JobClient:     Data-local map tasks=4
13/12/03 08:28:39 INFO mapred.JobClient:     Failed reduce tasks=1
13/12/03 08:28:39 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=307424
13/12/03 08:28:39 INFO mapred.JobClient:   File Input Format Counters
13/12/03 08:28:39 INFO mapred.JobClient:     Bytes Read=0
13/12/03 08:28:39 INFO mapred.JobClient:   FileSystemCounters
13/12/03 08:28:39 INFO mapred.JobClient:     HDFS_BYTES_READ=3227
13/12/03 08:28:39 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=228636
13/12/03 08:28:39 INFO mapred.JobClient:   Map-Reduce Framework
13/12/03 08:28:39 INFO mapred.JobClient:     Map output materialized bytes=940
13/12/03 08:28:39 INFO mapred.JobClient:     Map input records=36
13/12/03 08:28:39 INFO mapred.JobClient:     Spilled Records=36
13/12/03 08:28:39 INFO mapred.JobClient:     Map output bytes=844
13/12/03 08:28:39 INFO mapred.JobClient:     Total committed heap usage (bytes)=571555840
13/12/03 08:28:39 INFO mapred.JobClient:     CPU time spent (ms)=10550
13/12/03 08:28:39 INFO mapred.JobClient:     Map input bytes=1471
13/12/03 08:28:39 INFO mapred.JobClient:     SPLIT_RAW_BYTES=1020
13/12/03 08:28:39 INFO mapred.JobClient:     Combine input records=0
13/12/03 08:28:39 INFO mapred.JobClient:     Combine output records=0
13/12/03 08:28:39 INFO mapred.JobClient:     Physical memory (bytes) snapshot=690450432
13/12/03 08:28:39 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=2663706624
13/12/03 08:28:39 INFO mapred.JobClient:     Map output records=36



import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.MultipleInputs;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class StockAnalyzer extends Configured implements Tool 

{

    public static class StockAnalysisMapper1 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>
    {
    private String Commonkey, Stockadj, FileTag = "f1~";


    public void map(LongWritable key, Text value,OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException 
        {

        String values[] = value.toString().split(",");

        Commonkey = values[1].trim()+values[2].trim();
        Stockadj = values[8].trim();

        output.collect(new Text(Commonkey), new Text(FileTag + Stockadj));
      }
    }

    public static class StockAnalysisMapper2 extends MapReduceBase implements Mapper <LongWritable, Text, Text, Text>
    {
        private String Commonkey, Dividend, FileTag = "f2~";


        public void map(LongWritable key, Text value,OutputCollector<Text, Text> output, Reporter reporter)
            throws IOException {

            String values[] = value.toString().split(",");

            Commonkey = values[1].trim()+values[2].trim();
            Dividend = values[3].trim();

            output.collect(new Text(Commonkey), new Text(FileTag + Dividend));
          }
        }

    public static class StockAnalysisReducer extends MapReduceBase  implements Reducer<Text, Text, Text, Text> 

    {

    private String Stockadj=null;
    private String Dividend=null;

    public void reduce(Text key, Iterator<Text> values,OutputCollector<Text, Text> output, Reporter reporter)
                      throws IOException 
       {
        while (values.hasNext())    
        {
        String currValue = values.next().toString();
        String splitVals[] = currValue.split("~");

      if (splitVals[0].equals("f1")) 
      {
         Stockadj = splitVals[1] != null ? splitVals[1].trim(): "Stockadj";
      } 
      else if (splitVals[0].equals("f2"))
      {
          Dividend = splitVals[2] != null ? splitVals[2].trim(): "Dividend";
      }
     output.collect(new Text(Stockadj), new Text(Dividend));
     }
    }
    }


    public int run(String [] arguments) throws Exception
    {
        JobConf conf = new JobConf(StockAnalyzer.class);
        conf.setJobName("Stock Analysis"); 

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(StockAnalysisMapper1.class);
        conf.setMapperClass(StockAnalysisMapper2.class);
        conf.setReducerClass(StockAnalysisReducer.class);

        Path Mapper1InputPath = new Path(arguments[0]);
        Path Mapper2InputPath = new Path(arguments[1]);
        Path OutputPath = new Path(arguments[2]);

        MultipleInputs.addInputPath(conf,Mapper1InputPath,
         TextInputFormat.class,StockAnalysisMapper1.class);

        MultipleInputs.addInputPath(conf, Mapper2InputPath,
        TextInputFormat.class,StockAnalysisMapper2.class);

        FileOutputFormat.setOutputPath(conf, OutputPath);

        JobClient.runJob(conf);

        return 0;

    }
    public static void main(String [] arguments) throws Exception
    {
         int res = ToolRunner.run(new Configuration(),new StockAnalyzer(), arguments);
         System.exit(res);
    }
}

0 个答案:

没有答案