Equi使用mapreduce

时间:2016-07-31 06:37:46

标签: java arrays hadoop mapreduce

我最近开始学习hadoop / mapreduce / java。我正在进行以下任务。

我们有2个输入文件。

File1有2个字段。航空公司和航线数量。(航空公司是独一无二的) File2有4个字段。航空公司,航空公司名称,国家。(航空公司是独一无二的)

我需要找到最活跃的航空公司。 即 - 对于每个国家,我需要找到航线数量最多的航空公司。

让我用一个例子解释一下。让我们说

File1具有以下内容。

airlineid1  10

airlineid2  20

airlineid3  30

File2具有以下内容

airlineid1 airlinename1 country1

airlineid2 airlinename2 country1

airlineid5 airlinename5 country1

输出应为

country1 airlineid2 airlinename2 20

我为此写了2个mapreduce代码。

  1. JoinReduce - 减少对bayid上两个文件的连接。
  2. Activeairline - 使用在程序1中创建的输出,按国家/地区计算最活跃的航空公司。
  3. 我在运行程序2时遇到 arrayindexoutofbound:4 错误。我认为这是因为在程序1中创建的输出文件没有做equijoin。所以它有一些记录没有count field。(airlineid仅出现在文件2中)。当我尝试为这些记录分配计数时,它给出了arrayindexoutofbound选项。

    正如我之前提到的,我是Java新手,我不知道如何处理这个问题。 有人可以帮忙!

    以下是我的2个程序

    PROGRAM1(Joinreduce)

    public class JoinReduce {
    
      public static class RoutesMapper
       extends Mapper<LongWritable, Text, Text, Text>{
    
       public void map(LongWritable key, Text value, Context context
                    ) throws IOException, InterruptedException {
    
      StringTokenizer itr = new StringTokenizer(value.toString());
      String airlineid = itr.nextToken();
      String count = itr.nextToken();
    
        context.write(new Text(airlineid), new Text("routes" + "\t" + count));
       }
     }
    
     public static class AirlinesMapper
      extends Mapper<LongWritable, Text, Text, Text>{
    
    
     public void map(LongWritable key, Text value, Context context
               ) throws IOException, InterruptedException {
    
      String record = value.toString();
      String[] parts = record.split(",");
      String airlineid =  parts[0];
      String airlinename = parts[1];
      String country = parts[6];
    
      if(airlinename.length()==0){
            airlinename="NA";
       }
      if(country.length()==0){
            country="NA";
      }
       context.write(new Text(airlineid),new Text("airlines"+ "\t" + airlineid +   "\t" + airlinename + "\t" + country));  
      }
    }
    
     public static class JoinReducer
       extends Reducer<Text,Text,Text,Text> {
    
       public void reduce(Text key, Iterable<Text> values,
                       Context context
                       ) throws IOException, InterruptedException {
    
      String airlineid ="";
      String count = "";    
      String name = "";
      String country = "";
      for (Text val : values) {
          String parts[] = val.toString().split("\t");
          if (parts[0].equals("routes")){
             count = parts[1]; 
          }else if (parts[0].equals("airlines")){  
            airlineid = parts[1];  
            name = parts[2] ;
            country = parts[3];
          }
      }
    
      context.write(new Text("Joinedfile"), new Text(airlineid + "\t" + name +  "\t" + country + "\t" + count));
      }
    }
    
    public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Join Reduce");
    job.setJarByClass(JoinReduce.class);
    job.setReducerClass(JoinReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    MultipleInputs.addInputPath(job, new Path(args[0]),
                  TextInputFormat.class,RoutesMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]),
                  TextInputFormat.class,AirlinesMapper.class);
       Path outputPath = new Path(args[2]);               
       FileOutputFormat.setOutputPath(job, outputPath);
      System.exit(job.waitForCompletion(true) ? 0 : 1);
     }
    }
    

    Program2中(activeairline)

    public class Activeairline {
    
      public static class ActiveMapper
           extends Mapper<LongWritable, Text, Text, Text>{
    
    
        public void map(LongWritable key, Text value, Context context
                       ) throws IOException, InterruptedException {
    
            String record = value.toString();
            String parts[] = record.split("\t");
    
            String airlineid = parts[1];
            String name = parts[2];
            String country = parts[3];
            if(!(parts[4].length()==0)){
    
              String count = parts[4];           
              context.write(new Text(country),new Text(airlineid + "\t" + name + "\t" + count));
          }
         }
        } 
    
    
    
    public static class ActiveReducer
       extends Reducer<Text,Text,Text,Text> {
    
     public void reduce(Text key, Iterable<Text> values,
                       Context context
                       ) throws IOException, InterruptedException {
    
      int max = 0;
      String maxairlineid = "";
      String maxname = "";
      for (Text val : values) {
        String parts[] = val.toString().split("\t");
        int count = Integer.parseInt(parts[2]);
        String airlineid = parts[0];
        String name = parts[1];
        if (count > max){
            max = count;
            maxairlineid = airlineid;
            maxname = name;                 
        } 
      }
      context.write(key, new Text("Most Active airline " + "\t" + maxname + "\t" + maxairlineid));
      }
    }
    
    public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Active airline");
    job.setJarByClass(Activeairline.class);
    job.setMapperClass(ActiveMapper.class);
    job.setReducerClass(ActiveReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
     }
    }
    

0 个答案:

没有答案