在hadoop映射器中使用字符串拆分失败

时间:2014-11-24 12:41:48

标签: java hadoop

下面的代码是用Hadoop的Mapper编写的:

String[] s = value.toString().split("\\s+");
String date = s[1];

s[1]ArrayIndexOutofBoundsException发生错误。

正则表达式在hadoop中不起作用吗?

1 个答案:

答案 0 :(得分:0)

这是由于空白或空格来一行,你必须过滤它。

if(s.length>1){
String[] s = value.toString().split("\\s+");
String date = s[1];
}

您的问题解决方案

//地图功能:

  public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {
  //  private final static IntWritable one = new IntWritable(1);
    //private Text word = new Text();
      double temp;

    public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {
      String line = value.toString();
      line=line.replaceAll("U","");
      int a=line.length();
      if(a>2)
      {
          int spec=line.indexOf(' ');
          String s=line.substring(spec,spec+9);
          String b=line.substring(spec+10,a);

      StringTokenizer tokenizer = new StringTokenizer(b);
      while (tokenizer.hasMoreTokens()) {
        {
            temp=Double.valueOf(tokenizer.nextToken().toString());
        }
        output.collect(new Text(s), new DoubleWritable(temp));
      }
    }
    }
  }

//减少功能:

  public static class Reduce extends MapReduceBase implements Reducer<Text, DoubleWritable, Text, DoubleWritable> {
    public void reduce(Text key, Iterator<DoubleWritable> values, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {

        Double maxValue = Double.MIN_VALUE;
        Double minvalue=Double.MAX_VALUE;
        Double a;
        while (values.hasNext()) 
        {
        a=values.next().get();
        maxValue = Math.max(maxValue,a);
        minvalue=Math.min(minvalue,a);
    if(maxValue>40)
        {
            output.collect(key,new DoubleWritable(maxValue));
        } 
   /*   if(minvalue<10)
        {
        output.collect(key, new DoubleWritable(a));
        } */

        }
    output.collect(new Text(key+"Max"), new DoubleWritable(maxValue));
    output.collect(new Text(key+"Min"),new DoubleWritable(minvalue));
    }
    }