下面的代码是用Hadoop的Mapper编写的:
String[] s = value.toString().split("\\s+");
String date = s[1];
s[1]
,ArrayIndexOutofBoundsException
发生错误。
正则表达式在hadoop中不起作用吗?
答案 0 :(得分:0)
这是由于空白或空格来一行,你必须过滤它。
if(s.length>1){
String[] s = value.toString().split("\\s+");
String date = s[1];
}
您的问题解决方案
//地图功能:
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {
// private final static IntWritable one = new IntWritable(1);
//private Text word = new Text();
double temp;
public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
line=line.replaceAll("U","");
int a=line.length();
if(a>2)
{
int spec=line.indexOf(' ');
String s=line.substring(spec,spec+9);
String b=line.substring(spec+10,a);
StringTokenizer tokenizer = new StringTokenizer(b);
while (tokenizer.hasMoreTokens()) {
{
temp=Double.valueOf(tokenizer.nextToken().toString());
}
output.collect(new Text(s), new DoubleWritable(temp));
}
}
}
}
//减少功能:
public static class Reduce extends MapReduceBase implements Reducer<Text, DoubleWritable, Text, DoubleWritable> {
public void reduce(Text key, Iterator<DoubleWritable> values, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {
Double maxValue = Double.MIN_VALUE;
Double minvalue=Double.MAX_VALUE;
Double a;
while (values.hasNext())
{
a=values.next().get();
maxValue = Math.max(maxValue,a);
minvalue=Math.min(minvalue,a);
if(maxValue>40)
{
output.collect(key,new DoubleWritable(maxValue));
}
/* if(minvalue<10)
{
output.collect(key, new DoubleWritable(a));
} */
}
output.collect(new Text(key+"Max"), new DoubleWritable(maxValue));
output.collect(new Text(key+"Min"),new DoubleWritable(minvalue));
}
}