MapReduce:字符串索引超出范围

时间:2015-04-13 09:25:11

标签: java eclipse hadoop mapreduce

我遇到了MapReduce-Code的问题。这是说我的String-Index超出了Range,但是String足够长。 有没有人有什么建议?谢谢!

这是我的代码:

`

package Test;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TestMapper extends     Mapper<Text,Text,IntWritable,IntWritable> {
private IntWritable date_int = null;
private IntWritable amount_int = null; 
public void map(Text key, Text value, Context context)
        throws IOException, InterruptedException {
    String date_str = value.toString().substring(4,5);
    String amount_str = value.toString().substring(7,8);
    date_int = new IntWritable(Integer.parseInt(date_str));
    amount_int = new IntWritable(Integer.parseInt(amount_str));
    // Sammeln der Ergebnisse
    context.write(date_int, amount_int);
    }
}

package Test

import java.io.IOException;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable; 
//import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.Reducer.Context;

public class TestReducer extends Reducer<IntWritable, IntWritable,
IntWritable, FloatWritable> {
    public void reduce(IntWritable key, Iterable<IntWritable> values, Context   context)
            throws IOException, InterruptedException {  
        float sum = 0;
        int count = 0;
        for (IntWritable val : values) { sum +=val.get();
        count +=1;
        }   
        float result = sum / count;
        context.write(key, new FloatWritable(result));

         }
     }



package Test;

import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestDriver extends Configured implements Tool {
private final static Logger log =       Logger.getLogger(TestDriver.class.getName());
public static void main(String[] args) {
int res = 1; // Wenn 1 nicht veraendert wird, endet der Job nicht korrekt
try {
res = ToolRunner.run(new Configuration(), new TestDriver(), args);
} catch (Exception e) {
log.log(Level.SEVERE, "Fehler beim Ausfuehren des Jobs!");
e.printStackTrace();
    }
    System.exit(res);
    }
@Override
public int run(String[] args) {
log.log(Level.INFO, "Start Map-Reduce-Job 'TestDriver'... ");
Configuration conf = this.getConf();
Job job = null;
try {
job = Job.getInstance(conf);
} catch (IOException e1) {
log.log(Level.SEVERE, "Fehler bei Instanziierung des Jobs!");
e1.printStackTrace();
}

job.setJarByClass(TestDriver.class);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(FloatWritable.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

try {
FileInputFormat.addInputPath(job, new Path(args[0]));
} catch (IllegalArgumentException e) {
log.log(Level.SEVERE, "Fehler (Argument) beim Setzen des     Eingabepfades!");
e.printStackTrace();
} catch (IOException e) {
log.log(Level.SEVERE, "Fehler (IO) beim Setzen des Eingabepfades!");
e.printStackTrace();
}

FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = false;
try {

result = job.waitForCompletion(true);
} catch (ClassNotFoundException e) {
log.log(Level.SEVERE, "Fehler (ClassNotFound) beim Ausfuehren des     Jobs!");
e.printStackTrace();
} catch (IOException e) {
log.log(Level.SEVERE, "Fehler (IOException) beim Ausfuehren des Jobs!");
e.printStackTrace();
} catch (InterruptedException e) {
log.log(Level.SEVERE, "Fehler (Interrupted) beim Ausfuehren des Jobs!");
e.printStackTrace();
}
log.log(Level.INFO, "Fertig!");
return result ? 0 : 1;
}

}

`

这是错误信息:

java.lang.Exception:java.lang.StringIndexOutOfBoundsException:字符串索引超出范围:5     在org.apache.hadoop.mapred.LocalJobRunner $ Job.runTasks(LocalJobRunner.java:462)     在org.apache.hadoop.mapred.LocalJobRunner $ Job.run(LocalJobRunner.java:522)

我的输入文件是一个文本文件,就像这样:

200912024
2009120420
2009120750
200912083
2009120912
2009121066
2009121170
2009121225
2009121430
2009121560
2009121621
2009121722
2009121818
2009122112
2009122213

谢谢!

1 个答案:

答案 0 :(得分:0)

这是因为您的映射器输入值为空。

您正在使用

        job.setInputFormatClass( KeyValueTextInputFormat.class );

在KeyValueTextInputFormat中每一行按分隔符字节分为键和值部分。 如果不存在这样的字节,则键将是整行,值将为空。 请参阅Class KeyValueTextInputFormat

因此,如果您将输入格式更改为默认值:

job.setInputFormatClass( TextInputFormat.class );

你的映射器:

import java.io.IOException;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;

public class TestMapper extends     
Mapper<LongWritable,Text,IntWritable,IntWritable> {

private IntWritable date_int = new IntWritable();
private IntWritable amount_int = new IntWritable(); 

/**
 * @param key      - Line offset - ignored.
 * @param value    - Value to process.
 * @param context  - MapperContext object for accessing output, configuration         information, etc.
 * @throws IOException, InterruptedException. 
 */
@Override
public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException 
    {
    String date_str = value.toString().substring(4,5);
    String amount_str = value.toString().substring(7,8);
    int date = Integer.parseInt(date_str);
    date_int.set(date);
    int amount = Integer.parseInt(amount_str);
    amount_int.set(amount);
    // Sammeln der Ergebnisse
    context.write(date_int, amount_int);
    }
}

它应该工作。 祝你好运!