我遇到了MapReduce-Code的问题。这是说我的String-Index超出了Range,但是String足够长。 有没有人有什么建议?谢谢!
这是我的代码:
`
package Test;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TestMapper extends Mapper<Text,Text,IntWritable,IntWritable> {
private IntWritable date_int = null;
private IntWritable amount_int = null;
public void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
String date_str = value.toString().substring(4,5);
String amount_str = value.toString().substring(7,8);
date_int = new IntWritable(Integer.parseInt(date_str));
amount_int = new IntWritable(Integer.parseInt(amount_str));
// Sammeln der Ergebnisse
context.write(date_int, amount_int);
}
}
package Test
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
//import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.Reducer.Context;
public class TestReducer extends Reducer<IntWritable, IntWritable,
IntWritable, FloatWritable> {
public void reduce(IntWritable key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
float sum = 0;
int count = 0;
for (IntWritable val : values) { sum +=val.get();
count +=1;
}
float result = sum / count;
context.write(key, new FloatWritable(result));
}
}
package Test;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestDriver extends Configured implements Tool {
private final static Logger log = Logger.getLogger(TestDriver.class.getName());
public static void main(String[] args) {
int res = 1; // Wenn 1 nicht veraendert wird, endet der Job nicht korrekt
try {
res = ToolRunner.run(new Configuration(), new TestDriver(), args);
} catch (Exception e) {
log.log(Level.SEVERE, "Fehler beim Ausfuehren des Jobs!");
e.printStackTrace();
}
System.exit(res);
}
@Override
public int run(String[] args) {
log.log(Level.INFO, "Start Map-Reduce-Job 'TestDriver'... ");
Configuration conf = this.getConf();
Job job = null;
try {
job = Job.getInstance(conf);
} catch (IOException e1) {
log.log(Level.SEVERE, "Fehler bei Instanziierung des Jobs!");
e1.printStackTrace();
}
job.setJarByClass(TestDriver.class);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(FloatWritable.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
try {
FileInputFormat.addInputPath(job, new Path(args[0]));
} catch (IllegalArgumentException e) {
log.log(Level.SEVERE, "Fehler (Argument) beim Setzen des Eingabepfades!");
e.printStackTrace();
} catch (IOException e) {
log.log(Level.SEVERE, "Fehler (IO) beim Setzen des Eingabepfades!");
e.printStackTrace();
}
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = false;
try {
result = job.waitForCompletion(true);
} catch (ClassNotFoundException e) {
log.log(Level.SEVERE, "Fehler (ClassNotFound) beim Ausfuehren des Jobs!");
e.printStackTrace();
} catch (IOException e) {
log.log(Level.SEVERE, "Fehler (IOException) beim Ausfuehren des Jobs!");
e.printStackTrace();
} catch (InterruptedException e) {
log.log(Level.SEVERE, "Fehler (Interrupted) beim Ausfuehren des Jobs!");
e.printStackTrace();
}
log.log(Level.INFO, "Fertig!");
return result ? 0 : 1;
}
}
`
这是错误信息:
java.lang.Exception:java.lang.StringIndexOutOfBoundsException:字符串索引超出范围:5 在org.apache.hadoop.mapred.LocalJobRunner $ Job.runTasks(LocalJobRunner.java:462) 在org.apache.hadoop.mapred.LocalJobRunner $ Job.run(LocalJobRunner.java:522)
我的输入文件是一个文本文件,就像这样:
200912024
2009120420
2009120750
200912083
2009120912
2009121066
2009121170
2009121225
2009121430
2009121560
2009121621
2009121722
2009121818
2009122112
2009122213
谢谢!
答案 0 :(得分:0)
这是因为您的映射器输入值为空。
您正在使用
job.setInputFormatClass( KeyValueTextInputFormat.class );
在KeyValueTextInputFormat中每一行按分隔符字节分为键和值部分。 如果不存在这样的字节,则键将是整行,值将为空。 请参阅Class KeyValueTextInputFormat
因此,如果您将输入格式更改为默认值:
job.setInputFormatClass( TextInputFormat.class );
你的映射器:
import java.io.IOException;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
public class TestMapper extends
Mapper<LongWritable,Text,IntWritable,IntWritable> {
private IntWritable date_int = new IntWritable();
private IntWritable amount_int = new IntWritable();
/**
* @param key - Line offset - ignored.
* @param value - Value to process.
* @param context - MapperContext object for accessing output, configuration information, etc.
* @throws IOException, InterruptedException.
*/
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String date_str = value.toString().substring(4,5);
String amount_str = value.toString().substring(7,8);
int date = Integer.parseInt(date_str);
date_int.set(date);
int amount = Integer.parseInt(amount_str);
amount_int.set(amount);
// Sammeln der Ergebnisse
context.write(date_int, amount_int);
}
}
它应该工作。 祝你好运!