我正在尝试编写一个地图缩小代码,该代码将表存储在文本文件中。该表有两个属性。一个是id,第二个是name,代码应该采用具有相同id的所有值并连接它们。例:1 xyz 2 xyz 1 abc应该导致1 xyzabc 2 xyz。 以下是我的代码版本。作为初学者,我修改了MaxTemperature代码以学习
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperature {
public static class MaxTemperatureMapper
extends Mapper<Text, Text, Text, Text> {
@Override
public void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String lastWord = line.substring(line.lastIndexOf(" ")+1);
Text valq = new Text();
valq.set(line.substring(0,4));
context.write(new Text(lastWord), valq );
}
}
public static class MaxTemperatureReducer
extends Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
String p="";
for (Text value : values) {
p=p+value.toString();
}
Text aa= new Text();
aa.set(p);
context.write(key, new Text(aa));
}
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
我的输入文件
123456 name
123456 name
123456 age
123456 age
123456 relation
132323 age
123565 name
258963 test
258963 age
254789 age
254259 age
652145 name
985745 name
523698 name
214569 ame
123546 name
123456 age
321456 age
123456 age
124589 hyderabad
〜
预期产出
123456 name,name,age (all values with index 123456)
124589 hyderabad (al values with index 124589)
我收到以下错误
java.lang.StringIndexOutOfBoundsException: String index out of range: 4
at java.lang.String.substring(String.java:1907)
at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:39)
at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:26)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:672)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.Child.main(Child.java:262)
答案 0 :(得分:0)
3件事: