我想知道Mapper解析文件时会遇到这个问题。我的代码非常简单,我通过“::”分隔文件行获取数据。
例如(输入):
1::Toy Story (1995)::2077
使用我在练习中经常做的mapper的剪辑代码
String tokens[]= value.toString().split("::");
int empId = Integer.parseInt(tokens[0]) ;
int count = Integer.parseInt(tokens[2]) ;
从技术上讲,线路应按如下方式划分。
1 Toy Story (1995) 2077
tokens[0] tokens[1] tokens[2]
所以,如果我正在寻找令牌[0]和令牌[2],那么为什么作业正在挑选令牌[1],这就是抛出NumberFormatException异常,如果我试图将char解析为int,这是预期的异常。能不能帮我解决这个问题。
17/09/05 19:06:49 INFO mapreduce.Job: Task Id : attempt_1500305785265_0095_m_000000_2, Status : FAILED
Error: java.lang.NumberFormatException: For input string: "1::Toy Story (1995)::2077"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Integer.parseInt(Integer.java:580)
at java.lang.Integer.parseInt(Integer.java:615)
at com.dataflair.comparableTest.ValueSortExp$MapTask.map(ValueSortExp.java:93)
at com.dataflair.comparableTest.ValueSortExp$MapTask.map(ValueSortExp.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
CODE
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.IntWritable.Comparator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class ValueSortExp2 {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration(true);
String arguments[] = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = new Job(conf, "Test commond");
job.setJarByClass(ValueSortExp.class);
// Setup MapReduce
job.setMapperClass(ValueSortExp.MapTask.class);
job.setReducerClass(ValueSortExp.ReduceTask.class);
job.setNumReduceTasks(1);
// Specify key / value
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
//job.setSortComparatorClass(IntComparator.class);
// Input
FileInputFormat.addInputPath(job, new Path(arguments[0]));
job.setInputFormatClass(TextInputFormat.class);
// Output
FileOutputFormat.setOutputPath(job, new Path(arguments[1]));
job.setOutputFormatClass(TextOutputFormat.class);
/*
* // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if
* (hdfs.exists(outputDir)) hdfs.delete(outputDir, true);
*
* // Execute job int code = job.waitForCompletion(true) ? 0 : 1;
* System.exit(code);
*/
// Execute job
int code = job.waitForCompletion(true) ? 0 : 1;
System.exit(code);
}
/*public static class IntComparator extends WritableComparator {
public IntComparator() {
super(IntWritable.class);
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
Integer v1 = ByteBuffer.wrap(b1, s1, l1).getInt();
Integer v2 = ByteBuffer.wrap(b2, s2, l2).getInt();
return v1.compareTo(v2) * (-1);
}
}*/
public static class MapTask extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
public void map(LongWritable key,Text value, Context context) throws IOException, InterruptedException {
String tokens[]= value.toString().split("::");
int empId = Integer.parseInt(tokens[0]) ;
int count = Integer.parseInt(tokens[2]) ;
context.write(new IntWritable(count), new IntWritable(empId));
}
}
public static class ReduceTask extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
public void reduce(IntWritable key, Iterable<IntWritable> list, Context context)
throws java.io.IOException, InterruptedException {
for (IntWritable value : list) {
context.write(key, value);
}
}
}
}
1::Toy Story (1995)::2077
10::GoldenEye (1995)::888
100::City Hall (1996)::128
1000::Curdled (1996)::20