输入数据

Question

我想知道Mapper解析文件时会遇到这个问题。我的代码非常简单，我通过“::”分隔文件行获取数据。

例如（输入）：

1::Toy Story (1995)::2077

使用我在练习中经常做的mapper的剪辑代码

String tokens[]= value.toString().split("::");
int empId = Integer.parseInt(tokens[0]) ;   
int count = Integer.parseInt(tokens[2]) ;

从技术上讲，线路应按如下方式划分。

1               Toy Story (1995)         2077   
tokens[0]       tokens[1]                tokens[2]

所以，如果我正在寻找令牌[0]和令牌[2]，那么为什么作业正在挑选令牌[1]，这就是抛出NumberFormatException异常，如果我试图将char解析为int，这是预期的异常。能不能帮我解决这个问题。

17/09/05 19:06:49 INFO mapreduce.Job: Task Id : attempt_1500305785265_0095_m_000000_2, Status : FAILED
Error: java.lang.NumberFormatException: For input string: "1::Toy Story (1995)::2077"
        at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
        at java.lang.Integer.parseInt(Integer.java:580)
        at java.lang.Integer.parseInt(Integer.java:615)
        at com.dataflair.comparableTest.ValueSortExp$MapTask.map(ValueSortExp.java:93)
        at com.dataflair.comparableTest.ValueSortExp$MapTask.map(ValueSortExp.java:1)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)

CODE

import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.IntWritable.Comparator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class ValueSortExp2 {
public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration(true);

    String arguments[] = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = new Job(conf, "Test commond");
    job.setJarByClass(ValueSortExp.class);

    // Setup MapReduce
    job.setMapperClass(ValueSortExp.MapTask.class);
    job.setReducerClass(ValueSortExp.ReduceTask.class);
    job.setNumReduceTasks(1);

    // Specify key / value
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    //job.setSortComparatorClass(IntComparator.class);
    // Input
    FileInputFormat.addInputPath(job, new Path(arguments[0]));
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, new Path(arguments[1]));
    job.setOutputFormatClass(TextOutputFormat.class);

    /*
     * // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if
     * (hdfs.exists(outputDir)) hdfs.delete(outputDir, true);
     * 
     * // Execute job int code = job.waitForCompletion(true) ? 0 : 1;
     * System.exit(code);
     */

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    System.exit(code);

}

/*public static class IntComparator extends WritableComparator {

    public IntComparator() {
        super(IntWritable.class);
    }

    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

        Integer v1 = ByteBuffer.wrap(b1, s1, l1).getInt();
        Integer v2 = ByteBuffer.wrap(b2, s2, l2).getInt();

        return v1.compareTo(v2) * (-1);
    }
}*/

public static class MapTask extends Mapper<LongWritable, Text, IntWritable, IntWritable> {

        public void  map(LongWritable key,Text value, Context context) throws IOException, InterruptedException {

            String tokens[]= value.toString().split("::");

            int empId = Integer.parseInt(tokens[0]) ;   
            int count = Integer.parseInt(tokens[2]) ;

            context.write(new IntWritable(count), new IntWritable(empId));

        }   

    }


public static class ReduceTask extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
    public void reduce(IntWritable key, Iterable<IntWritable> list, Context context)
            throws java.io.IOException, InterruptedException {

        for (IntWritable value : list) {

            context.write(key, value);

        }

    }
}
}

输入数据

1::Toy Story (1995)::2077
10::GoldenEye (1995)::888
100::City Hall (1996)::128
1000::Curdled (1996)::20

在所有输入数据都很好之后仍然获得NumberFormatException异常

输入数据

0 个答案: