我使用hadoop 2.6,spring hadoop,avro 1.7.4来处理avro输入文件。但是因为有线错误而被阻止了。我已将输入文件格式设置为avrokeyinputformat,但是,hadoop仍然无法识别密钥这是堆栈跟踪:
4-04 12:15:58 INFO org.apache.hadoop.mapred.LocalJobRunner map task executor complete.
04-04 12:15:59 WARN org.apache.hadoop.mapred.LocalJobRunner job_local1850669149_0002
java.lang.Exception: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at com.truven.bp.data.mapred.Bdual$BdualMapper.map(Bdual.java:32)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
04-04 12:15:59 INFO org.apache.hadoop.mapreduce.Job Job job_local1850669149_0002 running in uber mode : false
import com.truven.bp.data.converter.GenericConverter;
import com.truven.bp.data.utils.Constant;
import com.truven.dataforge.paidclaims.avro.pojo.EnrollmentMonthlyEligibilityPrograms;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class Bdual extends Configured implements Tool {
private static final Log logger = LogFactory.getLog(Bdual.class);
private static class BdualMapper extends Mapper<AvroKey<EnrollmentMonthlyEligibilityPrograms>, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
public void setup(Context context)
throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
}
@Override
public void map(AvroKey<EnrollmentMonthlyEligibilityPrograms> key, NullWritable value, Context context)
throws IOException, InterruptedException {
String fields = genericConverter.convert(key.datum(), null);
context.write(new Text(fields), NullWritable.get());
}
}
private static class BdualReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
@Override
public void setup(Context context) throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
context.write(new Text(genericConverter.getHeader()), NullWritable.get());
}
@Override
public void reduce(Text key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: bdual <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Avro Bdual Conversion");
job.setJarByClass(Bdual.class);
Path inPath = new Path(args[0]);
Path outPath = new Path(args[1]);
FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(BdualMapper.class);
AvroJob.setInputKeySchema(job, EnrollmentMonthlyEligibilityPrograms.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setReducerClass(BdualReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new Bdual(), args);
System.exit(result);
}
}