我正在尝试通过Hadoop在localhost上执行MapReduce程序。
我写了一个程序来计算文本文件中的单词。 源代码非常简单:
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
public class WordCount {
static int printUsage() {
System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
public static void main(String[] args) throws Exception {
List<String> otherArgs = new ArrayList<String>();
Configuration conf = new Configuration();
for(int i=0; i < args.length; ++i) {
try {
if ("-m".equals(args[i])) {
conf.setInt("mapreduce.job.maps", Integer.parseInt(args[++i]));
} else if ("-r".equals(args[i])) {
conf.setInt("mapreduce.job.reduces", Integer.parseInt(args[++i]));
} else {
otherArgs.add(args[i]);
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
System.exit(printUsage());
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " +
args[i-1]);
System.exit(printUsage());
}
}
// Make sure there are exactly 2 parameters left.
if (otherArgs.size() != 2) {
System.out.println("ERROR: Wrong number of parameters: " +
otherArgs.size() + " instead of 2.");
System.exit(printUsage());
}
Path input = new Path(otherArgs.get(0));
Path output =new Path(otherArgs.get(1));
Job job = Job.getInstance(conf);
job.setJarByClass(WordCount.class);
FileInputFormat.setInputPaths(job, input);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);
FileOutputFormat.setOutputPath(job, output);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setCombinerClass(MyReducer.class);
job.setReducerClass(MyReducer.class);
job.waitForCompletion(true);
}
public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
super.cleanup(context);
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.map(key, value, context);
}
@Override
public void run(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
super.run(context);
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
super.setup(context);
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
super.cleanup(context);
}
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.reduce(key, values, context);
}
@Override
public void run(Context arg0) throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.run(arg0);
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
super.setup(context);
}
}
}
我是通过命令执行的:
yarn jar wordcount.jar /input /output
我收到了这个错误:
14/05/10 19:10:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/05/10 19:10:34 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
14/05/10 19:10:34 WARN mapreduce.JobSubmitter: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
14/05/10 19:10:36 INFO input.FileInputFormat: Total input paths to process : 1
14/05/10 19:10:36 INFO mapreduce.JobSubmitter: number of splits:1
14/05/10 19:10:36 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.output.value.class is deprecated. Instead, use mapreduce.job.output.value.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapreduce.combine.class is deprecated. Instead, use mapreduce.job.combine.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
14/05/10 19:10:36 INFO Configuration.deprecation: mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.output.dir is deprecated. Instead, use mapreduce.output.fileoutputformat.outputdir
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
14/05/10 19:10:36 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
14/05/10 19:10:36 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1399731445858_0009
14/05/10 19:10:37 INFO impl.YarnClientImpl: Submitted application application_1399731445858_0009 to ResourceManager at /0.0.0.0:8032
14/05/10 19:10:37 INFO mapreduce.Job: The url to track the job: http://dario-R510-P510:8088/proxy/application_1399731445858_0009/
14/05/10 19:10:37 INFO mapreduce.Job: Running job: job_1399731445858_0009
14/05/10 19:10:58 INFO mapreduce.Job: Job job_1399731445858_0009 running in uber mode : false
14/05/10 19:10:58 INFO mapreduce.Job: map 0% reduce 0%
14/05/10 19:11:06 INFO mapreduce.Job: Task Id : attempt_1399731445858_0009_m_000000_0, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1050)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:691)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
at WordCount$MyMapper.map(WordCount.java:92)
at WordCount$MyMapper.map(WordCount.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at WordCount$MyMapper.run(WordCount.java:99)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/05/10 19:11:13 INFO mapreduce.Job: Task Id : attempt_1399731445858_0009_m_000000_1, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1050)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:691)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
at WordCount$MyMapper.map(WordCount.java:92)
at WordCount$MyMapper.map(WordCount.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at WordCount$MyMapper.run(WordCount.java:99)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/05/10 19:11:19 INFO mapreduce.Job: Task Id : attempt_1399731445858_0009_m_000000_2, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1050)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:691)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
at WordCount$MyMapper.map(WordCount.java:92)
at WordCount$MyMapper.map(WordCount.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at WordCount$MyMapper.run(WordCount.java:99)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/05/10 19:11:27 INFO mapreduce.Job: map 100% reduce 100%
14/05/10 19:11:27 INFO mapreduce.Job: Job job_1399731445858_0009 failed with state FAILED due to: Task failed task_1399731445858_0009_m_000000
Job failed as tasks failed. failedMaps:1 failedReduces:0
14/05/10 19:11:27 INFO mapreduce.Job: Counters: 6
Job Counters
Failed map tasks=4
Launched map tasks=4
Other local map tasks=3
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=22040
Total time spent by all reduces in occupied slots (ms)=0
我哪里错了?
答案 0 :(得分:0)
在此行public static class MyMapper extends Mapper LongWritable, Text, Text, IntWritable>
中,您告诉输出键的类型为Text,输出值的类型为IntWritable。
然后,在map map {{1}中在这一行中,您正在写出LongWritable键和Text值。
protected void map(LongWritable key, Text value, Context context)
。在super.map中会有类型转换,如(Text)键和(Intwritable)值,这里会生成错误。
改变一个或另一个。