我正在尝试使用map reduce运行基本的单词计数作业。源代码可在官方网站上找到。
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
public class TestDriver {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
BasicConfigurator.configure();
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(TestDriver.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
我的控制台输出是:
log4j: Trying to find [log4j.xml] using context classloader sun.misc.Launcher$AppClassLoader@73d16e93.
log4j: Trying to find [log4j.xml] using sun.misc.Launcher$AppClassLoader@73d16e93 class loader.
log4j: Trying to find [log4j.xml] using ClassLoader.getSystemResource().
log4j: Trying to find [log4j.properties] using context classloader sun.misc.Launcher$AppClassLoader@73d16e93.
log4j: Trying to find [log4j.properties] using sun.misc.Launcher$AppClassLoader@73d16e93 class loader.
log4j: Trying to find [log4j.properties] using ClassLoader.getSystemResource().
log4j: Could not find resource: [null].
1 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(always=false, sampleName=Ops, about=, type=DEFAULT, value=[Rate of successful kerberos logins and latency (milliseconds)], valueName=Time)
39 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(always=false, sampleName=Ops, about=, type=DEFAULT, value=[Rate of failed kerberos logins and latency (milliseconds)], valueName=Time)
40 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.getGroups with annotation @org.apache.hadoop.metrics2.annotation.Metric(always=false, sampleName=Ops, about=, type=DEFAULT, value=[GetGroups], valueName=Time)
46 [main] DEBUG org.apache.hadoop.metrics2.impl.MetricsSystemImpl - UgiMetrics, User and group related metrics
416 [main] DEBUG org.apache.hadoop.security.authentication.util.KerberosName - Kerberos krb5 configuration not found, setting default realm to empty
429 [main] DEBUG org.apache.hadoop.security.Groups - Creating new Groups object
439 [main] DEBUG org.apache.hadoop.util.NativeCodeLoader - Trying to load the custom-built native-hadoop library...
449 [main] DEBUG org.apache.hadoop.util.NativeCodeLoader - Loaded the native-hadoop library
451 [main] DEBUG org.apache.hadoop.security.JniBasedUnixGroupsMapping - Using JniBasedUnixGroupsMapping for Group resolution
451 [main] DEBUG org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMapping
545 [main] DEBUG org.apache.hadoop.security.Groups - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000; warningDeltaMs=5000
558 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - hadoop login
560 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - hadoop login commit
577 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - using local user:NTUserPrincipal: Arunaabh
577 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - Using user: "NTUserPrincipal: Arunaabh" with name Arunaabh
578 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - User entry: "Arunaabh"
579 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - UGI loginUser:Arunaabh (auth:SIMPLE)
862 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - PrivilegedAction as:Arunaabh (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.connect(Job.java:1255)
878 [main] DEBUG org.apache.hadoop.mapreduce.Cluster - Trying ClientProtocolProvider : org.apache.hadoop.mapred.YarnClientProtocolProvider
879 [main] DEBUG org.apache.hadoop.mapreduce.Cluster - Cannot pick org.apache.hadoop.mapred.YarnClientProtocolProvider as the ClientProtocolProvider - returned null protocol
884 [main] DEBUG org.apache.hadoop.mapreduce.Cluster - Trying ClientProtocolProvider : org.apache.hadoop.mapred.LocalClientProtocolProvider
913 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - session.id is deprecated. Instead, use dfs.metrics.session-id
916 [main] INFO org.apache.hadoop.metrics.jvm.JvmMetrics - Initializing JVM Metrics with processName=JobTracker, sessionId=
1000 [main] DEBUG org.apache.hadoop.mapreduce.Cluster - Picked org.apache.hadoop.mapred.LocalClientProtocolProvider as the ClientProtocolProvider
1002 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - PrivilegedAction as:Arunaabh (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Cluster.getFileSystem(Cluster.java:162)
1014 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - PrivilegedAction as:Arunaabh (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
1044 [main] DEBUG org.apache.hadoop.mapreduce.JobSubmitter - Configuring job job_local545640952_0001 with file:/tmp/hadoop-Arunaabh/mapred/staging/Arunaabh545640952/.staging/job_local545640952_0001 as the submit dir
1044 [main] DEBUG org.apache.hadoop.mapreduce.JobSubmitter - adding the following namenodes' delegation tokens:[file:///]
2292 [main] WARN org.apache.hadoop.mapreduce.JobResourceUploader - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2293 [main] DEBUG org.apache.hadoop.mapreduce.JobResourceUploader - default FileSystem: file:///
2392 [main] DEBUG org.apache.hadoop.io.nativeio.NativeIO - Initialized cache for IDs to User/Group mapping with a cache timeout of 14400 seconds.
2400 [main] INFO org.apache.hadoop.mapreduce.JobSubmitter - Cleaning up the staging area file:/tmp/hadoop-Arunaabh/mapred/staging/Arunaabh545640952/.staging/job_local545640952_0001
2401 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - PrivilegedActionException as:Arunaabh (auth:SIMPLE) cause:0: No such file or directory
Exception in thread "main" 0: No such file or directory
at org.apache.hadoop.io.nativeio.NativeIO$POSIX.chmod(NativeIO.java:236)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:724)
at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:502)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:600)
at org.apache.hadoop.mapreduce.JobResourceUploader.uploadFiles(JobResourceUploader.java:94)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:95)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:190)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308)
at TestDriver.main(TestDriver.java:62)
我对单节点群集的配置来自官方网站: https://wiki.apache.org/hadoop/Hadoop2OnWindows
我已将以下内容作为参数传递:
C:\Users\Arunaabh\workspace\TestHadoop\hadooptest.txt
C:\Users\Arunaabh\workspace\TestHadoop\output
该错误似乎不是java输入或io异常。此外,我的纱线和名称节点和数据节点正常工作。
我在WINDOWS上运行hadoop请不要发布linux答案。
答案 0 :(得分:0)
您已将本地文件系统的输入和输出文件作为参数传递给mapreduced job
C:\Users\Arunaabh\workspace\TestHadoop\hadooptest.txt
C:\Users\Arunaabh\workspace\TestHadoop\output
您需要从hdfs文件系统提供输入和输出文件参数