我是初学者并尝试使用mapreduce和accumulo编写一个字数统计程序。
我已按照accumulo git hub for mapreduce编写代码并将输入文件保存在hdfs
中但是当我尝试执行它时,它会在$ Map类找不到时发出异常
以下是我的地图类
的代码段package mapreducepackageforaccumulo;
import java.io.IOException;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A simple map reduce job that inserts word counts into accumulo.
*/
public class WordCount extends Configured implements Tool {
private static final Logger log = LoggerFactory.getLogger(WordCount.class);
public static class WordCountMapClass extends Mapper<LongWritable,Text,Text,Mutation> {
public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
log.info("Inside accumulo word count mapper");
String[] words = value.toString().split("\\s+");
for (String word : words) {
Mutation mutation = new Mutation(new Text(word));
mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
output.write(null, mutation);
}
}
}
@Override
public int run(String[] args) throws Exception {
String input = args[0];
CreateConnection createConnection = new CreateConnection();
createConnection.setUser(args[1]);
createConnection.setPassword(args[2]);
String table = "wordcountTable";
Job job = Job.getInstance(getConf());
job.setJobName(WordCount.class.getName());
job.setJarByClass(WordCount.class);
//job.setJar("WordCount.jar");
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, input);
System.out.println("user..... " +createConnection.getUser() + " pass.... " +createConnection.getPassword() + " path.... " + args[0]);
job.setMapperClass(WordCountMapClass.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AccumuloOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
// AccumuloInputFormat not used here, but it uses the same functions.
AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(createConnection.getInstanceName()).withZkHosts(createConnection.getZooServers()));
AccumuloOutputFormat.setConnectorInfo(job, createConnection.getUser(), new PasswordToken(createConnection.getPassword()));
AccumuloOutputFormat.setCreateTables(job, true);
AccumuloOutputFormat.setDefaultTableName(job, table);
job.waitForCompletion(true);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new WordCount(), args);
System.exit(res);
}
}
以前我曾经为LimitInputStream获取class not found异常。 所以我将guava jar文件更改为我们在hadoop i中使用的jar文件:e guava-11.0.2.jar
下面是堆栈跟踪
[supriti@bhucloud01 ~]$ accumulo jar /home/supriti/Javajars/wordcount.jar /Supr iti/new.txt supriti supriti123
user..... supriti pass.... supriti123 path.... /Supriti/new.txt
2015-08-13 11:29:07,694 [client.RMProxy] INFO : Connecting to ResourceManager at bhucloud01.ad.infosys.com/10.118.192.109:8032
2015-08-13 11:29:08,789 [mapreduce.JobSubmitter] WARN : No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2015-08-13 11:29:08,808 [input.FileInputFormat] INFO : Total input paths to process : 1
2015-08-13 11:29:09,270 [mapreduce.JobSubmitter] INFO : number of splits:1
2015-08-13 11:29:09,487 [mapreduce.JobSubmitter] INFO : Submitting tokens for job: job_1439440228760_0006
2015-08-13 11:29:09,817 [mapred.YARNRunner] INFO : Job jar is not present. Not adding any jar to the list of resources.
2015-08-13 11:29:09,919 [impl.YarnClientImpl] INFO : Submitted application application_1439440228760_0006
2015-08-13 11:29:09,997 [mapreduce.Job] INFO : The url to track the job: http://bhucloud01.ad.infosys.com:8088/proxy/application_1439440228760_0006/
2015-08-13 11:29:09,999 [mapreduce.Job] INFO : Running job: job_1439440228760_0006
2015-08-13 11:29:23,320 [mapreduce.Job] INFO : Job job_1439440228760_0006 running in uber mode : false
2015-08-13 11:29:23,322 [mapreduce.Job] INFO : map 0% reduce 0%
2015-08-13 11:29:27,426 [mapreduce.Job] INFO : Task Id : attempt_1439440228760_0006_m_000000_0, Status : FAILED
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2047)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getMapperClass(JobContextImpl.java:196)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:742)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Caused by: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1953)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2045)
... 8 more
2015-08-13 11:29:35,544 [mapreduce.Job] INFO : Task Id : attempt_1439440228760_0006_m_000000_1, Status : FAILED
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2047)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getMapperClass(JobContextImpl.java:196)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:742)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Caused by: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1953)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2045)
... 8 more
2015-08-13 11:29:41,604 [mapreduce.Job] INFO : Task Id : attempt_1439440228760_0006_m_000000_2, Status : FAILED
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2047)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getMapperClass(JobContextImpl.java:196)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:742)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Caused by: java.lang.ClassNotFoundException: Class mapreducepackageforaccumulo.WordCount$WordCountMapClass not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1953)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2045)
... 8 more
2015-08-13 11:29:47,657 [mapreduce.Job] INFO : map 100% reduce 0%
2015-08-13 11:29:47,672 [mapreduce.Job] INFO : Job job_1439440228760_0006 failed with state FAILED due to: Task failed task_1439440228760_0006_m_000000
Job failed as tasks failed. failedMaps:1 failedReduces:0
2015-08-13 11:29:47,866 [mapreduce.Job] INFO : Counters: 9
Job Counters
Failed map tasks=4
Launched map tasks=4
Other local map tasks=3
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=29516
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=14758
Total vcore-seconds taken by all map tasks=14758
Total megabyte-seconds taken by all map tasks=30224384