将数据从mongoDB导入hadoop时出错

时间:2015-09-13 11:47:17

标签: hadoop

import java.io.*;
import org.apache.commons.logging.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.*;
import org.bson.*;
import com.mongodb.hadoop.*;
import com.mongodb.hadoop.util.*;

@SuppressWarnings("unused")
public class ImportfromMongoToHdfs{
    private static final Log log =      
    LogFactory.getLog(ImportfromMongoToHdfs.class);
    public static class ReadEmpDataFromMongo extends      
    Mapper<Object,BSONObject, Text, Text>{
        public void map(Object key, BSONObject value, Context context) throws   
        IOException, InterruptedException{
            System.out.println("Key: " + key);
            System.out.println("Value: " + value);
            String md5 = value.get("md5").toString();
            String name = value.get("name").toString();
            String dept = value.get("dept").toString();
            String salary = value.get("salary").toString();
            String location = value.get("location").toString();
            String output = "\t" + name + "\t" + dept + "\t" + salary + "\t" +     
            location;
            context.write( new Text(md5), new Text(output));
        }
    }
    public static void main(String[] args)throws Exception { 
        final Configuration conf = new Configuration();
        MongoConfigUtil.setInputURI(conf,"mongodb://localhost:27017/admin.emp");
        MongoConfigUtil.setCreateInputSplits(conf, false);
        System.out.println("Configuration: " + conf);
        final Job job = new Job(conf, "ReadWeblogsFromMongo");
        Path out = new Path("/mongodb3");
        FileOutputFormat.setOutputPath(job, out);
        job.setJarByClass(ImportfromMongoToHdfs.class); 
        job.setMapperClass(ReadEmpDataFromMongo.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(MongoInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setNumReduceTasks(0);
        System.exit(job.waitForCompletion(true) ? 0 : 1 );
    }
}

Conf:配置:core-default.xml,core-site.xml

15/09/13 17:08:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
15/09/13 17:08:47 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
15/09/13 17:08:47 INFO mapred.JobClient: Cleaning up the staging area file:/tmp/hadoop-hadoop/mapred/staging/hadoop-391630837/.staging/job_local_0001
Exception in thread "main" java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.JobContext, but interface was expected
    at com.mongodb.hadoop.MongoOutputFormat.checkOutputSpecs(MongoOutputFormat.java:35)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:949)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:912)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:912)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:500)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
    at com.orienit.kalyan.hadoop.training.mongodb.job.MongoDbWordCount.run(MongoDbWordCount.java:86)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
    at com.orienit.kalyan.hadoop.training.mongodb.job.MongoDbWordCount.main(MongoDbWordCount.java:90)

0 个答案:

没有答案