import java.io.*;
import org.apache.commons.logging.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.*;
import org.bson.*;
import com.mongodb.hadoop.*;
import com.mongodb.hadoop.util.*;
@SuppressWarnings("unused")
public class ImportfromMongoToHdfs{
private static final Log log =
LogFactory.getLog(ImportfromMongoToHdfs.class);
public static class ReadEmpDataFromMongo extends
Mapper<Object,BSONObject, Text, Text>{
public void map(Object key, BSONObject value, Context context) throws
IOException, InterruptedException{
System.out.println("Key: " + key);
System.out.println("Value: " + value);
String md5 = value.get("md5").toString();
String name = value.get("name").toString();
String dept = value.get("dept").toString();
String salary = value.get("salary").toString();
String location = value.get("location").toString();
String output = "\t" + name + "\t" + dept + "\t" + salary + "\t" +
location;
context.write( new Text(md5), new Text(output));
}
}
public static void main(String[] args)throws Exception {
final Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf,"mongodb://localhost:27017/admin.emp");
MongoConfigUtil.setCreateInputSplits(conf, false);
System.out.println("Configuration: " + conf);
final Job job = new Job(conf, "ReadWeblogsFromMongo");
Path out = new Path("/mongodb3");
FileOutputFormat.setOutputPath(job, out);
job.setJarByClass(ImportfromMongoToHdfs.class);
job.setMapperClass(ReadEmpDataFromMongo.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(MongoInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(0);
System.exit(job.waitForCompletion(true) ? 0 : 1 );
}
}
Conf:配置:core-default.xml,core-site.xml
15/09/13 17:08:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
15/09/13 17:08:47 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
15/09/13 17:08:47 INFO mapred.JobClient: Cleaning up the staging area file:/tmp/hadoop-hadoop/mapred/staging/hadoop-391630837/.staging/job_local_0001
Exception in thread "main" java.lang.IncompatibleClassChangeError: Found class org.apache.hadoop.mapreduce.JobContext, but interface was expected
at com.mongodb.hadoop.MongoOutputFormat.checkOutputSpecs(MongoOutputFormat.java:35)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:949)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:912)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:912)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:500)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
at com.orienit.kalyan.hadoop.training.mongodb.job.MongoDbWordCount.run(MongoDbWordCount.java:86)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at com.orienit.kalyan.hadoop.training.mongodb.job.MongoDbWordCount.main(MongoDbWordCount.java:90)