我尝试在hadoop / mapreduce上执行以下代码(此代码将数据添加到hbase表中):
//package org.apache.hadoop.hbase.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
* Sample Uploader MapReduce
* <p>
* This is EXAMPLE code. You will need to change it to work for your context.
* <p>
* Uses {@link TableReducer} to put the data into HBase. Change the InputFormat
* to suit your data. In this example, we are importing a CSV file.
* <p>
* <pre>row,family,qualifier,value</pre>
* <p>
* The table and columnfamily we're to insert into must preexist.
* <p>
* There is no reducer in this example as it is not necessary and adds
* significant overhead. If you need to do any massaging of data before
* inserting into HBase, you can do this in the map as well.
* <p>Do the following to start the MR job:
* <pre>
* ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
* </pre>
* <p>
* This code was written against HBase 0.21 trunk.
*/
public class UploaderHbase {
private static final String NAME = "UploaderHbase";
public static class Uploader
extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
private long checkpoint = 100;
private long count = 0;
@Override
public void map(LongWritable key, Text line, Context context)
throws IOException {
// Input is a CSV file
// Each map() is a single line, where the key is the line number
// Each line is comma-delimited; row,family,qualifier,value
// Split CSV line
String [] values = line.toString().split(",");
if(values.length != 4) {
return;
}
// Extract each value
byte [] row = Bytes.toBytes(values[0]);
byte [] family = Bytes.toBytes(values[1]);
byte [] qualifier = Bytes.toBytes(values[2]);
byte [] value = Bytes.toBytes(values[3]);
// Create Put
Put put = new Put(row);
put.add(family, qualifier, value);
// Uncomment below to disable WAL. This will improve performance but means
// you will experience data loss in the case of a RegionServer crash.
// put.setWriteToWAL(false);
try {
context.write(new ImmutableBytesWritable(row), put);
} catch (InterruptedException e) {
e.printStackTrace();
}
// Set status every checkpoint lines
if(++count % checkpoint == 0) {
context.setStatus("Emitting Put " + count);
}
}
}
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(UploaderHbase.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length != 2) {
System.err.println("Wrong number of arguments: " + otherArgs.length);
System.err.println("Usage: " + NAME + " <input> <tablename>");
System.exit(-1);
}
Job job = configureJob(conf, otherArgs);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
要编译此代码,我使用:
javac -cp $(hbase classpath):$(hadoop classpath) UploaderHbase.java
要执行它,我使用:
java -cp $(hbase classpath):$(hadoop classpath) UploaderHbase /tmp/input/input.csv TestTable
当我编译我的代码时,我没有得到任何错误,但是当我尝试用上面的行执行它时,我得到以下错误:
2017-05-03 12:43:45,216 INFO [main] mapreduce.Job: Task Id : attempt_1493803829363_0004_m_000000_2, Status : FAILED
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class UploaderHbase$Uploader not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getMapperClass(JobContextImpl.java:186)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:745)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.ClassNotFoundException: Class UploaderHbase$Uploader not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193)
... 8 more
at java.lang.Class.forName(Class.java:348)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)