我已经将一个表从Hbase导出到一个文件,几乎就像org.apache.hadoop.mapreduce.lib.output.TextOutputFormat一样,要导入导出的Text格式文件,我已经从开源调整了Import的代码来支持导入基于文本的文件而不是SequenceFile。 job.setInputFormatClass(TextInputFormat.class);
在运行Import类时,我收到以下异常。
java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.hbase.io.ImmutableBytesWritable
at Import$Importer.map(Import.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212)
这是我的导出类,它被调整为将内容从ExpoterTable写入文件。
public class Export
{
private static final Log LOG = LogFactory.getLog(Export.class);
final static String NAME = "export";
final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
private static OutputStream out;
private static final String utf8 = "UTF-8";
private static final byte[] newline;
private static final byte[] keyValueSeparator;
static {
try {
newline = "\n".getBytes(utf8);
keyValueSeparator = "\t".getBytes(utf8);
}
catch (UnsupportedEncodingException uee) {
throw new IllegalArgumentException("can't find " + utf8 + " encoding");
}
}
/**
* Mapper.
*/
static class ExporterTable extends TableMapper<ImmutableBytesWritable, Result>
{
/**
* @param row The current table row key.
* @param value The columns.
* @param context The current context.
* @throws IOException When something is broken with the data.
* @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
* org.apache.hadoop.mapreduce.Mapper.Context)
*/
@Override
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException {
try {
context.write(row, value);
write(row, value);
System.out.println(row);
System.out.println(value);
}
catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
String tableName = args[0];
// this.out = new DataOutputStream(fos);
Path outputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJobName(NAME + "_" + tableName);
job.setJarByClass(ExporterTable.class);
// Set optional scan parameters
Scan s = getConfiguredScanForJob(conf, args);
TableMapReduceUtil.initTableMapperJob(tableName, s, ExporterTable.class, ImmutableBytesWritable.class, IntWritable.class, job);
// No reducers. Just write straight to output files.
job.setNumReduceTasks(0);
job.setOutputValueClass(Text.class);
// FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputFormatClass(NullOutputFormat.class);
TableMapReduceUtil.addHBaseDependencyJars(conf);
TableMapReduceUtil.addDependencyJars(conf, JsonProcessingException.class);
TableMapReduceUtil.addDependencyJars(job);
return job;
}
private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
Scan s = new Scan();
// Optional arguments.
// Set Scan Versions
int versions = args.length > 2 ? Integer.parseInt(args[2]) : 1;
s.setMaxVersions(versions);
// Set Scan Range
long startTime = args.length > 3 ? Long.parseLong(args[3]) : 0L;
long endTime = args.length > 4 ? Long.parseLong(args[4]) : Long.MAX_VALUE;
s.setTimeRange(startTime, endTime);
// Set cache blocks
s.setCacheBlocks(false);
// Set Scan Column Family
boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
if (raw) {
s.setRaw(raw);
}
if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
}
// Set RowFilter or Prefix Filter if applicable.
Filter exportFilter = getExportFilter(args);
if (exportFilter != null) {
LOG.info("Setting Scan Filter for Export.");
s.setFilter(exportFilter);
}
LOG.info("versions=" + versions + ", starttime=" + startTime + ", endtime=" + endTime + ", keepDeletedCells=" + raw);
return s;
}
private static Filter getExportFilter(String[] args) {
Filter exportFilter = null;
String filterCriteria = (args.length > 5) ? args[5] : null;
if (filterCriteria == null)
return null;
if (filterCriteria.startsWith("^")) {
String regexPattern = filterCriteria.substring(1, filterCriteria.length());
exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
}
else {
exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
}
return exportFilter;
}
/*
* @param errorMsg Error message. Can be null.
*/
private static void usage(final String errorMsg) {
if (errorMsg != null && errorMsg.length() > 0) {
System.err.println("ERROR: " + errorMsg);
}
System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " + "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
System.err.println(" Note: -D properties will be applied to the conf used. ");
System.err.println(" For example: ");
System.err.println(" -D mapred.output.compress=true");
System.err.println(" -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
System.err.println(" -D mapred.output.compression.type=BLOCK");
System.err.println(" Additionally, the following SCAN properties can be specified");
System.err.println(" to control/limit what is exported..");
System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
System.err.println(" -D " + RAW_SCAN + "=true");
System.err.println("For performance consider the following properties:\n" + " -Dhbase.client.scanner.caching=100\n" + " -Dmapred.map.tasks.speculative.execution=false\n" + " -Dmapred.reduce.tasks.speculative.execution=false");
}
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("mapreduce.framework.name", "local");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
usage("Wrong number of arguments: " + otherArgs.length);
System.exit(-1);
}
boolean jobStatus = false;
Job job = createSubmittableJob(conf, otherArgs);
try {
File f = new File("Test");
out = new FileOutputStream(f);
jobStatus = job.waitForCompletion(true);
}
catch (Exception e) {
e.printStackTrace();
}
finally {
IOUtils.closeStream(out);
}
// convertTextToSequence(conf);
System.exit(jobStatus ? 0 : 1);
}
public static void write(ImmutableBytesWritable key, Result value) throws IOException {
boolean nullKey = key == null;
boolean nullValue = value == null;
if (nullKey && nullValue) {
return;
}
if (!nullKey) {
writeObject(key);
}
if (!(nullKey || nullValue)) {
out.write(keyValueSeparator);
}
if (!nullValue) {
writeObject(value);
}
out.write(newline);
}
/**
* Write the object to the byte stream, handling Text as a special
* case.
* @param o the object to print
* @throws IOException if the write throws, we pass it on
*/
private static void writeObject(Object o) throws IOException {
if (o instanceof Text) {
Text to = (Text) o;
out.write(to.getBytes(), 0, to.getLength());
}
else {
out.write(o.toString().getBytes(utf8));
}
}
}
任何帮助表示赞赏。
答案 0 :(得分:1)
您已声明了map方法,并将输出键写为var myString = "one,1 | xxx,2 | yyy,3 | zzz,4| ,5 |";
var regEx = new RegExp(/\w*,\d/g);
var myArray;
while ((myArray = regEx.exec(myString)) !== null) {
var cell = myArray[0].split(',');
var msg = 'Found ' + cell.join(',') + '. Value: \'' + cell[0] + '\'. Index: ' + cell[1];
console.log(msg);
}
ImmutableBytesWritable
您必须按如下方式覆盖作业参数才能设置 MapOutputKeyClass 和 MapOutPutvalueClass
public void map(ImmutableBytesWritable row, Result value, Context context)
throws IOException {
try {
context.write(row, value);
查看工作Example: 7。将HBase表导出到文件