我正在尝试从hbase表中读取数据,对其进行一些处理,然后使用以下代码将其存储在另一个表中
package analysis;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
public class Author_ref {
public static class MyMapper extends TableMapper<Text,Text> {
public void map(ImmutableBytesWritable row, Result value,Context context)throws IOException, InterruptedException
{
String key = new String(row.get());
String values = new String(value.getValue(Bytes.toBytes("authors"), Bytes.toBytes("authors")));
String clean_values = values.replaceAll("[","");
String clean_values2 = clean_values.replaceAll("]","");
String authors[] = clean_values2.trim().split(",");
for (String author : authors)
{
//Put row = new Put();
context.write(new Text(author),new Text( key));
}
}
}
public static class MyReducer extends TableReducer<Text, Text, ImmutableBytesWritable>
{
public void reduce(Text author, Iterable<Text> values,Context context)throws IOException,InterruptedException
{
String papers = "";
for (Text x : values)
{
papers = papers + ","+x.toString();
}
Put p = new Put(author.getBytes());
p.add(Bytes.toBytes("papers_writen"),Bytes.toBytes("papers_writen"),Bytes.toBytes(papers));
context.write(null, p);
}
}
public static void main(String[] args) throws Exception
{
Configuration config = HBaseConfiguration.create();
Job job = new Job(config,"ExampleSummary");
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false);
job.setJarByClass(Author_ref.class); // class that contains mapper and reducer
TableMapReduceUtil.initTableMapperJob(
"Dataset", // input table
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper class
Text.class, // mapper output key
Text.class, // mapper output value
job);
TableMapReduceUtil.initTableReducerJob(
"Author_paper", // output table
MyReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
System.exit(job.waitForCompletion(true)?0:1);
}
}
我遇到了以下错误。
线程“主”中的异常java.lang.NoSuchMethodError:org.apache.hadoop.yarn.api.records.URL.fromURI(Ljava / net / URI;)Lorg / apache / hadoop / yarn / api / records /网址; 在org.apache.hadoop.mapreduce.v2.util.LocalResourceBuilder.createLocalResources(LocalResourceBuilder.java:144) 在org.apache.hadoop.mapreduce.v2.util.MRApps.setupDistributedCache(MRApps.java:531) 在org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:92) 在org.apache.hadoop.mapred.LocalJobRunner $ Job。(LocalJobRunner.java:171) 在org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:760) 在org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:253) 在org.apache.hadoop.mapreduce.Job $ 11.run(Job.java:1570) 在org.apache.hadoop.mapreduce.Job $ 11.run(Job.java:1567) 在java.security.AccessController.doPrivileged(本机方法) 在javax.security.auth.Subject.doAs(Subject.java:422) 在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1889) 在org.apache.hadoop.mapreduce.Job.submit(Job.java:1567) 在org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1588) 在analysis.Author_ref.main(Author_ref.java:111)
我正在使用hadoop 2.9和hbase 1.2.6.1
答案 0 :(得分:0)
hadoop 2.9和hbase 1.2.x不兼容,请看一下
http://hbase.apache.org/book.html#basic.prerequisites
您将必须使用兼容的版本。