我编写了一个脚本,可以从HBase获取数据,解析它然后将其保存到Hive中。但是我收到了这个错误:
org.apache.hadoop.mapred.YarnChild: Exception running child : java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hive.hcatalog.data.HCatRecord
at org.apache.hive.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:53)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:558)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.write(WrappedReducer.java:105)
at org.apache.hadoop.mapreduce.Reducer.reduce(Reducer.java:150)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
我知道问题是reducer键,值和job.setOutputKeyClass
,job.setOutputValueClass
的一些愚蠢的不匹配,但我找不到它:(。请帮帮我,这是我的代码:
public class DumpProductViewsAggHive extends Configured implements Tool {
public static enum LOCAL_COUNTER {
IGNORED, VALID, INVALID
}
private static final String NAME = "DumpProductViewsAggHive"; //Change the name of the job here
private static final String SEPARATOR = "/t"; //Change the separator here
private String dateFrom; //Start date - HBase MR applicable
private String dateTo; //Ending date - HBase MR applicable
private String fileOutput; //output file path
private String table = "we_json"; //default HBase table
private int caching = 500; //default HBase caching
/**
* Map phase HBase
*/
public static class MapHBase extends TableMapper<Text, Text> {
private Text key_out = new Text();
private Text value_out = new Text();
private JSONParser parser = new JSONParser();
private DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
private String day;
private Date date = new Date();
private Double ts = new Double(0);
public void map(ImmutableBytesWritable row, Result value,
Context context) throws IOException, InterruptedException {
String b = new String(value.getValue(Bytes.toBytes("d"),
Bytes.toBytes("j")));
JSONObject obj;
try {
obj = (JSONObject) parser.parse(b);
if (obj.get("e").equals("pview_bcn")) {
ts = Double.parseDouble(obj.get("ts").toString());
ts = ts * 1000;
date.setTime(Math.round(ts));
day = formatter.format(date);
key_out.set(obj.get("sid").toString());
value_out.set(obj.get("variant_id") + SEPARATOR + obj.get("shop")
+ SEPARATOR + obj.get("status") + SEPARATOR + day
+ SEPARATOR + "D");
context.getCounter(LOCAL_COUNTER.VALID).increment(1);
context.write(key_out, value_out);
} else {
context.getCounter(LOCAL_COUNTER.IGNORED).increment(1);
}
} catch (Exception pe) {
// ignore value
context.getCounter(LOCAL_COUNTER.INVALID).increment(1);
return;
}
}
}
/**
* Reduce phase
*/
public static class Reduce extends Reducer<Text, Text, NullWritable, HCatRecord>{
public void reduce (Iterable<Text> key, Text value, Context context)
throws IOException, InterruptedException{
Set<Text> sidSet = new HashSet<Text>();
while (key.iterator().hasNext()) {
sidSet.add(key.iterator().next());
}
String[] tokens = value.toString().split( SEPARATOR );
HCatRecord record = new DefaultHCatRecord(6);
record.set(0, tokens[0].toString());
record.set(1, tokens[1].toString());
record.set(2, tokens[2].toString());
record.set(3, tokens[3].toString());
record.set(4, tokens[4].toString());
record.set(5, sidSet.size());
context.write(NullWritable.get(), record);
}
}
public void getParams(String[] otherArgs) throws ParseException {
DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
Calendar cal = Calendar.getInstance();
int i = 0;
/*
* Loop parameters
*/
while (i<otherArgs.length) {
// get parameter -d query only one day. HBase applicable.
if (otherArgs[i].equals("-d")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateFrom = Long.toHexString(cal.getTimeInMillis()/1000);
cal.add(Calendar.DATE, 1);
dateTo = Long.toHexString(cal.getTimeInMillis()/1000);
System.out.println("Day translated to start: " + dateFrom + "; End: " + dateTo);
}
// get start date -f parameter. HBase applicable.
if (otherArgs[i].equals("-f")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateFrom = Long.toHexString(cal.getTimeInMillis() / 1000);
System.out.println("From: " + dateFrom);
}
// get end date -t parameter. HBase applicable.
if (otherArgs[i].equals("-t")) {
cal.setTime(formatter.parse(otherArgs[++i]));
dateTo = Long.toHexString(cal.getTimeInMillis() / 1000);
System.out.println("To: " + dateTo);
}
// get output folder -o parameter.
if (otherArgs[i].equals("-o")) {
fileOutput = otherArgs[++i];
System.out.println("Output: " + fileOutput);
}
// get caching -c parameter. HBase applicable.
if (otherArgs[i].equals("-c")) {
caching = Integer.parseInt(otherArgs[++i]);
System.out.println("Caching: " + caching);
}
// get table name -tab parameter. HBase applicable.
if (otherArgs[i].equals("-tab")) {
table = otherArgs[++i];
System.out.println("Table: " + table);
}
i++;
}
}
/**
*
* @param fileInput
* @param dateFrom
* @param dateTo
* @param job
* @param caching
* @param table
* @throws IOException
*/
public void getInput(String fileInput, String dateFrom, String dateTo, Job job, int caching, String table) throws IOException {
// If the source is from Hbase
if (fileInput == null) {
/**
* HBase source
*/
// If date is not defined
if (dateFrom == null || dateTo == null) {
System.err.println("Start date or End Date is not defined.");
return;
}
System.out.println("HBase table used as a source.");
Scan scan = new Scan(Bytes.toBytes(dateFrom), Bytes.toBytes(dateTo));
scan.setCaching(caching); // set Caching, when the table is small it is better to use bigger number. Default scan is 1
scan.setCacheBlocks(false); // do not set true for MR jobs
scan.addColumn(Bytes.toBytes("d"), Bytes.toBytes("j"));
TableMapReduceUtil.initTableMapperJob(
table, //name of table
scan, //instance of scan
MapHBase.class, //mapper class
Text.class, //mapper output key
Text.class, //mapper output value
job);
}
}
/**
* Tool implementation
*/
@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
// Create configuration
Configuration conf = this.getConf();
String databaseName = null;
String tableName = "test";
// Parse arguments
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
getParams(otherArgs);
// It is better to specify zookeeper quorum in CLI parameter -D hbase.zookeeper.quorum=zookeeper servers
conf.set( "hbase.zookeeper.quorum",
"cz-dc1-s-132.mall.local,cz-dc1-s-133.mall.local,"
+ "cz-dc1-s-134.mall.local,cz-dc1-s-135.mall.local,"
+ "cz-dc1-s-136.mall.local");
// Create job
Job job = Job.getInstance(conf, NAME);
job.setJarByClass(DumpProductViewsAggHive.class);
// Setup MapReduce job
job.setReducerClass(Reducer.class);
//job.setNumReduceTasks(0); // If reducer is not needed
// Specify key / value
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
// Input
getInput(null, dateFrom, dateTo, job, caching, table);
// Output
// Ignore the key for the reducer output; emitting an HCatalog record as value
job.setOutputFormatClass(HCatOutputFormat.class);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(databaseName, tableName, null));
HCatSchema s = HCatOutputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
// Execute job and return status
return job.waitForCompletion(true) ? 0 : 1;
}
/**
* Main
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new DumpProductViewsAggHive(), args);
System.exit(res);
}
}
答案 0 :(得分:0)
Similarly to the question I answered a few minutes ago,你定义的reducer错误:
@Override
public void reduce (Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException
请使用@Override注释让编译器为您发现此错误。