Question

我编写了一个脚本，可以从HBase获取数据，解析它然后将其保存到Hive中。但是我收到了这个错误：

org.apache.hadoop.mapred.YarnChild: Exception running child : java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hive.hcatalog.data.HCatRecord
  at org.apache.hive.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:53)
  at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:558)
  at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
  at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.write(WrappedReducer.java:105)
  at org.apache.hadoop.mapreduce.Reducer.reduce(Reducer.java:150)
  at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
  at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
  at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
  at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:415)
  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
  at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)

我知道问题是reducer键，值和job.setOutputKeyClass，job.setOutputValueClass的一些愚蠢的不匹配，但我找不到它:(。请帮帮我，这是我的代码：

public class DumpProductViewsAggHive extends Configured implements Tool {

    public static enum LOCAL_COUNTER {
        IGNORED, VALID, INVALID
    }

    private static final String NAME = "DumpProductViewsAggHive"; //Change the name of the job here
    private static final String SEPARATOR = "/t"; //Change the separator here

    private String dateFrom;            //Start date - HBase MR applicable
    private String dateTo;              //Ending date - HBase MR applicable
    private String fileOutput;          //output file path
    private String table = "we_json";   //default HBase table
    private int caching = 500;          //default HBase caching


    /**
     * Map phase HBase
     */
    public static class MapHBase extends TableMapper<Text, Text> {  
        private Text key_out = new Text();
        private Text value_out = new Text();

        private JSONParser parser = new JSONParser();
        private DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
        private String day;
        private Date date = new Date();
        private Double ts = new Double(0);

        public void map(ImmutableBytesWritable row, Result value,
                Context context) throws IOException, InterruptedException {

            String b = new String(value.getValue(Bytes.toBytes("d"),
                    Bytes.toBytes("j")));
            JSONObject obj;

            try {
                obj = (JSONObject) parser.parse(b);
                if (obj.get("e").equals("pview_bcn")) {
                    ts = Double.parseDouble(obj.get("ts").toString());
                    ts = ts * 1000;
                    date.setTime(Math.round(ts));
                    day = formatter.format(date);

                    key_out.set(obj.get("sid").toString());
                    value_out.set(obj.get("variant_id") + SEPARATOR + obj.get("shop")
                            + SEPARATOR + obj.get("status") + SEPARATOR + day
                            + SEPARATOR + "D");
                    context.getCounter(LOCAL_COUNTER.VALID).increment(1);
                    context.write(key_out, value_out);
                } else {
                    context.getCounter(LOCAL_COUNTER.IGNORED).increment(1);
                }
            } catch (Exception pe) {
                // ignore value
                context.getCounter(LOCAL_COUNTER.INVALID).increment(1);
                return;
            }

        }
    }


    /**
     * Reduce phase
     */
    public static class Reduce extends Reducer<Text, Text, NullWritable, HCatRecord>{

        public void reduce (Iterable<Text> key, Text value, Context context)
            throws IOException, InterruptedException{

            Set<Text> sidSet = new HashSet<Text>();
            while (key.iterator().hasNext()) {
                sidSet.add(key.iterator().next());
            }
            String[] tokens = value.toString().split( SEPARATOR );  


            HCatRecord record = new DefaultHCatRecord(6);
            record.set(0, tokens[0].toString());
            record.set(1, tokens[1].toString());
            record.set(2, tokens[2].toString());
            record.set(3, tokens[3].toString());
            record.set(4, tokens[4].toString());
            record.set(5, sidSet.size());
            context.write(NullWritable.get(), record);
        }
    }

    public void getParams(String[] otherArgs) throws ParseException {
        DateFormat formatter = new SimpleDateFormat("yyyyMMdd");
        Calendar cal = Calendar.getInstance();
        int i = 0;

        /*
         * Loop parameters 
         */
        while (i<otherArgs.length) {
            // get parameter -d query only one day. HBase applicable.
            if (otherArgs[i].equals("-d")) {
                cal.setTime(formatter.parse(otherArgs[++i]));
                dateFrom = Long.toHexString(cal.getTimeInMillis()/1000);
                cal.add(Calendar.DATE, 1);
                dateTo = Long.toHexString(cal.getTimeInMillis()/1000);
                System.out.println("Day translated to start: " + dateFrom + "; End: " + dateTo);
            }
            // get start date -f parameter. HBase applicable.
            if (otherArgs[i].equals("-f")) {
                cal.setTime(formatter.parse(otherArgs[++i]));
                dateFrom = Long.toHexString(cal.getTimeInMillis() / 1000);
                System.out.println("From: " + dateFrom);
            }
            // get end date -t parameter. HBase applicable.
            if (otherArgs[i].equals("-t")) {
                cal.setTime(formatter.parse(otherArgs[++i]));
                dateTo = Long.toHexString(cal.getTimeInMillis() / 1000);
                System.out.println("To: " + dateTo);
            }

            // get output folder -o parameter.
            if (otherArgs[i].equals("-o")) {
                fileOutput = otherArgs[++i];
                System.out.println("Output: " + fileOutput);                
            }

            // get caching -c parameter. HBase applicable.
            if (otherArgs[i].equals("-c")) {
                caching = Integer.parseInt(otherArgs[++i]);
                System.out.println("Caching: " + caching);              
            }

            // get table name -tab parameter. HBase applicable.
            if (otherArgs[i].equals("-tab")) {
                table = otherArgs[++i];
                System.out.println("Table: " + table);              
            }

            i++;
        }
    }

    /**
     * 
     * @param fileInput
     * @param dateFrom
     * @param dateTo
     * @param job
     * @param caching
     * @param table
     * @throws IOException
     */ 
    public void getInput(String fileInput, String dateFrom, String dateTo, Job job, int caching, String table) throws IOException {
        // If the source is from Hbase
        if (fileInput == null) {
            /**
             * HBase source
             */
            // If date is not defined
            if (dateFrom == null || dateTo == null) {
                System.err.println("Start date or End Date is not defined.");
                return;
            }
            System.out.println("HBase table used as a source.");
            Scan scan = new Scan(Bytes.toBytes(dateFrom), Bytes.toBytes(dateTo));
            scan.setCaching(caching); // set Caching, when the table is small it is better to use bigger number. Default scan is 1
            scan.setCacheBlocks(false); // do not set true for MR jobs
            scan.addColumn(Bytes.toBytes("d"), Bytes.toBytes("j"));

            TableMapReduceUtil.initTableMapperJob(
                    table,          //name of table
                    scan,           //instance of scan
                    MapHBase.class, //mapper class
                    Text.class,     //mapper output key
                    Text.class,     //mapper output value
                    job);
        }
    }

    /**
     * Tool implementation 
     */
    @SuppressWarnings("deprecation")
    @Override
    public int run(String[] args) throws Exception {

        // Create configuration
        Configuration conf = this.getConf();
        String databaseName = null;
        String tableName = "test";

        // Parse arguments
        String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
        getParams(otherArgs);

        // It is better to specify zookeeper quorum in CLI parameter -D hbase.zookeeper.quorum=zookeeper servers
        conf.set( "hbase.zookeeper.quorum",
        "cz-dc1-s-132.mall.local,cz-dc1-s-133.mall.local,"
        + "cz-dc1-s-134.mall.local,cz-dc1-s-135.mall.local,"
        + "cz-dc1-s-136.mall.local");

        // Create job
        Job job = Job.getInstance(conf, NAME);
        job.setJarByClass(DumpProductViewsAggHive.class);


        // Setup MapReduce job
        job.setReducerClass(Reducer.class);
        //job.setNumReduceTasks(0); // If reducer is not needed

        // Specify key / value
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(DefaultHCatRecord.class);

        // Input
        getInput(null, dateFrom, dateTo, job, caching, table);

        // Output
        // Ignore the key for the reducer output; emitting an HCatalog record as value
        job.setOutputFormatClass(HCatOutputFormat.class);

        HCatOutputFormat.setOutput(job, OutputJobInfo.create(databaseName, tableName, null));
        HCatSchema s = HCatOutputFormat.getTableSchema(job);
        System.err.println("INFO: output schema explicitly set for writing:" + s);
        HCatOutputFormat.setSchema(job, s);

        // Execute job and return status
        return job.waitForCompletion(true) ? 0 : 1;
    }

    /**
     * Main
     * @param args
     * @throws Exception
     */ 
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new DumpProductViewsAggHive(), args);
        System.exit(res);
    }

}

Answer 1

Similarly to the question I answered a few minutes ago，你定义的reducer错误：

@Override
public void reduce (Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException

请使用@Override注释让编译器为您发现此错误。

org.apache.hadoop.io.Text无法强制转换为org.apache.hive.hcatalog.data.HCatRecord

1 个答案: