在hive

时间:2017-04-30 01:03:34

标签: hadoop hive

DROP TABLE filtered_online_march_customers;
            --creating bucketed table with customer id
            CREATE TABLE IF NOT EXISTS filtered_online_march_customers(
              customer_id           string,
              order_id              string
            )
            CLUSTERED BY(customer_id) INTO 32 BUCKETS;

--populating the table
set hive.enforce.bucketing = true;
FROM filtered_march_online_transactions
INSERT OVERWRITE TABLE filtered_online_march_customers
SELECT
  *

我创建了这个按customer_id聚类的表。但是,当我实际尝试使用桶时,它不起作用。

CREATE TABLE randomized_filtered_march_customers
AS
SELECT
  *
FROM
 filtered_online_march_customers
TABLESAMPLE(BUCKET 1 OUT OF 32 ON customer_id)

我收到了错误:  在pathToPartitionInfo中找不到dir = maprfs:///hive/v0k0020.db/filtered_online_march_customers/000000_0:[maprfs:/hive/v0k0020.db/filtered_online_march_customers/000000_0]     at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively(HiveFileFormatUtils.java:344)     at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively(HiveFileFormatUtils.java:306)     在org.apache.hadoop.hive.ql.io.CombineHiveInputFormat $ CombineHiveInputSplit。(CombineHiveInputFormat.java:108)     at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:455)     在org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:1098)     在org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1090)     在org.apache.hadoop.mapred.JobClient.access $ 500(JobClient.java:176)     在org.apache.hadoop.mapred.JobClient $ 2.run(JobClient.java:931)     在org.apache.hadoop.mapred.JobClient $ 2.run(JobClient.java:882)     at java.security.AccessController.doPrivileged(Native Method)     在javax.security.auth.Subject.doAs(Subject.java:415)     在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)     在org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:882)     在org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:856)     在org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:420)     在org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:136)     在org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)     在org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)     在org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1503)     在org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1270)     在org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1088)     在org.apache.hadoop.hive.ql.Driver.run(Driver.java:911)     在org.apache.hadoop.hive.ql.Driver.run(Driver.java:901)     在org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:268)     在org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:220)     在org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:423)     在org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:359)     在org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:456)     在org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:466)     在org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:748)     在org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:686)     在org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:625)     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)     at java.lang.reflect.Method.invoke(Method.java:606)     在org.apache.hadoop.util.RunJar.run(RunJar.java:221)     在org.apache.hadoop.util.RunJar.main(RunJar.java:136) 作业提交失败,异常为'java.io.IOException(在pathToPartitionInfo中找不到dir = maprfs:///hive/v0k0020.db/filtered_online_march_customers/000000_0:[maprfs:/hive/v0k0020.db/filtered_online_march_customers/000000_0])'< / p>

如果我将查询更改为

CREATE TABLE randomized_filtered_march_customers
    AS
    SELECT
      *
    FROM
     filtered_online_march_customers
    TABLESAMPLE(BUCKET 1 OUT OF 32 ON rand())

工作正常。知道怎么解决吗?

0 个答案:

没有答案