create table MY_DATA0(session_id STRING, userid BIGINT,date_time STRING, ip STRING, URL STRING ,country STRING, state STRING, city STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES
TERMINATED BY '\n' STORED AS TEXTFILE ;
LOAD DATA INPATH '/inputhive' OVERWRITE INTO TABLE MY_DATA0;
create table part0(session_id STRING, userid BIGINT,date_time STRING, ip STRING, URL STRING) partitioned by (country STRING, state STRING, city STRING)
clustered by (userid) into 256 buckets ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE ;
\insert overwrite table part0 partition(country, state, city) select session_id, userid, date_time,ip, url, country, state,city from my_data0;
我的数据集概述:
{60A191CB-B3CA-496E-B33B-0ACA551DD503},1331582487,2012-03-12 13:01:27,66.91.193.75,http://www.acme.com/SH55126545/VD55179433,United 国,Hauula,夏威夷
{365CC356-7822-8A42-51D2-B6396F8FC5BF},1331584835,2012-03-12 13:40:35,173.172.214.24,http://www.acme.com/SH55126545/VD55179433,United 州,埃尔帕索,德克萨斯州
当我运行最后一个插入脚本时,我得到一个错误:
了java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveFatalException:[错误20004]: 节点尝试创建过多动态时发生致命错误 分区。动态分区的最大数量由 hive.exec.max.dynamic.partitions和 hive.exec.max.dynamic.partitions.pernode。最大值设置为:100
PS:
我设置了这两个属性:
hive.exec.dynamic.partition.mode ::非严格
hive.enforce.bucketing ::真
答案 0 :(得分:7)
尝试将这些属性设置为更高的值。
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.dynamic.partitions.pernode=100000;
答案 1 :(得分:0)
在select语句中最后应该提到分区列。 例如:如果 state 是分区列,则“insert into table t1 partition(state) select Id, name, dept, sal, state from t2”;这会起作用。例如,如果我的查询是这样的“insert into table t1 partition(state) select Id, name, dept,state, sal from t2;”然后将创建带有salary(sal)列的分区