我的hadoop中有以下目录结构,
`/hadoop/maindirec/subdirect1/file1
/hadoop/maindirec/subdirect1/file2
/hadoop/maindirec/subdirect2/file1
/hadoop/maindirec/subdirect2/file2
/hadoop/maindirec/subdirect3/file1
/hadoop/maindirec/subdirect3/file2
/hadoop/maindirec/subdirect4/file1
/hadoop/maindirec/subdirect4/file2
/hadoop/maindirec/subdirect5/file1
/hadoop/maindirec/subdirect5/file2`
现在我想创建一个带有orc格式的hive表作为maindirec和subirect1-5作为partiations。 任何人都可以让我知道如何做到这一点。 提前谢谢。
到目前为止
create external table temp(name string,id int) partitioned by(subd string)
row format delimited
fields terminated by '\t'
stored as orc location '/hadoop/maindirec'
tblproperties("orc.compress"="SNAPPY","skip.header.line.count"="4");
alter table temp add partition(subd='subdirect1') location '/hadoop/maindirec/subdirect1' partition(subd='subdirect2') location '/hadoop/maindirec/subdirect2' partition(subd='subdirect3') location
'/hadoop/maindirec/subdirect3' partition(subd='subdirect4') location
'/hadoop/maindirec/subdirect4' partition(subd='subdirect5') location '/hadoop/maindirec/subdirect5';
输入
select * from temp;
的输出
Failed with exception java.io.IOException:java.lang.RuntimeException: serious problem
答案 0 :(得分:0)
您可以使用以下代码:(根据需要更改和添加列名称)
CREATE EXTERNAL TABLE temp_table(col1 int,col2 int)PARTITIONED BY(subd string) 由','终止的行格式删除字段 由'\ n'终止的线路 存储为ORC LOCATION'/ hadoop / maindirec';
ALTER TABLE temp_table ADD PARTITION(subd ='subdirect1')LOCATION '/ hadoop的/ maindirec / subdirect1 / files1-100'
PARTITION(subd ='subdirect2')LOCATION'/ hadoop / maindirec / subdirect2 / files1-100'
PARTITION(subd ='subdirect3')LOCATION'/ hadoop / maindirec / subdirect3 / files1-100'
PARTITION(subd ='subdirect4')LOCATION'/ hadoop / maindirec / subdirect4 / files1-100'
PARTITION(subd ='subdirect5')LOCATION'/ hadoop / maindirec / subdirect5 / files1-100';