我目前有一个文件,其中包含需要填充9个不同表的数据。每个表都有不同数量的列和数据类型,因此我需要过滤源文件(使用第一列确定行将进入哪个表)。
我当前的方法是创建一个表,该表具有通用列名称col_1,col_2等,直到文件中最后一个填充列,然后创建引用此文件的9个视图。我遇到的问题是,由于表格都是不同的结构,因此在同一列中出现了不同的数据类型。
是否有可能创建一个动态模式来过滤HIVE表指向第一列的.csv?
感谢
答案 0 :(得分:0)
<强> data.csv 强>
1,1,Now,11,22,2016-12-12
1,2,I,33,44,2017-01-01
3,3,heard,55,66,2017-02-02
1,4,you,77,88,2017-03-03
2,5,know,99,1010,2017-04-04
1,6,that,1111,1212,2017-05-05
2,7,secret,1313,1414,2017-06-06
create external table mycsv
(
rec_type int
,id int
,mystring string
,myint1 int
,myint2 int
,mydate date
)
row format delimited
fields terminated by ','
stored as textfile
;
select * from mycsv;
+----------+----+----------+--------+--------+------------+
| rec_type | id | mystring | myint1 | myint2 | mydate |
+----------+----+----------+--------+--------+------------+
| 1 | 1 | Now | 11 | 22 | 2016-12-12 |
| 1 | 2 | I | 33 | 44 | 2017-01-01 |
| 3 | 3 | heard | 55 | 66 | 2017-02-02 |
| 1 | 4 | you | 77 | 88 | 2017-03-03 |
| 2 | 5 | know | 99 | 1010 | 2017-04-04 |
| 1 | 6 | that | 1111 | 1212 | 2017-05-05 |
| 2 | 7 | secret | 1313 | 1414 | 2017-06-06 |
+----------+----+----------+--------+--------+------------+
create table t1(id int,mystring string);
create table t2(id int,mystring string,mydate date);
create table t3(id int,mydate date,myint1 int,myint2 int);
from mycsv
insert into t1 select id,mystring where rec_type = 1
insert into t2 select id,mystring,mydate where rec_type = 2
insert into t3 select id,mydate,myint1,myint2 where rec_type = 3
select * from t1;
+----+----------+
| id | mystring |
+----+----------+
| 1 | Now |
| 2 | I |
| 4 | you |
| 6 | that |
+----+----------+
select * from t2;
+----+----------+------------+
| id | mystring | mydate |
+----+----------+------------+
| 5 | know | 2017-04-04 |
| 7 | secret | 2017-06-06 |
+----+----------+------------+
select * from t3;
+----+------------+--------+--------+
| id | mydate | myint1 | myint2 |
+----+------------+--------+--------+
| 3 | 2017-02-02 | 55 | 66 |
+----+------------+--------+--------+