我有带有模式的表A
name string
address string
timezone string
one_key_value map<string,array<string>>
two_key_value map<string,array<string>>
和具有模式的表B
name string
address string
timezone string
one_key_value array<struct<key:string,value:array<string>>
two_key_value array<struct<key:string,value:array<string>>
我正在尝试全部合并
SELECT * FROM (SELECT * FROM A UNION ALL SELECT * FROM B) tmp;
遇到错误
FAILED: SemanticException Schema of both sides of union should match.
有没有办法解决这个问题,因为它们几乎是相似的,只是对于表B,值在数组中。
答案 0 :(得分:1)
展开map
列并组装array<struct<key:string,value:array<string>>
,因此它将与table_b中的类型相同,这应该在Hive版本1.3.0起起作用:
select * from
(
select s.name, s.address, s.timezone,
--get arrays array<struct<key:string,value:array<string>>
collect_set(mystruct1) as one_key_value,
collect_set(mystruct2) as two_key_value
from
(
select a.*,
--get structs struct<key:string,value:array<string>
named_struct('key',k1.key, 'value', k1.value) mystruct1,
named_struct('key',k2.key, 'value', k2.value) mystruct2
from table_a a
--explode maps, get key:string, value:array<string>
lateral view outer explode(one_key_value) k1 key,value
lateral view outer explode(two_key_value) k2 key,value
)s
group by s.name, s.address, s.timezone
) table_a
UNION ALL
select * from table_b
;
对于以前的Hive版本,还有Brickhouse collect
UDF:
add jar /path/to/jar/brickhouse-0.7.1.jar;
create temporary function collect as 'brickhouse.udf.collect.CollectUDAF';
select * from
(
select s.name, s.address, s.timezone,
--get arrays array<struct<key:string,value:array<string>>
collect(mystruct1) as one_key_value,
collect(mystruct2) as two_key_value
from
(
select a.*,
--get structs struct<key:string,value:array<string>
named_struct('key',k1.key, 'value', k1.value) mystruct1,
named_struct('key',k2.key, 'value', k2.value) mystruct2
from table_a a
--explode maps, get key:string, value:array<string>
lateral view outer explode(one_key_value) k1 key,value
lateral view outer explode(two_key_value) k2 key,value
)s
group by s.name, s.address, s.timezone
) table_a
UNION ALL
select * from table_b
;