在Hive中联接具有几乎相同模式的表

时间:2018-10-17 18:43:26

标签: sql hive

我有带有模式的表A

name            string                                      
address         string                                      
timezone        string                                      
one_key_value   map<string,array<string>>                       
two_key_value   map<string,array<string>>

和具有模式的表B

name            string                                      
address         string                                      
timezone        string                                      
one_key_value   array<struct<key:string,value:array<string>>                    
two_key_value   array<struct<key:string,value:array<string>>

我正在尝试全部合并

SELECT * FROM (SELECT * FROM A UNION ALL SELECT * FROM B) tmp; 

遇到错误

FAILED: SemanticException Schema of both sides of union should match.

有没有办法解决这个问题,因为它们几乎是相似的,只是对于表B,值在数组中。

1 个答案:

答案 0 :(得分:1)

展开map列并组装array<struct<key:string,value:array<string>>,因此它将与table_b中的类型相同,这应该在Hive版本1.3.0起起作用:

select * from
(
select s.name, s.address, s.timezone, 
       --get arrays array<struct<key:string,value:array<string>>
       collect_set(mystruct1) as one_key_value,
       collect_set(mystruct2) as two_key_value
  from
(
select a.*, 
       --get structs struct<key:string,value:array<string>
       named_struct('key',k1.key, 'value', k1.value) mystruct1,
       named_struct('key',k2.key, 'value', k2.value) mystruct2
  from table_a a
       --explode maps, get key:string, value:array<string>
       lateral view outer explode(one_key_value) k1 key,value     
       lateral view outer explode(two_key_value) k2 key,value  
)s
group by s.name, s.address, s.timezone
) table_a

UNION ALL

select * from table_b
;

对于以前的Hive版本,还有Brickhouse collect UDF:

add jar /path/to/jar/brickhouse-0.7.1.jar;
create temporary function collect as 'brickhouse.udf.collect.CollectUDAF';

 select * from
    (
    select s.name, s.address, s.timezone, 
           --get arrays array<struct<key:string,value:array<string>>
           collect(mystruct1) as one_key_value,
           collect(mystruct2) as two_key_value
      from
    (
    select a.*, 
           --get structs struct<key:string,value:array<string>
           named_struct('key',k1.key, 'value', k1.value) mystruct1,
           named_struct('key',k2.key, 'value', k2.value) mystruct2
      from table_a a
           --explode maps, get key:string, value:array<string>
           lateral view outer explode(one_key_value) k1 key,value     
           lateral view outer explode(two_key_value) k2 key,value  
    )s
    group by s.name, s.address, s.timezone
    ) table_a

    UNION ALL

    select * from table_b
    ;