如何在hive中实现数据范围的左连接

时间:2017-08-22 16:46:33

标签: hadoop hive hiveql

我想将下面的oracle逻辑转换为hive。 逻辑:

Select a.id,a.name,b.desc from table a left join table b on
a.num between b.min_num and b.max_num;

任何人都可以帮助我在蜂巢中实现上述逻辑。

3 个答案:

答案 0 :(得分:1)

使用此解决方案,您可以控制性能 b范围被分割为子范围,可以根据需要缩小(x)。

  • 太大x实际上会导致交叉加入。
  • 太小x可能会从b生成一个巨大的集合(x = 1将生成所有b范围'值)。
set hivevar:x=100;

select  a.id
       ,a.name
       ,b.desc

from        table_a as a 

        left join  

            (select  a.id
                   ,b.desc

            from        table_a as a

                    inner join 

                       (select   b.min_num div ${hivevar:x} + pe.pos    as sub_range_id
                                ,b.*

                        from        table_b as b
                                lateral view 
                                    posexplode(split(space(cast (b.max_num div ${hivevar:x} - b.min_num div ${hivevar:x} as int)),' ')) pe
                        ) as b

                    on          a.num div ${hivevar:x}  =
                                b.sub_range_id

            where   a.num between b.min_num and b.max_num
            ) b                    

        on          b.id   =
                    a.id
;

答案 1 :(得分:0)

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  a.id
                           ,b.desc

                    from                table_a as a
                            cross join  table_b as b

                    where   a.num between b.min_num and b.max_num
                    ) b                    

        on          b.id   =
                    a.id
;

答案 2 :(得分:0)

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  b.min_num + pe.pos  as num
                           ,b.desc

                    from    table_b as b
                                lateral view 
                                    posexplode(split(space(b.max_num-b.min_num),' ')) pe
                    ) b                    

        on          b.num   =
                    a.num
;