如何减去蜂巢中的项目

时间:2017-05-31 23:39:58

标签: sql hive

我为每个客户提供了两个项目列表,ItemListA和ItemListB

Customer_id   ItemListA   ItemListB
   24            2,3         3,4,5
   26            6,7         8,9,10
   25            4,5          5,8

我希望ItemListB中的Item不在ItemListA中,因此输出应为:

Customer_id   ItemListB_A
     24          4,5
     26          8,9,10
     25          4,8

我不知道如何从Hive中的两个字符串中减去项目。我知道COLLECT_SET但它可以删除重复但不删除项目交集。

4 个答案:

答案 0 :(得分:0)

这将产生预期的结果。

  select Customer_id, collect_list(y) from
    (select Customer_id, y , count(*) as cnt from 
    (select Customer_id,y from (select Customer_id, split(concat_ws(',',ItemListA,ItemListB),',') as x from table_name) temp lateral view explode(x) temp as y) temp1 
    group by Customer_id,y  ) temp2 where cnt =1 group by Customer_id;

答案 1 :(得分:0)

select      customer_id

           ,split
            (
                regexp_replace
                (
                    concat(ItemListB,',:,',ItemListA)
                   ,'(?<=^|,)(?<item>.*?),(?=.*(?<=,)\\k<item>(?=,|$))'
                   ,''
                )
               ,',?:'
            )[0]        as ItemListB_A

from        mytable
+-------------+-------------+
| customer_id | itemlistb_a |
+-------------+-------------+
|          24 | 4,5         |
|          26 | 8,9,10      |
|          25 | 8           |
+-------------+-------------+

答案 2 :(得分:0)

select      customer_id
           ,concat_ws (',',collect_list (item)) as ItemListB_A

from       (select      t.customer_id
                       ,pe.item

            from        mytable     t
                        lateral view posexplode (split (concat_ws(',',ItemListA,ItemListB),',')) pe as pos,item

            group by    t.customer_id
                       ,pe.item

            having      count (case when pos < size(split(ItemListA,',')) then 1 end) = 0  
            ) t

group by    customer_id              
+-------------+-------------+
| customer_id | itemlistb_a |
+-------------+-------------+
|          24 | 4,5         |
|          25 | 8           |
|          26 | 10,8,9      |
+-------------+-------------+

答案 3 :(得分:-1)

select a.Customer_id,concat_ws (',',collect_list (a.item)) as ItemListB_A from (            
select      t.Customer_id
                       ,pe.item

            from        test_hive     t
                        lateral view posexplode (split (concat_ws(',',ItemListB),',')) pe as pos,item
                        lateral view posexplode (split (concat_ws(',',ItemListA),',')) pe1 as pos1,item1
            group by t.Customer_id,pe.item
            having count(case when item=item1 then 1 end)=0
)a
group by a.Customer_id