我想使用HIVE
找到两列中的值组合计数输入表:
+--------------+--------------------+
| Col1 | Col2 |
+--------------+--------------------+
| Sam | Ricky |
| Patel | Trump |
| Suzane | Robert |
| Ricky | Sam |
| Bob | Ricky |
| Robert | Suzane |
+--------------+--------------------+
找到模式组合:
.............................
| Sam | Ricky |
| Ricky | Sam |
| Suzane | Robert |
| Robert | Suzane |
.............................
预期产出:
Sam Ricky 2
Robert Suzane 2
答案 0 :(得分:2)
select least (col1, col2) as least_col
,greatest (col1, col2) as greatest_col
,count (*)
from mytable
group by least (col1, col2)
,greatest (col1, col2)
having count (*) >= 2
;
create table mytable (Col1 string,Col2 string);
insert into mytable values
('Sam' ,'Ricky' )
,('Patel' ,'Trump' )
,('Suzane' ,'Robert')
,('Ricky' ,'Sam' )
,('Bob' ,'Ricky' )
,('Robert' ,'Suzane')
;
select * from mytable;
+--------------+--------------+
| mytable.col1 | mytable.col2 |
+--------------+--------------+
| Sam | Ricky |
| Patel | Trump |
| Suzane | Robert |
| Ricky | Sam |
| Bob | Ricky |
| Robert | Suzane |
+--------------+--------------+
select col1
,col2
,least (col1, col2) as least_col
,greatest (col1, col2) as greatest_col
from mytable
;
+--------+--------+-----------+--------------+
| col1 | col2 | least_col | greatest_col |
+--------+--------+-----------+--------------+
| Sam | Ricky | Ricky | Sam |
| Patel | Trump | Patel | Trump |
| Suzane | Robert | Robert | Suzane |
| Ricky | Sam | Ricky | Sam |
| Bob | Ricky | Bob | Ricky |
| Robert | Suzane | Robert | Suzane |
+--------+--------+-----------+--------------+
select least (col1, col2) as least_col
,greatest (col1, col2) as greatest_col
,count (*)
from mytable
group by least (col1, col2)
,greatest (col1, col2)
having count (*) >= 2
;
+-----------+--------------+-----+
| least_col | greatest_col | _c2 |
+-----------+--------------+-----+
| Robert | Suzane | 2 |
| Ricky | Sam | 2 |
+-----------+--------------+-----+