我的hive表中有两列。 col2包含值'Y'和'N'。在hive中我想循环遍历表,如果每次都有状态变化(从N到Y或从Y到N),我想在另一个表中插入一个新行。我如何使用hiveql做到这一点?谢谢你们!
答案 0 :(得分:0)
insert into my_target
select ...
from (select col2
,lag (col2) over (order by ...) as prev_col2
from my_source
) t
where t.col2 <> prev_col2
;
with my_source as (select posexplode(split('YYNYNNNYYY','\\B')) as (col1,col2))
select * from my_source;
+----------------+----------------+
| my_source.col1 | my_source.col2 |
+----------------+----------------+
| 0 | Y |
+----------------+----------------+
| 1 | Y |
+----------------+----------------+
| 2 | N |
+----------------+----------------+
| 3 | Y |
+----------------+----------------+
| 4 | N |
+----------------+----------------+
| 5 | N |
+----------------+----------------+
| 6 | N |
+----------------+----------------+
| 7 | Y |
+----------------+----------------+
| 8 | Y |
+----------------+----------------+
| 9 | Y |
+----------------+----------------+
with my_source as (select posexplode(split('YYNYNNNYYY','\\B')) as (col1,col2))
select *
from (select col1
,col2
,lag (col2) over (order by col1) as prev_col2
from my_source
) t
where t.col2 <> prev_col2
;
+--------+--------+-------------+
| t.col1 | t.col2 | t.prev_col2 |
+--------+--------+-------------+
| 2 | N | Y |
+--------+--------+-------------+
| 3 | Y | N |
+--------+--------+-------------+
| 4 | N | Y |
+--------+--------+-------------+
| 7 | Y | N |
+--------+--------+-------------+