Need hive query that calculates the date difference for consecutive records but for the same txn type and generate same number if the difference is less than 10 else generate new number.
+--------+----------+-------------+
| Txn_id | Txn_type | Txn_date |
+--------+----------+-------------+
| 1 | T100 | 26-Aug-2015 |
| 2 | T100 | 03-Nov-2015 |
| 3 | T100 | 05-Dec-2015 |
| 4 | T100 | 08-Dec-2015 |
| 5 | T100 | 25-Jan-2016 |
| 6 | T111 | 26-Jan-2016 |
| 7 | T200 | 02-Feb-2016 |
| 8 | T200 | 07-May-2016 |
| 9 | T200 | 12-May-2016 |
| 10 | T200 | 20-May-2016 |
+--------+----------+-------------+
+--------+----------+-------------+--------+
| Txn_id | Txn_type | Txn_date | Number |
+--------+----------+-------------+--------+
| 1 | T100 | 26-Aug-2015 | 1 |
| 2 | T100 | 03-Nov-2015 | 2 |
| 3 | T100 | 05-Dec-2015 | 3 |
| 4 | T100 | 08-Dec-2015 | 3 |
| 5 | T100 | 25-Jan-2016 | 4 |
| 6 | T111 | 26-Jan-2016 | 1 |
| 7 | T200 | 02-Feb-2016 | 1 |
| 8 | T200 | 07-May-2016 | 2 |
| 9 | T200 | 12-May-2016 | 2 |
| 10 | T200 | 20-May-2016 | 2 |
+--------+----------+-------------+--------+
答案 0 :(得分:2)
不确定是否"少于10天"意味着严格或非严格的不平等,但除此之外:
with
inputs ( txn_id, txn_type, txn_date ) as (
select 1, 'T100', to_date('26-Aug-2015', 'dd-Mon-yy') from dual union all
select 2, 'T100', to_date('03-Nov-2015', 'dd-Mon-yy') from dual union all
select 3, 'T100', to_date('05-Dec-2015', 'dd-Mon-yy') from dual union all
select 4, 'T100', to_date('08-Dec-2015', 'dd-Mon-yy') from dual union all
select 5, 'T100', to_date('25-Jan-2016', 'dd-Mon-yy') from dual union all
select 6, 'T111', to_date('26-Jan-2016', 'dd-Mon-yy') from dual union all
select 7, 'T200', to_date('02-Feb-2016', 'dd-Mon-yy') from dual union all
select 8, 'T200', to_date('07-May-2016', 'dd-Mon-yy') from dual union all
select 9, 'T200', to_date('12-May-2016', 'dd-Mon-yy') from dual union all
select 10, 'T200', to_date('20-May-2016', 'dd-Mon-yy') from dual
),
prep ( txn_id, txn_type, txn_date, ct ) as (
select txn_id, txn_type, txn_date,
case when txn_date < lag(txn_date) over (partition by txn_type
order by txn_date) + 10 then 0 else 1 end
from inputs
)
select txn_id, txn_type, txn_date,
sum(ct) over (partition by txn_type order by txn_date) as number_
from prep;
我使用number_
作为列名;不要将保留的Oracle单词用于表名或列名,除非你的生活依赖于它,甚至不是。
答案 1 :(得分:0)
使用公用表表达式来标记差异超过10天的行,然后计算这些行以获取新数字。
with test_data as (
SELECT 1 txn_id, 'T100' txn_type, to_date('26-AUG-2015','DD-MON-YYYY') txn_date from dual union all
SELECT 2 txn_id, 'T100', to_date('03-NOV-2015','DD-MON-YYYY') from dual union all
SELECT 3 txn_id, 'T100', to_date('05-DEC-2015','DD-MON-YYYY') from dual union all
SELECT 4 txn_id, 'T100', to_date('08-DEC-2015','DD-MON-YYYY') from dual union all
SELECT 5 txn_id, 'T100', to_date('25-JAN-2016','DD-MON-YYYY') from dual union all
SELECT 6 txn_id, 'T111', to_date('26-JAN-2016','DD-MON-YYYY') from dual union all
SELECT 7 txn_id, 'T200', to_date('02-FEB-2016','DD-MON-YYYY') from dual union all
SELECT 8 txn_id, 'T200', to_date('07-MAY-2016','DD-MON-YYYY') from dual union all
SELECT 9 txn_id, 'T200', to_date('12-MAY-2016','DD-MON-YYYY') from dual union all
SELECT 10 txn_id, 'T200', to_date('20-MAY-2016','DD-MON-YYYY') from dual),
markers as (
select td.*,
case when td.txn_date - nvl(lag(td.txn_date)
over ( partition by txn_type order by txn_id ), td.txn_date-9999) > 10
THEN 'Y' ELSE NULL end new_txn_marker from test_data td )
SELECT txn_id, txn_type,txn_date,
count(new_txn_marker) over ( partition by txn_type order by txn_id ) "NUMBER"
FROM markers;