我希望在我的数据中编号条纹,目标是找到np标记至少3个连续条纹的位置。
以下是我的数据的一个子集:
drop table if exists bi_test;
create table test (id varchar(12),rd date,np decimal);
insert into test
select 'aaabbbccc', '2016-07-25'::date, 0 union all
select 'aaabbbccc', '2016-08-01'::date, 0 union all
select 'aaabbbccc', '2016-08-08'::date, 0 union all
select 'aaabbbccc', '2016-08-15'::date, 0 union all
select 'aaabbbccc', '2016-08-22'::date, 1 union all
select 'aaabbbccc', '2016-08-29'::date, 0 union all
select 'aaabbbccc', '2016-09-05'::date, 1 union all
select 'aaabbbccc', '2016-09-12'::date, 0 union all
select 'aaabbbccc', '2016-09-19'::date, 1;
我希望使用row_number()和count(),但它似乎没有给我我想要的结果。
select
*
,row_number() over (partition by t.id order by t.rd) all_ctr
,count(t.id) over (partition by t.id) all_count
,row_number() over (partition by t.id,t.np order by t.rd) np_counter
,count(t.id) over (partition by t.id,t.np) np_non_np
from
bi_adhoc.test t
order by
t.rd;
以下是我的结果,以及所需的结果:
id rd np all_ctr all_count np_counter np_non_np **Desired**
aaabbbccc 7/25/2016 0 1 9 1 6 **1**
aaabbbccc 8/1/2016 0 2 9 2 6 **2**
aaabbbccc 8/8/2016 0 3 9 3 6 **3**
aaabbbccc 8/15/2016 0 4 9 4 6 **4**
aaabbbccc 8/22/2016 1 5 9 1 3 **1**
aaabbbccc 8/29/2016 0 6 9 5 6 **1**
aaabbbccc 9/5/2016 1 7 9 2 3 **1**
aaabbbccc 9/12/2016 0 8 9 6 6 **1**
aaabbbccc 9/19/2016 1 9 9 3 3 **1**
答案 0 :(得分:0)
这样做的一种方法是计算CTE中的滞后(np)值,然后比较当前的np和滞后的np以检测条纹。这可能不是最佳方式,但似乎工作正常。
with source_cte as
(
select
*
,row_number() over (partition by t.id order by t.rd) row_num
,lag(np,1) over (partition by t.id order by t.rd) as prev_np
from
bi_adhoc.test t
)
, streak_cte as
(
select
*,
case when np=prev_np or prev_np is NULL then 1 else 0 end as is_streak
from
source_cte
)
select
*,
case when is_streak=1 then dense_rank() over (partition by id, is_streak order by rd) else 1 end as desired
from
streak_cte
order by
rd;
答案 1 :(得分:0)
首先,我添加了一些额外的数据来帮助充分说明问题...
drop table if exists bi_adhoc.test;
create table bi_adhoc.test (id varchar(12),period date,hit decimal);
insert into bi_adhoc.test
select 'aaabbbccc', '2016-07-25'::date, 0 union all
select 'aaabbbccc', '2016-08-01'::date, 0 union all
select 'aaabbbccc', '2016-08-08'::date, 0 union all
select 'aaabbbccc', '2016-08-15'::date, 1 union all
select 'aaabbbccc', '2016-08-22'::date, 1 union all
select 'aaabbbccc', '2016-08-29'::date, 0 union all
select 'aaabbbccc', '2016-09-05'::date, 0 union all
select 'aaabbbccc', '2016-09-12'::date, 1 union all
select 'aaabbbccc', '2016-09-19'::date, 0 union all
select 'aaabbbccc', '2016-09-26'::date, 1 union all
select 'aaabbbccc', '2016-10-03'::date, 1 union all
select 'aaabbbccc', '2016-10-10'::date, 1 union all
select 'aaabbbccc', '2016-10-17'::date, 1 union all
select 'aaabbbccc', '2016-10-24'::date, 1 union all
select 'aaabbbccc', '2016-10-31'::date, 0 union all
select 'aaabbbccc', '2016-11-07'::date, 0 union all
select 'aaabbbccc', '2016-11-14'::date, 0 union all
select 'aaabbbccc', '2016-11-21'::date, 0 union all
select 'aaabbbccc', '2016-11-28'::date, 0 union all
select 'aaabbbccc', '2016-12-05'::date, 1 union all
select 'aaabbbccc', '2016-12-12'::date, 1;
然后关键是弄清楚条纹是什么以及如何识别每条条纹以便我可以对数据进行分区以便对数据进行分区。
select
*
,case
when t1.hit = 1 then row_number() over (partition by t1.id,t1.hit_partition order by t1.period)
when t1.hit = 0 then row_number() over (partition by t1.id,t1.miss_partition order by t1.period)
else null
end desired
from
(
select
*
,row_number() over (partition by t.id order by t.id,t.period)
,case
when t.hit = 1 then row_number() over (partition by t.id, t.hit order by t.period)
else null
end hit_counter
,case
when t.hit = 1 then row_number() over (partition by t.id order by t.id,t.period) - row_number() over (partition by t.id, t.hit order by t.period)
else null
end hit_partition
,case
when t.hit = 0 then row_number() over (partition by t.id, t.hit order by t.period)
else null
end miss_counter
,case
when t.hit = 0 then row_number() over (partition by t.id order by t.id,t.period) - row_number() over (partition by t.id, t.hit order by t.period)
else null
end miss_partition
from
bi_adhoc.test t
) t1
order by
t1.id
,t1.period;
结果:
id period hit row_number hit_counter hit_partition miss_counter miss_partition desired
aaabbbccc 2016-07-25 0 1 NULL NULL 1 0 1
aaabbbccc 2016-08-01 0 2 NULL NULL 2 0 2
aaabbbccc 2016-08-08 0 3 NULL NULL 3 0 3
aaabbbccc 2016-08-15 1 4 1 3 NULL NULL 1
aaabbbccc 2016-08-22 1 5 2 3 NULL NULL 2
aaabbbccc 2016-08-29 0 6 NULL NULL 4 2 1
aaabbbccc 2016-09-05 0 7 NULL NULL 5 2 2
aaabbbccc 2016-09-12 1 8 3 5 NULL NULL 1
aaabbbccc 2016-09-19 0 9 NULL NULL 6 3 1
aaabbbccc 2016-09-26 1 10 4 6 NULL NULL 1
aaabbbccc 2016-10-03 1 11 5 6 NULL NULL 2
aaabbbccc 2016-10-10 1 12 6 6 NULL NULL 3
aaabbbccc 2016-10-17 1 13 7 6 NULL NULL 4
aaabbbccc 2016-10-24 1 14 8 6 NULL NULL 5
aaabbbccc 2016-10-31 0 15 NULL NULL 7 8 1
aaabbbccc 2016-11-07 0 16 NULL NULL 8 8 2
aaabbbccc 2016-11-14 0 17 NULL NULL 9 8 3
aaabbbccc 2016-11-21 0 18 NULL NULL 10 8 4
aaabbbccc 2016-11-28 0 19 NULL NULL 11 8 5
aaabbbccc 2016-12-05 1 20 9 11 NULL NULL 1
aaabbbccc 2016-12-12 1 21 10 11 NULL NULL 2