我有按时间排序的数据(例如帐户交易),并且每个帐户都有一个日期和指标,指示交易是否大于某个阈值(例如$ 100)。我想为每个帐户计算价值超过$ 100的最大连续交易次数。
我有一个解决方案,但是它需要三个步骤来进行数据转换和最终聚合。有没有更聪明的方法来解决这个问题?对于我正在寻找的数字下面的样本数据是:
--seq 1 --> result 3
--seq 2 --> result 1
/*
select * from tmp_is_something;
drop table tmp_is_something;
-- seq - order of the transaction (1 = most recent)
-- part - account id
-- expr - 1 = transaction > $100, 0 = transaction <= $100
create table tmp_is_something (seq number, part number, expr number);
insert into tmp_is_something values(1,1,1);
insert into tmp_is_something values(2,1,1);
insert into tmp_is_something values(3,1,0);
insert into tmp_is_something values(4,1,0);
insert into tmp_is_something values(5,1,0);
insert into tmp_is_something values(6,1,1);
insert into tmp_is_something values(7,1,1);
insert into tmp_is_something values(8,1,0);
insert into tmp_is_something values(9,1,1);
insert into tmp_is_something values(10,1,1);
insert into tmp_is_something values(11,1,1);
insert into tmp_is_something values(12,1,0);
insert into tmp_is_something values(1,2,1);
insert into tmp_is_something values(2,2,0);
insert into tmp_is_something values(3,2,1);
commit;
*/
查询为:
with t as (
select
seq,
part,
expr,
count(*) over(partition by part) as cnt,
case when expr <> coalesce(lag(expr, 1) over(partition by part order by seq), expr) then 1 else 0 end as change_to_prev,
row_number() over(partition by part order by seq) as rn
from tmp_is_something
)
, u as (
select
seq, part, expr, cnt, rn, change_to_prev,
sum(change_to_prev)
over (partition by part order by seq rows between unbounded preceding and current row) + 1 as flip_sequence
from t
)
, v as (
select
part,
flip_sequence,
count(*) cnt,
max(expr) expr,
max(rn) last_rn,
max(rn) - count(*) rn_diff
from
u
group by
part,
flip_sequence
)
select
part,
listagg( expr || '-' || cnt || '; ') within group (order by flip_sequence) as seq_str,
max(expr * cnt) max_con_is
from v
group by part;
答案 0 :(得分:1)
使用分析性sum()
两次以基于连续数据创建分组列,然后将该列用于进一步分析:
select part, max(cnt) cnt
from (select part, grp, count(1) cnt
from (select tmp.*,
sum(1) over (partition by part order by seq)
- sum(expr) over (partition by part order by seq) grp
from tmp)
where expr = 1 group by part, grp)
group by part