Oracle row_number / rank具有特定逻辑

时间:2019-04-03 07:49:02

标签: sql oracle oracle11g ranking row-number

我需要选择一些机制来对具有row_number或rank的行进行排名。我试图同时使用两种情况的RNK1,RNK2列,但是我不确定是否有可能。请查看实际和预期结果。

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select 
    tmp.*,
    case when flag = 1 then row_number() over(partition by flag order by flag) else null end as rnk1,
    case when flag = 1 then rank() over(partition by flag order by flag) else null end as rnk2
from tmp
order by startdate, username

实际:

+-------------+--------------------+--------+--------+--------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1      | 6      | 1      |
| "username1" | 01-APR-19 01:00:00 | 0      |        |        |
| "username1" | 01-APR-19 02:00:00 | 1      | 4      | 1      |
| "username1" | 01-APR-19 03:00:00 | 1      | 3      | 1      |
| "username1" | 01-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 01:00:00 | 1      | 5      | 1      |
| "username1" | 02-APR-19 02:00:00 | 1      | 1      | 1      |
| "username1" | 02-APR-19 03:00:00 | 1      | 2      | 1      |
| "username1" | 02-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 05:00:00 | 0      |        |        |
+-------------+--------------------+--------+--------+--------+

预期:

+-------------+--------------------+--------+--------+--------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1      | 1      |
| "username1" | 01-APR-19 01:00:00 | 0      |        |        |
| "username1" | 01-APR-19 02:00:00 | 1      | 2      | 2      |
| "username1" | 01-APR-19 03:00:00 | 1      | 2      | 2      |
| "username1" | 01-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 01:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 02:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 03:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 05:00:00 | 0      |        |        |
+-------------+--------------------+--------+--------+--------+

感谢大家的快速反应。我开始处理您的建议并再次陷入困境

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
) 
select 
    tmp.*,
    dense_rank() over( order by startdate, username, threshold)-
     (case when flag=1 then
     row_number()over(partition by flag, username order by startdate, username) - flag
      else null end) as grp
from tmp
order by 
startdate, username

实际:

+-------------+--------------------+--------+-------------+-------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1           | 1     |
| "username1" | 01-APR-19 01:00:00 | 0      |             |       |
| "username1" | 01-APR-19 02:00:00 | 1      | 1           | 2     |
| "username1" | 01-APR-19 03:00:00 | 1      |             | 2     |
| "username1" | 01-APR-19 04:00:00 | 0      |             |       |
| "username1" | 01-APR-19 05:00:00 | 0      |             |       |
| "username1" | 02-APR-19 01:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 02:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 03:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 04:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 05:00:00 | 0      |             |       |
+-------------+--------------------+--------+-------------+-------+

预期:

+-------------+--------------------+--------+-------------+-------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1           | 1     |
| "username1" | 01-APR-19 01:00:00 | 0      |             |       |
| "username1" | 01-APR-19 02:00:00 | 1      | 1           | 2     |
| "username1" | 01-APR-19 03:00:00 | 1      |             | 2     |
| "username1" | 01-APR-19 04:00:00 | 0      |             |       |
| "username1" | 01-APR-19 05:00:00 | 0      |             |       |
| "username1" | 02-APR-19 01:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 02:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 03:00:00 | 1      | 1           | 5     |
| "username1" | 02-APR-19 04:00:00 | 1      |             | 5     |
| "username1" | 02-APR-19 05:00:00 | 0      |             |       |
+-------------+--------------------+--------+-------------+-------+

4 个答案:

答案 0 :(得分:2)

假设您要对连续的标志= 1行进行分组,则可以结合使用Tabibitosan技术和density_rank来做到这一点,就像这样:

WITH      tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
                  select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual),
  tabibitosan AS (SELECT tmp.*,
                         CASE
                           WHEN flag = 1 THEN
                            row_number() over(ORDER BY startdate) - row_number() over(PARTITION BY flag ORDER BY startdate)
                         END grp
                  FROM   tmp)
SELECT username,
       startdate,
       flag,
       CASE
         WHEN flag = 1 THEN
          dense_rank() over(PARTITION BY flag ORDER BY grp)
       END rnk
FROM   tabibitosan
ORDER  BY startdate,
          username;

USERNAME  STARTDATE                 FLAG        RNK
--------- ------------------- ---------- ----------
username1 01/04/2019 00:00:00          1          1
username1 01/04/2019 01:00:00          0 
username1 01/04/2019 02:00:00          1          2
username1 01/04/2019 03:00:00          1          2
username1 01/04/2019 04:00:00          0 
username1 02/04/2019 01:00:00          1          3
username1 02/04/2019 02:00:00          1          3
username1 02/04/2019 03:00:00          1          3
username1 02/04/2019 04:00:00          0 
username1 02/04/2019 05:00:00          0 

我已更新查询以考虑额外的阈值列:

WITH      tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual),
  tabibitosan AS (SELECT tmp.*,
                         CASE
                           WHEN flag = 1 THEN
                            row_number() over(PARTITION BY username ORDER BY startdate) - row_number() over(PARTITION BY username, flag ORDER BY startdate)
                         END grp,
                         SUM(CASE WHEN flag = 1 THEN threshold END) OVER (PARTITION BY username, flag ORDER BY startdate) threshold_sum -- assumes threshold is 1 or null; change the case statement inside the sum if this isn't the case
                  FROM   tmp)
SELECT username,
       startdate,
       flag,
       threshold,
       CASE
         WHEN flag = 1 THEN
          dense_rank() over(PARTITION BY flag ORDER BY grp, threshold_sum)
       END rnk
FROM   tabibitosan
ORDER  BY startdate,
          username;

USERNAME  STARTDATE         FLAG  THRESHOLD        RNK
--------- ----------- ---------- ---------- ----------
username1 01/04/2019           1          1          1
username1 01/04/2019           0            
username1 01/04/2019           1          1          2
username1 01/04/2019           1                     2
username1 01/04/2019           0            
username1 01/04/2019           0            
username1 02/04/2019           1                     3
username1 02/04/2019           1          1          4
username1 02/04/2019           1          1          5
username1 02/04/2019           1                     5
username1 02/04/2019           0            

我假设阈值列只能为1或为null;如果不是这种情况,则必须根据数据更新条件和。

我还更新了分析功能的分区,以包括用户名列,因为我假设这是数据的主键。

最后,请注意,我对示例数据进行了一些更改,以表明如果标志= 1的组中第一行的阈值为空,则如果下一行具有阈值,则它将属于自己的组组。如果这不是您想要的行为,则需要使用您想要的逻辑来更新您的问题。

答案 1 :(得分:1)

尝试如下

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select 
    tmp.*,
    dense_rank() over( order by startdate, username)-
     (case when flag=1 then
     row_number()over(partition by flag, username order by startdate, username) - flag
      else null end) as grp


from tmp
order by 
startdate, username


USERNAME    STARTDATE   FLAG    GRP
username1   01-APR-19   1       1
username1   01-APR-19   0   
username1   01-APR-19   1       2
username1   01-APR-19   1       2
username1   01-APR-19   0   
username1   02-APR-19   1       3
username1   02-APR-19   1       3
username1   02-APR-19   1       3
username1   02-APR-19   0   
username1   02-APR-19   0   

Online demo

答案 2 :(得分:1)

with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
) 
select tmp.*, decode(flag, 1, count(threshold) over (partition by username order by startdate)) rn
from tmp;

USERNAME  STARTDATE                 FLAG  THRESHOLD         RN
--------- ------------------- ---------- ---------- ----------
username1 2019-04-01 00:00:00          1          1          1
username1 2019-04-01 01:00:00          0                      
username1 2019-04-01 02:00:00          1          1          2
username1 2019-04-01 03:00:00          1                     2
username1 2019-04-01 04:00:00          0                      
username1 2019-04-01 05:00:00          0                      
username1 2019-04-02 01:00:00          1          1          3
username1 2019-04-02 02:00:00          1                     3
username1 2019-04-02 03:00:00          1          1          4
username1 2019-04-02 04:00:00          1                     4
username1 2019-04-02 05:00:00          0                      

11 rows selected.

答案 3 :(得分:0)

with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select username, startdate, flag, decode(flag, 1, match_num) as rn
from tmp
match_recognize(
partition by username
order by startdate
measures match_number() AS match_num
all rows per match
pattern (s* f*)
define f as f.flag = 1, s as s.flag = 0
);

USERNAME  STARTDATE                 FLAG         RN
--------- ------------------- ---------- ----------
username1 2019-04-01 00:00:00          1          1
username1 2019-04-01 01:00:00          0           
username1 2019-04-01 02:00:00          1          2
username1 2019-04-01 03:00:00          1          2
username1 2019-04-01 04:00:00          0           
username1 2019-04-02 01:00:00          1          3
username1 2019-04-02 02:00:00          1          3
username1 2019-04-02 03:00:00          1          3
username1 2019-04-02 04:00:00          0           
username1 2019-04-02 05:00:00          0           

10 rows selected.