我有一个同事并不想在百分等级中包含空行。默认的Teradata函数似乎只将null视为集合中的最小数字,因此我决定手动进行数学运算。我开始使用以下查询来测试我的等式
drop table tmp;
create multiset volatile table tmp (
num byteint
) primary index (num)
on commit preserve rows
;
insert into tmp
values (1)
;insert into tmp
values (2)
;insert into tmp
values (1)
;insert into tmp
values (4)
;insert into tmp
values (null)
;insert into tmp
values (4)
;insert into tmp
values (null)
;insert into tmp
values (2)
;insert into tmp
values (9)
;insert into tmp
values (null)
;insert into tmp
values (10)
;insert into tmp
values (10)
;insert into tmp
values (11)
;
select
num,
case
when num is null then 0
else cast(dense_rank() over (partition by case when num is not null then 1 else 2 end order by num) as number)
end as str_rnk,
q.nn,
str_rnk/q.nn as pct_rnk
from tmp
cross join (
select cast(count(num) as number) as nn from tmp
) q
order by num
;
所以我期望在结果集中看到的是:
num str_rnk nn pct_rnk
null 0 10 0
null 0 10 0
null 0 10 0
1 1 10 0.1
1 1 10 0.1
2 2 10 0.2
2 2 10 0.2
4 3 10 0.3
4 3 10 0.3
9 4 10 0.4
10 5 10 0.5
10 5 10 0.5
但是我得到的结果看起来像常规rank
而不是dense_rank
,如下所示:
num str_rnk nn pct_rnk
null 0 10 0
null 0 10 0
null 0 10 0
1 1 10 0.1
1 1 10 0.1
2 2 10 0.3
2 2 10 0.3
4 3 10 0.5
4 3 10 0.5
9 4 10 0.7
10 5 10 0.8
10 5 10 0.8
我知道我可以在子查询中设置排名,它会计算我期望它的方式,但是为什么它不像我现在这样做呢?
答案 0 :(得分:2)
虽然这不能回答你的问题。这不是分裂的问题,在同一个SELECT中运行CAST和Dense_Rank两次似乎是一些奇怪的问题。
考虑:
select
num,
case
when num is null then 0
else cast(dense_rank() over (partition by case when num is not null then 1 else 2 end order by num) as number)
end as str_rnk,
case
when num is null then 0
else cast(dense_rank() over (partition by case when num is not null then 1 else 2 end order by num) as number)
end as str_rnk2
from tmp
cross join (
select cast(count(num) as number) as nn from tmp
) q;
+--------+---------+----------+
| num | str_rnk | str_rnk2 |
+--------+---------+----------+
| 1 | 1 | 1 |
| 1 | 1 | 1 |
| 2 | 2 | 3 |
| 2 | 2 | 3 |
| 4 | 3 | 5 |
| 4 | 3 | 5 |
| 9 | 4 | 7 |
| 10 | 5 | 8 |
| 10 | 5 | 8 |
| 11 | 6 | 10 |
| <null> | 0 | 0 |
| <null> | 0 | 0 |
| <null> | 0 | 0 |
+--------+---------+----------+
由于此处不需要CAST:
select
num,
case
when num is null then 0
else dense_rank() over (partition by case when num is not null then 1 else 2 END order by num)
end as str_rnk,
case
when num is null then 0
else dense_rank() over (partition by case when num is not null then 1 else 2 END order by num)
end as str_rnk2
from tmp
cross join (
select cast(count(num) as number) as nn from tmp
) q;
+--------+---------+----------+
| num | str_rnk | str_rnk2 |
+--------+---------+----------+
| 1 | 1 | 1 |
| 1 | 1 | 1 |
| 2 | 2 | 2 |
| 2 | 2 | 2 |
| 4 | 3 | 3 |
| 4 | 3 | 3 |
| 9 | 4 | 4 |
| 10 | 5 | 5 |
| 10 | 5 | 5 |
| 11 | 6 | 6 |
| <null> | 0 | 0 |
| <null> | 0 | 0 |
| <null> | 0 | 0 |
+--------+---------+----------+
您的查询,快速重写:
select
num,
case
when num is null then 0
else dense_rank() over (partition by num * 0 order by num)
end as str_rnk,
str_rnk * 1.0/COUNT(*) OVER (PARTITION BY num * 0) as pct_rnk
from tmp
order by num
;
+--------+---------+---------+
| num | str_rnk | pct_rnk |
+--------+---------+---------+
| <null> | 0 | 0.0 |
| <null> | 0 | 0.0 |
| <null> | 0 | 0.0 |
| 1 | 1 | 0.1 |
| 1 | 1 | 0.1 |
| 2 | 2 | 0.2 |
| 2 | 2 | 0.2 |
| 4 | 3 | 0.3 |
| 4 | 3 | 0.3 |
| 9 | 4 | 0.4 |
| 10 | 5 | 0.5 |
| 10 | 5 | 0.5 |
| 11 | 6 | 0.6 |
+--------+---------+---------+
或者如果你想完全从那里获得CASE陈述:
select
num,
dense_rank() over (partition by num * 0 order by num) * (num * 0 + 1.0) as str_rnk,
str_rnk/COUNT(*) OVER (PARTITION BY num * 0) as pct_rnk
from tmp
order by num;
答案 1 :(得分:1)
正如JNevill所说,这是一个错误,你应该用Teradata支持打开一个事件:
SELECT
num,
-- cast to FLOAT or DECIMAL works as expected
Cast(Dense_Rank() Over (ORDER BY num) AS NUMBER) AS a,
a AS b
FROM tmp
num a b
---- --- ---
? 1 1
? 1 1
? 1 1
1 2 4
1 2 4
2 3 6
2 3 6
4 4 8
4 4 8
9 5 10
10 6 11
10 6 11
11 7 13
但添加QUALIFY a<>b
会返回一个空结果: - )
PERCENT_RANK
的原始计算基于
Cast(Rank() Over (ORDER BY num) -1 AS DEC(18,6)) / Count(*) Over ()
如果要排除NULL,可以切换到Count(num)
和NULLS LAST
:
SELECT
num,
CASE
WHEN num IS NOT NULL
THEN Cast(Dense_Rank() Over (ORDER BY num NULLS LAST) AS DECIMAL(18,6))
ELSE 0
END AS str_rnk,
str_rnk / Count(num) Over ()
FROM tmp
或者使用那个光滑的num * 0
技巧:
SELECT
num,
Coalesce(Dense_Rank()
Over (ORDER BY num NULLS LAST)
* (num * 0 +1.000000), 0) AS str_rnk,
str_rnk / Count(num) Over ()
FROM tmp