GBQ参考页面上的所有排名公式都假设有多行,一列。 对于下面的内容,我试图找出Algo1到Algo5s的排名。
theTable:
cid algo_1 algo_2 algo_3 algo_4 algo_5
1 4.31 4.15 4.33 4.35 4.35
2 1.31 4.15 4.33 4.34 3.35
所以,结果是
cid algo_1 algo_2 algo_3 algo_4 algo_5 algo_1_rank algo_2_rank algo_3_rank algo_4_rank algo_5_rank
1 4.31 4.15 4.33 4.35 4.35 4 5 3 1 1
2 1.31 4.15 4.33 4.34 3.35 5 3 2 1 4
P.S。实际上,我在SO之外被问到这个问题,所以决定在这里分享
答案 0 :(得分:1)
选项1 - 标准SQL
WITH theTable AS (
SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5 UNION ALL
SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5
),
tempTable AS (
SELECT
cid,
alg.k AS algo,
alg.v AS value,
RANK() OVER(PARTITION BY cid ORDER BY v DESC) AS rnk
FROM theTable, UNNEST(
ARRAY[STRUCT<k STRING, v FLOAT64>("algo_1", algo_1), STRUCT("algo_2", algo_2),
STRUCT("algo_3", algo_3), STRUCT("algo_4", algo_4), STRUCT("algo_5", algo_5)]
) AS alg
)
SELECT
cid,
MAX(IF(algo = "algo_1", value, NULL)) AS algo_1,
MAX(IF(algo = "algo_2", value, NULL)) AS algo_2,
MAX(IF(algo = "algo_3", value, NULL)) AS algo_3,
MAX(IF(algo = "algo_4", value, NULL)) AS algo_4,
MAX(IF(algo = "algo_5", value, NULL)) AS algo_5,
MAX(IF(algo = "algo_1", rnk, NULL)) AS algo_1_rank,
MAX(IF(algo = "algo_2", rnk, NULL)) AS algo_2_rank,
MAX(IF(algo = "algo_3", rnk, NULL)) AS algo_3_rank,
MAX(IF(algo = "algo_4", rnk, NULL)) AS algo_4_rank,
MAX(IF(algo = "algo_5", rnk, NULL)) AS algo_5_rank
FROM tempTable
GROUP BY cid
选项2 - 旧版SQL
SELECT
cid,
MAX(IF(num = 1, value, NULL)) AS algo_1,
MAX(IF(num = 2, value, NULL)) AS algo_2,
MAX(IF(num = 3, value, NULL)) AS algo_3,
MAX(IF(num = 4, value, NULL)) AS algo_4,
MAX(IF(num = 5, value, NULL)) AS algo_5,
MAX(IF(num = 1, rnk, NULL)) AS algo_1_rank,
MAX(IF(num = 2, rnk, NULL)) AS algo_2_rank,
MAX(IF(num = 3, rnk, NULL)) AS algo_3_rank,
MAX(IF(num = 4, rnk, NULL)) AS algo_4_rank,
MAX(IF(num = 5, rnk, NULL)) AS algo_5_rank
FROM (
SELECT
cid, num,
CASE
WHEN num = 1 THEN algo_1
WHEN num = 2 THEN algo_2
WHEN num = 3 THEN algo_3
WHEN num = 4 THEN algo_4
WHEN num = 5 THEN algo_5
END AS value,
RANK() OVER(PARTITION BY cid ORDER BY value DESC) AS rnk
FROM (
SELECT * FROM
(SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5),
(SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5)
) AS theTable
CROSS JOIN (
SELECT INTEGER(SPLIT("1,2,3,4,5")) AS num FROM (SELECT 1)
) AS nums
)
GROUP BY cid
选项3 - 带标量UDF的标准SQL
CREATE TEMPORARY FUNCTION myRank(a float64, b float64, c float64, d float64, e float64)
RETURNS ARRAY<int64>
LANGUAGE js AS """
var arr = [a, b, c, d, e];
var sorted = arr.slice().sort(function(a,b){return b-a})
var ranks = arr.slice().map(function(v){ return sorted.indexOf(v)+1 });
return ranks
""";
WITH theTable AS (
SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5 UNION ALL
SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5
),
tempTable AS (
SELECT *, myRank(algo_1, algo_2, algo_3, algo_4, algo_5) AS ranks
FROM theTable
)
SELECT
cid, algo_1, algo_2, algo_3, algo_4, algo_5,
ranks[ORDINAL(1)] AS algo_1_rank,
ranks[ORDINAL(2)] AS algo_2_rank,
ranks[ORDINAL(3)] AS algo_3_rank,
ranks[ORDINAL(4)] AS algo_4_rank,
ranks[ORDINAL(5)] AS algo_5_rank
FROM tempTable
选项4 - 提前处理排名
通常,作为theTable
的架构表是从多行生成的,每行只有一个测试条目(cid,算法,值如下面的theOriginalData
)
在这一点上实际排名是最有意义的
SELECT
cid,
MAX(IF(algo = "algo_1", value, NULL)) AS algo_1,
MAX(IF(algo = "algo_2", value, NULL)) AS algo_2,
MAX(IF(algo = "algo_3", value, NULL)) AS algo_3,
MAX(IF(algo = "algo_4", value, NULL)) AS algo_4,
MAX(IF(algo = "algo_5", value, NULL)) AS algo_5,
MAX(IF(algo = "algo_1", rnk, NULL)) AS algo_1_rank,
MAX(IF(algo = "algo_2", rnk, NULL)) AS algo_2_rank,
MAX(IF(algo = "algo_3", rnk, NULL)) AS algo_3_rank,
MAX(IF(algo = "algo_4", rnk, NULL)) AS algo_4_rank,
MAX(IF(algo = "algo_5", rnk, NULL)) AS algo_5_rank,
FROM (
SELECT
cid, algo, value,
RANK() OVER(PARTITION BY cid ORDER BY value DESC) AS rnk
FROM (
SELECT * FROM
(SELECT 1 AS cid, "algo_1" AS algo, 4.31 AS value),
(SELECT 1 AS cid, "algo_2" AS algo, 4.15 AS value),
(SELECT 1 AS cid, "algo_3" AS algo, 4.33 AS value),
(SELECT 1 AS cid, "algo_4" AS algo, 4.35 AS value),
(SELECT 1 AS cid, "algo_5" AS algo, 4.35 AS value),
(SELECT 2 AS cid, "algo_1" AS algo, 1.31 AS value),
(SELECT 2 AS cid, "algo_2" AS algo, 4.15 AS value),
(SELECT 2 AS cid, "algo_3" AS algo, 4.33 AS value),
(SELECT 2 AS cid, "algo_4" AS algo, 4.34 AS value),
(SELECT 2 AS cid, "algo_5" AS algo, 3.35 AS value)
) AS theOriginalData
)
GROUP BY cid
注意:如果由我决定 - 我会选择选项#4 以防万一由于某种原因(例如表格已经存在) )我会选择选项#3 ,因为它看起来最优雅
答案 1 :(得分:1)
米哈伊尔为你的问题提供了一些很好的解决方案 - 如果其中一个人为你服务,请将他的回答标记为已被接受。如果你最终使用它,我想给出#3的替代形式;你可以用SQL UDF来表达转型:
batches