我经常使用相同的聚合函数组合进行查询。 E.g。
SELECT
my_id,
sum(a * weight) / nullif(sum(CASE WHEN a IS NOT NULL THEN weight END), 0) AS a,
sum(b * weight) / nullif(sum(CASE WHEN b IS NOT NULL THEN weight END), 0) AS b
FROM my_table
GROUP BY my_id
我想避免一遍又一遍地重复相同的表达。使用新函数weighted_avg
得到相同的结果会很棒:
SELECT
my_id,
weighted_avg(a, weight) AS a,
weighted_avg(b, weight) AS b
FROM my_table
GROUP BY my_id
我知道的唯一方法就是使用CREATE AGGREGATE
中间状态和SFUNC
来调用每一行。不幸的是,这比原始查询慢得多,这使得它在我的情况下无法使用。
我想我的理想解决方案看起来像
CREATE AGGREGATE FUNCTION weighted_avg(x float, weight float)
RETURNS float AS $$
SELECT sum(x * weight) / nullif(sum(CASE WHEN x IS NOT NULL THEN weight END), 0)
$$ language SQL IMMUTABLE;
并在执行查询时内联。但我找不到Postgres支持的类似内容。
答案 0 :(得分:0)
您没有显示测试的聚合函数。这就是我创建它的方式:
create function weighted_avg_acumm (fa float[], x float, weight float)
returns float[] as $$
select array[
fa[1] + x * weight,
fa[2] + weight
]::float[]
$$ language sql immutable strict;
create function weighted_avg_acumm_final (fa float[])
returns float as $$
select fa[1] / fa[2]
$$ language sql immutable strict;
create aggregate weighted_avg (x float, weight float)(
sfunc = weighted_avg_acumm,
finalfunc = weighted_avg_acumm_final,
stype = float[],
initcond = '{0,0}'
);
我测试了它对我来说也慢得多:
create table t (a int, weight int);
insert into t (a, weight)
select
nullif(round(random() * 10), 0),
trunc(random() * 10) + 1
from generate_series(1,1000000)
;
explain analyze
select weighted_avg(a, weight)
from t;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Aggregate (cost=269425.25..269425.26 rows=1 width=8) (actual time=7933.440..7933.440 rows=1 loops=1)
-> Seq Scan on t (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.018..241.571 rows=1000000 loops=1)
Planning time: 0.189 ms
Execution time: 7933.508 ms
explain analyze
select
sum(a::numeric * weight) /
nullif(sum(case when a is not null then weight end), 0)
from t;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Aggregate (cost=26925.00..26925.02 rows=1 width=8) (actual time=904.852..904.852 rows=1 loops=1)
-> Seq Scan on t (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.010..127.264 rows=1000000 loops=1)
Planning time: 0.048 ms
Execution time: 904.891 ms