计算列中不同值的出现次数

时间:2017-08-31 07:26:14

标签: sql postgresql jsonb

我有以下查询:

select 
    jsonb_build_object('high', count(*) filter (where total = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where total = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where total = 'Low')) as total,
    jsonb_build_object('high', count(*) filter (where social = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where social = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where social = 'Low')) as social
from (
    select score_labels->>'total' as total, 
    score_labels->>'social' as social,

    from survey_results
    ) s;

我想知道是否有办法简化它?假设使用迭代而不是重复jsonb_build_object语句?

此查询返回以下结果:

total                                  social
-------------------------------------  -------------------------------
{"low": 80, "high": 282, "medium": 0}  {"low": 103, "high": 115, "medium": 0} 

1 个答案:

答案 0 :(得分:1)

这个特殊情况

你需要一个plpgsql函数:

create or replace function my_arr_to_jsonb(text[])
returns jsonb language plpgsql as $$
declare
    agg int[] = array[0, 0, 0];
    s text;
begin
    foreach s in array $1 loop
        if lower(s) = 'high' then
            agg[1]:= agg[1]+ 1;
        elsif lower(s) = 'medium' then
            agg[2]:= agg[2]+ 1;
        else
            agg[3]:= agg[3]+ 1;
        end if;
    end loop;
    return jsonb_build_object(
        'high', agg[1],
        'medium', agg[2],
        'low', agg[3]);
end $$;

行动中的功能:

with my_table (id, score_labels) as (
values
(1, '{"total": "High", "risk": "High"}'::jsonb),
(2, '{"total": "High", "risk": "Low"}'::jsonb),
(3, '{"total": "Low", "risk": "Medium"}'::jsonb)
)

select 
    my_arr_to_jsonb(array_agg(score_labels->>'total')) as total, 
    my_arr_to_jsonb(array_agg(score_labels->>'risk')) as risk
from my_table

               total                |                risk                
------------------------------------+------------------------------------
 {"low": 1, "high": 2, "medium": 0} | {"low": 1, "high": 1, "medium": 1}
(1 row)

可以在函数中使用该算法来创建自定义聚合函数(参见下文)。

广义解决方案

这个问题涉及使用单个聚合函数计算表列中不同值的出现的有趣主题。

create or replace function count_labels_state(text[], text)
returns text[] language plpgsql as $$
declare
    i int;
begin
    if $2 is not null then
        i:= array_position($1, quote_ident($2));
        if i is null then
            $1:= $1 || array[quote_ident($2), '0'];
            i:= cardinality($1)- 1;
        end if;
        $1[i+1]:= $1[i+1]::int+ 1;
    end if;
    return $1;
end $$;

create or replace function count_labels_final(text[])
returns jsonb language plpgsql as $$
declare
    j jsonb = '{}';
    i int = 1;
begin
    loop exit when i > cardinality($1); 
        j:= j || jsonb_build_object(trim($1[i], '"'), $1[i+1]::int);
        i:= i+ 2;
    end loop;
    return j;
end $$;

create aggregate count_labels(text) (
    sfunc = count_labels_state,
    stype = text[],
    finalfunc = count_labels_final
);

用法。而不是:

with my_table (label) as (
values
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('unknown')
)

select
    count(*) filter (where label = 'low') as low,
    count(*) filter (where label = 'medium') as medium,
    count(*) filter (where label = 'high') as high,
    count(*) filter (where label = 'unknown') as unknown
from my_table;

 low | medium | high | unknown 
-----+--------+------+---------
   5 |      2 |    2 |       1
(1 row)

你可以使用它(你不必知道标签):

select count_labels(label) as labels
from my_table;

                      labels                      
--------------------------------------------------
 {"low": 5, "high": 2, "medium": 2, "unknown": 1}
(1 row)

聚合在整数列上运行良好:

with my_table (n) as (
values 
    (1), (2), (3), (4),
    (1), (2), (1), (2)
)

select count_labels(n::text) as integers
from my_table;

              integers              
----------------------------------
 {"1": 3, "2": 3, "3": 1, "4": 1}
(1 row) 

如果是其他类型,则应记住聚合适用于值的文本表示(例如数字1.10 = 1.1'1.10' <> '1.1')。