在postgresql中拥有多行表的最快方法

时间:2014-04-11 09:03:10

标签: postgresql

要获得表t的确切行数,我实际运行以下查询

SELECT COUNT(*) AS n FROM t;

有更快的方法吗?

1 个答案:

答案 0 :(得分:0)

您可以使用异步例程增量将计数编号添加到其他表中。 或者如果表DML tps很小,可以使用像触发器这样的同步例程。 注意: 当从t表中获取增量数据时,异步例程会引起注意并发问题。 EXP: 触发

postgres=# create table a(id serial4 primary key, info text, crt_time timestamp(0) default now());
NOTICE:  CREATE TABLE will create implicit sequence "a_id_seq" for serial column "a.id"
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a"
CREATE TABLE
postgres=# create table cnt_a(id int primary key, cnt int);
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "cnt_a_pkey" for table "cnt_a"
CREATE TABLE
postgres=# insert into cnt_a select generate_series(0,1000),0;
INSERT 0 1001
CREATE OR REPLACE FUNCTION public.tg_insert_a()
 RETURNS trigger
 LANGUAGE plpgsql
AS $function$
declare
  m_id int;
  rm numeric;
begin
  select max(id),random() into m_id,rm from cnt_a;
  update cnt_a set cnt=cnt+1 where id=(rm*m_id)::int;
  return null;
end;
$function$;
CREATE OR REPLACE FUNCTION public.tg_delete_a()
 RETURNS trigger
 LANGUAGE plpgsql
AS $function$
declare
  m_id int;
  rm numeric;
begin
  select max(id),random() into m_id,rm from cnt_a;
  update cnt_a set cnt=cnt-1 where id=(rm*m_id)::int;
  return null;
end;
$function$;
CREATE OR REPLACE FUNCTION public.tg_truncate_a()
 RETURNS trigger
 LANGUAGE plpgsql
AS $function$
declare
begin
  update cnt_a set cnt=0 where not cnt=0;
  return null;
end;
$function$;
create trigger tg1 after insert on a for each row execute procedure tg_insert_a();
create trigger tg2 after delete on a for each row execute procedure tg_delete_a();
create trigger tg3 after truncate on a for each statement execute procedure tg_truncate_a();
pg92@digoal-PowerEdge-R610-> cat insert.sql 
insert into a (info) values ('test');
pg92@digoal-PowerEdge-R610-> pgbench -M prepared -r -n -f ./insert.sql -h $PGDATA -p 1919 -U postgres -T 60 -c 16 -j 4 postgres
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 16
number of threads: 4
duration: 60 s
number of transactions actually processed: 1831418
tps = 30514.831839 (including connections establishing)
tps = 30522.057886 (excluding connections establishing)
statement latencies in milliseconds:
        0.522411        insert into a (info) values ('test');
postgres=# select count(*) from a;
  count  
---------
 1755964
(1 row)
Time: 285.491 ms
postgres=# select sum(cnt) from cnt_a ;
   sum   
---------
 1755964
(1 row)
Time: 0.689 ms
  1. async count exp:

    - #TEST TABLE:

    创建表日志 (   id序列主键,   xid int8默认txid_current()不为null,   c1 int not null,   c2 int not null,   c3 int not null,   c4文本不为空,   crt_time timestamp默认现在() ); 在log(xid)上创建索引idx_log_1;

    -- # COUTN TABLE
    create table log_c1_cnt_day (c1 int, cnt int8, stat_time text, primary key(c1,stat_time));
    create table log_c1_cnt_week (c1 int, cnt int8, stat_time text, primary key(c1,stat_time));
    create table log_c1_cnt_month (c1 int, cnt int8, stat_time text, primary key(c1,stat_time));
    create table log_c1_cnt_year (c1 int, cnt int8, stat_time text, primary key(c1,stat_time));
    
    -- # COUNT TABLE 2
    create table log_c2_c3_cnt_day (c2 int, c3 int, cnt int8, stat_time text, primary key(c2,c3,stat_time));
    create table log_c2_c3_cnt_week (c2 int, c3 int, cnt int8, stat_time text, primary key(c2,c3,stat_time));
    create table log_c2_c3_cnt_month (c2 int, c3 int, cnt int8, stat_time text, primary key(c2,c3,stat_time));
    create table log_c2_c3_cnt_year (c2 int, c3 int, cnt int8, stat_time text, primary key(c2,c3,stat_time));
    
    -- TEST DATA
    insert into log (c1,c2,c3,c4) values (1,1,1,1);
    insert into log (c1,c2,c3,c4) values (2,2,2,2);
    
    -- 
    digoal=# select * from log;
     id |    xid    | c1 | c2 | c3 | c4 |          crt_time          
    ----+-----------+----+----+----+----+----------------------------
      1 | 480125659 |  1 |  1 |  1 | 1  | 2013-04-21 20:55:45.907713
      2 | 480125660 |  2 |  2 |  2 | 2  | 2013-04-21 20:55:46.286933
    (2 rows)
    

    - #aysnc count function

    create table log_read 
    (
    tablename name not null, 
    xid int8 not null, 
    xip int8[], 
    xip_res int8[],  
    mod_time timestamp, 
    primary key (tablename)
    );
    insert into log_read values ('log', 0, null, null, now());
    
    
    create or replace function analyze_log(v_limit int) returns void as $$
    declare
      v_advisory_xact_lock int8 := null;  
    
      v_xid_snap txid_snapshot := null;  
      v_xmin int8 := null;  
      v_xmax int8 := null;  
      v_xip int8[] := null;  
    
      v_log_read_log_xid int8 := null;  
      v_log_read_log_xid_update int8 := null;  
    
      v_log_read_log_xip int8[] := null;  
      v_log_read_log_xip_do int8[] := null;  
      v_log_read_log_xip_update int8[] := null;  
      v_log_read_log_xip_res int8[] := null;  
      v_log_read_log_xip_res_update int8[] := null;  
    
      v_log log[] := null;  
      v_log_doxip log[] := null;  
    
    begin
      if v_limit <=0 then
        raise notice 'please ensure v_limit > 0 .';
        return;
      end if;
    
      v_advisory_xact_lock := 1;
      if not pg_try_advisory_xact_lock(v_advisory_xact_lock) then
        raise notice 'Another function is calling, this call will exit.';
        return;
      end if;
    
      v_xid_snap := txid_current_snapshot();
      v_xmin := txid_snapshot_xmin(v_xid_snap);
      v_xmax := txid_snapshot_xmax(v_xid_snap);
      select array_agg(t) into v_xip from txid_snapshot_xip(v_xid_snap) g(t);
    
      select xid,xip,xip_res into v_log_read_log_xid,v_log_read_log_xip,v_log_read_log_xip_res from log_read where tablename='log';
      if not found then
        raise notice 'log_read no log entry. please add it in log_read table first.';
        return;
      end if;
    
      select max(xid) into v_log_read_log_xid_update from (select xid from log where xid > v_log_read_log_xid and xid < v_xmax and xid not in (select * from unnest(v_xip) union all select * from unnest(v_log_read_log_xip_res)) order by xid limit v_limit) t;
      if v_log_read_log_xid_update is not null then
        raise notice '取log1';
        select array_agg(log) into v_log from (select log from log where xid > v_log_read_log_xid and xid<=v_log_read_log_xid_update and xid not in (select * from unnest(v_xip) union all select * from unnest(v_log_read_log_xip_res)) order by xid) t;
      else 
        v_log_read_log_xid_update := v_log_read_log_xid;
      end if;
    
      select array_agg(i) into v_log_read_log_xip_do from (select * from unnest(v_log_read_log_xip) i except select * from unnest(v_xip))t where i is not null;
      select array_agg(i) into v_log_read_log_xip_update from 
      (  select i from (select * from unnest(v_log_read_log_xip) i union all select * from unnest(v_xip)
         except select * from unnest(v_log_read_log_xip_do)) t where i is not null group by i ) t;
      select array_agg(i) into v_log_read_log_xip_res_update from (select * from unnest(v_log_read_log_xip_res) i union select * from unnest(v_log_read_log_xip) union select * from unnest(v_xip))t where i>v_log_read_log_xid_update;
      select array_agg(log) into v_log_doxip from log where xid in (select * from unnest(v_log_read_log_xip_do));
    
      update log_read set 
        xip=v_log_read_log_xip_update, 
        xid=v_log_read_log_xid_update, 
        xip_res=v_log_read_log_xip_res_update,
        mod_time=now() 
      where tablename='log';
      -- raise notice 'log_read.oldxip(log): %.', v_log_read_log_xip;
      -- raise notice 'log_read.newxip(log): %.', v_log_read_log_xip_update;
      -- raise notice 'log_read.newxipres(log): %.', v_log_read_log_xip_res_update;
    
      perform stat_log_c1(v_log);
      perform stat_log_c1(v_log_doxip);
    
    return;
    end;
    $$ language plpgsql;
    
    CREATE OR REPLACE FUNCTION public.stat_log_c1(v_log log[])
     RETURNS void
     LANGUAGE plpgsql
    AS $function$
    declare
      v_stat_time text;
      v_c1 int;
      v_cnt int8;
    begin
      for v_stat_time, v_c1, v_cnt in select to_char(crt_time, 'yyyymmdd'), c1 , count(*) from (select ((unnest(v_log)::log)).*) t group by to_char(crt_time, 'yyyymmdd'), c1 loop
        perform 1 from log_c1_cnt_day where c1=v_c1 and stat_time=v_stat_time;
        if not found then
          insert into log_c1_cnt_day(c1, cnt, stat_time) values (v_c1, v_cnt, v_stat_time);
        else
          update log_c1_cnt_day set cnt=cnt+v_cnt where c1=v_c1 and stat_time=v_stat_time;
        end if;
      end loop;
    end;
    $function$;
    
    truncate log;
    truncate log_c1_cnt_day;
    update log_read set xid=0, xip=null, xip_res=null;
    
    -- pgbench script
    cat ins.sql 
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    
    -- # pgbench
    pg92@digoal-PowerEdge-R610-> pgbench -M prepared -f ./ins.sql -r -n -h $PGDATA -U postgres -T 60 -c 8 -j 2
    transaction type: Custom query
    scaling factor: 1
    query mode: prepared
    number of clients: 8
    number of threads: 2
    duration: 60 s
    number of transactions actually processed: 2887271
    tps = 48121.007692 (including connections establishing)
    tps = 48131.903512 (excluding connections establishing)
    statement latencies in milliseconds:
            0.164881        insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    
    pg92@digoal-PowerEdge-R610-> cat analyze.sh 
    #!/bin/bash
    for ((i=0;i<100;i++))
    do
    psql -c "select * from analyze_log(1);"
    psql -c "select * from analyze_log(1000000);"
    done
    # 调用analyze.sh
    
    digoal=# select c1,count(*),to_char(crt_time,'yyyymmdd') from log group by c1,to_char(crt_time,'yyyymmdd') order by c1;
     c1 | count  | to_char  
    ----+--------+----------
      0 | 140071 | 20130421
      1 | 282303 | 20130421
      2 | 281499 | 20130421
      3 | 281339 | 20130421
      4 | 282008 | 20130421
      5 | 281871 | 20130421
      6 | 282954 | 20130421
      7 | 281855 | 20130421
      8 | 281560 | 20130421
      9 | 281516 | 20130421
     10 | 140456 | 20130421
    (11 rows)
    digoal=# select * from log_c1_cnt_day where cnt<>0 order by c1;
     c1 |  cnt   | stat_time 
    ----+--------+-----------
      0 | 140071 | 20130421
      1 | 282303 | 20130421
      2 | 281499 | 20130421
      3 | 281339 | 20130421
      4 | 282008 | 20130421
      5 | 281871 | 20130421
      6 | 282954 | 20130421
      7 | 281855 | 20130421
      8 | 281560 | 20130421
      9 | 281516 | 20130421
     10 | 140456 | 20130421
    (11 rows)
    
    
    cat ins.sql
    begin;
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    end;
    begin;
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    rollback;
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    insert into log (c1,c2,c3,c4) values(round(random()*10),1,2,3);
    -- # pgbench
    pg92@digoal-PowerEdge-R610-> pgbench -M prepared -f ./ins.sql -r -n -h $PGDATA -U postgres -T 60 -c 8 -j 2
    transaction type: Custom query
    scaling factor: 1
    query mode: prepared
    number of clients: 8
    number of threads: 2
    duration: 60 s
    number of transactions actually processed: 100862
    tps = 1680.570420 (including connections establishing)
    tps = 1680.946330 (excluding connections establishing)
    statement latencies in milliseconds:
    
    
    pg92@digoal-PowerEdge-R610-> cat analyze.sh 
    #!/bin/bash
    for ((i=0;i<100;i++))
    do
    psql -c "select * from analyze_log(1);"
    psql -c "select * from analyze_log(1000000);"
    done
    
    
    
    digoal=# select c1,count(*),to_char(crt_time,'yyyymmdd') from log group by c1,to_char(crt_time,'yyyymmdd') order by c1;
     c1 | count  | to_char  
    ----+--------+----------
      0 | 245002 | 20130426
      1 | 491034 | 20130426
      2 | 489717 | 20130426
      3 | 490628 | 20130426
      4 | 490064 | 20130426
      5 | 490393 | 20130426
      6 | 490893 | 20130426
      7 | 490081 | 20130426
      8 | 490180 | 20130426
      9 | 490659 | 20130426
     10 | 245860 | 20130426
    (11 rows)
    digoal=# select * from log_c1_cnt_day where cnt<>0 order by c1;
     c1 |  cnt   | stat_time 
    ----+--------+-----------
      0 | 245002 | 20130426
      1 | 491034 | 20130426
      2 | 489717 | 20130426
      3 | 490628 | 20130426
      4 | 490064 | 20130426
      5 | 490393 | 20130426
      6 | 490893 | 20130426
      7 | 490081 | 20130426
      8 | 490180 | 20130426
      9 | 490659 | 20130426
     10 | 245860 | 20130426
    (11 rows)