PostgreSQL数据库中具有大小(相对和绝对)的模式列表

时间:2010-12-11 19:03:48

标签: postgresql

我正在寻找一个返回任何数据库表单结果的查询 (请参阅下面的示例,假设数据库使用的总空间为40GB)

schema | size | relative size
----------+-------------------
foo    | 15GB |   37.5%      
bar    | 20GB |     50%
baz    |  5GB |   12.5%

我设法使用按架构排序的数据库中的实体来编制空间列表, 这很有用,但是从这个方面获取每个模式的摘要看起来并不那么容易。 见下文。

SELECT relkind,
       relname,
       pg_catalog.pg_namespace.nspname,
       pg_size_pretty(pg_relation_size(pg_catalog.pg_class.oid))
FROM   pg_catalog.pg_class
       INNER JOIN pg_catalog.pg_namespace
         ON relnamespace = pg_catalog.pg_namespace.oid
ORDER  BY pg_catalog.pg_namespace.nspname,
          pg_relation_size(pg_catalog.pg_class.oid) DESC;

这会产生类似

的结果
  relkind |                relname                |      nspname       | pg_size_pretty 
---------+---------------------------------------+--------------------+----------------
  r       | geno                                  | btsnp              | 11 GB
  i       | geno_pkey                             | btsnp              | 5838 MB
  r       | anno                                  | btsnp              | 63 MB
  i       | anno_fid_key                          | btsnp              | 28 MB
  i       | ix_btsnp_anno_rsid                    | btsnp              | 28 MB
  [...]
  r       | anno                                  | btsnp_shard        | 63 MB
  r       | geno4681                              | btsnp_shard        | 38 MB
  r       | geno4595                              | btsnp_shard        | 38 MB
  r       | geno4771                              | btsnp_shard        | 38 MB
  r       | geno4775                              | btsnp_shard        | 38 MB

看起来像使用SUM之类的聚合运算符似乎是必要的,到目前为止还没有成功。

                                                                                 Regards, Faheem

3 个答案:

答案 0 :(得分:48)

试试这个:

SELECT schema_name, 
       sum(table_size),
       (sum(table_size) / database_size) * 100
FROM (
  SELECT pg_catalog.pg_namespace.nspname as schema_name,
         pg_relation_size(pg_catalog.pg_class.oid) as table_size,
         sum(pg_relation_size(pg_catalog.pg_class.oid)) over () as database_size
  FROM   pg_catalog.pg_class
     JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name, database_size


编辑:刚刚注意到解决方法,总结所有表以获取数据库大小是不必要的:

SELECT schema_name, 
       pg_size_pretty(sum(table_size)::bigint),
       (sum(table_size) / pg_database_size(current_database())) * 100
FROM (
  SELECT pg_catalog.pg_namespace.nspname as schema_name,
         pg_relation_size(pg_catalog.pg_class.oid) as table_size
  FROM   pg_catalog.pg_class
     JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name
ORDER BY schema_name

答案 1 :(得分:2)

更好的解决方案:

WITH 

schemas AS (
SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
GROUP BY schemaname
),

db AS (
SELECT pg_database_size(current_database()) AS size
)

SELECT schemas.name, 
       pg_size_pretty(schemas.size) as absolute_size,
       schemas.size::float / (SELECT size FROM db)  * 100 as relative_size
FROM schemas;

接受的答案解决了上述问题,但是建议的查询效率不高。您可以执行EXPLAIN来查看区别:

EXPLAIN WITH 

schemas AS (
SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
GROUP BY schemaname
),

db AS (SELECT pg_database_size(current_database()) AS size)

SELECT schemas.name, 
       pg_size_pretty(schemas.size) as absolute_size,
       schemas.size::float / (SELECT size FROM db)  * 100 as relative_size
FROM schemas;

                                                 QUERY PLAN
------------------------------------------------------------------------------------------------------------
 CTE Scan on schemas  (cost=47100.79..47634.34 rows=16417 width=104)
   CTE schemas
     ->  Finalize HashAggregate  (cost=46854.50..47100.76 rows=16417 width=72)
           Group Key: n.nspname
           ->  Gather  (cost=43119.63..46608.25 rows=32834 width=96)
                 Workers Planned: 2
                 ->  Partial HashAggregate  (cost=42119.63..42324.85 rows=16417 width=96)
                       Group Key: n.nspname
                       ->  Hash Left Join  (cost=744.38..39763.93 rows=94228 width=128)
                             Hash Cond: (c.relnamespace = n.oid)
                             ->  Parallel Seq Scan on pg_class c  (cost=0.00..38772.14 rows=94228 width=72)
                                   Filter: (relkind = ANY ('{r,p}'::"char"[]))
                             ->  Hash  (cost=539.17..539.17 rows=16417 width=68)
                                   ->  Seq Scan on pg_namespace n  (cost=0.00..539.17 rows=16417 width=68)
   CTE db
     ->  Result  (cost=0.00..0.01 rows=1 width=8)
   InitPlan 3 (returns $3)
     ->  CTE Scan on db  (cost=0.00..0.02 rows=1 width=8)

vs

EXPLAIN SELECT schema_name, 
       pg_size_pretty(sum(table_size)::bigint),
       (sum(table_size) / pg_database_size(current_database())) * 100
FROM (
  SELECT pg_catalog.pg_namespace.nspname as schema_name,
         pg_relation_size(pg_catalog.pg_class.oid) as table_size
  FROM   pg_catalog.pg_class
     JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name
ORDER BY schema_name; 

                                       QUERY PLAN
-------------------------------------------------------------------------------------------
 GroupAggregate  (cost=283636.24..334759.75 rows=1202906 width=128)
   Group Key: pg_namespace.nspname
   ->  Sort  (cost=283636.24..286643.51 rows=1202906 width=72)
         Sort Key: pg_namespace.nspname
         ->  Hash Join  (cost=744.38..51446.15 rows=1202906 width=72)
               Hash Cond: (pg_class.relnamespace = pg_namespace.oid)
               ->  Seq Scan on pg_class  (cost=0.00..44536.06 rows=1202906 width=8)
               ->  Hash  (cost=539.17..539.17 rows=16417 width=68)
                     ->  Seq Scan on pg_namespace  (cost=0.00..539.17 rows=16417 width=68)

答案 2 :(得分:0)

https://www.depesz.com/2018/02/17/which-schema-is-using-the-most-disk-space/

显示了也计算TOAST表格的解决方案。在PG12上测试:

 WITH recursive all_elements AS (
    SELECT 'base/' || l.filename AS path, x.*
    FROM
        pg_ls_dir('base/') AS l (filename),
        LATERAL pg_stat_file( 'base/' || l.filename) AS x
    UNION ALL
    SELECT 'pg_tblspc/' || l.filename AS path, x.*
    FROM
        pg_ls_dir('pg_tblspc/') AS l (filename),
        LATERAL pg_stat_file( 'pg_tblspc/' || l.filename) AS x
    UNION ALL
    SELECT
        u.path || '/' || l.filename, x.*
    FROM
        all_elements u,
        lateral pg_ls_dir(u.path) AS l(filename),
        lateral pg_stat_file( u.path || '/' || l.filename ) AS x
    WHERE
        u.isdir
), all_files AS (
    SELECT path, SIZE FROM all_elements WHERE NOT isdir
), interesting_files AS (
    SELECT
        regexp_replace(
            regexp_replace(f.path, '.*/', ''),
            '\.[0-9]*$',
            ''
        ) AS filename,
        SUM( f.size )
    FROM
        pg_database d,
        all_files f
    WHERE
        d.datname = current_database() AND
        f.path ~ ( '/' || d.oid || E'/[0-9]+(\\.[0-9]+)?$' )
    GROUP BY filename
)
SELECT
    n.nspname AS schema_name,
    SUM( f.sum ) AS total_schema_size
FROM
    interesting_files f
    JOIN pg_class c ON f.filename::oid = c.relfilenode
    LEFT OUTER JOIN pg_class dtc ON dtc.reltoastrelid = c.oid AND c.relkind = 't'
    JOIN pg_namespace n ON COALESCE( dtc.relnamespace, c.relnamespace ) = n.oid
GROUP BY
    n.nspname
ORDER BY
    total_schema_size DESC