我有约。 200个sql语句,我需要分析这些列中使用的列和表。我发现PostgreSQL 9.0 +中有XML解释计划。
我是否知道如何从该计划中获取使用过的列和表的列表?
更新版本:
测试数据
CREATE TABLE tmp.a (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.b (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.c (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.d (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.e (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
insert into tmp.a values (1,1,1,1,1,1,1,1,1);
insert into tmp.a values (2,1,1,1,1,1,1,1,1);
insert into tmp.a values (3,1,1,1,1,1,1,1,1);
insert into tmp.a values (4,1,1,1,1,1,1,1,1);
insert into tmp.b values (1,1,1,1,1,1,1,1,1);
insert into tmp.b values (2,1,1,1,1,1,1,1,1);
insert into tmp.b values (3,1,1,1,1,1,1,1,1);
insert into tmp.b values (4,1,1,1,1,1,1,1,1);
insert into tmp.c values (1,1,1,1,1,1,1,1,1);
insert into tmp.c values (2,1,1,1,1,1,1,1,1);
insert into tmp.c values (3,1,1,1,1,1,1,1,1);
insert into tmp.c values (4,1,1,1,1,1,1,1,1);
insert into tmp.d values (1,1,1,1,1,1,1,1,1);
insert into tmp.d values (2,1,1,1,1,1,1,1,1);
insert into tmp.d values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (2,1,1,1,1,1,1,1,1);
insert into tmp.e values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (4,1,1,1,1,1,1,1,1);
示例SQL和解释计划
explain (verbose true, format xml, costs false)
select
a.b,
a.c,
b.c,
d.b,
e.f
from
tmp.a a
join tmp.b b using (id)
join tmp.c c using (id)
left join tmp.d d on (a.id = d.id)
left join tmp.e e on (b.id = e.id)
where
c.d = 1 and (d.f > 0 or e.g is null)
存储在表格中的XML结果
create table tmp.file (fcontent text);
insert into tmp.file values ('
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Merge Join</Node-Type>
<Join-Type>Left</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>b.c</Item>
<Item>d.b</Item>
<Item>e.f</Item>
</Output>
<Merge-Cond>(b.id = e.id)</Merge-Cond>
<Filter>((d.f > 0) OR (e.g IS NULL))</Filter>
<Plans>
<Plan>
<Node-Type>Merge Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Left</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>b.c</Item>
<Item>b.id</Item>
<Item>d.b</Item>
<Item>d.f</Item>
</Output>
<Merge-Cond>(a.id = d.id)</Merge-Cond>
<Plans>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>b.c</Item>
<Item>b.id</Item>
</Output>
<Sort-Key>
<Item>a.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Hash Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>b.c</Item>
<Item>b.id</Item>
</Output>
<Hash-Cond>(b.id = a.id)</Hash-Cond>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>b</Relation-Name>
<Schema>tmp</Schema>
<Alias>b</Alias>
<Output>
<Item>b.id</Item>
<Item>b.b</Item>
<Item>b.c</Item>
<Item>b.d</Item>
<Item>b.e</Item>
<Item>b.f</Item>
<Item>b.g</Item>
<Item>b.h</Item>
<Item>b.i</Item>
</Output>
</Plan>
<Plan>
<Node-Type>Hash</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>c.id</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Hash Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>c.id</Item>
</Output>
<Hash-Cond>(a.id = c.id)</Hash-Cond>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>a</Relation-Name>
<Schema>tmp</Schema>
<Alias>a</Alias>
<Output>
<Item>a.id</Item>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.d</Item>
<Item>a.e</Item>
<Item>a.f</Item>
<Item>a.g</Item>
<Item>a.h</Item>
<Item>a.i</Item>
</Output>
</Plan>
<Plan>
<Node-Type>Hash</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>c.id</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>c</Relation-Name>
<Schema>tmp</Schema>
<Alias>c</Alias>
<Output>
<Item>c.id</Item>
</Output>
<Filter>(c.d = 1)</Filter>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>d.b</Item>
<Item>d.id</Item>
<Item>d.f</Item>
</Output>
<Sort-Key>
<Item>d.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>d</Relation-Name>
<Schema>tmp</Schema>
<Alias>d</Alias>
<Output>
<Item>d.b</Item>
<Item>d.id</Item>
<Item>d.f</Item>
</Output>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>e.f</Item>
<Item>e.id</Item>
<Item>e.g</Item>
</Output>
<Sort-Key>
<Item>e.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>e</Relation-Name>
<Schema>tmp</Schema>
<Alias>e</Alias>
<Output>
<Item>e.f</Item>
<Item>e.id</Item>
<Item>e.g</Item>
</Output>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Query>
</explain>
');
解释计划中的项目
with elements as (
SELECT trim(a[rn]) AS elem, rn
FROM (
SELECT *, generate_series(1, array_upper(a, 1)) AS rn
FROM (
SELECT string_to_array(fcontent, chr(10)) AS a
FROM tmp.file
) x
) y
)
select
regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from
elements where elem like '<Item>%'
group by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
Item
标记中有25列。但是,要执行此查询,您只需要13:a.b, a.c, b.c, d.b, e.f, a.id, b.id, c.id, d.id, e.id, c.d, d.f, e.g
。我是如何只从解释计划中获得这些专栏的?
原始版本
例如,我确实有以下查询(更多用于说明,无需理解):
select
dd.id_databox_data,
dd.recipient_id,
dd.sender_id,
lp.business_name,
fa.repayment_identification,
perform_time,
dd.subject as dd_subject,
ca.subject as ca_subject,
d.unique_name,
s.name,
s.long_name,
lld.legal_template,
lld.issue_date,
ca.perform_time,
dd.id_recipient_document_ident,
dd.id_sender_document_ident,
dci1.ref_number,
dci2.ref_number
from
databox_data as dd
join databox_data_attachments as dda on (dd.id_databox_data = dda.id_databox_data)
join databox_attachment as da on (da.id_databox_attachment = dda.id_databox_attachment)
join document as d on (d.id_document = da.id_document)
join external_file_letter_data as fld on (fld.id_document = d.id_document)
join letter_data as ld on (ld.id_letter_data = fld.id_letter_data)
join legal_letter_data lld on (ld.id_letter_data = lld.id_letter_data)
join legal_instrument li using (id_legal_instrument)
left join execution e using (id_legal_instrument)
join v_communication_act as ca on (ca.id_letter_data = ld.id_letter_data)
join solver s using (id_solver)
join responsibility as r on (r.id_responsibility = ca.id_related_responsibility)
join party pr on (r.id_responsible = pr.id_party)
join financial_accountability fa using (id_accountability)
join flight f using (id_flight)
join portfolio p using (id_portfolio)
join legal_person lp on (pr.id_source = lp.id_party)
left join v_authority va on (dd.recipient_id = va.data_box_id)
left join databox_document_ident dci1 on (dd.id_recipient_document_ident = dci1.id_databox_document_ident)
left join databox_document_ident dci2 on (dd.id_recipient_document_ident = dci2.id_databox_document_ident)
where
ca.perform_time > (Now()::date - 1)
and s.id_solver = 41
我正在使用explain (verbose true, format xml, costs false)
。
这导致以下解释计划(XML版本),遗憾的是我无法插入整个解释计划(对帖子长度的限制),如果您需要完整的解释计划,请使用此pastebin版本:
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Join-Type>Left</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>lp.business_name</Item>
<Item>fa.repayment_identification</Item>
<Item>act.perform_time</Item>
<Item>dd.subject</Item>
<Item>communication_act.subject</Item>
<Item>d.unique_name</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>act.perform_time</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>dci1.ref_number</Item>
<Item>dci2.ref_number</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Left</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>dd.subject</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>d.unique_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>communication_act.subject</Item>
<Item>act.perform_time</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>fa.repayment_identification</Item>
<Item>lp.business_name</Item>
<Item>dci1.ref_number</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>dd.subject</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>d.unique_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>communication_act.subject</Item>
<Item>act.perform_time</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>fa.repayment_identification</Item>
有没有办法(最好是SQL方式)如何从这样的计划中获取使用过的列和表的列表?仅查询<Item>
行是不够的,因为当表首次出现在解释计划中时(在最低级别),所有列都列在<Item>
标记中,尽管其中许多列都不需要完成查询。
我使用以下SQL列出了唯一的<Item>
标记:
with elements as (
SELECT trim(a[rn]) AS elem, rn
FROM (
SELECT *, generate_series(1, array_upper(a, 1)) AS rn
FROM (
SELECT string_to_array(fcontent, chr(10)) AS a
FROM tmp.file
) x
) y
)
select
regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from
elements where elem like '<Item>%'
group by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
答案 0 :(得分:1)
在我看来,您需要解析XML的三个方面。
查询返回的列将位于第一个output
元素下(item
标记中)。
联接的每个子节点都有一个output
,由计划员认为必须使用的列组成。加入列将在这里。如果这是为您提供超集,您可能需要解析join-filter
的内容以获取此处的信息。
这两个不会给你所需要的一切。您当然需要解析filter
元素以从中拉出列。
如果要在SQL中执行此操作,则应查看PostgreSQL中的xml functions。您可以在PostgreSQL中的xml上运行xpath查询,或者通过xslt运行。这比尝试用正则表达式解析它要好得多。
遗憾的是,这种情况的复杂性有点超出了工作范例,但我希望这足以让你开始。