从XML / JSON解释计划中获取查询中的列(和表)列表

时间:2013-02-06 15:20:06

标签: xml json postgresql sql-execution-plan postgresql-9.0

我有约。 200个sql语句,我需要分析这些列中使用的列和表。我发现PostgreSQL 9.0 +中有XML解释计划。

我是否知道如何从该计划中获取使用过的列和表的列表?

更新版本:

测试数据

CREATE TABLE tmp.a (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.b (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.c (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.d (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.e (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);

insert into tmp.a values (1,1,1,1,1,1,1,1,1);
insert into tmp.a values (2,1,1,1,1,1,1,1,1);
insert into tmp.a values (3,1,1,1,1,1,1,1,1);
insert into tmp.a values (4,1,1,1,1,1,1,1,1);
insert into tmp.b values (1,1,1,1,1,1,1,1,1);
insert into tmp.b values (2,1,1,1,1,1,1,1,1);
insert into tmp.b values (3,1,1,1,1,1,1,1,1);
insert into tmp.b values (4,1,1,1,1,1,1,1,1);
insert into tmp.c values (1,1,1,1,1,1,1,1,1);
insert into tmp.c values (2,1,1,1,1,1,1,1,1);
insert into tmp.c values (3,1,1,1,1,1,1,1,1);
insert into tmp.c values (4,1,1,1,1,1,1,1,1);
insert into tmp.d values (1,1,1,1,1,1,1,1,1);
insert into tmp.d values (2,1,1,1,1,1,1,1,1);
insert into tmp.d values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (2,1,1,1,1,1,1,1,1);
insert into tmp.e values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (4,1,1,1,1,1,1,1,1);

示例SQL和解释计划

explain (verbose true, format xml, costs false)
select 
    a.b,
    a.c,
    b.c,
    d.b,
    e.f
from
    tmp.a a
    join tmp.b b using (id)
    join tmp.c c using (id)
    left join tmp.d d on (a.id = d.id)
    left join tmp.e e on (b.id = e.id)
where
    c.d = 1 and (d.f > 0 or e.g is null)

存储在表格中的XML结果

create table tmp.file (fcontent text);

insert into tmp.file values ('
    <explain xmlns="http://www.postgresql.org/2009/explain">
      <Query>
        <Plan>
          <Node-Type>Merge Join</Node-Type>
          <Join-Type>Left</Join-Type>
          <Output>
            <Item>a.b</Item>
            <Item>a.c</Item>
            <Item>b.c</Item>
            <Item>d.b</Item>
            <Item>e.f</Item>
          </Output>
          <Merge-Cond>(b.id = e.id)</Merge-Cond>
          <Filter>((d.f &gt; 0) OR (e.g IS NULL))</Filter>
          <Plans>
            <Plan>
              <Node-Type>Merge Join</Node-Type>
              <Parent-Relationship>Outer</Parent-Relationship>
              <Join-Type>Left</Join-Type>
              <Output>
                <Item>a.b</Item>
                <Item>a.c</Item>
                <Item>b.c</Item>
                <Item>b.id</Item>
                <Item>d.b</Item>
                <Item>d.f</Item>
              </Output>
              <Merge-Cond>(a.id = d.id)</Merge-Cond>
              <Plans>
                <Plan>
                  <Node-Type>Sort</Node-Type>
                  <Parent-Relationship>Outer</Parent-Relationship>
                  <Output>
                    <Item>a.b</Item>
                    <Item>a.c</Item>
                    <Item>a.id</Item>
                    <Item>b.c</Item>
                    <Item>b.id</Item>
                  </Output>
                  <Sort-Key>
                    <Item>a.id</Item>
                  </Sort-Key>
                  <Plans>
                    <Plan>
                      <Node-Type>Hash Join</Node-Type>
                      <Parent-Relationship>Outer</Parent-Relationship>
                      <Join-Type>Inner</Join-Type>
                      <Output>
                        <Item>a.b</Item>
                        <Item>a.c</Item>
                        <Item>a.id</Item>
                        <Item>b.c</Item>
                        <Item>b.id</Item>
                      </Output>
                      <Hash-Cond>(b.id = a.id)</Hash-Cond>
                      <Plans>
                        <Plan>
                          <Node-Type>Seq Scan</Node-Type>
                          <Parent-Relationship>Outer</Parent-Relationship>
                          <Relation-Name>b</Relation-Name>
                          <Schema>tmp</Schema>
                          <Alias>b</Alias>
                          <Output>
                            <Item>b.id</Item>
                            <Item>b.b</Item>
                            <Item>b.c</Item>
                            <Item>b.d</Item>
                            <Item>b.e</Item>
                            <Item>b.f</Item>
                            <Item>b.g</Item>
                            <Item>b.h</Item>
                            <Item>b.i</Item>
                          </Output>
                        </Plan>
                        <Plan>
                          <Node-Type>Hash</Node-Type>
                          <Parent-Relationship>Inner</Parent-Relationship>
                          <Output>
                            <Item>a.b</Item>
                            <Item>a.c</Item>
                            <Item>a.id</Item>
                            <Item>c.id</Item>
                          </Output>
                          <Plans>
                            <Plan>
                              <Node-Type>Hash Join</Node-Type>
                              <Parent-Relationship>Outer</Parent-Relationship>
                              <Join-Type>Inner</Join-Type>
                              <Output>
                                <Item>a.b</Item>
                                <Item>a.c</Item>
                                <Item>a.id</Item>
                                <Item>c.id</Item>
                              </Output>
                              <Hash-Cond>(a.id = c.id)</Hash-Cond>
                              <Plans>
                                <Plan>
                                  <Node-Type>Seq Scan</Node-Type>
                                  <Parent-Relationship>Outer</Parent-Relationship>
                                  <Relation-Name>a</Relation-Name>
                                  <Schema>tmp</Schema>
                                  <Alias>a</Alias>
                                  <Output>
                                    <Item>a.id</Item>
                                    <Item>a.b</Item>
                                    <Item>a.c</Item>
                                    <Item>a.d</Item>
                                    <Item>a.e</Item>
                                    <Item>a.f</Item>
                                    <Item>a.g</Item>
                                    <Item>a.h</Item>
                                    <Item>a.i</Item>
                                  </Output>
                                </Plan>
                                <Plan>
                                  <Node-Type>Hash</Node-Type>
                                  <Parent-Relationship>Inner</Parent-Relationship>
                                  <Output>
                                    <Item>c.id</Item>
                                  </Output>
                                  <Plans>
                                    <Plan>
                                      <Node-Type>Seq Scan</Node-Type>
                                      <Parent-Relationship>Outer</Parent-Relationship>
                                      <Relation-Name>c</Relation-Name>
                                      <Schema>tmp</Schema>
                                      <Alias>c</Alias>
                                      <Output>
                                        <Item>c.id</Item>
                                      </Output>
                                      <Filter>(c.d = 1)</Filter>
                                    </Plan>
                                  </Plans>
                                </Plan>
                              </Plans>
                            </Plan>
                          </Plans>
                        </Plan>
                      </Plans>
                    </Plan>
                  </Plans>
                </Plan>
                <Plan>
                  <Node-Type>Sort</Node-Type>
                  <Parent-Relationship>Inner</Parent-Relationship>
                  <Output>
                    <Item>d.b</Item>
                    <Item>d.id</Item>
                    <Item>d.f</Item>
                  </Output>
                  <Sort-Key>
                    <Item>d.id</Item>
                  </Sort-Key>
                  <Plans>
                    <Plan>
                      <Node-Type>Seq Scan</Node-Type>
                      <Parent-Relationship>Outer</Parent-Relationship>
                      <Relation-Name>d</Relation-Name>
                      <Schema>tmp</Schema>
                      <Alias>d</Alias>
                      <Output>
                        <Item>d.b</Item>
                        <Item>d.id</Item>
                        <Item>d.f</Item>
                      </Output>
                    </Plan>
                  </Plans>
                </Plan>
              </Plans>
            </Plan>
            <Plan>
              <Node-Type>Sort</Node-Type>
              <Parent-Relationship>Inner</Parent-Relationship>
              <Output>
                <Item>e.f</Item>
                <Item>e.id</Item>
                <Item>e.g</Item>
              </Output>
              <Sort-Key>
                <Item>e.id</Item>
              </Sort-Key>
              <Plans>
                <Plan>
                  <Node-Type>Seq Scan</Node-Type>
                  <Parent-Relationship>Outer</Parent-Relationship>
                  <Relation-Name>e</Relation-Name>
                  <Schema>tmp</Schema>
                  <Alias>e</Alias>
                  <Output>
                    <Item>e.f</Item>
                    <Item>e.id</Item>
                    <Item>e.g</Item>
                  </Output>
                </Plan>
              </Plans>
            </Plan>
          </Plans>
        </Plan>
      </Query>
    </explain>
    ');

解释计划中的项目

with elements as (
    SELECT trim(a[rn]) AS elem, rn
    FROM   (
        SELECT *, generate_series(1, array_upper(a, 1)) AS rn
        FROM  (
        SELECT string_to_array(fcontent, chr(10)) AS a
        FROM   tmp.file
        ) x
        ) y
    )
select 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from 
    elements where elem like '<Item>%' 
group by 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )

Item标记中有25列。但是,要执行此查询,您只需要13:a.b, a.c, b.c, d.b, e.f, a.id, b.id, c.id, d.id, e.id, c.d, d.f, e.g。我是如何只从解释计划中获得这些专栏的?


原始版本

例如,我确实有以下查询(更多用于说明,无需理解):

select
    dd.id_databox_data,
    dd.recipient_id,
    dd.sender_id,
    lp.business_name,
    fa.repayment_identification,
    perform_time,
    dd.subject as dd_subject,
    ca.subject as ca_subject,
    d.unique_name,
    s.name,
    s.long_name,
    lld.legal_template,
    lld.issue_date,
    ca.perform_time,
    dd.id_recipient_document_ident,
    dd.id_sender_document_ident,
    dci1.ref_number,    
    dci2.ref_number     
from 
    databox_data as dd 
    join databox_data_attachments as dda on (dd.id_databox_data = dda.id_databox_data)
    join databox_attachment as da on (da.id_databox_attachment = dda.id_databox_attachment)
    join document as d on (d.id_document = da.id_document)
    join external_file_letter_data as fld on (fld.id_document = d.id_document)
    join letter_data as ld on (ld.id_letter_data = fld.id_letter_data)
    join legal_letter_data lld on (ld.id_letter_data = lld.id_letter_data)
    join legal_instrument li using (id_legal_instrument)
    left join execution e using (id_legal_instrument)
    join v_communication_act as ca on (ca.id_letter_data = ld.id_letter_data)
    join solver s using (id_solver)
    join responsibility as r on (r.id_responsibility = ca.id_related_responsibility)
    join party pr on (r.id_responsible = pr.id_party)
    join financial_accountability fa using (id_accountability)
    join flight f using (id_flight)
    join portfolio p using (id_portfolio)
    join legal_person lp on (pr.id_source = lp.id_party)
    left join v_authority va on (dd.recipient_id = va.data_box_id) 
    left join databox_document_ident dci1 on (dd.id_recipient_document_ident = dci1.id_databox_document_ident)
    left join databox_document_ident dci2 on (dd.id_recipient_document_ident = dci2.id_databox_document_ident)      
where
    ca.perform_time > (Now()::date - 1)
    and s.id_solver = 41

我正在使用explain (verbose true, format xml, costs false)

这导致以下解释计划(XML版本),遗憾的是我无法插入整个解释计划(对帖子长度的限制),如果您需要完整的解释计划,请使用此pastebin版本:

<explain xmlns="http://www.postgresql.org/2009/explain">
  <Query>
    <Plan>
      <Node-Type>Nested Loop</Node-Type>
      <Join-Type>Left</Join-Type>
      <Output>
        <Item>dd.id_databox_data</Item>
        <Item>dd.recipient_id</Item>
        <Item>dd.sender_id</Item>
        <Item>lp.business_name</Item>
        <Item>fa.repayment_identification</Item>
        <Item>act.perform_time</Item>
        <Item>dd.subject</Item>
        <Item>communication_act.subject</Item>
        <Item>d.unique_name</Item>
        <Item>s.name</Item>
        <Item>s.long_name</Item>
        <Item>lld.legal_template</Item>
        <Item>lld.issue_date</Item>
        <Item>act.perform_time</Item>
        <Item>dd.id_recipient_document_ident</Item>
        <Item>dd.id_sender_document_ident</Item>
        <Item>dci1.ref_number</Item>
        <Item>dci2.ref_number</Item>
      </Output>
      <Plans>
        <Plan>
          <Node-Type>Nested Loop</Node-Type>
          <Parent-Relationship>Outer</Parent-Relationship>
          <Join-Type>Left</Join-Type>
          <Output>
            <Item>dd.id_databox_data</Item>
            <Item>dd.recipient_id</Item>
            <Item>dd.sender_id</Item>
            <Item>dd.subject</Item>
            <Item>dd.id_recipient_document_ident</Item>
            <Item>dd.id_sender_document_ident</Item>
            <Item>d.unique_name</Item>
            <Item>lld.legal_template</Item>
            <Item>lld.issue_date</Item>
            <Item>communication_act.subject</Item>
            <Item>act.perform_time</Item>
            <Item>s.name</Item>
            <Item>s.long_name</Item>
            <Item>fa.repayment_identification</Item>
            <Item>lp.business_name</Item>
            <Item>dci1.ref_number</Item>
          </Output>
          <Plans>
            <Plan>
              <Node-Type>Nested Loop</Node-Type>
              <Parent-Relationship>Outer</Parent-Relationship>
              <Join-Type>Inner</Join-Type>
              <Output>
                <Item>dd.id_databox_data</Item>
                <Item>dd.recipient_id</Item>
                <Item>dd.sender_id</Item>
                <Item>dd.subject</Item>
                <Item>dd.id_recipient_document_ident</Item>
                <Item>dd.id_sender_document_ident</Item>
                <Item>d.unique_name</Item>
                <Item>lld.legal_template</Item>
                <Item>lld.issue_date</Item>
                <Item>communication_act.subject</Item>
                <Item>act.perform_time</Item>
                <Item>s.name</Item>
                <Item>s.long_name</Item>
                <Item>fa.repayment_identification</Item>

有没有办法(最好是SQL方式)如何从这样的计划中获取使用过的列和表的列表?仅查询<Item>行是不够的,因为当表首次出现在解释计划中时(在最低级别),所有列都列在<Item>标记中,尽管其中许多列都不需要完成查询。

我使用以下SQL列出了唯一的<Item>标记:

with elements as (
    SELECT trim(a[rn]) AS elem, rn
    FROM   (
        SELECT *, generate_series(1, array_upper(a, 1)) AS rn
        FROM  (
        SELECT string_to_array(fcontent, chr(10)) AS a
        FROM   tmp.file
        ) x
        ) y
    )
select 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from 
    elements where elem like '<Item>%' 
group by 
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
    regexp_replace(elem, E'<Item>|</Item>', '', 'g' )

1 个答案:

答案 0 :(得分:1)

在我看来,您需要解析XML的三个方面。

查询返回的列将位于第一个output元素下(item标记中)。

联接的每个子节点都有一个output,由计划员认为必须使用的列组成。加入列将在这里。如果这是为您提供超集,您可能需要解析join-filter的内容以获取此处的信息。

这两个不会给你所需要的一切。您当然需要解析filter元素以从中拉出列。

如果要在SQL中执行此操作,则应查看PostgreSQL中的xml functions。您可以在PostgreSQL中的xml上运行xpath查询,或者通过xslt运行。这比尝试用正则表达式解析它要好得多。

遗憾的是,这种情况的复杂性有点超出了工作范例,但我希望这足以让你开始。