Oracle 11g:使用xmltable,xmlagg和pivot

时间:2015-11-11 16:27:01

标签: oracle11g pivot xquery clob xmltable

我有以下XML:

<record>
    <leader>02220cim a2200325 a 4500</leader>    
    <datafield tag="035" ind1=" " ind2=" ">
      <subfield code="a">P286</subfield>
    </datafield>
    <datafield tag="100" ind1="1" ind2=" ">
      <subfield code="a">Gold, Claudia M.</subfield>
    </datafield>
    <datafield tag="300" ind1=" " ind2=" ">
      <subfield code="a">1 disque son. (6h47) :</subfield>
      <subfield code="b">numérique ;</subfield>
      <subfield code="c">12 cm.</subfield>
    </datafield>  
    <datafield tag="650" ind1=" " ind2="6">
      <subfield code="a">Émotions chez l&apos;enfant</subfield>
    </datafield>
    <datafield tag="650" ind1=" " ind2="6">
      <subfield code="a">Parents et enfants</subfield>
    </datafield>
    <datafield tag="655" ind1=" " ind2="4">
      <subfield code="a">Enregistrement numérique sur CD.</subfield>
    </datafield>
    <datafield tag="700" ind1="1" ind2=" ">
      <subfield code="a">Taussig, Sylvie</subfield>
    </datafield>
    <datafield tag="700" ind1="1" ind2=" ">
      <subfield code="a">Marcelli, Daniel</subfield>
    </datafield>
    <datafield tag="852" ind1=" " ind2=" ">
      <subfield code="a">GBQ</subfield>
      <subfield code="b">QO</subfield>
      <subfield code="c">DC13378</subfield>
      <subfield code="d">32002518936826</subfield>
    </datafield>
    <datafield tag="852" ind1=" " ind2=" ">
      <subfield code="a">GBQ</subfield>
      <subfield code="b">QO</subfield>
      <subfield code="c">DC13378</subfield>
      <subfield code="d">32002518936859</subfield>
    </datafield>    
  </record>

我需要将行中的值转换为列。例如:

leader    datafield_035   datafield_100   datafield_300  datafield_650 ...
-----------------------------------------------------------------------
02220...

这是我做的:

1 / Step1:

insert into table_data(leader, tag, subfield) 
(
select x.leader,x1.tag, x2.subfield
from table_xml t
  cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/collection/record'  
          passing t.xml_document
          columns leader VARCHAR2(100) PATH 'leader',                                        
          datafield      xmltype       path './datafield'  
        ) x
    cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/datafield[@tag=245 or @tag=100 or @tag=300 or @tag=260 or @tag=650 
                or @tag=520 or @tag=091 or @tag=534 or @tag=245 or @tag=500 or @tag=655 or @tag=505]'     
         passing x.datafield
          columns tag VARCHAR2(20) PATH '@tag',           
         subfield xmltype PATH './subfield'
          )x1
      cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/subfield'     
         passing x1.subfield                
         COLUMNS subfield varchar2(2000) PATH '.'
          )x2      
where t.id=3
)
;

2 /第2步:

INSERT INTO table_CLOB (leader, tag, subfield)
select leader, tag, 
rtrim(xmlagg(XMLELEMENT(e,subfield, ' ; ').extract('//text()')order by subfield).getClobVal(),' ; ') as subfields
from table_data
group by leader, tag
;

3 / Step3:

insert into table_result(leader, tag, subfield)
(    select leader, "'091'" as cote, "'100'" as auteur, "'245'" as titre,
        "'300'" as Desc_Mater, "'260'" as edition,"'520'" as resume, "'534'" as version,
        "'500'" as notes, "'650'" as sujet
      from
      (
        select leader, tag, dbms_lob.substr(subfield,2000,1) as col
        from table_clob
      ) 
      pivot(max(col) for tag in ('091', '100', '245', '260','650','520','300','534','500'))
    );

完成这些步骤后,我得到了我期望的结果,但性能并不是很好。

无论如何,我可以做一次拍摄并获得最终结果吗?

1 个答案:

答案 0 :(得分:1)

您可以使用subquery factoring(也称为公用表表达式或CTE)而不是永久(甚至是临时)表。

但是你真的不需要做所有工作,你可以使用listagg()和你的第一个查询:

select x.leader
  , listagg(case when x1.tag = '091' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as cote
  , listagg(case when x1.tag = '100' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as auteur
  , listagg(case when x1.tag = '245' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as titre
  , listagg(case when x1.tag = '300' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as desc_mater
  , listagg(case when x1.tag = '260' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as edition
  , listagg(case when x1.tag = '520' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as resume
  , listagg(case when x1.tag = '534' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as version
  , listagg(case when x1.tag = '500' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as notes
  , listagg(case when x1.tag = '650' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as subjet
from table_xml t
  cross join XMLTABLE ( ... ) x
    cross join XMLTABLE ( ... )x1
      cross join XMLTABLE ( ... )x2      
where t.id=3
group by x.leader;
除了撇号丢失它的实体编码之外,

从样本中获得相同的结果;这可能是一件好事。

SQL Fiddle demo;我打算把CTE版本放在那里作为参考,但它正在杀死它 - 它经常对XML或CTE不满意,而且它也在间歇性地与listagg版本斗争;所以这是两者的完整版本:

with table_data as (
select x.leader,x1.tag, x2.subfield
from table_xml t
  cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/collection/record'  
          passing t.xml_document
          columns leader VARCHAR2(100) PATH 'leader',                                        
          datafield      xmltype       path './datafield'  
        ) x
    cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/datafield[@tag=245 or @tag=100 or @tag=300 or @tag=260 or @tag=650 
                or @tag=520 or @tag=091 or @tag=534 or @tag=245 or @tag=500 or @tag=655 or @tag=505]'     
         passing x.datafield
          columns tag VARCHAR2(20) PATH '@tag',           
         subfield xmltype PATH './subfield'
          )x1
      cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/subfield'     
         passing x1.subfield                
         COLUMNS subfield varchar2(2000) PATH '.'
          )x2      
where t.id=3
),
table_clob as (
select leader, tag, 
rtrim(xmlagg(XMLELEMENT(e,subfield, ' ; ').extract('//text()')order by subfield).getClobVal(),' ; ') as subfield
from table_data
group by leader, tag
),
table_result as (
select leader, "'091'" as cote, "'100'" as auteur, "'245'" as titre,
        "'300'" as Desc_Mater, "'260'" as edition,"'520'" as resume, "'534'" as version,
        "'500'" as notes, "'650'" as sujet
      from
      (
        select leader, tag, dbms_lob.substr(subfield,2000,1) as col
        from table_clob
      ) 
      pivot(max(col) for tag in ('091', '100', '245', '260','650','520','300','534','500'))
)
select * from table_result;

select x.leader
  , listagg(case when x1.tag = '091' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as cote
  , listagg(case when x1.tag = '100' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as auteur
  , listagg(case when x1.tag = '245' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as titre
  , listagg(case when x1.tag = '300' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as desc_mater
  , listagg(case when x1.tag = '260' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as edition
  , listagg(case when x1.tag = '520' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as resume
  , listagg(case when x1.tag = '534' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as version
  , listagg(case when x1.tag = '500' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as notes
  , listagg(case when x1.tag = '650' then x2.subfield end, ' ; ')
      within group (order by x2.subfield) as subjet
from table_xml t
  cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/collection/record'  
          passing t.xml_document
          columns leader VARCHAR2(100) PATH 'leader',                                        
          datafield      xmltype       path './datafield'  
        ) x
    cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/datafield[@tag=245 or @tag=100 or @tag=300 or @tag=260 or @tag=650 
                or @tag=520 or @tag=091 or @tag=534 or @tag=245 or @tag=500 or @tag=655 or @tag=505]'     
         passing x.datafield
          columns tag VARCHAR2(20) PATH '@tag',           
         subfield xmltype PATH './subfield'
          )x1
      cross join XMLTABLE (xmlnamespaces (default 'http://www.loc.gov/MARC21/slim')  
        , '/subfield'     
         passing x1.subfield                
         COLUMNS subfield varchar2(2000) PATH '.'
          )x2      
where t.id=3
group by x.leader;