SQLSERVER - 导入和解析来自wordpress的特殊XML

时间:2017-06-01 08:52:19

标签: sql-server xml tsql xml-parsing

我编写了一个加载xml文件并解析它的脚本。

这是脚本和部分xml文件,例如:

SQL SCRIPT

DECLARE @XML AS XML
DECLARE @hDoc AS INT

CREATE TABLE #XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE() 
FROM OPENROWSET(BULK 'C:\temp\wordpress.2017-05-22.xml', SINGLE_BLOB) AS x;

SELECT @XML = XMLData FROM #XMLwithOpenXML

EXEC sp_xml_preparedocument @hDoc OUTPUT, @XML

SELECT *
FROM OPENXML(@hDoc, 'rss/channel/item')
WITH 
    (       
        Product [VARCHAR](50) 'title',
        Category [VARCHAR](50) 'category'
    )

EXEC sp_xml_removedocument @hDoc

DROP TABLE #XMLwithOpenXML

XML文件(部分)

<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0"
    xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
    xmlns:content="http://purl.org/rss/1.0/modules/content/"
    xmlns:wfw="http://wellformedweb.org/CommentAPI/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
    <item>
        <title>Bancone Tetris</title>
        <category domain="categorie-prodotti" nicename="arredi-light"><![CDATA[Arredi light]]></category>
        <category domain="categorie-outlet" nicename="arredi-light"><![CDATA[arredi light]]></category>
        <category domain="categorie-prodotti" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
            <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
        </wp:postmeta>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
            <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
        </wp:postmeta>
</item>
<item>
... SAME STRUCTURE AS ABOVE
</item>
</channel>
</rss>

我遇到的问题

  1. 我需要列出所有类别的所有产品,此时脚本会返回仅第一类的所有产品
  2. 我不知道如何返回每个产品的所有postmeta(键,值)列表......
  3. 希望它清楚, 感谢支持!

1 个答案:

答案 0 :(得分:2)

我对OPENXML很生疏,但你不需要它。让我们从一些更新的样本数据开始:

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT 
'<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0"
    xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
    xmlns:content="http://purl.org/rss/1.0/modules/content/"
    xmlns:wfw="http://wellformedweb.org/CommentAPI/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
    <item>
        <title>Bancone Tetris</title>
        <category domain="categorie-prodotti" nicename="arredi-light"><![CDATA[Arredi light]]></category>
        <category domain="categorie-outlet" nicename="arredi-light"><![CDATA[arredi light]]></category>
        <category domain="categorie-prodotti" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
            <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
        </wp:postmeta>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
            <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
        </wp:postmeta>
    </item>
    <item>
        <title>Bancone Tetris Part2</title>
        <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
            <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
        </wp:postmeta>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
            <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
        </wp:postmeta>
    </item>
</channel>
</rss>' AS BulkColumn, GETDATE();

此查询将为您提供所有产品和相关类别:

SELECT 
  product  = item.value('(title/text())[1]', 'varchar(1000)'),
  category = category.value('(text())[1]', 'varchar(1000)')
FROM #XMLwithOpenXML x
CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
CROSS APPLY item.nodes('category')   n2(category);

<强>结果

product                    category
-------------------------- --------------
Bancone Tetris             Arredi light
Bancone Tetris             arredi light
Bancone Tetris             Banconi
Bancone Tetris             Banconi
Bancone Tetris             luci
Bancone Tetris Part2       Banconi
Bancone Tetris Part2       luci

对于你的wp:postmeta,你需要引用wp命名空间。这有三种方法(注意我的评论):

-- Option #1: USE "*:" for "all namespaces (MY FAVORITE)
SELECT 
  product  = item.value('(title/text())[1]', 'varchar(1000)'),
  category = category.value('(text())[1]', 'varchar(1000)'),
  meta_key = postmeta.value('(*:meta_key/text())[1]', 'varchar(1000)'),
  meta_val = postmeta.value('(*:meta_value/text())[1]', 'varchar(1000)')
FROM #XMLwithOpenXML x
CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
CROSS APPLY item.nodes('category')   n2(category)
CROSS APPLY item.nodes('*:postmeta') n3(postmeta);

-- Option #2: USE "WITH XMLNAMESPACES"
WITH XMLNAMESPACES ('http://wordpress.org/export/1.2/' as wp) 
SELECT 
  product  = item.value('(title/text())[1]', 'varchar(1000)'),
  category = category.value('(text())[1]', 'varchar(1000)'),
  meta_key = postmeta.value('(wp:meta_key/text())[1]', 'varchar(1000)'),
  meta_val = postmeta.value('(wp:meta_value/text())[1]', 'varchar(1000)')
FROM #XMLwithOpenXML x
CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
CROSS APPLY item.nodes('category')   n2(category)
CROSS APPLY item.nodes('wp:postmeta') n3(postmeta);

-- Option #3: Inline Namespace declaration
SELECT 
  product  = item.value('(title/text())[1]', 'varchar(1000)'),
  category = category.value('(text())[1]', 'varchar(1000)'),
  meta_key = postmeta.value('declare namespace wp="http://wordpress.org/export/1.2/"; (wp:meta_key/text())[1]', 'varchar(1000)'),
  meta_val = postmeta.value('declare namespace wp="http://wordpress.org/export/1.2/"; (wp:meta_value/text())[1]', 'varchar(1000)')
FROM #XMLwithOpenXML x
CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
CROSS APPLY item.nodes('category')   n2(category)
CROSS APPLY item.nodes('declare namespace wp="http://wordpress.org/export/1.2/"; wp:postmeta') n3(postmeta);

所有三种解决方案都回归:

product              category             meta_key             meta_val
-------------------- -------------------- -------------------- --------------------
Bancone Tetris       Arredi light         wpcf-codice          cod.05-008
Bancone Tetris       Arredi light         wpcf-size            145x80xh110 cm
Bancone Tetris       arredi light         wpcf-codice          cod.05-008
Bancone Tetris       arredi light         wpcf-size            145x80xh110 cm
Bancone Tetris       Banconi              wpcf-codice          cod.05-008
Bancone Tetris       Banconi              wpcf-size            145x80xh110 cm
Bancone Tetris       Banconi              wpcf-codice          cod.05-008
Bancone Tetris       Banconi              wpcf-size            145x80xh110 cm
Bancone Tetris       luci                 wpcf-codice          cod.05-008
Bancone Tetris       luci                 wpcf-size            145x80xh110 cm
Bancone Tetris Part2 Banconi              wpcf-codice          cod.05-008
Bancone Tetris Part2 Banconi              wpcf-size            145x80xh110 cm
Bancone Tetris Part2 luci                 wpcf-codice          cod.05-008
Bancone Tetris Part2 luci                 wpcf-size            145x80xh110 cm