OPENXML不返回预期结果

时间:2016-05-27 11:24:58

标签: sql-server xml tsql xquery openxml

我有一个XML

<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <status>12</status>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
    </message_info>
  </message_infos>
</response>

我需要将其另存为

397300589   673399673   12
397300589   673399675   12
397300591   673399669   12
397300591   673399671   12

但是

SELECT *
FROM OPENXML(@ixml, '/response/message_infos/message_info')
WITH (id VARCHAR(50) 'id', pdu_id VARCHAR(50) 'pdu_id', status INT 'status')

给我错误的结果:

397300589   673399673   12
397300591   673399669   12

我做错了什么?我没有找到相同的例子,有人可以帮我吗?

5 个答案:

答案 0 :(得分:4)

DECLARE @xml XML = N'
<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <status>12</status>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
    </message_info>
  </message_infos>
</response>'

SELECT
      id = MIN(CASE WHEN RowNumGroup = 1 THEN val END)
    , pdu_id = MIN(CASE WHEN RowNumGroup = 2 THEN val END)
    , [status] = MIN(CASE WHEN RowNumGroup = 0 THEN val END)
FROM (
    SELECT
          val = t.c.value('(./text())[1]', 'BIGINT')
        , RowNum = ROW_NUMBER() OVER (ORDER BY 1/0)
        , RowNumGroup = ROW_NUMBER() OVER (ORDER BY 1/0) % 3
    FROM @xml.nodes('/response/message_infos/message_info/*') t(c)
) t
GROUP BY RowNum - CASE WHEN RowNumGroup = 0 THEN 3 ELSE RowNumGroup END

更新日期30/05/2016

准备数据:

SET NOCOUNT ON
IF OBJECT_ID('tempdb.dbo.#xml') IS NOT NULL
    DROP TABLE #xml
GO
CREATE TABLE #xml (data XML)
GO
INSERT INTO #xml
SELECT TOP(10000) N'
<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <status>12</status>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
    </message_info>
  </message_infos>
</response>'
FROM [master].dbo.spt_values

Mikael Eriksson

DECLARE @d DATETIME = GETDATE()

SELECT T.X.value('(id/text())[sql:column("N.N")][1]', 'INT') AS id,
       T.X.value('(pdu_id/text())[sql:column("N.N")][1]', 'INT') AS pdu_id,
       T.X.value('(status/text())[sql:column("N.N")][1]', 'INT') AS [status]
FROM #xml x
CROSS APPLY data.nodes('/response/message_infos/message_info') AS T(X)
CROSS APPLY (
    VALUES
        (1),(2),(3),(4),(5),
        (6),(7),(8),(9),(10)
) AS N(N)
WHERE N.N <= T.X.value('count(id)', 'INT')

SELECT 'Mikael Eriksson: ' +  CAST(CAST(GETDATE() - @d AS TIME) AS VARCHAR(100))
GO

<强> Shnugo

DECLARE @d DATETIME = GETDATE()

WITH AllMessageInfos AS (
    SELECT ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) AS Mi_Index
         , mi.query('.') AS TheMI
    FROM #xml x
    CROSS APPLY data.nodes('/response/message_infos/message_info') AS A(mi)
)
, AllIDs AS (
    SELECT Mi_Index
         , ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS id_Index
         , id.value('.','NVARCHAR(MAX)') AS id
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/id') AS a(id)   
)
, AllPdu_IDs AS (
    SELECT Mi_Index
         , ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS pdu_Index
         , id.value('.','NVARCHAR(MAX)') AS pdu_id
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/pdu_id') AS a(id)   
)
, AllStatus AS (
    SELECT Mi_Index
         , ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS status_Index
         , id.value('.', 'INT') AS status
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/status') AS a(id)   
)
SELECT i.Mi_Index AS MessageInfoIndex
     , i.id_Index AS SubSetIndex
     , i.id
     , p.pdu_id
     , s.[status]
FROM AllIDs AS i
JOIN AllPdu_IDs AS p ON i.Mi_Index = p.Mi_Index AND i.id_Index = p.pdu_Index
JOIN AllStatus AS s ON i.Mi_Index = s.Mi_Index AND i.id_Index = s.status_Index

SELECT 'Shnugo: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100))
GO

<强> Montewizdoh

DECLARE @d DATETIME = GETDATE()

SELECT 
    m.value('for $i in . return count(../../*[. << $i])', 'INT') AS message_info_position,
    m.value('for $i in . return count(../*[. << $i]) + 1', 'INT') AS internal_position,
    m.value('fn:local-name(.)', 'SYSNAME') AS element_name,
    m.value('.', 'NVARCHAR(4000)') AS element_value
FROM #xml x
CROSS APPLY data.nodes('/response/message_infos/message_info/*') AS A(m)

SELECT 'Montewhizdoh: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100))
GO

Devart(OLD)

DECLARE @d DATETIME = GETDATE()
SELECT
      id = MAX(CASE WHEN name = 'id' THEN val END)
    , pdu_id = MAX(CASE WHEN name = 'pdu_id' THEN val END)
    , [status] = MAX(CASE WHEN name = 'status' THEN val END)
FROM (
    SELECT
          name = t.c.value('local-name(.)', 'SYSNAME')
        , val = t.c.value('.', 'INT')
        , RowNum = ROW_NUMBER() OVER (ORDER BY 1/0)
        , RowNumGroup = ROW_NUMBER() OVER (ORDER BY 1/0) % 3
    FROM #xml x
    CROSS APPLY data.nodes('/response/message_infos/message_info/*') t(c)
) t
GROUP BY RowNum - CASE WHEN RowNumGroup = 0 THEN 3 ELSE RowNumGroup END
SELECT 'Devart (OLD): ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100))
GO

Devart(新)

DECLARE @d DATETIME = GETDATE()
SELECT
      id = MIN(CASE WHEN RowNumGroup = 1 THEN val END)
    , pdu_id = MIN(CASE WHEN RowNumGroup = 2 THEN val END)
    , [status] = MIN(CASE WHEN RowNumGroup = 0 THEN val END)
FROM (
    SELECT
          val = t.c.value('(./text())[1]', 'BIGINT')
        , RowNum = ROW_NUMBER() OVER (ORDER BY 1/0)
        , RowNumGroup = ROW_NUMBER() OVER (ORDER BY 1/0) % 3
    FROM #xml x
    CROSS APPLY data.nodes('/response/message_infos/message_info/*') t(c)
) t
GROUP BY RowNum - CASE WHEN RowNumGroup = 0 THEN 3 ELSE RowNumGroup END

SELECT 'Devart (NEW): ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100))

效果比较Core i5 4460 3.2GHzDDR3 8GbSQL Server 2014 SP1 Express):

Mikael Eriksson: 00:00:00.327
Shnugo:          00:00:00.913
Montewhizdoh:    00:00:01.680
Devart (OLD):    00:00:00.363
Devart (NEW):    00:00:00.200

答案 1 :(得分:3)

我提供以下的考虑。您可能需要转动结果,但结果具有足够的意义来按原样编程。

message_info_position告诉您值来自哪个message_info local_position告诉你message_info中的位置

我认为您会发现此解决方案非常强大且能够处理奇怪的数据形状。

希望它有所帮助!

DECLARE @ixml XML=
'<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>3973005891</id>
      <pdu_id>6733996732</pdu_id>
      <status>123</status>
      <id>3973005894</id>
      <pdu_id>6733996755</pdu_id>
      <status>126</status>
      <id>3973005897</id>
      <pdu_id>6733996738</pdu_id>
      <status>129</status>
      <id>39730058912</id>
      <pdu_id>67339967513</pdu_id>
    <pdu_id>67339967513x</pdu_id>
    <pdu_id>67339967513y</pdu_id>
      <status>12</status>
    </message_info>
  </message_infos>
</response>';

SELECT 
 row_number() over(order by A.m) as internal_position,
 CHECKSUM(m.value('..', 'nvarchar(max)')) as message_info_position,
 m.value('fn:local-name(.)', 'SYSNAME') as element_name,
 m.value('.', 'nvarchar(4000)') as element_value
FROM @ixml.nodes('/response/message_infos/message_info/*') AS A(m)

答案 2 :(得分:3)

这不是答案,只是表现比较!

我真正喜欢的是不同人提供的各种解决方案。这里有4个答案,每个答案都遵循完全不同的概念。

我很好奇他们的表现如何。这是我的结果:

  1. Mikael Eriksson在~1.4秒内
  2. Shnugo在~4.8秒
  3. montewhizdoh在8.8秒但尚未转动!!
  4. 11.3秒内的Devart
  5. 由于存在所有节点,通过XML读取自上而下的方法显然比具有大量来回导航的方法更快。可能会有一些解决方案可以改进......

    令我惊讶的是,GROUP BY with MAX and CASE是一个缓慢的......

    这是代码:

    CREATE TABLE #tmp (ID INT IDENTITY,XmlContent XML);
    GO
    INSERT INTO #tmp(XmlContent)
    SELECT('<response>
      <message_infos>
        <message_info>
          <id>397300589</id>
          <pdu_id>673399673</pdu_id>
          <status>12</status>
          <id>397300589</id>
          <pdu_id>673399675</pdu_id>
          <status>12</status>
        </message_info>
        <message_info>
          <id>397300591</id>
          <pdu_id>673399669</pdu_id>
          <status>12</status>
          <id>397300591</id>
          <pdu_id>673399671</pdu_id>
          <status>12</status>
        </message_info>
        <message_info>
          <id>3973005891</id>
          <pdu_id>6733996732</pdu_id>
          <status>123</status>
          <id>3973005894</id>
          <pdu_id>6733996755</pdu_id>
          <status>126</status>
          <id>3973005897</id>
          <pdu_id>6733996738</pdu_id>
          <status>129</status>
          <id>39730058912</id>
          <pdu_id>67339967513</pdu_id>
          <status>12</status>
        </message_info>
      </message_infos>
    </response>');
    GO 1000
    
    
    --Mikael Eriksson 1,4
    DECLARE @d DATETIME = GETDATE();
    WITH Numbers AS (SELECT N FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) as tbl(N)) 
    select T.X.value('(id/text())[sql:column("N.N")][1]', 'nvarchar(max)') as id,
           T.X.value('(pdu_id/text())[sql:column("N.N")][1]', 'nvarchar(max)') as pdu_id,
           T.X.value('(status/text())[sql:column("N.N")][1]', 'int') as status
    from #tmp
    CROSS APPLY XmlContent.nodes('/response/message_infos/message_info') as T(X)
      cross apply (SELECT TOP(T.X.value('count(id)', 'int')) N FROM Numbers) AS N(N)
    
    SELECT 'Mikael Eriksson: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100));
    GO
    
    --Shnugo 4.8 Sekunden
    DECLARE @d DATETIME = GETDATE();
    WITH AllMessageInfos AS
    (
        SELECT  ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) AS Mi_Index
               ,mi.query('.') AS TheMI
        FROM #tmp
        CROSS APPLY XmlContent.nodes('/response/message_infos/message_info') AS A(mi)
    )
    ,AllIDs AS
    (
        SELECT Mi_Index
              ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS id_Index
              ,id.value('.','nvarchar(max)') AS id
        FROM AllMessageInfos
        CROSS APPLY TheMI.nodes('message_info/id') AS a(id)   
    )
    ,AllPdu_IDs AS
    (
        SELECT Mi_Index
              ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS pdu_Index
              ,id.value('.','nvarchar(max)') AS pdu_id
        FROM AllMessageInfos
        CROSS APPLY TheMI.nodes('message_info/pdu_id') AS a(id)   
    )
    ,AllStatus AS
    (
        SELECT Mi_Index
              ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS status_Index
              ,id.value('.','int') AS status
        FROM AllMessageInfos
        CROSS APPLY TheMI.nodes('message_info/status') AS a(id)   
    )
    SELECT i.Mi_Index AS MessageInfoIndex
          ,i.id_Index AS SubSetIndex
          ,i.id
          ,p.pdu_id
          ,s.status
    FROM AllIDs AS i
    INNER JOIN AllPdu_IDs AS p ON i.Mi_Index=p.Mi_Index AND i.id_Index=p.pdu_Index
    INNER JOIN AllStatus AS s ON i.Mi_Index=s.Mi_Index AND i.id_Index=s.status_Index
    
    SELECT 'Shnugo: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100));
    GO
    
    --Montewizdoh 8.8 Sekunden
    DECLARE @d DATETIME = GETDATE();
    SELECT 
    m.value('for $i in . return count(../../*[. << $i])', 'int') as message_info_position,
    m.value('for $i in . return count(../*[. << $i]) + 1', 'int') as internal_position,
    m.value('fn:local-name(.)', 'SYSNAME') as element_name,
    m.value('.', 'nvarchar(4000)') as element_value
    FROM #tmp
    CROSS APPLY XmlContent.nodes('/response/message_infos/message_info/*') AS A(m)
    SELECT 'Montewhizdoh: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100));
    GO
    
    --Devart 11.3 Sec
    DECLARE @d DATETIME = GETDATE();
    SELECT
          id = MAX(CASE WHEN name = 'id' THEN val END)
        , pdu_id = MAX(CASE WHEN name = 'pdu_id' THEN val END)
        , [status] = MAX(CASE WHEN name = 'status' THEN val END)
    FROM (
        SELECT
              name = t.c.value('local-name(.)', 'SYSNAME')
            , val = t.c.value('.', 'nvarchar(max)')
            , RowNum = ROW_NUMBER() OVER (ORDER BY 1/0)
            , RowNumGroup = ROW_NUMBER() OVER (ORDER BY 1/0) % 3
        FROM #tmp
        CROSS APPLY XmlContent.nodes('/response/message_infos/message_info/*') t(c)
    ) t
    GROUP BY RowNum - CASE WHEN RowNumGroup = 0 THEN 3 ELSE RowNumGroup END
    SELECT 'Devart: ' +  CAST(CAST(GETDATE()-@d AS TIME) AS VARCHAR(100));
    
    GO
    
    DROP TABLE #tmp;
    

答案 3 :(得分:2)

首先:FROM OPENXML已过时了!您应该更好地使用真正的 XML方法......

更新

根据您的评论我建议:您阅读编号集中的所有标签,并将它们与其子集中的索引拼接在一起。

我在第二个message_info中添加了第三个组来测试这个...

DECLARE @ixml XML=
'<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <status>12</status>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
      <id>1111</id>
      <pdu_id>2222</pdu_id>
      <status>33</status>
    </message_info>
  </message_infos>
</response>';

WITH AllMessageInfos AS
(
    SELECT  ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) AS Mi_Index
           ,mi.query('.') AS TheMI
    FROM @ixml.nodes('/response/message_infos/message_info') AS A(mi)
)
,AllIDs AS
(
    SELECT Mi_Index
          ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS id_Index
          ,id.value('.','bigint') AS id
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/id') AS a(id)   
)
,AllPdu_IDs AS
(
    SELECT Mi_Index
          ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS pdu_Index
          ,id.value('.','bigint') AS pdu_id
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/pdu_id') AS a(id)   
)
,AllStatus AS
(
    SELECT Mi_Index
          ,ROW_NUMBER() OVER(PARTITION BY Mi_Index ORDER BY (SELECT NULL)) AS status_Index
          ,id.value('.','int') AS status
    FROM AllMessageInfos
    CROSS APPLY TheMI.nodes('message_info/status') AS a(id)   
)
SELECT i.Mi_Index AS MessageInfoIndex
      ,i.id_Index AS SubSetIndex
      ,i.id
      ,p.pdu_id
      ,s.status
FROM AllIDs AS i
INNER JOIN AllPdu_IDs AS p ON i.Mi_Index=p.Mi_Index AND i.id_Index=p.pdu_Index
INNER JOIN AllStatus AS s ON i.Mi_Index=s.Mi_Index AND i.id_Index=s.status_Index

看起来很奇怪,每个message_info下面都有两组相同的标签。你可以这样做:

DECLARE @ixml XML=
'<response>
  <message_infos>
    <message_info>
      <id>397300589</id>
      <pdu_id>673399673</pdu_id>
      <status>12</status>
      <id>397300589</id>
      <pdu_id>673399675</pdu_id>
      <status>12</status>
    </message_info>
    <message_info>
      <id>397300591</id>
      <pdu_id>673399669</pdu_id>
      <status>12</status>
      <id>397300591</id>
      <pdu_id>673399671</pdu_id>
      <status>12</status>
    </message_info>
  </message_infos>
</response>';

SELECT m.value('id[1]','bigint') AS id
      ,m.value('pdu_id[1]','bigint') AS pdu_id
      ,m.value('status[1]','int') AS status
FROM @ixml.nodes('/response/message_infos/message_info') AS A(m)
UNION ALL
SELECT m.value('id[2]','bigint') AS id
      ,m.value('pdu_id[2]','bigint') AS pdu_id
      ,m.value('status[2]','int') AS status
FROM @ixml.nodes('/response/message_infos/message_info') AS A(m)

答案 4 :(得分:2)

您可以使用数字表来获取第n个值。 这当然不依赖于语句的未定义顺序,但它确实要求所有元素在每个message_info元素中呈现相同的次数。

这里我使用10行的静态数字表。

select T.X.value('(id/text())[sql:column("N.N")][1]', 'int') as id,
       T.X.value('(pdu_id/text())[sql:column("N.N")][1]', 'int') as pdu_id,
       T.X.value('(status/text())[sql:column("N.N")][1]', 'int') as status
from @x.nodes('/response/message_infos/message_info') as T(X)
  cross apply (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) as N(N)
where N.N <= T.X.value('count(id)', 'int')