我在SQL Server 2012 / SQL Server 2016上。
Xquery功能'修改'只会一次更改一行。所以我逐个元素地遍历我的XML数据。这很慢。 当只有一个不同的值要更改时,我会转换XML并替换值。但是在大多数情况下(行)有多个不同的值。
我希望my_xml的结构从(stronly simplified)数据变得清晰:元素组(a和b)中有多个标签。
--My data
if object_id ('tempdb..#t1') is not null drop table #t1
create table #t1 (id int not null identity (1,1) unique clustered, my_xml xml)
insert #t1 (my_xml) values
('<root><a label=''name A'' /> <b /> </root>'), --A
('<root><a label=''name A'' /> <b label=''name B'' /> </root>'), --AB
('<root><a label=''name A'' /> <b label=''name B'' /> <a label=''name A'' /> <b label=''name B'' /><a label=''name A'' /> <b label=''name B'' /></root>'), --ABABAB
('<root><a label=''name B'' /> <b label=''name A'' /> <a label=''name B'' /> <a label=''name B'' /> <a label=''name B'' /> </root>') --BABBB
select * from #t1
我想将标签的值更改为原始值的SHA-Hash。
我查询了一些关于my_xml结构的信息,以最小化要修改的行:
--My helper-table
if object_id ('tempdb..#t2') is not null drop table #t2
select
id,
max(my_xml.value('count(/root/*[self::a or self::b]/@label)', 'int')) max_label,
max(my_xml.value('count(distinct-values(/root/*[self::a or self::b]/@label))', 'int')) max_label_distinct
into #t2
from #t1
group by id
select * from #t2
然后我修改XML(只使用带有多个不同标签的行,因为只需要它们,我需要遍历XML)
--My query:
set nocount on
declare
@label char(64),
@id int,
@n_max_label smallint
declare test cursor for
select
id,
max_label
from #t2
where max_label_distinct > 1
open test
fetch next from test into @id, @n_max_label
while (@@FETCH_STATUS <> -1)
begin
while @n_max_label > 0
begin
select @label = isnull(convert(char(64), HASHBYTES('SHA2_256', my_xml.value('((/root/*[self::a or self::b]/@label)[sql:variable("@n_max_label")])[1]', 'char(64)')), 1), 'x')
from #t1
where id = @id
update #t1
set my_xml.modify('replace value of ((/root/*[self::a or self::b]/@label)[sql:variable("@n_max_label")])[1] with sql:variable("@label")')
where id = @id
set @n_max_label = @n_max_label - 1
end
fetch next from test into @id, @n_max_label
end
close test
deallocate test
最终结果:
select * from #t1
我想以10到100倍的速度(就速度而言)改进解决方案,因为现在我需要大约2个月来计算结果 - 长期操作大约600 GB的数据。 (卸载和使用其他工具是没有选择的)
答案 0 :(得分:1)
您发现.modify()
会一次更改一个值。这很慢,需要程序化(CURSOR
)逻辑。
您没有仔细查看原始XML,因此这可能指向错误的方向,但您可以尝试这样做:
您的新帮助程序表
WITH cte AS
(
select
id
,A.AllLabels.value('local-name(..)','nvarchar(max)') AS ElementName
,A.AllLabels.value('local-name(.)','nvarchar(max)') AS LabelName
,A.AllLabels.value('.','nvarchar(max)') AS LabelValue
,HASHBYTES('SHA2_256', A.AllLabels.value('.','nvarchar(max)')) AS LabelHash
from #t1
--use "/root/" instead of "//" if always below <root>
cross apply my_xml.nodes('//*[local-name()="a" or local-name()="b"]/@label') AS A(AllLabels)
)
SELECT id
,ElementName
,LabelName
,LabelValue
,LabelHash
,(SELECT LabelHash FOR XML PATH(''),TYPE).value('.','nvarchar(max)') AS HashBase64
,CONCAT(' ',LabelName,'="',LabelValue,'"' ) AS ReplaceThis
,CONCAT(' ',LabelName,'="',(SELECT LabelHash FOR XML PATH(''),TYPE).value('.','nvarchar(max)'),'"' ) AS WithThat
INTO #t2
FROM cte
GROUP BY id,ElementName,LabelName,LabelValue,LabelHash;
SELECT * FROM #t2;
结果
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| id | ElementName | LabelName | LabelValue | LabelHash | HashBase64 | ReplaceThis | WithThat |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 1 | a | label | name A | 0x6F5A56BD0B5E367787D4D3F798F5467C17FB798C0849ECF88373A7346EB405CB | b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs= | label="name A" | label="b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 2 | a | label | name A | 0x6F5A56BD0B5E367787D4D3F798F5467C17FB798C0849ECF88373A7346EB405CB | b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs= | label="name A" | label="b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 3 | a | label | name A | 0x6F5A56BD0B5E367787D4D3F798F5467C17FB798C0849ECF88373A7346EB405CB | b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs= | label="name A" | label="b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 4 | b | label | name A | 0x6F5A56BD0B5E367787D4D3F798F5467C17FB798C0849ECF88373A7346EB405CB | b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs= | label="name A" | label="b1pWvQteNneH1NP3mPVGfBf7eYwISez4g3OnNG60Bcs=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 2 | b | label | name B | 0xC8144356A508FA516CC263E0839B7D22811A6F8333851F5319E778D94FE5B0C3 | yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM= | label="name B" | label="yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 3 | b | label | name B | 0xC8144356A508FA516CC263E0839B7D22811A6F8333851F5319E778D94FE5B0C3 | yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM= | label="name B" | label="yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
| 4 | a | label | name B | 0xC8144356A508FA516CC263E0839B7D22811A6F8333851F5319E778D94FE5B0C3 | yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM= | label="name B" | label="yBRDVqUI+lFswmPgg5t9IoEab4MzhR9TGed42U/lsMM=" |
+----+-------------+-----------+------------+--------------------------------------------------------------------+----------------------------------------------+----------------+------------------------------------------------------+
您可以使用此代码使用列REPLACE
和ReplaceThis
在字符串基础上尝试WithThat
,也可以在.modify
<中使用CURSOR
/ p>
要记住的一些事情:
Base64
而不是十六进制字符串(通常为)@label
的元素,则可以在帮助程序表中省略此列以减少行数。label = "blah"
与字符串级别的label="blah"
不同).modify()
,你可以动态地在CURSOR
内建立一个声明并使用EXEC(@YourStatement)
,但我怀疑这会很快......