继上一个问题之后,我正在尝试清理一些数据,其中ID存储为以逗号分隔的值列表。我需要把它们分成不同的行。我有一个有效的查询,但速度很慢。你有什么想法比我正在做的更快吗?
SET NOCOUNT OFF
DECLARE @Conversion TABLE
(
ID bigint
, LogSearch_ID int
, LogSearchDimension_ID int
, SearchValue varchar(MAX)
)
DECLARE @RowsUpdated int, @MaxRows int, @NumUpdates int;
SET @MaxRows = 500;
SET @NumUpdates = 0;
SET @RowsUpdated = 1;
WHILE @RowsUpdated > 0 AND @NumUpdates < @MaxRows
BEGIN
INSERT INTO @Conversion (ID, LogSearch_ID, LogSearchDimension_ID, SearchValue )
SELECT TOP 1
ID, LogSearch_ID, LogSearchDimension_ID, SearchValue
FROM LogSearchesDimensions (NOLOCK)
WHERE LogSearchDimension_ID = 5 AND SearchValue LIKE '%,%';
INSERT INTO LogSearchesDimensions (LogSearch_ID, LogSearchDimension_ID, SearchValue)
SELECT
LogSearch_ID
, LogSearchDimension_ID
, s
FROM
@Conversion
-- The split function returns a table value with each item as a row in column 's'
dbo.Split((SELECT SearchValue FROM @Conversion), 0, 0);
SET @RowsUpdated = @@rowcount;
SET @NumUpdates = @NumUpdates + 1;
DELETE FROM LogSearchesDimensions WHERE ID = (SELECT ID FROM @Conversion)
DELETE FROM @Conversion;
END
split函数看起来像这样(我自己没有写):
CREATE FUNCTION SPLIT
(
@s nvarchar(max),
@trimPieces bit,
@returnEmptyStrings bit
)
returns @t table (val nvarchar(max))
as
begin
declare @i int, @j int
select @i = 0, @j = (len(@s) - len(replace(@s,',','')))
;with cte
as
(
select
i = @i + 1,
s = @s,
n = substring(@s, 0, charindex(',', @s)),
m = substring(@s, charindex(',', @s)+1, len(@s) - charindex(',', @s))
union all
select
i = cte.i + 1,
s = cte.m,
n = substring(cte.m, 0, charindex(',', cte.m)),
m = substring(
cte.m,
charindex(',', cte.m) + 1,
len(cte.m)-charindex(',', cte.m)
)
from cte
where i <= @j
)
insert into @t (val)
select pieces
from
(
select
case
when @trimPieces = 1
then ltrim(rtrim(case when i <= @j then n else m end))
else case when i <= @j then n else m end
end as pieces
from cte
) t
where
(@returnEmptyStrings = 0 and len(pieces) > 0)
or (@returnEmptyStrings = 1)
option (maxrecursion 0)
return
end
GO
所以查询正在做的是抓取一行中包含逗号分隔值的行,将其分成多行,将其插回维度表,然后删除原始行。它需要永远经历并运行更新。你有任何改进建议吗?
这是我确定的最终解决方案。不是非常快,但比完成所有循环切换字符串更稳定,更快。
SET NOCOUNT ON
DECLARE @RowsUpdated int, @MaxRows int, @NumUpdates int, @SQL varchar(max);
SET @MaxRows = 100;
SET @NumUpdates = 0;
SET @RowsUpdated = 1;
WHILE @RowsUpdated > 0 AND @NumUpdates < @MaxRows
BEGIN
BEGIN TRANSACTION
SET @SQL = (
SELECT TOP 1
'INSERT INTO LogSearchesDimensions (SearchValue, LogSearch_ID, LogSearchDimension_ID) SELECT '
+ REPLACE(SearchValue, ',', ', ' + Cast(LogSearch_ID AS varchar) + ', ' + CAST(LogSearchDimension_ID AS varchar) + ' UNION ALL SELECT ')
+ ', ' + Cast(LogSearch_ID AS varchar) + ', ' + CAST(LogSearchDimension_ID AS varchar) + ';'
+ 'DELETE FROM LogSearchesDimensions WHERE ID = ' + CAST(ID AS varchar) + ';' AS SQL
FROM LogSearchesDimensions (NOLOCK)
WHERE LogSearchDimension_ID = 5 AND SearchValue LIKE '%,%'
)
SET @RowsUpdated = @@rowcount;
IF @RowsUpdated = 0
BREAK
SET @NumUpdates = @NumUpdates + 1;
COMMIT
END
答案 0 :(得分:0)
在SQL中执行SPLITing会很慢。您是否考虑过将数据导出到平面文件并使用SSIS包重新导入?
答案 1 :(得分:0)
不要在光标中通过表格进行拆分,而是尝试以下方法:
DECLARE @sql varchar(MAX);
SELECT @sql = 'insert into mytable(id, otherfield1, otherfield2) select '
+ REPLACE(@idfield, ',', ', ' + @otherfield1 + ', ' + @otherfield2 union all select ')
EXEC(@SQL);
然后,在游标完成处理具有逗号分隔值的行之后,一个简单的删除语句。
这假设otherfield
和otherfield2
是数字,否则您需要在该动态SQL中进行一些转义。