有没有在更短的时间内完成这项工作?我从我的案例表中获取摘要列,并使用以下循环将数据逐字拆分到我的单词表中:
Example case table
CaseID | CaseNumber | Summary
1 111111 This is a summary
2 111112 This is Summary 2
DECLARE
@n int = 1
;
WHILE @n <= 1000
BEGIN
INSERT INTO words (caseID, caseNumber, pn, word)
SELECT caseID, caseNumber, pn, word FROM dbo.Split6(' ', (select summary
from
cases where caseID = @n)) where caseID = @n group by caseID,caseNumber, pn,
word
option (maxrecursion 0)
SET @n = @n+1;
END
GO
它有效,但速度很慢。花了3个小时来分解1000例。我有10万箱。有没有办法可以更有效地做到这一点?这是我正在使用的分割功能:
Split6 function:
CREATE FUNCTION [dbo].[Split6] (
@sep CHAR(1)
,@s nVARCHAR(4000)
)
RETURNS TABLE
AS
RETURN (
WITH Pieces(caseID,caseNumber, pn, start, stop) AS (
SELECT cs.caseID
,cs.caseNumber
,1
,1
,CHARINDEX(@sep, @s)
FROM cases cs
UNION ALL
SELECT caseID
,caseNumber
,pn + 1
,stop + 1
,CHARINDEX(@sep, @s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT caseID
,caseNumber
,pn
,SUBSTRING(@s, start, CASE
WHEN stop > 0
THEN stop - start
ELSE 512
END) AS word
FROM Pieces
) GO
答案 0 :(得分:0)
你应该尽可能避免循环。
以下使用Parse / Split功能与Cross Apply一起使用(使用Outer Apply显示空值)。
就性能而言......使用100,000个记录的测试样本,平均每个5个字,执行时间为2.2秒。
示例强>
Declare @YourTable Table ([CaseID] varchar(50),[CaseNumber] varchar(50),[Summary] varchar(50))
Insert Into @YourTable Values
(1,111111,'This is a summary')
,(2,111112,'This is Summary 2')
Select A.CaseID
,A.CaseNumber
,B.*
From @YourTable A
Cross Apply [dbo].[udf-Str-Parse](A.Summary,' ') B
<强>返回强>
CaseID CaseNumber RetSeq RetVal
1 111111 1 This
1 111111 2 is
1 111111 3 a
1 111111 4 summary
2 111112 1 This
2 111112 2 is
2 111112 3 Summary
2 111112 4 2
感兴趣的UDF
CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(@String,@Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')
编辑 - 另一个解析/拆分功能
以下TVF比XML版本略快,但限制为8K。例如,在5,000个样本记录中,平均为36个“单词”,比XML版本快20ms。
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (@String varchar(max),@Delimiter varchar(25))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter)) = @Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = LTrim(RTrim(Substring(@String, A.N, A.L)))
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-8K]('John||Cappelletti||was||here','||')