我写的SQL匹配函数花了太多时间。需要优化

时间:2012-09-11 11:40:43

标签: sql sql-server sql-server-2008 tsql

这是示例测试数据,对于其应返回的任何其他内容,函数应返回1:

Inventor: Raj Patel
Attorney: Raj Patel

Inventor: Patel; Raj
Attorney: Patel

Inventor: Patel; R
Attorney: Patel; Raj

Inventor: Patel; Raj, Madnani; Raj
Attorney: Patel; Raj

Inventor: Patel; Raj
Attorney: Patel; R


**Eg.** Select dbo.Match('Patel; R','Patel; Raj')

所有这些执行都应返回1:

Select dbo.Match('Raj Patel','Raj Patel')
Select dbo.Match('Patel; Raj','Patel')
Select dbo.Match('Patel; R',' Patel; Raj')
Select dbo.Match('Patel; Raj, Madnani; Raj','Patel; Raj')
Select dbo.Match('Patel; Raj','Patel; R')

应该返回1

这是我使用太多游标的匹配函数:

ALTER FUNCTION [dbo].[Match]
(
    @Subj1      varchar(8000),
    @Subj2      varchar(8000)
)
RETURNS bit
AS
BEGIN
    Set @Subj1 = IsNull(@Subj1,'')
    Set @Subj2 = IsNull(@Subj2,'')

    If @Subj1 = '' Or @Subj2 = ''
    Begin
        Return 0
    End

    If Lower(@Subj1) = Lower(@Subj2)
    Begin
        Return 1
    End

    Declare Subj1NamesCurr Cursor For --all separate names
        Select * From dbo.Split(@Subj1,',')

    Declare Subj2NamesCurr Cursor SCROLL For --all separate names
        Select * From dbo.Split(@Subj2,',')

    Open Subj1NamesCurr
    Open Subj2NamesCurr

    Declare @Sub1Names  varchar(8000)
    Declare @Sub2Names  varchar(8000)
    Declare @Sub1NamePart   varchar(8000)
    Declare @Sub2NamePart   varchar(8000)
    Declare @Sub1PartCount  tinyint = 0
    Declare @Sub2PartCount  tinyint = 0
    Declare @Sub1NamesPart  TABLE(Data varchar(8000))
    Declare @Sub2NamesPart  TABLE(Data varchar(8000))
    Declare @MatchCount int = 0
    Declare @TempCount int = 0

    Fetch From Subj1NamesCurr INTO @Sub1Names --fetch 1st name from 1st subject

    Insert into @Sub1NamesPart
            Select * From dbo.Split(@Sub1Names,';') --get names part from 1st subject's row

    Select @Sub1PartCount = Count(*) From @Sub1NamesPart


    While @@Fetch_Status = 0 --each names of 1st subject
    Begin

        Fetch First From Subj2NamesCurr into @Sub2Names

        While @@Fetch_Status = 0 --each names of 1st subject
        Begin
            Declare Sub1NameCurr Cursor For
                Select * From @Sub1NamesPart --name parts of 1st subject

            OPEN Sub1NameCurr

            Fetch From Sub1NameCurr into @Sub1NamePart

            Insert into @Sub2NamesPart
                Select * From dbo.Split(@Sub2Names,';') 

            Select @Sub2PartCount = Count(*) From @Sub2NamesPart
            Set @MatchCount = 0

            While @@Fetch_Status = 0 --splitted name of 1st subject
            Begin

                Declare Sub2NameCurr Cursor For
                    Select * From @Sub2NamesPart  --name parts of 2nd subject

                OPEN Sub2NameCurr

                Fetch From Sub2NameCurr into @Sub2NamePart

                Set @TempCount = 0

                While @@Fetch_Status = 0 --splitted name of 2nd subject
                Begin
                    Set @TempCount = @TempCount + 1
                    If dbo.Trim(Lower(@Sub1NamePart)) = dbo.Trim(Lower(@Sub2NamePart))
                    Begin

                        Set @MatchCount = @MatchCount + 1

                        If @Sub2PartCount = 1
                        Begin
                            Return 1
                        End
                    End
                    Else If Lower(Left(dbo.Trim(@Sub1NamePart),1)) = Lower(dbo.Trim(@Sub2NamePart)) Or 
                        Lower(Left(dbo.Trim(@Sub2NamePart),1)) = Lower(dbo.Trim(@Sub1NamePart))
                    Begin

                        Set @MatchCount = @MatchCount + 1
                    End

                    Fetch Next From Sub2NameCurr into @Sub2NamePart

                    Delete from @Sub2NamesPart
                    Insert into @Sub2NamesPart
                        Select * From dbo.Split(@Sub2Names,';') 
                End

                If @MatchCount = @Sub2PartCount
                Begin
                    Return 1
                End

                CLOSE Sub2NameCurr
                DEALLOCATE Sub2NameCurr

                Fetch Next From Sub1NameCurr into @Sub1NamePart

                Delete from @Sub1NamesPart
                Insert into @Sub1NamesPart
                    Select * From dbo.Split(@Sub1Names,';') --get names part from 1st subject's row

                Select @Sub1PartCount = Count(*) From @Sub1NamesPart                
            End

            CLOSE Sub1NameCurr
            DEALLOCATE Sub1NameCurr

        End
    End

    Close Subj1NamesCurr
    Deallocate Subj1NamesCurr

    Close Subj2NamesCurr
    Deallocate Subj2NamesCurr   
    Return 0

END

编辑:为了不产生混淆,Trim只是一个对字符串执行LTrim和RTrim的函数。就是这样。

2 个答案:

答案 0 :(得分:1)

这适用于您的示例,只是不知道它是否比您的解决方案更快。

代码应该非常自我解释并且易于更改

DECLARE @str1 VARCHAR(max)
DECLARE @str2 VARCHAR(max)
SELECT @str1 = 'Patel; Raj'
SELECT @str2 = 'Patel'

DECLARE @x1 XML
DECLARE @x2 XML
SELECT @x1 = CAST('<i>' + REPLACE(@str1, ',', '</i><i>') + '</i>' AS XML)
SELECT @x2 = CAST('<i>' + REPLACE(@str2, ',', '</i><i>') + '</i>' AS XML)


SELECT 
COUNT(*)
FROM
(
SELECT
p.number as Position,
LTRIM(RTRIM(x.value('.','VARCHAR(50)'))) AS Name
FROM
master..spt_values p
cross APPLY @x1.nodes('/i[position()=sql:column("number")]') n(x) 
where p.type = 'p'
) Names1
INNER JOIN
(
SELECT
p.number as Position,
LTRIM(RTRIM(x.value('.','VARCHAR(50)'))) AS Name
FROM
master..spt_values p
cross APPLY @x2.nodes('/i[position()=sql:column("number")]') n(x) 
where p.type = 'p'
) Names2
ON
Names1.Name = Names2.Name OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1))) 
AND
  LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2))) 
) OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2))) 
AND
  LEFT(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))),1) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1))) 
) OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2))) 
AND
  LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))) = LEFT(LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1))),1) 
) OR
Names1.Name = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2)))
OR
Names2.Name = LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2)))

答案 1 :(得分:1)

这是一个避免游标的答案。我认为这可能最好分解为几个功能,但请告诉我你的想法以及它对你来说是否更快。

对我来说,这是性能比较,以原始比例(FWIW)表示:
CPU:47% 阅读:28%

所以它确实看起来更快,读取次数更少,而且它适用于您提供的所有测试用例。

ALTER FUNCTION [dbo].[Match] 
(
@Subj1      varchar(8000), 
@Subj2      varchar(8000) 
) 
RETURNS bit AS 
BEGIN    

Set @Subj1 = IsNull(@Subj1,'')  
Set @Subj2 = IsNull(@Subj2,'')    

If @Subj1 = '' Or @Subj2 = ''    
Begin       
    Return 0  
End     

If Lower(@Subj1) = Lower(@Subj2) 
Begin    
    Return 1  
End   

 DECLARE @FullNames1 TABLE(Name varchar(200), SemiColon int, [Space] int)     
 DECLARE @FullNames2 TABLE(Name varchar(200), SemiColon int, [Space] int) 

 INSERT INTO @FullNames1
 SELECT 
    item, 
    CHARINDEX(';', item,0) as SemiColon,
    CHARINDEX(' ', item,0) as [Space]
 FROM
    dbo.Split(@Subj1, ',')


 INSERT INTO @FullNames2
 SELECT 
    item, 
    CHARINDEX(';', item,0) as SemiColon,
    CHARINDEX(' ', item,0) as [Space]
 FROM
    dbo.Split(@Subj2, ',')

 DECLARE @Names1 TABLE(FirstName varchar(100), LastName varchar(100))
 DECLARE @Names2 TABLE(FirstName varchar(100), LastName varchar(100))

 INSERT INTO @Names1
 SELECT 
    CASE WHEN SemiColon <> 0 THEN
        SUBSTRING(FullNames.Name,SemiColon+1, LEN(FullNames.Name))
    ELSE
        CASE WHEN FullNames.Space <> 0 THEN
            SUBSTRING(FullNames.Name, 1, FullNames.Space-1)     
        ELSE
            ''      
        END
    END AS FirstName,
    CASE WHEN SemiColon <> 0 THEN
        SUBSTRING(FullNames.Name, 1, SemiColon-1)       
    ELSE
        CASE WHEN FullNames.Space <> 0 THEN
            SUBSTRING(FullNames.Name,FullNames.Space+1, LEN(FullNames.Name))
        ELSE
            FullNames.Name  
        END
    END AS LastName
 FROM @FullNames1 FullNames

 INSERT INTO @Names2
 SELECT 
    CASE WHEN SemiColon <> 0 THEN
        SUBSTRING(FullNames.Name, SemiColon+1, LEN(FullNames.Name))
    ELSE
        CASE WHEN FullNames.Space <> 0 THEN
            SUBSTRING(FullNames.Name, 1, FullNames.Space-1)     
        ELSE
            ''      
        END
    END AS FirstName,
    CASE WHEN SemiColon <> 0 THEN
        SUBSTRING(FullNames.Name, 1, SemiColon-1)       
    ELSE
        CASE WHEN FullNames.Space <> 0 THEN
            SUBSTRING(FullNames.Name,FullNames.Space + 1, LEN(FullNames.Name))
        ELSE
            FullNames.Name  
        END
    END AS LastName
 FROM @FullNames2 FullNames


 UPDATE @Names1
 SET FirstName = Lower(LTrim(RTrim(FirstName))),
     LastName = Lower(LTrim(RTrim(LastName)))

 UPDATE @Names2
 SET FirstName = Lower(LTrim(RTrim(FirstName))),
     LastName = Lower(LTrim(RTrim(LastName)))


 IF EXISTS(
     SELECT *
     FROM @Names1 A
     INNER JOIN @Names2 B ON
     A.LastName = B.LastName AND
     (
        (A.FirstName = B.FirstName) OR
        (LEN(A.FirstName) = 1 AND A.FirstName = LEFT(B.FirstName, 1)) OR
        (LEN(B.FirstName) = 1 AND B.FirstName = LEFT(A.FirstName, 1)) OR
        (LEN(A.FirstName) = 0) OR
        (LEN(B.FirstName) = 0)
     ))
BEGIN
    RETURN 1
END

RETURN 0   
END