这是示例测试数据,对于其应返回的任何其他内容,函数应返回1:
Inventor: Raj Patel
Attorney: Raj Patel
Inventor: Patel; Raj
Attorney: Patel
Inventor: Patel; R
Attorney: Patel; Raj
Inventor: Patel; Raj, Madnani; Raj
Attorney: Patel; Raj
Inventor: Patel; Raj
Attorney: Patel; R
**Eg.** Select dbo.Match('Patel; R','Patel; Raj')
所有这些执行都应返回1:
Select dbo.Match('Raj Patel','Raj Patel')
Select dbo.Match('Patel; Raj','Patel')
Select dbo.Match('Patel; R',' Patel; Raj')
Select dbo.Match('Patel; Raj, Madnani; Raj','Patel; Raj')
Select dbo.Match('Patel; Raj','Patel; R')
应该返回1
这是我使用太多游标的匹配函数:
ALTER FUNCTION [dbo].[Match]
(
@Subj1 varchar(8000),
@Subj2 varchar(8000)
)
RETURNS bit
AS
BEGIN
Set @Subj1 = IsNull(@Subj1,'')
Set @Subj2 = IsNull(@Subj2,'')
If @Subj1 = '' Or @Subj2 = ''
Begin
Return 0
End
If Lower(@Subj1) = Lower(@Subj2)
Begin
Return 1
End
Declare Subj1NamesCurr Cursor For --all separate names
Select * From dbo.Split(@Subj1,',')
Declare Subj2NamesCurr Cursor SCROLL For --all separate names
Select * From dbo.Split(@Subj2,',')
Open Subj1NamesCurr
Open Subj2NamesCurr
Declare @Sub1Names varchar(8000)
Declare @Sub2Names varchar(8000)
Declare @Sub1NamePart varchar(8000)
Declare @Sub2NamePart varchar(8000)
Declare @Sub1PartCount tinyint = 0
Declare @Sub2PartCount tinyint = 0
Declare @Sub1NamesPart TABLE(Data varchar(8000))
Declare @Sub2NamesPart TABLE(Data varchar(8000))
Declare @MatchCount int = 0
Declare @TempCount int = 0
Fetch From Subj1NamesCurr INTO @Sub1Names --fetch 1st name from 1st subject
Insert into @Sub1NamesPart
Select * From dbo.Split(@Sub1Names,';') --get names part from 1st subject's row
Select @Sub1PartCount = Count(*) From @Sub1NamesPart
While @@Fetch_Status = 0 --each names of 1st subject
Begin
Fetch First From Subj2NamesCurr into @Sub2Names
While @@Fetch_Status = 0 --each names of 1st subject
Begin
Declare Sub1NameCurr Cursor For
Select * From @Sub1NamesPart --name parts of 1st subject
OPEN Sub1NameCurr
Fetch From Sub1NameCurr into @Sub1NamePart
Insert into @Sub2NamesPart
Select * From dbo.Split(@Sub2Names,';')
Select @Sub2PartCount = Count(*) From @Sub2NamesPart
Set @MatchCount = 0
While @@Fetch_Status = 0 --splitted name of 1st subject
Begin
Declare Sub2NameCurr Cursor For
Select * From @Sub2NamesPart --name parts of 2nd subject
OPEN Sub2NameCurr
Fetch From Sub2NameCurr into @Sub2NamePart
Set @TempCount = 0
While @@Fetch_Status = 0 --splitted name of 2nd subject
Begin
Set @TempCount = @TempCount + 1
If dbo.Trim(Lower(@Sub1NamePart)) = dbo.Trim(Lower(@Sub2NamePart))
Begin
Set @MatchCount = @MatchCount + 1
If @Sub2PartCount = 1
Begin
Return 1
End
End
Else If Lower(Left(dbo.Trim(@Sub1NamePart),1)) = Lower(dbo.Trim(@Sub2NamePart)) Or
Lower(Left(dbo.Trim(@Sub2NamePart),1)) = Lower(dbo.Trim(@Sub1NamePart))
Begin
Set @MatchCount = @MatchCount + 1
End
Fetch Next From Sub2NameCurr into @Sub2NamePart
Delete from @Sub2NamesPart
Insert into @Sub2NamesPart
Select * From dbo.Split(@Sub2Names,';')
End
If @MatchCount = @Sub2PartCount
Begin
Return 1
End
CLOSE Sub2NameCurr
DEALLOCATE Sub2NameCurr
Fetch Next From Sub1NameCurr into @Sub1NamePart
Delete from @Sub1NamesPart
Insert into @Sub1NamesPart
Select * From dbo.Split(@Sub1Names,';') --get names part from 1st subject's row
Select @Sub1PartCount = Count(*) From @Sub1NamesPart
End
CLOSE Sub1NameCurr
DEALLOCATE Sub1NameCurr
End
End
Close Subj1NamesCurr
Deallocate Subj1NamesCurr
Close Subj2NamesCurr
Deallocate Subj2NamesCurr
Return 0
END
编辑:为了不产生混淆,Trim只是一个对字符串执行LTrim和RTrim的函数。就是这样。
答案 0 :(得分:1)
这适用于您的示例,只是不知道它是否比您的解决方案更快。
代码应该非常自我解释并且易于更改
DECLARE @str1 VARCHAR(max)
DECLARE @str2 VARCHAR(max)
SELECT @str1 = 'Patel; Raj'
SELECT @str2 = 'Patel'
DECLARE @x1 XML
DECLARE @x2 XML
SELECT @x1 = CAST('<i>' + REPLACE(@str1, ',', '</i><i>') + '</i>' AS XML)
SELECT @x2 = CAST('<i>' + REPLACE(@str2, ',', '</i><i>') + '</i>' AS XML)
SELECT
COUNT(*)
FROM
(
SELECT
p.number as Position,
LTRIM(RTRIM(x.value('.','VARCHAR(50)'))) AS Name
FROM
master..spt_values p
cross APPLY @x1.nodes('/i[position()=sql:column("number")]') n(x)
where p.type = 'p'
) Names1
INNER JOIN
(
SELECT
p.number as Position,
LTRIM(RTRIM(x.value('.','VARCHAR(50)'))) AS Name
FROM
master..spt_values p
cross APPLY @x2.nodes('/i[position()=sql:column("number")]') n(x)
where p.type = 'p'
) Names2
ON
Names1.Name = Names2.Name OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1)))
AND
LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2)))
) OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2)))
AND
LEFT(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))),1) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1)))
) OR
(LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2))) = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2)))
AND
LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),1))) = LEFT(LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),1))),1)
) OR
Names1.Name = LTRIM(RTRIM(PARSENAME(Replace(Names2.Name,';','.'),2)))
OR
Names2.Name = LTRIM(RTRIM(PARSENAME(Replace(Names1.Name,';','.'),2)))
答案 1 :(得分:1)
这是一个避免游标的答案。我认为这可能最好分解为几个功能,但请告诉我你的想法以及它对你来说是否更快。
对我来说,这是性能比较,以原始比例(FWIW)表示:
CPU:47%
阅读:28%
所以它确实看起来更快,读取次数更少,而且它适用于您提供的所有测试用例。
ALTER FUNCTION [dbo].[Match]
(
@Subj1 varchar(8000),
@Subj2 varchar(8000)
)
RETURNS bit AS
BEGIN
Set @Subj1 = IsNull(@Subj1,'')
Set @Subj2 = IsNull(@Subj2,'')
If @Subj1 = '' Or @Subj2 = ''
Begin
Return 0
End
If Lower(@Subj1) = Lower(@Subj2)
Begin
Return 1
End
DECLARE @FullNames1 TABLE(Name varchar(200), SemiColon int, [Space] int)
DECLARE @FullNames2 TABLE(Name varchar(200), SemiColon int, [Space] int)
INSERT INTO @FullNames1
SELECT
item,
CHARINDEX(';', item,0) as SemiColon,
CHARINDEX(' ', item,0) as [Space]
FROM
dbo.Split(@Subj1, ',')
INSERT INTO @FullNames2
SELECT
item,
CHARINDEX(';', item,0) as SemiColon,
CHARINDEX(' ', item,0) as [Space]
FROM
dbo.Split(@Subj2, ',')
DECLARE @Names1 TABLE(FirstName varchar(100), LastName varchar(100))
DECLARE @Names2 TABLE(FirstName varchar(100), LastName varchar(100))
INSERT INTO @Names1
SELECT
CASE WHEN SemiColon <> 0 THEN
SUBSTRING(FullNames.Name,SemiColon+1, LEN(FullNames.Name))
ELSE
CASE WHEN FullNames.Space <> 0 THEN
SUBSTRING(FullNames.Name, 1, FullNames.Space-1)
ELSE
''
END
END AS FirstName,
CASE WHEN SemiColon <> 0 THEN
SUBSTRING(FullNames.Name, 1, SemiColon-1)
ELSE
CASE WHEN FullNames.Space <> 0 THEN
SUBSTRING(FullNames.Name,FullNames.Space+1, LEN(FullNames.Name))
ELSE
FullNames.Name
END
END AS LastName
FROM @FullNames1 FullNames
INSERT INTO @Names2
SELECT
CASE WHEN SemiColon <> 0 THEN
SUBSTRING(FullNames.Name, SemiColon+1, LEN(FullNames.Name))
ELSE
CASE WHEN FullNames.Space <> 0 THEN
SUBSTRING(FullNames.Name, 1, FullNames.Space-1)
ELSE
''
END
END AS FirstName,
CASE WHEN SemiColon <> 0 THEN
SUBSTRING(FullNames.Name, 1, SemiColon-1)
ELSE
CASE WHEN FullNames.Space <> 0 THEN
SUBSTRING(FullNames.Name,FullNames.Space + 1, LEN(FullNames.Name))
ELSE
FullNames.Name
END
END AS LastName
FROM @FullNames2 FullNames
UPDATE @Names1
SET FirstName = Lower(LTrim(RTrim(FirstName))),
LastName = Lower(LTrim(RTrim(LastName)))
UPDATE @Names2
SET FirstName = Lower(LTrim(RTrim(FirstName))),
LastName = Lower(LTrim(RTrim(LastName)))
IF EXISTS(
SELECT *
FROM @Names1 A
INNER JOIN @Names2 B ON
A.LastName = B.LastName AND
(
(A.FirstName = B.FirstName) OR
(LEN(A.FirstName) = 1 AND A.FirstName = LEFT(B.FirstName, 1)) OR
(LEN(B.FirstName) = 1 AND B.FirstName = LEFT(A.FirstName, 1)) OR
(LEN(A.FirstName) = 0) OR
(LEN(B.FirstName) = 0)
))
BEGIN
RETURN 1
END
RETURN 0
END