我创建了一个udf,当像这样多次运行这个函数时:
select
dbo.ufngetpercentagematch('http://www.booking.com/hotel/au/crowne-plaza-melbourne.en-gb.html','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.hotelclub.com/hotels/Australia--VI/Melbourne/Crowne_Plaza_MELBOURNE.h175114/','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.orbitz.com/hotel/Australia--VI/Melbourne/Crowne_Plaza_MELBOURNE.h175114/','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.tripadvisor.com/Hotel_Review-g255100-d255387-Reviews-Crowne_Plaza_Melbourne-Melbourne_Victoria.html','CROWNE PLAZA MELBOURNE')
需要7秒。 而每个select语句单独运行几乎需要10-15微秒。 有人能说出为什么会这样吗?
我可能错过了函数或某些函数属性吗?
表
CREATE TABLE [dbo].[NoiseWords](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Word] [nvarchar](500) NOT NULL,
[Deleted] [bit] NULL,
CONSTRAINT [PK_NoiseWords] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[NoiseWords] ADD DEFAULT ((0)) FOR [Deleted]
GO
insert into noisewords(word) values('The')
insert into noisewords(word) values('A')
insert into noisewords(word) values('Hotel')
insert into noisewords(word) values('villa')
insert into noisewords(word) values('villas')
insert into noisewords(word) values('resort')
insert into noisewords(word) values('$')
insert into noisewords(word) values('an')
insert into noisewords(word) values('and')
insert into noisewords(word) values('resorts')
insert into noisewords(word) values('home')
insert into noisewords(word) values('house')
insert into noisewords(word) values('homes')
insert into noisewords(word) values('houses')
insert into noisewords(word) values('cottage')
insert into noisewords(word) values('cottages')
insert into noisewords(word) values('hotels')
insert into noisewords(word) values('inn')
insert into noisewords(word) values('inns')
insert into noisewords(word) values('hoteles')
insert into noisewords(word) values('guest')
insert into noisewords(word) values('hostel')
insert into noisewords(word) values('hostels')
insert into noisewords(word) values('room')
insert into noisewords(word) values('rooms')
insert into noisewords(word) values('apartment')
insert into noisewords(word) values('apartments')
insert into noisewords(word) values('housing')
insert into noisewords(word) values('lodging')
insert into noisewords(word) values('motel')
insert into noisewords(word) values('motels')
insert into noisewords(word) values('roof')
insert into noisewords(word) values('shelter')
insert into noisewords(word) values('spa')
insert into noisewords(word) values('spas')
insert into noisewords(word) values('tavern')
insert into noisewords(word) values('taverns')
insert into noisewords(word) values('saloon')
insert into noisewords(word) values('dormitory')
insert into noisewords(word) values('camp')
insert into noisewords(word) values('camps')
insert into noisewords(word) values('cabin')
insert into noisewords(word) values('cabins')
insert into noisewords(word) values('suites')
insert into noisewords(word) values('suite')
ufnGetPercentageMatch
CREATE FUNCTION dbo.ufnGetPercentageMatch
(@reference nvarchar(1000),
@input nvarchar(1000))
RETURNS decimal(6,2)
AS
begin
--declare @reference nvarchar(1000)='764 4th Ave'
--declare @input nvarchar(1000)='764 4th Ave, Brooklyn, NY, 11232, USA'
set @input = ' '+ ltrim(rtrim(dbo.ufnRemoveNonAlphaNumericCharacters(dbo.ufnReplaceAccentChars(replace(replace(' ' + @input + ' ',' BW ',' '),' Best Western ',' ')))))+' '
set @reference = ' '+ ltrim(rtrim(dbo.ufnRemoveNonAlphaNumericCharacters(dbo.ufnReplaceAccentChars(replace(replace(' ' + @reference + ' ' ,' BW ',' '),' Best Western ',' ')))))+' '
--print @input
--print @reference
declare @max int
declare @counter int =1
select @max = count(1) from noisewords
declare @query1 varchar(max)=''
declare @query2 nvarchar(max)=''
declare @noiseword varchar(500)=''
declare @percmatch decimal(6,2)=0
while @max >= @counter
begin
select @noiseword=' ' + ltrim(rtrim(word)) + ' ' from noisewords where id = @counter and deleted = 0
set @input = replace(@input,@noiseword,' ')
set @reference = replace(@reference,@noiseword,' ')
set @counter = @counter + 1
end
--print @reference + '::' + @input
declare @query nvarchar(max)
set @query= 'select ' + @query1 + '''' + @input + '''' + @query2
;with cte1 as
(
select *
from
splitstring(@input,' ')
),
cte2 as
(
select item,@reference as ref, case when @reference like '%'+item+'%' then 1 else 0 end as c
from cte1
where isnull(item,'')<>''
),
cte3 as
(
select @input as inp,@reference as ref,sum(c) as sum,count(1) as total,
cast(sum(c) as decimal(6,2))/(cast(count(1) as decimal(6,2))) as avg
from cte2
)
select @percmatch = isnull(avg,0) from cte3
--select * from cte2
--print @percmatch
RETURN(@percmatch)
END
ufnRemoveNonAlphaNumericCharacters
Create Function [dbo].[ufnRemoveNonAlphaNumericCharacters](@Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare @KeepValues as varchar(50)
Set @KeepValues = '%[^a-z0-9 ]%'
While PatIndex(@KeepValues, @Temp) > 0
Set @Temp = Stuff(@Temp, PatIndex(@KeepValues, @Temp), 1, '')
Return @Temp
End
ufnReplaceAccentChars
create function [dbo].[ufnReplaceAccentChars] ( @p_OriginalString varchar(max) )
returns varchar(max) as
begin
declare @ModifiedString varchar(100) = @p_OriginalString Collate SQL_Latin1_General_CP1253_CI_AI;
return @ModifiedString
end
SplitString
create FUNCTION [dbo].[SplitString]
(
@Input VARCHAR(8000),
@Character CHAR(1)
)
RETURNS @Output TABLE (
Item VARCHAR(1000)
)
AS
BEGIN
DECLARE @StartIndex INT, @EndIndex INT
SET @StartIndex = 1
IF SUBSTRING(@Input, LEN(@Input) - 1, LEN(@Input)) <> @Character
BEGIN
SET @Input = @Input + @Character
END
WHILE CHARINDEX(@Character, @Input) > 0
BEGIN
SET @EndIndex = CHARINDEX(@Character, @Input)
INSERT INTO @Output(Item)
SELECT SUBSTRING(@Input, @StartIndex, @EndIndex - 1)
SET @Input = SUBSTRING(@Input, @EndIndex + 1, LEN(@Input))
END
RETURN
END
答案 0 :(得分:0)
这不是一个完整的答案,因为它会比论坛提供的更多,但这里有一个将标量函数ufnReplaceAccentChars转换为内联表值函数的示例。我在旧函数中看到的一个逻辑问题是它接收varchar(max)并将其截断为100个字符。
create function [dbo].[ufnReplaceAccentChars]
(
@p_OriginalString varchar(max)
)
returns table as
return select left(@p_OriginalString, 100) Collate SQL_Latin1_General_CP1253_CI_AI as ReplacedValue;
如果您按照我之前提供的链接进行操作,您可以找到一些优秀的拆分功能。删除特殊字符有点棘手。然后你仍然有主要的查询和删除噪音的话来处理。希望这会让你开始。