Question

我创建了一个udf，当像这样多次运行这个函数时：

select 
dbo.ufngetpercentagematch('http://www.booking.com/hotel/au/crowne-plaza-melbourne.en-gb.html','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.hotelclub.com/hotels/Australia--VI/Melbourne/Crowne_Plaza_MELBOURNE.h175114/','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.orbitz.com/hotel/Australia--VI/Melbourne/Crowne_Plaza_MELBOURNE.h175114/','CROWNE PLAZA MELBOURNE'),
dbo.ufngetpercentagematch('http://www.tripadvisor.com/Hotel_Review-g255100-d255387-Reviews-Crowne_Plaza_Melbourne-Melbourne_Victoria.html','CROWNE PLAZA MELBOURNE')

需要7秒。而每个select语句单独运行几乎需要10-15微秒。有人能说出为什么会这样吗？

我可能错过了函数或某些函数属性吗？

表

CREATE TABLE [dbo].[NoiseWords](
    [Id] [int] IDENTITY(1,1) NOT NULL,
    [Word] [nvarchar](500) NOT NULL,
    [Deleted] [bit] NULL,
 CONSTRAINT [PK_NoiseWords] PRIMARY KEY CLUSTERED 
(
    [Id] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO

ALTER TABLE [dbo].[NoiseWords] ADD  DEFAULT ((0)) FOR [Deleted]
GO


insert into noisewords(word) values('The')
insert into noisewords(word) values('A')
insert into noisewords(word) values('Hotel')
insert into noisewords(word) values('villa')
insert into noisewords(word) values('villas')
insert into noisewords(word) values('resort')
insert into noisewords(word) values('$')
insert into noisewords(word) values('an')
insert into noisewords(word) values('and')
insert into noisewords(word) values('resorts')
insert into noisewords(word) values('home')
insert into noisewords(word) values('house')
insert into noisewords(word) values('homes')
insert into noisewords(word) values('houses')
insert into noisewords(word) values('cottage')
insert into noisewords(word) values('cottages')
insert into noisewords(word) values('hotels')
insert into noisewords(word) values('inn')
insert into noisewords(word) values('inns')
insert into noisewords(word) values('hoteles')
insert into noisewords(word) values('guest')
insert into noisewords(word) values('hostel')
insert into noisewords(word) values('hostels')
insert into noisewords(word) values('room')
insert into noisewords(word) values('rooms')
insert into noisewords(word) values('apartment')
insert into noisewords(word) values('apartments')
insert into noisewords(word) values('housing')
insert into noisewords(word) values('lodging')
insert into noisewords(word) values('motel')
insert into noisewords(word) values('motels')
insert into noisewords(word) values('roof')
insert into noisewords(word) values('shelter')
insert into noisewords(word) values('spa')
insert into noisewords(word) values('spas')
insert into noisewords(word) values('tavern')
insert into noisewords(word) values('taverns')
insert into noisewords(word) values('saloon')
insert into noisewords(word) values('dormitory')
insert into noisewords(word) values('camp')
insert into noisewords(word) values('camps')
insert into noisewords(word) values('cabin')
insert into noisewords(word) values('cabins')
insert into noisewords(word) values('suites')
insert into noisewords(word) values('suite')

ufnGetPercentageMatch

CREATE FUNCTION dbo.ufnGetPercentageMatch    
(@reference nvarchar(1000),    
@input nvarchar(1000))    
RETURNS  decimal(6,2)    
AS    
begin    

 --declare @reference nvarchar(1000)='764 4th Ave'    
 --declare @input nvarchar(1000)='764 4th Ave, Brooklyn, NY, 11232, USA'    

 set @input = ' '+ ltrim(rtrim(dbo.ufnRemoveNonAlphaNumericCharacters(dbo.ufnReplaceAccentChars(replace(replace(' ' + @input + ' ',' BW ',' '),' Best Western ',' ')))))+' '    
 set @reference = ' '+ ltrim(rtrim(dbo.ufnRemoveNonAlphaNumericCharacters(dbo.ufnReplaceAccentChars(replace(replace(' ' + @reference + ' ' ,' BW ',' '),' Best Western ',' ')))))+' '    
 --print @input     
 --print @reference    
 declare @max int    
 declare @counter int =1    
 select @max = count(1) from noisewords    
 declare @query1 varchar(max)=''    
 declare @query2 nvarchar(max)=''    
 declare @noiseword varchar(500)=''    
 declare @percmatch decimal(6,2)=0    
 while @max >= @counter    
 begin    
  select @noiseword=' ' + ltrim(rtrim(word)) + ' ' from noisewords where id = @counter and deleted = 0    
  set @input = replace(@input,@noiseword,' ')    
  set @reference = replace(@reference,@noiseword,' ')    
  set @counter = @counter + 1    
 end    

 --print @reference + '::' + @input    

 declare @query nvarchar(max)    
 set @query= 'select ' + @query1 + '''' +  @input + '''' + @query2    

 ;with cte1 as    
 (    
  select *     
  from     
  splitstring(@input,' ')    
 ),    
 cte2 as    
 (    
  select item,@reference as ref, case when @reference like '%'+item+'%' then 1 else 0 end as c    
  from cte1    
  where isnull(item,'')<>''    
 ),    
 cte3 as    
 (    
  select @input as inp,@reference as ref,sum(c) as sum,count(1) as total,    
   cast(sum(c)  as decimal(6,2))/(cast(count(1) as decimal(6,2))) as avg    
  from cte2    
 )    
 select @percmatch = isnull(avg,0) from cte3    
 --select * from cte2    
 --print @percmatch    
 RETURN(@percmatch)    
END

ufnRemoveNonAlphaNumericCharacters

Create Function [dbo].[ufnRemoveNonAlphaNumericCharacters](@Temp VarChar(1000))  
Returns VarChar(1000)  
AS  
Begin  

    Declare @KeepValues as varchar(50)  
    Set @KeepValues = '%[^a-z0-9 ]%'  
    While PatIndex(@KeepValues, @Temp) > 0  
        Set @Temp = Stuff(@Temp, PatIndex(@KeepValues, @Temp), 1, '')  

    Return @Temp  
End

ufnReplaceAccentChars

create function [dbo].[ufnReplaceAccentChars] ( @p_OriginalString varchar(max) )  
returns varchar(max) as  
begin  
  declare @ModifiedString varchar(100) = @p_OriginalString Collate SQL_Latin1_General_CP1253_CI_AI;  
  return @ModifiedString  
end

SplitString

create FUNCTION [dbo].[SplitString]  
(      
      @Input VARCHAR(8000),  
      @Character CHAR(1)  
)  
RETURNS @Output TABLE (  
      Item VARCHAR(1000)  
)  
AS  
BEGIN  
      DECLARE @StartIndex INT, @EndIndex INT  

      SET @StartIndex = 1  
      IF SUBSTRING(@Input, LEN(@Input) - 1, LEN(@Input)) <> @Character  
      BEGIN  
            SET @Input = @Input + @Character  
      END  

      WHILE CHARINDEX(@Character, @Input) > 0  
      BEGIN  
            SET @EndIndex = CHARINDEX(@Character, @Input)  

            INSERT INTO @Output(Item)  
            SELECT SUBSTRING(@Input, @StartIndex, @EndIndex - 1)  

            SET @Input = SUBSTRING(@Input, @EndIndex + 1, LEN(@Input))  
      END  

      RETURN  
END

Answer 1

这不是一个完整的答案，因为它会比论坛提供的更多，但这里有一个将标量函数ufnReplaceAccentChars转换为内联表值函数的示例。我在旧函数中看到的一个逻辑问题是它接收varchar（max）并将其截断为100个字符。

create function [dbo].[ufnReplaceAccentChars] 
( 
    @p_OriginalString varchar(max) 
)  
returns table as
    return select left(@p_OriginalString, 100) Collate SQL_Latin1_General_CP1253_CI_AI as ReplacedValue;

如果您按照我之前提供的链接进行操作，您可以找到一些优秀的拆分功能。删除特殊字符有点棘手。然后你仍然有主要的查询和删除噪音的话来处理。希望这会让你开始。

多次执行sql函数需要花费太多时间

1 个答案: