表格包含Title
字段和Tags
字段。标签是通过来自文档的潜在Dirichlet分配(LDA)生成的,并且可以是例如标签。 &#39; 鱼,烤箱,时间&#39; 烧烤,啤酒&#39;或者&#39; <肉>烧烤&#39;标签的长度不固定。
给定一组标签,如何找到匹配最大标签数量的记录,无论标签的顺序如何?
所以,如果&#39; BBQ,肉&#39;得到最好的结果应该是&#39; 肉类,烧烤&#39;。如果&#39; 烧烤,鱼,奶油&#39;给出所有三个记录都可以返回(它们都有一个匹配的标记)。
答案 0 :(得分:1)
使用this function并创建此
CREATE FUNCTION dbo.getCountOfMatch ( @mainString VARCHAR(MAX), @searchString nvarchar(max))
RETURNS
INT
AS
BEGIN
DECLARE @returnCount INT
SELECT
@returnCount = COUNT(1)
FROM
splitstring(@mainString) A INNER JOIN
splitstring(@searchString) B ON A.Name = B.Name
RETURN @returnCount
END
和
SELECT TOP 1 // What you want
Title,
Tags
FROM
(
SELECT
A.Title,
A.Tags,
dbo.getCountOfMatch(A.Tags, @search) CountTags -- The number of matches.
FROM
TABLE A
) B
ORDER BY B.CountTags DESC
<强>已更新强>
DECLARE @searchText NVARCHAR(MAX) = 'BBQ, meat'
DECLARE @query NVARCHAR(MAX) = '
SELECT
*
FROM
Table
WHERE '
SELECT
@query +=
(
SELECT
'Tags like ''%' + A.Name + '%'' AND ' -- Dont forget trim!
FROM
splitstring(@searchText) A
FOR XML PATH ('')
)
SELECT @query = LEFT(@query, LEN(@query) - 4) + 'ORDER BY LEN(Tags)' -- For exactly matching: LEN(Tags) = LEN(@searchText)
EXEC sp_executesql @query
查询看起来像;
SELECT
*
FROM
Table
WHERE
Tags like '%BBQ%' AND
Tags like '%meat%'
ORDER BY LEN(Tags)
答案 1 :(得分:0)
结合两个UDF,您可以返回搜索的命中率(百分比)。
例如
Select [dbo].[udf-Str-Match-Rate]('Dog,House,Custom',',','The dog house is red',' ')
返回0.6666 - 找到3个单词/短语中的2个。
每个人都可以拥有自己的分隔符
仅测试不同的单词以避免夸大结果
我还包括一个soundex(这是可选的)
第一个UDF是独立的,可以单独使用。
CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@delimeter varchar(10))
--Usage: Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
-- Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
-- Select * from [dbo].[udf-Str-Parse]('id26,id46|id658,id967','|')
Returns @ReturnTable Table (Key_PS int IDENTITY(1,1) NOT NULL , Key_Value varchar(max))
As
Begin
Declare @intPos int,@SubStr varchar(max)
Set @IntPos = CharIndex(@delimeter, @String)
Set @String = Replace(@String,@delimeter+@delimeter,@delimeter)
While @IntPos > 0
Begin
Set @SubStr = Substring(@String, 0, @IntPos)
Insert into @ReturnTable (Key_Value) values (@SubStr)
Set @String = Replace(@String, @SubStr + @delimeter, '')
Set @IntPos = CharIndex(@delimeter, @String)
End
Insert into @ReturnTable (Key_Value) values (@String)
Return
End
第二个UDF需要第一个
CREATE FUNCTION [dbo].[udf-Str-Match-Rate] (@SearchFor varchar(max),@SearchForDelim varchar(5),@SearchIn varchar(max),@SearchInDelim varchar(5))
-- Syntax : Select [dbo].[udf-Str-Match-Rate]('Dog,House,Custom',',','The dog house is red',' ')
Returns money
AS
BEGIN
Declare @RetVal money
;with cteSearchFor as (Select Distinct Key_Value from [dbo].[udf-Str-Parse](@SearchFor ,@SearchForDelim))
,cteSearchIn as (Select Distinct Key_Value from [dbo].[udf-Str-Parse](@SearchIn,@SearchInDelim))
,cteWordCnt as (Select Words=cast(count(*) as money) From cteSearchFor)
Select @RetVal = isnull(Count(*)/max(Words),0)
From cteSearchFor S
Join cteWordCnt W on 1=1
Join cteSearchIn C
on S.Key_Value = C.Key_Value
or Soundex(S.Key_Value) = Soundex(C.Key_Value)
Return @RetVal
END
答案 2 :(得分:0)
创建表格标签并使用Coq website
填充tags
title PK
tag PK
select title, count(*)
from tags
where tag in ('BBQ', 'fish', 'cream')
group by title
having count(*) > 1
SELECT *
from table
join dbo.splitstring(table.tags)