假设我有一个由某些类别代码组织的短语表:
category phrase
0 "hello bye"
0 "hello no"
1 "hello bye"
1 "yes no"
2 "hello bye"
然后我想要一个表来计算每个部分每个关键字的出现次数:
category keyword frequency
0 "hello" 2
0 "no" 1
0 "bye" 1
1 "hello" 1
1 "bye" 1
1 "yes" 1
1 "no" 1
2 "hello" 1
2 "bye" 1
要查找我以前使用此代码的关键字:
DECLARE @xml XML
SET @xml = '<c>' + replace(
(select DISTINCT ' ' + phrase
FROM myTable
FOR XML PATH('')
), ' ', '</c><c>') + '</c>'
但我想不出用这种方法跟踪类别的方法......
答案 0 :(得分:1)
拆分功能
CREATE FUNCTION [dbo].[udf_Split]
(
@RowData nvarchar(2000),
@SplitOn nvarchar(5)
)
RETURNS @RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare @Cnt int
Set @Cnt = 1
While (Charindex(@SplitOn,@RowData)>0)
Begin
Insert Into @RtnValue (data)
Select
Data = ltrim(rtrim(Substring(@RowData,1,Charindex(@SplitOn,@RowData)-1)))
Set @RowData = Substring(@RowData,Charindex(@SplitOn,@RowData)+1,len(@RowData))
Set @Cnt = @Cnt + 1
End
Insert Into @RtnValue (data)
Select Data = ltrim(rtrim(@RowData))
Return
END
您的数据
DECLARE @TABLE TABLE (category INT,phrase VARCHAR(50))
INSERT INTO @TABLE VALUES
(0,'hello bye'),(0,'hello no'),(1,'hello bye'),
(1,'yes no'),(2,'hello bye')
<强>查询强>
SELECT category
, Data AS keyword
, COUNT(Data) AS frequency
FROM @TABLE t CROSS APPLY
(SELECT * FROM [dbo].[udf_Split](t.phrase, ' '))C
GROUP BY category,Data
ORDER BY category
结果集
╔══════════╦═════════╦═══════════╗
║ category ║ keyword ║ frequency ║
╠══════════╬═════════╬═══════════╣
║ 0 ║ bye ║ 1 ║
║ 0 ║ hello ║ 2 ║
║ 0 ║ no ║ 1 ║
║ 1 ║ bye ║ 1 ║
║ 1 ║ hello ║ 1 ║
║ 1 ║ no ║ 1 ║
║ 1 ║ yes ║ 1 ║
║ 2 ║ bye ║ 1 ║
║ 2 ║ hello ║ 1 ║
╚══════════╩═════════╩═══════════╝