我有一张表格,其中包含
等条目id keywords
1 cat, dog, man, mouse
2 man, pen, pencil, eraser
3 dog, man, friends
4 dog, leash,......
我想制作类似
的表格id cat dog man mouse pen pencil eraser friends leash ......
1 1 1 1 1 0 0 0 0 0
2 0 0 1 0 1 1 1 0 0
3 0 1 1 0 0 0 0 1 0
等等。
答案 0 :(得分:5)
WITH basedata(id,keywords) AS
(
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'
),
cte(id, t, x)
AS (SELECT *,
CAST('<foo>' + REPLACE(keywords,',','</foo><foo>') + '</foo>' AS XML)
FROM basedata)
SELECT id,
LTRIM(RTRIM(w.value('.', 'nvarchar(max)'))) as keyword
INTO #Split
FROM cte
CROSS APPLY x.nodes('//foo') as word(w)
DECLARE @ColList nvarchar(max)
SELECT @ColList = ISNULL(@ColList + ',','') + keyword
FROM (
SELECT DISTINCT QUOTENAME(keyword) AS keyword
FROM #Split
) T
EXEC(N'
SELECT *
FROM #Split
PIVOT (COUNT(keyword) FOR keyword IN (' + @ColList + N')) P')
DROP TABLE #Split
给出
id cat dog eraser friends leash man mouse pen pencil
----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- -----------
1 1 1 0 0 0 1 1 0 0
2 0 0 1 0 0 1 0 1 1
3 0 1 0 1 0 1 0 0 0
4 0 1 0 0 1 0 0 0 0
答案 1 :(得分:2)
您必须使用数据透视表吗?你的最终结果是每个id的频率 - 这看起来很奇怪?否则,单元格总是包含1作为频率。
看看这是否适合你。
示例数据
create table basedata(id int,keywords varchar(max));
insert basedata
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'
查询
;with cte(id, list, word) as (
select id,
cast(STUFF(keywords,1,CHARINDEX(',',keywords+','),'') as varchar(max)),
cast(ltrim(rtrim(LEFT(keywords,CHARINDEX(',',keywords+',')-1))) as varchar(max))
from basedata
where keywords > ''
union all
select id,
STUFF(list,1,CHARINDEX(',',list+','),''),
ltrim(rtrim(LEFT(list,CHARINDEX(',',list+',')-1)))
from cte
where list > ''
)
select word, COUNT(*) frequency
from cte
group by word
输出
word frequency
---------- -----------
cat 1
dog 3
eraser 1
friends 1
leash 1
man 3
mouse 1
pen 1
pencil 1
答案 2 :(得分:1)
SELECT id,
CAST(CASE WHEN Charindex('dog', keywords) > 0 THEN 1 ELSE 0 END AS bit) as DOG,
CAST(CASE WHEN Charindex('cat', keywords) > 0 THEN 1 ELSE 0 END AS bit) as CAT,
...
FROM yourtable
答案 3 :(得分:0)
如果您使用的是SQL Server 2008,则可以使用全文解析器来拆分字符串:
Declare @Inputs Table ( Id int not null Primary Key, Keywords nvarchar(max ) )
Insert @Inputs( Id, Keywords ) Values( 1, 'cat, dog, man, mouse' )
Insert @Inputs( Id, Keywords ) Values( 2, 'man, pen, pencil, eraser' )
Insert @Inputs( Id, Keywords ) Values( 3, 'dog, man, friends' )
Insert @Inputs( Id, Keywords ) Values( 4, 'dog, leash' )
Declare @LCID int
Declare @StopListId int
Declare @AccentSensitive int
Set @LCID = Cast( DatabasePropertyEx('master','LCID') As int )
Set @StopListId = 0
Set @AccentSensitive = 1
Select S.display_term, Count(*) As Frequency
From @Inputs As I
Cross Apply (
Select display_term
From sys.dm_fts_parser( QUOTENAME( I.Keywords, '"')
, @LCID, @StopListId, @AccentSensitive)
) As S
Group By S.display_term
但是,如果您不使用SQL Server 2008,则需要拆分功能。我在这篇文章的最后提出过。然后您的查询就是:
Select LTrim(RTrim(S.Value)), Count(*) As Frequency
From @Inputs As I
Cross Apply dbo.Split( I.Keywords, ',' ) As S
Group By LTrim(RTrim(S.Value))
分裂功能:
Create Function [dbo].[Split]
(
@DelimitedList nvarchar(max)
, @Delimiter varchar(2) = ','
)
RETURNS TABLE
AS
RETURN
(
With CorrectedList As
(
Select Case When Left(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
+ @DelimitedList
+ Case When Right(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
As List
, DataLength(@Delimiter) As DelimiterLen
)
, Numbers As
(
Select TOP (Coalesce(Len(@DelimitedList),1)) Row_Number() Over ( Order By c1.object_id ) As Value
From sys.objects As c1
Cross Join sys.columns As c2
)
Select CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position
, Substring (
CL.List
, CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen
, CharIndex(@Delimiter, CL.list, N.Value + 1)
- ( CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen )
) As Value
From CorrectedList As CL
Cross Join Numbers As N
Where N.Value < Len(CL.List)
And Substring(CL.List, N.Value, CL.DelimiterLen) = @Delimiter
)