我有可以属于多个类的文档,可以包含多个标记(单词):
create table Tokens (
Id INT not null,
Text NVARCHAR(255) null,
primary key (Id)
)
create table DocumentClassTokens (
Id INT not null,
DocumentFk INT null,
ClassFk INT null,
TokenFk INT null,
primary key (Id)
)
我想确定这些统计数据(给予该课程的所有代币):
我现在正在使用它,但它看起来不正确(我很确定A和B的计算是正确的):
declare @class int;
select @class = id from dbo.Classes where text = 'bla'
;with A as
(
select
a.text as token,
count(distinct DocumentFk) as A
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk = @class
group by a.text
)
,B as
(
select
a.text as token,
count(distinct DocumentFk) as B
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk != @class
group by a.text
)
,C as
(
select
a.text as token,
count(distinct DocumentFk) as C
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk = @class
group by a.text
)
,D as
(
select
a.text as token,
count(distinct DocumentFk) as D
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk != @class
group by a.text
)
select
case when A is null then 0 else A end as A,
case when B is null then 0 else B end as B,
case when C is null then 0 else C end as C,
case when D is null then 0 else D end as D,
t.Text,
t.id
from dbo.Tokens as t
left outer join A as a on t.text = a.token
left outer join B as b on t.text = b.token
left outer join C as c on t.text = c.token
left outer join D as d on t.text = d.token
order by t.text
非常感谢任何反馈。非常感谢!
祝福,
基督教
PS:
一些测试数据:
use play;
drop table tokens
create table Tokens
(
Id INT not null,
Text NVARCHAR(255) null,
primary key (Id)
)
insert into Tokens (id, text) values (1,'1')
insert into Tokens (id, text) values (2,'2')
drop table DocumentClassTokens
create table DocumentClassTokens (
Id INT not null,
DocumentFk INT null,
ClassFk INT null,
TokenFk INT null,
primary key (Id)
)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (1,1,1,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (2,1,1,2)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (3,2,1,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (4,2,2,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (5,3,2,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (6,3,2,3)
答案 0 :(得分:1)
这种接缝可以根据您的描述进行操作。看看你的代码,我不太确定。
编辑1 使用列而不是行和@ClassID作为过滤器。
declare @ClassID int
set @ClassID = 1
;with cte(DokumentFk, TokenFk, ClassFk) as
(
select DocumentFk, max(TokenFK), max(ClassFk)
from DocumentClassTokens
where ClassFK = @ClassID
group by DocumentFK
)
select
(select count(*)
from cte
where
TokenFk is not null and
ClassFk is not null) as A,
(select count(*)
from cte
where
TokenFk is not null and
ClassFk is null) as B,
(select count(*)
from cte
where
TokenFk is null and
ClassFk is not null) as C,
(select count(*)
from cte
where
TokenFk is null and
ClassFk is null) as D
答案 1 :(得分:1)
您的问题现在看起来更清晰了,如果我没有忽略任何内容,那么这是您可能会尝试针对您的数据运行的查询。
DECLARE @class int;
SET @class = 1;
SELECT
TokenFk,
TokenClassDocs AS A,
TokenNonClassDocs AS B,
TotalClassDocs - TokenClassDocs AS C,
TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
SELECT
TokenFk,
COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TokenClassDocs,
COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TokenNonClassDocs
FROM DocumentClassTokens dct
GROUP BY dct.TokenFk
) AS bytoken
CROSS JOIN (
SELECT
COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TotalClassDocs,
COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TotalNonClassDocs
FROM DocumentClassTokens
) AS totals
请告诉我们是否可以。
修改强>
上述解决方案是错误的。这是固定的,它肯定是正确的,只是我不喜欢它和错误的版本一样(具有讽刺意味的......)。
DECLARE @class int;
SET @class = 1;
SELECT
TokenFk,
TokenClassDocs AS A,
TokenNonClassDocs AS B,
TotalClassDocs - TokenClassDocs AS C,
TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
SELECT
TokenFk,
COUNT(DISTINCT cls.DocumentFk) AS TokenClassDocs,
COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TokenNonClassDocs
FROM DocumentClassTokens dct
LEFT JOIN (
SELECT DISTINCT DocumentFk
FROM DocumentClassTokens
WHERE ClassFk = @class
) cls ON dct.DocumentFk = cls.DocumentFk
GROUP BY dct.TokenFk
) AS bytoken
CROSS JOIN (
SELECT
COUNT(DISTINCT cls.DocumentFk) AS TotalClassDocs,
COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TotalNonClassDocs
FROM DocumentClassTokens dct
LEFT JOIN (
SELECT DISTINCT DocumentFk
FROM DocumentClassTokens
WHERE ClassFk = @class
) cls ON dct.DocumentFk = cls.DocumentFk
) AS totals
注意:我想我现在可以看到如何检查数字是否错误:每一行(即每个令牌)中A,B,C,D的总和必须等于总文件数,不应该令人惊讶,因为每个文件都可以满足1个,而且正在探索的4个案例中只有1个。如果行总和与总文档数不同,则行中的某些数字肯定是错误的。