用tsql确定一些统计信息

时间:2011-02-03 17:34:02

标签: tsql

我有可以属于多个类的文档,可以包含多个标记(单词):

create table Tokens (
        Id INT not null,
       Text NVARCHAR(255) null,
       primary key (Id)
    )

create table DocumentClassTokens (
        Id INT not null,
       DocumentFk INT null,
       ClassFk INT null,
       TokenFk INT null,
       primary key (Id)
    )

我想确定这些统计数据(给予该课程的所有代币):

  • A =包含令牌且属于类
  • 的不同文档的数量
  • B =包含令牌但不属于班级
  • 的不同文件的数量
  • C =不包含令牌且属于类
  • 的不同文档的数量
  • D =不包含令牌且不属于类
  • 的不同文档的数量

我现在正在使用它,但它看起来不正确(我很确定A和B的计算是正确的):

declare @class int;

select @class = id from dbo.Classes where text = 'bla'

;with A as
(
    select
        a.text as token,
        count(distinct DocumentFk) as A
    from dbo.Tokens as a
    inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk = @class
    group by a.text
)
,B as
(
    select
        a.text as token,
        count(distinct DocumentFk) as B
    from dbo.Tokens as a
    inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk != @class
    group by a.text
)
,C as
(
    select
        a.text as token,
        count(distinct DocumentFk) as C
    from dbo.Tokens as a
    inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk = @class
    group by a.text
)
,D as
(
    select
        a.text as token,
        count(distinct DocumentFk) as D
    from dbo.Tokens as a
    inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk != @class
    group by a.text
)
select 
    case when A is null then 0 else A end as A,
    case when B is null then 0 else B end as B,
    case when C is null then 0 else C end as C,
    case when D is null then 0 else D end as D,
    t.Text,
    t.id
from dbo.Tokens as t
left outer join A as a on t.text = a.token
left outer join B as b on t.text = b.token
left outer join C as c on t.text = c.token
left outer join D as d on t.text = d.token
order by t.text

非常感谢任何反馈。非常感谢!

祝福,

基督教

PS:

一些测试数据:

use play;

drop table tokens
create table Tokens 
(
   Id INT not null,
   Text NVARCHAR(255) null,
   primary key (Id)
)

insert into Tokens (id, text) values (1,'1')
insert into Tokens (id, text) values (2,'2')

drop table DocumentClassTokens
create table DocumentClassTokens (
        Id INT not null,
       DocumentFk INT null,
       ClassFk INT null,
       TokenFk INT null,
       primary key (Id)
    )

insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (1,1,1,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (2,1,1,2) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (3,2,1,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (4,2,2,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (5,3,2,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (6,3,2,3)  

2 个答案:

答案 0 :(得分:1)

这种接缝可以根据您的描述进行操作。看看你的代码,我不太确定。

编辑1 使用列而不是行和@ClassID作为过滤器。

declare @ClassID int
set @ClassID = 1

;with cte(DokumentFk, TokenFk, ClassFk) as
(
  select DocumentFk, max(TokenFK), max(ClassFk) 
  from DocumentClassTokens
  where ClassFK = @ClassID
  group by DocumentFK
)
select
(select count(*)
 from cte
 where
   TokenFk is not null and
   ClassFk is not null) as A,
(select count(*)
 from cte
 where
   TokenFk is not null and
   ClassFk is null) as B,
(select count(*)
 from cte
 where
   TokenFk is null and
   ClassFk is not null) as C,
(select count(*)
 from cte
 where
   TokenFk is null and
   ClassFk is null) as D

答案 1 :(得分:1)

您的问题现在看起来更清晰了,如果我没有忽略任何内容,那么这是您可能会尝试针对您的数据运行的查询。

DECLARE @class int;
SET @class = 1;

SELECT
  TokenFk,
  TokenClassDocs                        AS A,
  TokenNonClassDocs                     AS B,
  TotalClassDocs    - TokenClassDocs    AS C,
  TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
  SELECT
    TokenFk,
    COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TokenClassDocs,
    COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TokenNonClassDocs
  FROM DocumentClassTokens dct
  GROUP BY dct.TokenFk
) AS bytoken
  CROSS JOIN (
    SELECT
      COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TotalClassDocs,
      COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TotalNonClassDocs
    FROM DocumentClassTokens
  ) AS totals

请告诉我们是否可以。


修改

上述解决方案是错误的。这是固定的,它肯定是正确的,只是我不喜欢它和错误的版本一样(具有讽刺意味的......)。

DECLARE @class int;
SET @class = 1;

SELECT
  TokenFk,
  TokenClassDocs                        AS A,
  TokenNonClassDocs                     AS B,
  TotalClassDocs    - TokenClassDocs    AS C,
  TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
  SELECT
    TokenFk,
    COUNT(DISTINCT cls.DocumentFk) AS TokenClassDocs,
    COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TokenNonClassDocs
  FROM DocumentClassTokens dct
    LEFT JOIN (
      SELECT DISTINCT DocumentFk
      FROM DocumentClassTokens
      WHERE ClassFk = @class
    ) cls ON dct.DocumentFk = cls.DocumentFk
  GROUP BY dct.TokenFk
) AS bytoken
  CROSS JOIN (
    SELECT
      COUNT(DISTINCT cls.DocumentFk) AS TotalClassDocs,
      COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TotalNonClassDocs
    FROM DocumentClassTokens dct
      LEFT JOIN (
        SELECT DISTINCT DocumentFk
        FROM DocumentClassTokens
        WHERE ClassFk = @class
      ) cls ON dct.DocumentFk = cls.DocumentFk
  ) AS totals

注意:我想我现在可以看到如何检查数字是否错误:每一行(即每个令牌)中A,B,C,D的总和必须等于总文件数,不应该令人惊讶,因为每个文件都可以满足1个,而且正在探索的4个案例中只有1个。如果行总和与总文档数不同,则行中的某些数字肯定是错误的。