按类别查找缺失的序列

时间:2014-11-27 06:23:15

标签: sql sql-server tsql

我必须从以下示例中识别丢失的记录。

Category    BatchNo TransactionNo
+++++++++++++++++++++++++++++++++
CAT1           1    1
CAT1           1    2
CAT1           2    3
CAT1           2    4
CAT1           2    5
CAT1           3    6
CAT1           3    7
CAT1           3    8
CAT1           5    12
CAT1           5    13
CAT1           5    14
CAT1           5    15
CAT1           7    18
CAT2           1    1
CAT2           1    2
CAT2           3    6
CAT2           3    7
CAT2           3    8
CAT2           3    9
CAT2           4    10
CAT2           4    11
CAT2           4    12
CAT2           6    14

我需要一个脚本来识别丢失的记录,如下所示

Category    BatchNo
+++++++++++++++++++
CAT1         4
CAT1         6
CAT2         2
CAT2         5

我不需要知道CAT1 8CAT2 7不存在,因为它们可能尚未插入。

6 个答案:

答案 0 :(得分:1)

您可以为每个类别创建临时结果集,其中包含所有可能的批次,最多不超过最大批次编号,而不是选择不可用的批次编号。

create table TEMP(
        Category varchar(10),
        BatchNo int,
        TransactionNo int
    )
    insert into TEMP values
    ('CAT1', 1, 1),
    ('CAT1', 1, 2),
    ('CAT1', 2, 3),
    ('CAT1', 2, 4),
    ('CAT1', 2, 5),
    ('CAT1', 3, 6),
    ('CAT1', 3, 7),
    ('CAT1', 3, 8),
    ('CAT1', 5, 9),
    ('CAT1', 7, 10),
    ('CAT2', 1, 1),
    ('CAT2', 1, 2),
    ('CAT2', 3, 3),
    ('CAT2', 4, 4),
    ('CAT2', 4, 5),
    ('CAT2', 4, 6),
    ('CAT2', 6, 7);


    WITH BatchNo (BatchID,Category,MaxBatch) AS (
      SELECT 1, Category, MAX(BatchNo) AS MaxBatch  FROM TEMP GROUP BY Category
      UNION ALL
      SELECT BatchID + 1, Category, MaxBatch FROM BatchNo
      WHERE BatchID < MaxBatch
    )

    SELECT 
        BatchNo.Category,
        BatchNo.BatchID
    FROM 
        BatchNo
    WHERE 
        BatchID NOT IN (SELECT BatchNo FROM TEMP WHERE Category = BatchNo.Category)
    ORDER BY
        BatchNo.Category,
        BatchNo.BatchID


    DROP TABLE TEMP

答案 1 :(得分:0)

这个使用Tally Table。供参考:http://www.sqlservercentral.com/articles/T-SQL/62867/

示例数据

create table MyTable(
    Category varchar(10),
    BatchNo int,
    TransactionNo int
)
insert into MyTable values
('CAT1', 1, 1),
('CAT1', 1, 2),
('CAT1', 2, 3),
('CAT1', 2, 4),
('CAT1', 2, 5),
('CAT1', 3, 6),
('CAT1', 3, 7),
('CAT1', 3, 8),
('CAT1', 5, 12),
('CAT1', 5, 13),
('CAT1', 5, 14),
('CAT1', 5, 15),
('CAT1', 7, 18),
('CAT2', 1, 1),
('CAT2', 1, 2),
('CAT2', 3, 6),
('CAT2', 3, 7),
('CAT2', 3, 8),
('CAT2', 3, 9),
('CAT2', 4, 10),
('CAT2', 4, 11),
('CAT2', 4, 12),
('CAT2', 6, 14);

with e1(n) as (
    select 1 union all select 1 union all select 1 union all 
    select 1 union all select 1 union all select 1 union all 
    select 1 union all select 1 union all select 1 union all select 1
),  --10e+1 or 10 rows
e2(n) as (select 1 from e1 a, e1 b), --10e+2 or 100 rows
e4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows
tally(n) as(
    select 
        top (select top 1 BatchNo from MyTable order by BatchNo desc)
        row_number() over(order by (select null))
    from e4
)
select  
    c.Category,
    t.n
from tally t
cross join(
    select 
        Category, 
        max(BatchNo) as MaxBatchNo 
    from MyTable 
    group by Category
)c
left join MyTable m
    on m.BatchNo = t.n
    and m.Category = c.Category
where
    m.Category is null 
    and t.n < c.MaxBatchNo
order by
    c.Category,
    t.n

答案 2 :(得分:0)

最好创建投影表并使用标准left join来查找差距:

declare @Sequencer table (
    Id int primary key
);

insert into @Sequencer (Id)
select top (1000) row_number() over(order by (select null)) from master.dbo.spt_values;

select *
from @Sequencer s
    inner join (
        select Category, max(BatchNo) as [Size] from dbo.Table group by Category
    ) cat on cat.Size > s.Id
    left join (
        select distinct Category, BatchNo from dbo.Table
    ) t on t.Category = cat.Category and t.BatchNo = s.Id
where t.BatchNo is null;

当然,在现实生活中,您可能需要超过1000行,因此请相应调整。

答案 3 :(得分:0)

WITH Numbers AS (
    SELECT MAX(BatchNo) AS Number
    FROM #MyTable
  UNION ALL
    SELECT Number - 1
    FROM Numbers
    WHERE Number > 1
)
,CategorySizes AS (
    SELECT Category
          ,MIN(BatchNo) AS StartBatch
          ,MAX(BatchNo) AS EndBatch
    FROM #MyTable
    GROUP BY Category
)
,PossibleBatches AS (
    SELECT Category
          ,Numbers.Number AS BatchNo
    FROM CategorySizes
         CROSS JOIN Numbers
    WHERE Numbers.Number BETWEEN CategorySizes.StartBatch AND CategorySizes.EndBatch                
)
,MissingBatches AS (
    SELECT PossibleBatches.Category
          ,PossibleBatches.BatchNo
    FROM PossibleBatches
         LEFT JOIN #MyTable
            ON #MyTable.Category = PossibleBatches.Category
               AND #MyTable.BatchNo = PossibleBatches.BatchNo
    WHERE #MyTable.BatchNo IS NULL
)
SELECT *
FROM MissingBatches

答案 4 :(得分:0)

没有使用循环或获取你可以使用这个:( #Category是我的你的表名称的等价物)。 (表现很完美)

DECLARE @t TABLE (RN INT IDENTITY,Category VARCHAR(255), BatchNo INT)

INSERT INTO @t
SELECT DISTINCT Category, BatchNo
FROM #Category

SELECT a.Category,a.BatchNo+1 AS BatchNo 
FROM @t a
CROSS APPLY (SELECT * FROM @t b 
                      WHERE a.RN+1 = b.RN AND 
                            a.Category = b.Category AND 
                            a.BatchNo+1 != b.BatchNo) x

答案 5 :(得分:0)

create table #cat(
        Category varchar(10),
        BatchNo int,
        TransactionNo int
    )
insert into #cat values
    ('CAT1', 1, 1),
    ('CAT1', 1, 2),
    ('CAT1', 2, 3),
    ('CAT1', 2, 4),
    ('CAT1', 2, 5),
    ('CAT1', 3, 6),
    ('CAT1', 3, 7),
    ('CAT1', 3, 8),
    ('CAT1', 5, 9),
    ('CAT1', 7, 10),
    ('CAT2', 1, 1),
    ('CAT2', 1, 2),
    ('CAT2', 3, 3),
    ('CAT2', 4, 4),
    ('CAT2', 4, 5),
    ('CAT2', 4, 6),
    ('CAT2', 6, 7);


SELECT DISTINCT C.Category, C.BatchNo + 1
FROM #cat c
OUTER APPLY
    (
        SELECT *
        FROM #cat c1
        WHERE C1.BatchNo = C.BatchNo + 1 AND C1.Category = C.Category
) C2
WHERE C2.BatchNo IS NULL 
      AND 
      C.BatchNo <> (SELECT MAX(BatchNo) FROM #cat C3 WHERE c3.Category = c.Category)