需要基于集合的解决方案来对行进行分组

时间:2010-06-15 18:04:57

标签: sql-server sql-server-2005 tsql

我需要根据Category列对一组行进行分组,并将基于SUM(Number)列的组合行限制为小于或等于@Limit值。

对于每个不同的Category列,我需要识别< = @limit的“桶”。如果Number列的所有行的SUM(Category)都是< = @Limit,那么Category值只会有1个存储桶(如'CCCC' '在样本数据中)。但是,如果SUM(Number)> @limit,则Category值将存在多个存储桶行(如示例数据中的“AAAA”),并且每个存储桶必须为< = @Limit。根据需要可以有多个桶。另外,查看Category值'DDDD',它的一行全部大于@Limit,并在结果集中分成两行。

鉴于此简化数据:

DECLARE @Detail table (DetailID int  primary key, Category char(4), Number int)
SET NOCOUNT ON
INSERT @Detail VALUES ( 1, 'AAAA',100)
INSERT @Detail VALUES ( 2, 'AAAA', 50)
INSERT @Detail VALUES ( 3, 'AAAA',300)
INSERT @Detail VALUES ( 4, 'AAAA',200)
INSERT @Detail VALUES ( 5, 'BBBB',500)
INSERT @Detail VALUES ( 6, 'CCCC',200)
INSERT @Detail VALUES ( 7, 'CCCC',100)
INSERT @Detail VALUES ( 8, 'CCCC', 50)
INSERT @Detail VALUES ( 9, 'DDDD',800)
INSERT @Detail VALUES (10, 'EEEE',100)
INSERT @Detail VALUES (11, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (12, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (13, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (14, 'AAAA',200) --EDIT added
SET NOCOUNT OFF

DECLARE @Limit int
SET @Limit=500

我需要以下结果集之一:

DetailID  Bucket  |    DetailID  Category Bucket
--------  ------  |    --------  -------- ------
 1        1       |     1        'AAAA'   1     
 2        1       |     2        'AAAA'   1     
 3        1       |     3        'AAAA'   1     
 4        2       |     4        'AAAA'   2     
11        2       |    11        'AAAA'   2      --EDIT added
12        3       |    12        'AAAA'   3      --EDIT added
13        3       |    13        'AAAA'   3      --EDIT added
14        4       |    14        'AAAA'   4      --EDIT added
 5        5       OR    5        'BBBB'   1     
 6        6       |     6        'CCCC'   1     
 7        6       |     7        'CCCC'   1     
 8        6       |     8        'CCCC'   1     
 9        7       |     9        'DDDD'   1     
 9        8       |     9        'DDDD'   2     
10        9       |    10        'EEEE'   1   
在尝试所有答案后

编辑

如果基于集合的解决方案的所有尝试都不能按需运行,我将对@GalacticJello Answer进行修改,修改在下面的代码中注明。我基本上找到了整个类别适合存储桶的所有行,并使用单个INSERT-SELECT插入它们,然后使用@GalacticJello游标循环遍历其余数据。这在我的情况下可以正常工作,因为循环几乎不会有任何行处理。

DECLARE @DetailTemp table (PID INT IDENTITY(1,1), DetailID int  primary key, Category char(4), Number int) 
DECLARE @DetailFinal table (DetailID int, Category char(4), Bucket int) ---<<<renamed column to Bucket

DECLARE @DetailCount int
SET @DetailCount = 0;

--------<<<optimization added starts here
;WITH AllSingleBuckets AS (
    SELECT
        Category
        FROM @Detail
        GROUP BY Category
        HAVING SUM(Number)<=@Limit

)
INSERT INTO @DetailFinal
        (DetailID, Category, Bucket)
    SELECT
        d.DetailID,d.Category,1
        FROM @Detail                    d
            INNER JOIN AllSingleBuckets s ON d.Category=s.Category
--------<<<optimization added ends here

INSERT @DetailTemp
--------<<<changed for optimization, added WHERE clause
SELECT d.DetailId, d.Category, d.Number FROM @Detail d WHERE NOT EXISTS (SELECT 1 FROM @DetailFinal f WHERE d.Category=f.Category) ORDER BY Category, DetailId
SELECT @DetailCount = @@ROWCOUNT

DECLARE @CurrentPid int
SET @CurrentPid = 1

DECLARE @ThisId int
DECLARE @ThisCategory char(4)
DECLARE @ThisNumber int

DECLARE @CurrentCategory char(4)
DECLARE @CurrentSum INT
DECLARE @CurrentBucket INT


WHILE @CurrentPid <= @DetailCount
BEGIN
    SELECT @ThisId = DetailId, @ThisCategory = Category, @ThisNumber = Number
    FROM @DetailTemp 
    WHERE PID = @CurrentPid

    IF @ThisCategory = @CurrentCategory
    BEGIN
        IF @CurrentSum + @ThisNumber > @Limit
        BEGIN
            SET @CurrentBucket = @CurrentBucket + 1
            SET @CurrentSum = @ThisNumber
        END
        ELSE
        BEGIN
            SET @CurrentSum = @CurrentSum + @ThisNumber
        END
    END
    ELSE
    BEGIN
        SET @CurrentBucket = 1
        SET @CurrentCategory = @ThisCategory
        SET @CurrentSum = @ThisNumber
    END

    WHILE @CurrentSum > @Limit
    BEGIN
        INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket
        SET @CurrentBucket = @CurrentBucket + 1
        SET @CurrentSum = @CurrentSum - @Limit
    END

    INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket

    SET @CurrentPid = @CurrentPid + 1
END


SELECT * from @DetailFinal ORDER BY Category --------<<<added order by

输出:

DetailID    Category Bucket
----------- -------- -----------
1           AAAA     1
2           AAAA     1
3           AAAA     1
4           AAAA     2
11          AAAA     2
12          AAAA     3
13          AAAA     3
14          AAAA     4
5           BBBB     1
6           CCCC     1
7           CCCC     1
8           CCCC     1
9           DDDD     1
9           DDDD     2
10          EEEE     1

(15 row(s) affected)

4 个答案:

答案 0 :(得分:1)

可能以下内容对您有用(但它不会为'DDDD'生成2行;我不确定您是否可以在不插入2个不同行的情况下执行此操作。)

select detailId, category,
FLOOR((SELECT sum(Number)
from Detail where category=t2.category and detailId <= t2.detailId
)/501)+1 as bucket
from Detail t2
order by detailId;

答案 1 :(得分:1)

您需要保持运行总计才能知道何时达到@Limit。当然,CROSS APPLY可能无法扩展(并且也取决于索引)。

编辑:固定DDDD,存储桶1

;WITH cRunning AS
(
    SELECT
        D1.DetailID, D1.Category, D3.RunningTotal, D3.GroupCount
    FROM
        @Detail D1
        CROSS APPLY
        (SELECT
             Category, COUNT(*) AS GroupCount,
             CAST(SUM(Number) AS int) AS RunningTotal
        FROM @Detail D2
        WHERE D1.Category = D2.Category AND D1.DetailID >= D2.DetailID
        GROUP BY D2.Category) D3
)
SELECT
    DetailID, Category,
    RunningTotal / @Limit + 1 AS Bucket --abuse integer math
FROM
    cRunning
UNION ALL
SELECT --singletons > @Limit
    DetailID, Category, 1
FROM
    cRunning
WHERE
    GroupCount = 1 AND RunningTotal > @Limit
ORDER BY
    Category, DetailID, Bucket

当然,如果您为DDDD

投入虚拟零行,我的第一个答案就有效
...
INSERT @Detail VALUES ( xxx, 'DDDD',0)
...
SELECT
    D1.DetailID, D1.Category,
    RunningTotal / @Limit + 1 AS Bucket --abuse integer math
FROM
    @Detail D1
    CROSS APPLY
    (SELECT SUM(Number) AS RunningTotal
    FROM @Detail D2
    WHERE D1.Category = D2.Category AND D1.DetailID >= D2.DetailID
    GROUP BY D2.Category) D3

答案 2 :(得分:1)

最后!

我发现了我的代码中的一些错误,纠正了这些错误,现在我通过CTE进行了此操作。我认为,如果一个细节跨越多个桶,那么它将始终在它们之间分开。看起来你现在想要那些比桶更大的桶来跨越多个桶,但其他细节要完全推进到下一个桶。你意识到在这种情况下,你最终可能会在一个桶中装满50个,对吧?如果下一个细节是500,那么它将向前推进,而50将获得一个桶自己 - 宽敞!

无论如何,只要将代码包含在此处作为基于完全设置的解决方案,以防任何人感兴趣:

;WITH sequence_ids AS (SELECT DetailID, Category, Number, ROW_NUMBER() OVER (PARTITION BY Category ORDER BY DetailID) AS sequence_id FROM @Detail),
main_cte AS (
    SELECT
        D1.DetailID,
        D1.Category,
        D1.Number,
        CASE WHEN @Limit > D1.Number THEN @Limit - D1.Number ELSE 0 END AS RemainingBucket,
        CASE WHEN D1.Number > @Limit THEN D1.Number - @Limit ELSE 0 END AS RemainingDetail,
        D1.sequence_id,
        1 AS bucket
    FROM
        sequence_ids D1
    WHERE
        sequence_id = 1
    UNION ALL
    SELECT
        D2.DetailID,
        D2.Category,
        D2.Number,
        CASE WHEN COALESCE(NULLIF(RemainingBucket, 0), @Limit) > COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) THEN COALESCE(NULLIF(RemainingBucket, 0), @Limit) - COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) ELSE 0 END AS RemainingBucket,
        CASE WHEN COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) > COALESCE(NULLIF(RemainingBucket, 0), @Limit) THEN COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) - COALESCE(NULLIF(RemainingBucket, 0), @Limit) ELSE 0 END AS RemainingDetail,
        D2.sequence_id,
        CASE WHEN RemainingBucket = 0 THEN bucket + 1 ELSE bucket END
    FROM
        main_cte
    INNER JOIN sequence_ids D2 ON
        D2.Category = main_cte.Category AND
        ((main_cte.RemainingDetail > 0 AND D2.DetailID = main_cte.DetailID) OR
         (main_cte.RemainingDetail <= 0 AND D2.sequence_id = main_cte.sequence_id + 1))
)
SELECT
    *
FROM
    main_cte
ORDER BY
    Category,
    bucket,
    sequence_id

答案 3 :(得分:1)

DECLARE @Detail table (DetailID int  primary key, Category char(4), Number int) 
SET NOCOUNT ON 
INSERT @Detail VALUES ( 1, 'AAAA',100) 
INSERT @Detail VALUES ( 2, 'AAAA', 50) 
INSERT @Detail VALUES ( 3, 'AAAA',300) 
INSERT @Detail VALUES ( 4, 'AAAA',200) 
INSERT @Detail VALUES ( 5, 'BBBB',500) 
INSERT @Detail VALUES ( 6, 'CCCC',200) 
INSERT @Detail VALUES ( 7, 'CCCC',100) 
INSERT @Detail VALUES ( 8, 'CCCC', 50) 
INSERT @Detail VALUES ( 9, 'DDDD',800) 
INSERT @Detail VALUES (10, 'EEEE',100) 
INSERT @Detail VALUES (11, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (12, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (13, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (14, 'AAAA',200) --EDIT added 
SET NOCOUNT OFF 

DECLARE @Limit int 
SET @Limit=500 

DECLARE @DetailTemp table (PID INT IDENTITY(1,1), DetailID int  primary key, Category char(4), Number int) 
DECLARE @DetailFinal table (DetailID int, Category char(4), Number int) 

DECLARE @DetailCount int
SET @DetailCount = 0;

INSERT @DetailTemp
SELECT DetailId, Category, Number FROM @Detail ORDER BY Category, DetailId
SELECT @DetailCount = @@ROWCOUNT

DECLARE @CurrentPid int
SET @CurrentPid = 1

DECLARE @ThisId int
DECLARE @ThisCategory char(4)
DECLARE @ThisNumber int

DECLARE @CurrentCategory char(4)
DECLARE @CurrentSum INT
DECLARE @CurrentBucket INT


WHILE @CurrentPid <= @DetailCount
BEGIN
    SELECT @ThisId = DetailId, @ThisCategory = Category, @ThisNumber = Number
    FROM @DetailTemp 
    WHERE PID = @CurrentPid

    IF @ThisCategory = @CurrentCategory
    BEGIN
        IF @CurrentSum + @ThisNumber > @Limit
        BEGIN
            SET @CurrentBucket = @CurrentBucket + 1
            SET @CurrentSum = @ThisNumber
        END
        ELSE
        BEGIN
            SET @CurrentSum = @CurrentSum + @ThisNumber
        END
    END
    ELSE
    BEGIN
        SET @CurrentBucket = 1
        SET @CurrentCategory = @ThisCategory
        SET @CurrentSum = @ThisNumber
    END

    WHILE @CurrentSum > @Limit
    BEGIN
        INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket
        SET @CurrentBucket = @CurrentBucket + 1
        SET @CurrentSum = @CurrentSum - @Limit
END

    INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket

    SET @CurrentPid = @CurrentPid + 1
END


SELECT * from @DetailFinal