在没有SQL Server循环的情况下将内容分配给固定大小的存储区

时间:2011-09-15 00:13:11

标签: sql-server tsql database-design sql-server-2008-r2 hierarchical-data

我在SQL Server 2008 R2中使用优先级有序的内容集,必须将其分配给一组存储桶以实现内容指定值。内容列表中的每个项目都与一个参差不齐的树层次结构(存储桶)中的节点相关。每个存储桶都有一个分配给它的值,并且可以容纳固定数量的内容。

我正在尝试按优先级顺序将内容分配给它们所关联的存储桶(或相关内容中树的任何父/祖父母)。我必须从最高桶值(带空格)开始,并且仅在桶值匹配或超过我的内容值时停止。

希望我粗略的例子会有所帮助。假设B是桶,每个桶可以容纳2个内容而C是内容。括号中的数字是桶值和所需的内容值。

Bucket to content tree

C1将导致分配给B1(B1树中的最高值)和B4将其分配给总值7.B1和B4现在只剩下一个时隙。

C2将被分配B1和B5,B1中没有插槽,B2中没有1个插槽。

C3无法使用B1,因为没有可用的插槽,因此会导致B2,B5和B9在B5中没有插槽而B2 / B5中没有插槽。

等等......

我可以通过创建所有存储桶的列表及其与所有子/子子存储桶的关系来了解如何迭代地实现此目的。循环使用一次一个内容,分配其“桶”并减少剩余的桶空间。我认为它需要成为循环的原因是由于每个桶中剩余的空间数量未知,这是基于处理所有更高优先级的内容。

但是,一次循环一个内容会感觉本质上是错误的,必须有一种更有效的方法来解决这个分配问题 - 理想情况下只需一次通过......

示例SQL Server代码(与上图匹配)

--core table/fields 
CREATE TABLE Bucket 
(
    Id int,
    Name varchar(3),
    BucketValue int,
    SlotRemaining int --only required for my solution to hold number of slots left to fill

)

CREATE TABLE BucketParent
(
    ChildBucketId int,
    ParentBucketId int
)

CREATE TABLE Content
(
    Id int,             
    Name varchar(3),
    ContentValue int,
    AllocationState int, --only required for my solution to identify content that still needs processing
                        --1=unprocessed, 2=Complete
    Priority int        --order to work through content 1=most imnportant
)

CREATE TABLE ContentBucket
(
    ContentId int,
    BucketId int
)
Go

CREATE TABLE ContentPriorityBucket -- table to record my allocation of content to the most valuable bucket
(
    ContentId int,
    BucketId int
)
Go

--test data to match example (wish id made it smaller now :)
INSERT INTO Bucket Values (1,'B1', 4, null)
INSERT INTO Bucket Values (2,'B2', 5, null)
INSERT INTO Bucket Values (3,'B3', 4, null)
INSERT INTO Bucket Values (4,'B4', 3, null)
INSERT INTO Bucket Values (5,'B5', 3, null)
INSERT INTO Bucket Values (6,'B6', 3, null)
INSERT INTO Bucket Values (7,'B7', 4, null)
INSERT INTO Bucket Values (8,'B8', 2, null)
INSERT INTO Bucket Values (9,'B9', 1, null)
INSERT INTO Bucket Values (10,'B10', 2, null)
INSERT INTO Bucket Values (11,'B11', 1, null)

INSERT INTO BucketParent Values (8, 4)
INSERT INTO BucketParent Values (4, 1)
INSERT INTO BucketParent Values (9, 5)
INSERT INTO BucketParent Values (5, 1)
INSERT INTO BucketParent Values (5, 2)
INSERT INTO BucketParent Values (10, 5)
INSERT INTO BucketParent Values (10, 6)
INSERT INTO BucketParent Values (6, 2)
INSERT INTO BucketParent Values (6, 3)
INSERT INTO BucketParent Values (11, 6)
INSERT INTO BucketParent Values (11, 7)
INSERT INTO BucketParent Values (7, 3)

INSERT INTO Content Values (1,'C1', 5, null, 1)
INSERT INTO Content Values (2,'C2', 8, null, 2)
INSERT INTO Content Values (3,'C3', 9, null, 3)
INSERT INTO Content Values (4,'C4', 10, null, 4)

INSERT INTO ContentBucket Values (1,8)
INSERT INTO ContentBucket Values (1,4)
INSERT INTO ContentBucket Values (2,9)
INSERT INTO ContentBucket Values (3,9)
INSERT INTO ContentBucket Values (4,10)
INSERT INTO ContentBucket Values (4,7)
GO

--Iterative solution that I am trying to improve on
UPDATE  Bucket 
SET     SlotRemaining = 2 --clear previous run and allocate maximum bucket size

UPDATE  Content
SET     AllocationState = 1 --set state to unprocessed

--Clear last run
TRUNCATE Table ContentPriorityBucket

GO 

DECLARE @ContentToProcess int = 0
DECLARE @CurrentContent int 
DECLARE @CurrentContentValue int 

SELECT  @ContentToProcess = COUNT(id) FROM Content WHERE AllocationState =1

WHILE (@ContentToProcess > 0)
BEGIN 
    -- get next content to process
    SELECT  Top(1) @CurrentContent = ID,
            @CurrentContentValue = ContentValue
    FROM    Content 
    WHERE   AllocationState =1 
    ORDER BY Priority; 

    WITH    BucketList (Id, BucketValue, SlotRemaining)
    as
    (
        -- list buckets related to content
        SELECT      b.Id
                    ,b.BucketValue
                    ,b.SlotRemaining
        FROM        ContentBucket cb 
        INNER JOIN  Bucket b on cb.BucketId = b.Id
        WHERE       cb.ContentId = @CurrentContent
        -- need to pull back all buckets (even those that are full as they may have empty parents)
        UNION ALL
        SELECT      b.Id
                    ,b.BucketValue
                    ,b.SlotRemaining
        FROM        BucketList bl
        INNER JOIN  BucketParent bp on bl.Id = bp.ChildBucketId
        INNER JOIN  Bucket b on bp.ParentBucketId = b.Id
    ),
    DistinctBucketList (Id, BucketValue, SlotRemaining)
    as
    (
        --dedupe buckets
        SELECT  distinct Id
                , BucketValue
                , SlotRemaining
        FROM    BucketList
    ),
    BucketListOrdered (Id, BucketValue, RowOrder)
    as
    (
        --order buckets
        SELECT      Id
                    ,BucketValue
                    ,ROW_NUMBER() OVER (ORDER BY BucketValue desc, Id)-- added id to get consistant result if two buckets have same value
        FROM        DistinctBucketList
        WHERE       SlotRemaining >0
    ),
    CulmativeBucketListWithinRequiredValue (Id, RowOrder, CulmativeBucketValue, RequiredBucket)
    as
    (
            -- this will mark all buckets up to the bucket value, but will be 1 bucket short
            SELECT      blo.Id
                        ,blo.RowOrder
                        ,SUM(blc.BucketValue) CulmativeBucketValue
                        ,CASE 
                            WHEN SUM(blc.BucketValue) <=@CurrentContentValue THEN 1
                            ELSE 0 
                        END RequiredBucket
            FROM        BucketListOrdered blo
            LEFT  JOIN  BucketListOrdered blc ON blc.RowOrder  <= blo.RowOrder
            GROUP BY    blo.Id, blo.RowOrder
    )
    -- this will identify all buckets required to top content value
    INSERT INTO ContentPriorityBucket
    SELECT      @CurrentContent
                ,b.Id
    FROM        CulmativeBucketListWithinRequiredValue b
    WHERE       b.RowOrder <= (SELECT Max(RowOrder) + 1 FROM CulmativeBucketListWithinRequiredValue WHERE RequiredBucket =1)

    --reduce all used bucket sizes by 1 (could alternatively determine this from ContentPriorityBucket)
    UPDATE      Bucket
    SET         SlotRemaining = SlotRemaining -1
    WHERE       id in (SELECT BucketId FROM ContentPriorityBucket WHERE ContentId = @CurrentContent)

    -- update processed bucket
    UPDATE      Content
    SET         AllocationState = 2
    WHERE       @CurrentContent = Id 

    SELECT      @ContentToProcess = COUNT(id) FROM Content WHERE AllocationState =1
END

SELECT ContentId, BucketId  FROM ContentPriorityBucket

/*
DROP TABLE Bucket 
DROP TABLE BucketParent
DROP TABLE Content
DROP TABLE ContentBucket
DROP TABLE ContentPriorityBucket 
*/

1 个答案:

答案 0 :(得分:1)

关于这个问题,有几点可以解决。

首先,广义bin-packing是一个NP-Complete问题,因此一般不能一次解决。这种特殊的箱式包装,因为它是有序的包装,可能是不同的,但问题的复杂性问题仍然存在;它肯定不是O(1),所以它可能需要一个循环,无论如何。

为此,1次通过非循环解决方案似乎不应该是可能的;它看起来像是一个基于集合的解决方案的问题。您可以创建一个表值CLR函数,它可以找到每个项目适合的存储桶。否则,保持循环解决方案就没问题了。 (如果您发布代码,可能更容易看出是否有可能进行改进。)