仅输出行的超集

时间:2019-04-17 07:45:42

标签: sql sql-server tsql

给定表Products(ProductGroupId INT非空,ProductId INT非空) 我的任务是仅输出ProductGroupId为其他产品组超集的行

例如,对于数据


    ProductGroupId ProductId
        1             101
        1             102
        1             103
        2             101
        3             102
        4             102
        4             103
        5             104

我希望


    ProductGroupId ProductId
        1             101
        1             102
        1             103
        5             104

因为ProductGroupId = 1是产品组2,3,4的超集

到目前为止我的解决方案:

第一:我们生成两个可以相互包含的ProductGroupId对

第二:我们检查超集ProductGroupId(父级)是否包含另一个ProductGroupId(子级)的所有ProductId,而父级具有比子级更多的ProductId。

到那时我们将拥有:

    ProductGroupId ProductId
        1             101
        1             102
        1             103

因此有必要添加行,其中ProductId唯一标识ProductGroupId

    ProductGroupId ProductId
        5             104

UNION之后,我们有

    ProductGroupId ProductId
        1             101
        1             102
        1             103
        5             104

代码

DECLARE @products TABLE
(
    ProductGroupId INT NOT NULL,
    ProductId INT NOT NULL
)

INSERT INTO @products
SELECT 1, 101
UNION
SELECT 1, 102
UNION
SELECT 1, 103
UNION
SELECT 2, 101
UNION
SELECT 3, 102
UNION 
SELECT 4, 102
UNION
SELECT 4, 103
UNION
SELECT 5, 104

;WITH possible_pairs
AS
(
    SELECT ParentId = parent.ProductGroupId,
        ChildId = child.ProductGroupId
    FROM @products parent
        JOIN @products child
            ON child.ProductId = parent.ProductId
    WHERE parent.ProductGroupId <> child.ProductGroupId
    GROUP BY parent.ProductGroupId, child.ProductGroupId
)
, supersets
AS
(
    SELECT ParentId, ChildId
    FROM possible_pairs pp
    WHERE NOT EXISTS
    (
        -- All ProductIds in child exist in parent
        SELECT ProductId
        FROM @products p_child
        WHERE p_child.ProductGroupId = pp.ChildId
        EXCEPT
        SELECT ProductId
        FROM @products p_parent
        WHERE p_parent.ProductGroupId = pp.ParentId
    )
    AND EXISTS
    (
        -- Parent has more ProductIds then child
        SELECT ProductId
        FROM @products p_parent
        WHERE p_parent.ProductGroupId = pp.ParentId
        EXCEPT
        SELECT ProductId
        FROM @products p_child
        WHERE p_child.ProductGroupId = pp.ChildId
    )
)
SELECT p.*
FROM @products p
    JOIN supersets s
        ON p.ProductGroupId = s.ParentId
WHERE NOT EXISTS
(
    -- We need to filter product groups that at the same time are superset and subset. 
    -- Eg. product group 4, that is superset for 3 and subset for 1
    SELECT s2.ChildId
    FROM supersets s2
    WHERE s2.ChildId = s.ParentId
)
/*
    ProductGroupId ProductId
        1             101
        1             102
        1             103
*/
UNION 
SELECT p.*
FROM @products p
WHERE NOT EXISTS
(
    SELECT *
    FROM supersets s2
    WHERE s2.ParentId = p.ProductGroupId
        OR s2.ChildId = p.ProductGroupId
)
/*
    ProductGroupId ProductId
        5             104
*/

2 个答案:

答案 0 :(得分:0)

使用row_number()

select * from
(
select productid,ProductGroupId, row_number() over(partition by productid order by ProductGroupId) as rn
from tablename
)A where rn=1

答案 1 :(得分:0)

我已经在下面的SQL Server中进行了测试,并且工作正常。我再创建一个表@productSuper来保存超集。

    DECLARE @products TABLE (ProductGroupId INT NOT NULL, ProductId INT NOT NULL)

INSERT INTO @products
SELECT 1, 101
UNION
SELECT 1, 102
UNION
SELECT 1, 103
UNION
SELECT 2, 101
UNION
SELECT 3, 102
UNION 
SELECT 4, 102
UNION
SELECT 4, 103
UNION
SELECT 5, 104

SELECT *
FROM @products;

DECLARE @productSuper TABLE (ProductGroupId INT NOT NULL, ProductId INT NOT NULL)
DECLARE @currentProductGroupId INT = 1
DECLARE @maxProductGroupId INT

SET @maxProductGroupId = (
        SELECT Max(ProductGroupId)
        FROM @products
        )

INSERT INTO @productSuper
SELECT ProductGroupId, ProductId
FROM @products AS parent
WHERE ProductGroupId = @currentProductGroupId -- Assuming to start this as start parent group

WHILE @currentProductGroupId <= @maxProductGroupId
BEGIN
    INSERT INTO @productSuper
    SELECT ProductGroupId, ProductId
    FROM @products AS child
    WHERE ProductGroupId = @currentProductGroupId AND NOT EXISTS (
            SELECT ProductId
            FROM @productSuper AS Super
            WHERE Super.ProductId = child.ProductId
            )

    SET @currentProductGroupId += 1
END

SELECT *
FROM @productSuper