使用sql选择最佳得分结果

时间:2013-09-20 03:16:46

标签: sql-server tsql sql-server-2005

我有一组数据需要选择最佳匹配。每条记录都有一个名称和CNum。具有相同“名称”的每条记录应具有相同的“CNum”;实际上,一些'Name'匹配具有相同的CNum而一些不具有(这是要解决的问题)。我需要确定哪个CNum更好,并使用单个CNum更新所有'Name'匹配。

我使用ParentId列更新了表格以显示匹配的名称,并使用SubParentId标记匹配的“名称”和“CNum”以帮助提取所需的结果(并使其更容易查看匹配)。

为了帮助确定哪个CNum在“名称”组中比另一个更好,每个记录都分为两列:'ScoreA'和'ScoreB';得分越低越好。以下是我用来确定哪个CNum最好的规则:

  1. 如果名称组中的所有记录(相同的ParentId)具有相同的CNum(相同的SubParentId),则不执行任何操作
  2. 如果他们在同一个名称组中并非所有人都拥有相同的CNum,请选择具有最低ScoreA的记录的ID,并将该组的parentId更新为所选的ID
  3. 如果没有一个ScoreA匹配,请更新每个组的parentId,其中记录的ID为最低ScoreB
  4. 如果没有单一的ScoreB匹配,并且它是不同区域之间的联系,并且只有其中一个具有“AB”区域,则使用Region of的记录的id更新组的parentId中的每一个。 AB'
  5. 如果仍然不匹配或多个“AB”区域(具有不同的CNum和绑定分数),请为该“名称”组中的每条记录设置NoBestMatch = 1
  6. 假设:如果他们有相同的姓名和CNum他们的分数将是相同的

    是否有一种很好的方法可以应用上述规则来获得我正在寻找的结果?

    以下是我正在寻找的数据样本和结果,并在插入语句旁边注明了预期的获胜结果:

    -- create table
    CREATE TABLE Results
    (
        Id          INT NOT NULL IDENTITY( 1, 1 ) PRIMARY KEY,
        Name        VARCHAR(200) NULL,
        CNum        NVARCHAR(100) NULL,
        Region      NVARCHAR(3) NULL,
        ScoreA      INT NULL,
        ScoreB      INT NULL,
        ParentId    INT NULL,
        SubParentId INT NULL,
        NoMatch     BIT NOT NULL DEFAULT(0)
    )
    GO
    
    -- insert data
    
    -- Leave as is: they are all the same
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Grasslands', '91588', 'WY', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB )
        VALUES ( 'Grasslands', '91588', 'WY', '-668', '13' )
    
    --  Acme Co: winner noted below --> best ScoreA
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '269415003', 'AB', '-13455', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '269415003', 'AB', '-13455', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '5695003', 'AB', '-155', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '269415003', 'AB', '-13460', '-23' ) -- Expected Winner
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '5695003', 'AB', '-155', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '5695003', 'AB', '-155', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '856545', 'AB', '-22', '16' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Acme Co', '856545', 'AB', '-22', '16' )
    
    --  Zuland Ltd: winner noted below --> best ScoreB
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zuland Ltd', '654543', 'AB', '-13455', '-28' )    
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zuland Ltd', '654543', 'AB', '-13455', '-28' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zuland Ltd', '654543', 'AB', '-13455', '-23' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zuland Ltd', '5603', 'ON', '-13455', '-30' )  -- Expected Winner
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zuland Ltd', '5603', 'ON', '-13455', '-23' )
    
    --  Emco Inc: winner noted below --> AB tie breaker
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '5695003', 'ON', '-668', '13' )    
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '5695003', 'AB', '-668', '13' ) -- Expected Winner
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '5545', 'CA', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '5545', 'CA', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '995588', 'WY', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Emco Inc', '995588', 'WY', '-668', '13' )
    
    -- Zemco Inc: No Winner --> No AB tie breaker
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zemco Inc', '5695003', 'TN', '-668', '13' )   
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB )
        VALUES ( 'Zemco Inc', '5695003', 'TN', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Zemco Inc', '5545', 'CA', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB )
        VALUES ( 'Zemco Inc', '995588', 'WY', '-668', '13' )
    
    -- Texco Inc: No Winner --> No AB tie breaker
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Texco Inc', '234JJJ', 'TN', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Texco Inc', '555552', 'TN', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Texco Inc', '234JJJ', 'CA', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Texco Inc', '555552', 'WY', '-668', '13' )
    
    -- Grasslands: Leave as is --> they are all the same
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Grasslands', '91588', 'WY', '-668', '13' )    
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB )
        VALUES ( 'Grasslands', '91588', 'WY', '-668', '13' )
    
    -- Mike Inc: No Match --> more than 1 'AB' with tied scores
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Mike Inc', '234JJJ', 'AB', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Mike Inc', '555552', 'AB', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Mike Inc', '234JJJ', 'AB', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB )
        VALUES ( 'Mike Inc', '555552222', 'WY', '-668', '13' )
    INSERT INTO Results ( Name, CNum, Region, ScoreA, ScoreB ) 
        VALUES ( 'Mike Inc', '90210', 'KT', '-668', '13' )
    
    GO
    
    -- set parent id matched on Name
    UPDATE  r
    SET     r.ParentId = COALESCE( r1.Id, r.Id )
    FROM    Results r
    LEFT JOIN Results r1
      ON    r.Name = r1.NAME
    GO
    
    -- set sub-parent id matched on Name and CNum
    UPDATE  r
    SET     r.SubParentId = COALESCE( r1.Id, r.Id )
    FROM    Results r
    LEFT JOIN Results r1
      ON    r.Name = r1.Name AND
            r.CNum = r1.CNum
    GO
    

1 个答案:

答案 0 :(得分:1)

所以对于你给出的规则,这就是我提出的。如果区域规则(规则4)不是“AB”,那么将来可能无效的唯一事情就是它。由于它首先按字母顺序排列在这里的区域,我可以使用下面的代码:

UPDATE  r2 
SET     [CNum] = A.[CNum]
FROM    (
            SELECT   [Id] ,
                    [Name] ,
                    [CNum] ,
                    [Region] ,
                    [ScoreA] ,
                    [ScoreB] ,
                    [ParentId] ,
                    [SubParentId] ,
                    [NoMatch],
                    ROW_NUMBER() OVER (PARTITION BY [Name] ORDER BY CAST([ScoreA] AS INT) ASC, CAST(ScoreB AS INT) ASC, [Region] ASC) AS RowNum 
            FROM    [dbo].[Results] AS r
        ) AS A
    INNER JOIN [dbo].[Results] AS r2 ON [r2].[Name] = [A].[Name] AND [r2].[Id] != [A].[Id]
WHERE [RowNum] = 1