T-SQL:重复和查询最新的唯一记录

时间:2018-12-05 03:46:44

标签: sql sql-server tsql sql-server-2012

DECLARE @Table TABLE
    (
        [ID]    INT
      , [Alias] NVARCHAR(50)
      , [Key]   INT
      , [Val]   NVARCHAR(10)
      , [RN]    INT
    )

INSERT INTO @Table ([ID], [Alias], [Key], [Val], [RN])
VALUES
    ( 1, N'A1', 1, N'One', 1 )
  , ( 1, N'A1', 2, N'Two', 1 )
  , ( 1, N'A1', 3, N'', 1 )
  , ( 1, N'A2', 1, N'One', 2 )
  , ( 1, N'A2', 2, N'', 2 )
  , ( 1, N'A2', 3, N'Three', 2 )
  , ( 1, N'A3', 1, N'One', 3 )
  , ( 1, N'A3', 2, N'Two', 3 )
  , ( 1, N'A3', 4, N'Four_New', 3 )
  , ( 1, N'A4', 4, N'Four', 4 )
  , ( 1, N'A4', 5, N'Five', 4 )

  , ( 2, N'B1', 1, N'', 1 )
  , ( 2, N'B1', 2, N'', 1 )
  , ( 2, N'B1', 3, N'', 1 )
  , ( 2, N'B2', 1, N'One', 2 )
  , ( 2, N'B2', 2, N'', 2 )
  , ( 2, N'B2', 3, N'', 2 )
  , ( 2, N'B3', 2, N'Two', 3 )
  , ( 2, N'B3', 4, N'Four', 3 )
  , ( 2, N'B4', 3, N'Three', 4 )
  , ( 2, N'B4', 6, N'Six', 4 )

/* OUTPUT # 1
    ID Alias Key    Val
    1  A5    1      One         -- Kept the Val from Alias = A1
    1  A5    2      Two         -- Kept the Val from Alias = A1
    1  A5    3                  -- Kept the Val from Alias = A1
    1  A5    4      Four_New    -- Taking the Key/Val from Alias = A3 and assigned it to Alias = A1 for ID = 1
    1  A5    5      Five        -- Taking the Key/Val from Alias = A4 and assigned it to Alias = A1 for ID = 1

    2  B5    1                  -- Kept the Val from Alias = B1
    2  B5    2                  -- Kept the Val from Alias = B1
    2  B5    3                  -- Kept the Val from Alias = B1
    2  B5    4      Four        -- Taking the Key/Val from Alias = B3 and assigned it to Alias = B1 for ID = 2
    2  B5    6      Six         -- Taking the Key/Val from Alias = B4 and assigned it to Alias = B1 for ID = 1
*/


/* OUTPUT #2
ID Alias Key    Val
1  A5    1      One         -- Kept the Val from Alias = A1
1  A5    2      Two         -- Kept the Val from Alias = A1
1  A5    3      Three       -- Taking the Val from Alias = A2 because it's the 1st (based on RN ASC) latest filled value for Key = 3 for ID = 1
1  A5    4      Four_New    -- Got the Key/Val from Alias = A3 and assigned it to Alias = A1
1  A5    5      Five        -- Got the Key/Val from Alias = A4 and assigned it to Alias = A1

2  B5    1      One         -- Taking the Val from Alias = B2 because it's the 1st (based on RN ASC) latest filled value for Key = 1 for ID = 2
2  B5    2      Two         -- Taking the Val from Alias = B3 because it's the 1st (based on RN ASC) latest filled value for Key = 2 for ID = 2
2  B5    3      Three       -- Taking the Val from Alias = B4 because it's the 1st (based on RN ASC) latest filled value for Key = 3 for ID = 2
2  B5    4      Four        -- Got the Key/Val from Alias = B3 and assigned it to Alias = B1
2  B5    6      Six         -- Got the Key/Val from Alias = B4 and assigned it to Alias = B1
*/

描述: 这是一个真实表的示例数据表示,恰好很大。

字段:

  • ID =人员的唯一ID
  • Alias =每次再次创建同一人的条目时,都会为其赋予n新的别名
  • Key =描述该人的财产
  • Value =该属性的值
  • RN =创建人员条目的顺序-1为最新,2为第二最新,依此类推。

目标:

两个输出

输出#1 合并所有匹配人员的记录,创建一个全新的人员,该人员将从该人员的最新条目中获取所有键/值数据,并添加先前条目中所有丢失的键/值(始终取最新的键/值)。

输出#2 合并所有匹配人员的记录,并创建一个全新的人员,该人员将从该人员的最新条目中获取所有非(空/空)键/值数据,并添加先前条目中所有丢失的键/值(始终取最新的键/值) )

1 个答案:

答案 0 :(得分:2)

我选择对结果进行排名

对于输出#1

with base_rows
  as (select *
            ,rank() over(partition by id,[key] order by rn asc) as rnk
            ,cast(max(substring(alias,2,len(alias))) over(partition by id)+1 as nvarchar(100)) as max_index
            ,max(substring(alias,1,1)) over(partition by id) as alias_char
        from t
      )
 select id
        ,concat(alias_char,max_index)
        ,[key]
        ,[val]
        ,[rn]
   from base_rows
  where rnk=1


+----+------------------+-----+----------+----+
| id | (No column name) | key |   val    | rn |
+----+------------------+-----+----------+----+
|  1 | A5               |   1 | One      |  1 |
|  1 | A5               |   2 | Two      |  1 |
|  1 | A5               |   3 |          |  1 |
|  1 | A5               |   4 | Four_New |  3 |
|  1 | A5               |   5 | Five     |  4 |
|  2 | B5               |   1 |          |  1 |
|  2 | B5               |   2 |          |  1 |
|  2 | B5               |   3 |          |  1 |
|  2 | B5               |   4 | Four     |  3 |
|  2 | B5               |   6 | Six      |  4 |
+----+------------------+-----+----------+----+

对于输出#2

它与输出#1所用的查询相同,只是我们过滤掉了base_rows块中所有为空的记录。

with base_rows
  as (select *
            ,rank() over(partition by id,[key] order by rn asc) as rnk
            ,cast(max(substring(alias,2,len(alias))) over(partition by id)+1 as nvarchar(100)) as max_index
            ,max(substring(alias,1,1)) over(partition by id) as alias_char
        from t
      where [val] <> ''  
      )
 select id
        ,concat(alias_char,max_index)
        ,[key]
        ,[val]
        ,[rn]
   from base_rows
 where rnk=1



+----+------------------+-----+----------+----+
| id | (No column name) | key |   val    | rn |
+----+------------------+-----+----------+----+
|  1 | A5               |   1 | One      |  1 |
|  1 | A5               |   2 | Two      |  1 |
|  1 | A5               |   3 | Three    |  2 |
|  1 | A5               |   4 | Four_New |  3 |
|  1 | A5               |   5 | Five     |  4 |
|  2 | B5               |   1 | One      |  2 |
|  2 | B5               |   2 | Two      |  3 |
|  2 | B5               |   3 | Three    |  4 |
|  2 | B5               |   4 | Four     |  3 |
|  2 | B5               |   6 | Six      |  4 |
+----+------------------+-----+----------+----+