SQL Server:如何为过去的交易分配正确的帐户?

时间:2017-09-26 17:05:36

标签: sql sql-server type-2-dimension

SQL大师, 我有一种情况需要将交易分配给一个表(#legacyt2)中存在3/1/2017之前的有效开始日期和有效结束日期的帐户,并且当前维度表中存在超过该日期的任何内容(#type2 )。我需要根据#fact表中找到的事务日期为适当的生成/结束生效日期分配正确的skey(在#type2和#legacyt2中找到)。

我已经接近了,但我还没能正确分配帐户。我提供了构建脚本和有效的查询。

我根据提供的脚本查找的最终结果是:

enter image description here

    drop table #type2;

    create table #type2
    (ckey int,
    skey int,
    lastUpdated date,
    begdate date,
    enddate date
    );

    insert into #type2
     (ckey,skey,lastupdated,begdate,enddate)
    values
     (1,100,'9/26/2017','5/9/2017','5/11/2017');

    insert into #type2
     (ckey,skey,lastupdated,begdate)
    values
     (1,125,'9/26/2017','5/11/2017');

    drop table #legacyt2;

    create table #legacyt2
    (ckey int,
    skey int,
    lastUpdated date,
    begdate date,
    enddate date
    );

    insert into #legacyt2
     (ckey,skey,lastupdated,begdate,enddate)
    values
     (1,100,'3/1/2017','1/22/2016','2/5/2017');

    insert into #legacyt2
     (ckey,skey,lastupdated,begdate)
    values
     (1,125,'3/1/2017','2/5/2017');


    drop table #fact;

    create table #fact
    (ckey int,
     xactDate date,
     acct int,
     amt decimal(10,2)
    );

    insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/24/2017',1,10.00);

    insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/24/2017',10,10.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/30/2017',1,20.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/2/2017',1,80.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/3/2017',10,20.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/6/2017',1,50.00);


     select ckey,
            skey,
            lastupdated,
            begdate,
            enddate
      from #type2

    -- Step #1: collect legacy and current dimension type 2 info and rank to identify current and previous versions.
     select ckey,
            skey,
            lastupdated,
            begdate,
            enddate
      from #legacyt2

    drop table #dim;

    with Dimension as
     (  select t2.ckey,
             t2.skey,
             t2.begdate,
             l2.begdate as LegacyBegDate,
             t2.enddate,
             l2.enddate as LegacyEndDate,
             row_number() over (partition by t2.ckey order by max(t2.begdate)) as rnk
        from #type2 t2
         left join #legacyt2 l2
          on l2.ckey = t2.ckey
          and l2.skey = t2.skey
        group by t2.ckey,t2.skey,t2.begdate,t2.enddate,l2.begdate,l2.enddate
     )
       select ckey, 
              skey,
              begdate,
              legacyBegDate,
              enddate,
              legacyenddate,
              rnk
            into #dim  
          from Dimension
         order by ckey,begdate;


    select * from #dim  

2 个答案:

答案 0 :(得分:1)

我想我理解你了。试试吧:

    ;WITH dimension 
     AS (SELECT ckey 
                ,skey 
                ,begdate xactDate 
         FROM   #type2 
         UNION 
         SELECT ckey 
                ,skey 
                ,enddate xactDate 
         FROM   #type2 
         WHERE  enddate IS NOT NULL 
         UNION 
         SELECT ckey 
                ,skey 
                ,begdate xactDate 
         FROM   #legacyt2 
         UNION 
         SELECT ckey 
                ,skey 
                ,enddate xactDate 
         FROM   #legacyt2 
         WHERE  enddate IS NOT NULL), 
     dimension2 
     AS (SELECT Row_number() 
                  OVER ( 
                    partition BY ckey 
                    ORDER BY skey, xactdate) rn 
                ,ckey 
                ,skey 
                ,xactdate 
         FROM   dimension), 
     fact1 
     AS (SELECT Row_number() 
                  OVER ( 
                    partition BY ckey 
                    ORDER BY xactdate) rn 
                ,ckey 
                ,xactdate 
                ,acct 
                ,amt 
         FROM   #fact) 
SELECT A.ckey 
       ,b.skey 
       ,a.xactdate 
       ,a.acct 
       ,a.amt 
INTO   #dim 
FROM   fact1 a 
       INNER JOIN dimension2 b 
               ON a.ckey = b.ckey 
                  AND a.rn = b.rn 
ORDER  BY a.ckey 
          ,A.rn 



SELECT * 
FROM   #dim 

结果

ckey        skey        xactDate   acct        amt
----------- ----------- ---------- ----------- ---------------------------------------
1           100         2017-01-24 1           10.00
1           100         2017-01-24 10          10.00
1           100         2017-01-30 1           20.00
1           100         2017-02-02 1           80.00
1           125         2017-02-03 10          20.00
1           125         2017-02-06 1           50.00

答案 1 :(得分:0)

尽管@Mauricio Pontalti Neri做了一份超级工作,但最终解决这个问题的最简单,最清晰的方法是使用#fact分段表为skey添加一个新列以进行更新和利用#dim。这不会赢得任何口才奖,但它确实完成了工作。

这是解决方案的样子:

     -- step #1 dump the legacy/current dimension information into a combined #temp table.

        with Dimension as
     (  select t2.ckey,
             t2.skey,
             t2.begdate,
             l2.begdate as LegacyBegDate,
             t2.enddate,
             l2.enddate as LegacyEndDate,
             row_number() over (partition by t2.ckey order by max(t2.begdate)) as rnk
        from #type2 t2
         left join #legacyt2 l2
          on l2.ckey = t2.ckey
          and l2.skey = t2.skey
        group by t2.ckey,t2.skey,t2.begdate,t2.enddate,l2.begdate,l2.enddate
     )
       select ckey, 
              skey,
              begdate,
              legacyBegDate,
              enddate,
              legacyenddate,
              rnk
            into #dim  
          from Dimension
         order by ckey,begdate;

        -- step #2 update the skey for current ckey/skey pairings.

     update #fact
      set #fact.skey = x.skey
      from (select skey,
                   ckey,
                   LegacyBegDate,
                   LegacyEndDate,
                   begdate,
                   enddate
              from #dim
              where enddate is null
                and legacyenddate is null
            )x
     where #fact.ckey = x.ckey
      and #fact.xactdate between cast(x.legacyBegDate as date) and cast(getdate() as date)

        -- step #3 update the skey for closed ckey/skey pairings.

      update #fact
      set #fact.skey = x.skey
      from (select skey,
                   ckey,
                   LegacyBegDate,
                   LegacyEndDate,
                   begdate,
                   enddate
              from #dim
              where enddate is NOT null
                and legacyenddate is NOT null
            )x
     where #fact.ckey = x.ckey
      and #fact.xactdate between cast(x.legacyBegDate as date) and cast(x.legacyEndDate as date)