按顺序排序行并填充缺失行的间隙

时间:2017-07-27 06:27:25

标签: tsql sequence gaps-in-data

我遇到了一个关于表格中缺少行的问题,这让我很头疼。

作为基础数据,我有下表:

declare @table table
(
  id1   int,
  id2   int,
  ch    char(1) not null,
  val   int     
)

insert into @table values (1112, 121, 'A', 12)
insert into @table values (1351, 121, 'A', 13)
insert into @table values (1411, 121, 'B', 81)
insert into @table values (1312, 7, 'C', 107)
insert into @table values (1401, 2, 'A', 107)
insert into @table values (1454, 2, 'D', 107)
insert into @table values (1257, 6, 'A', 1)
insert into @table values (1269, 6, 'B', 12)
insert into @table values (1335, 6, 'C', 12)
insert into @table values (1341, 6, 'D', 5)
insert into @table values (1380, 6, 'A', 3)

输出应按id2排序,并遵循固定的ch序列,该序列应重复,直到下一个id2开始。

序列:

'A'
'B'
'C'
'D'

如果序列或模式被中断,它应该用null填充缺失的行,以便我得到这个结果表:

id1     id2     ch     val
----------------------------    
1112    121     'A'    12
NULL    121     'B'    NULL
NULL    121     'C'    NULL
NULL    121     'D'    NULL
1351    121     'A'    13
1411    121     'B'    81
NULL    121     'C'    NULL
NULL    121     'D'    NULL
NULL    7       'A'    NULL
NULL    7       'B'    NULL
1312    7       'C'    107
NULL    7       'D'    NULL
1401    2       'A'    107
NULL    2       'B'    NULL
NULL    2       'C'    NULL
1454    2       'D'    107

依旧......

我正在寻找的是一种没有迭代的方法。

我希望有人可以提供帮助!

提前致谢!

3 个答案:

答案 0 :(得分:1)

解决方案可能就是这样:

declare @table table (  id1   int,  id2   int,  ch    char(1) not null,  val   int     )
insert into @table values (1112, 121, 'A', 12)
    ,(1351, 121, 'A', 13),(1411, 121, 'B', 81),(1312, 7, 'C', 107),(1401, 2, 'A', 107)
    ,(1454, 2, 'D', 107),(1257, 6, 'A', 1),(1269, 6, 'B', 12),(1335, 6, 'C', 12)
    ,(1341, 6, 'D', 5),(1380, 6, 'A', 3)
;with foo as
(select 
    *
    ,row_number() over (partition by id2 order by id1) rwn
    ,ascii(isnull(lag(ch,1) over (partition by id2 order by id1),'A'))-ascii('A')  prev
    ,count(*) over (partition by id2,ch) nr
    ,ascii(ch)-ascii('A') cur
from @table

)
,bar as
(
select 
*,case when cur<=prev and rwn>1 then 4 else 0 end + cur-prev step
from foo
)
,foobar as
(
select *,sum(step) over (partition by id2 order by id1 rows unbounded preceding) rownum
from bar
)
,iterations as
(
select id2,max(nr) nr from foo
group by id2
)
,blanks as
(
select
id2,ch chnr,char(ch+ascii('A') )ch,ROW_NUMBER() over (partition by id2 order by c.nr,ch)-1 rownum,c.nr
from iterations a
inner join (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) c(nr)
on c.nr<=a.nr
cross join (values (0),(1),(2),(3)) b(ch)
)
select
b.id1,a.id2,a.ch,b.val
from blanks a
left join foobar b
on a.id2=b.id2 and a.rownum=b.rownum 
order by a.id2,a.rownum

我首先进行查询&#34; foo&#34;查看行号并获取每个id2的ch的前一个值。

&#34;杆&#34;然后查找行之间有多少缺失值。例如,如果前一个是A而且当前是c,则有2.如果前一个是A而且当前是A,则有4个!

&#34; foobar的&#34;然后添加步骤,从而对原始行进行编号,它们应该在最终输出中。

&#34;迭代&#34;计算&#34; ABCD&#34;的次数。行应该出现。

&#34; BLANKS&#34;然后是所有最后一行,即每个id2,它输出所有&#34; ABCD&#34;应该在最终输出中的行,并将它们编号为rownum

最后我离开了加入&#34; foobar&#34;用&#34; BLANKS&#34;在id2和rownum上。因此,我们得到正确的行数,并输出原始值中的值。

答案 1 :(得分:0)

我对你的输出感到困惑,试试这个:

更新

   DECLARE @table TABLE
    (
      row INT IDENTITY(1, 1) ,
      id1 INT ,
      id2 INT ,
      ch CHAR(1) NOT NULL ,
      val INT
    );

 DECLARE @Sequence TABLE ( ch3 CHAR(1) NOT NULL );


 INSERT INTO @Sequence
 VALUES ( 'A' );
 INSERT INTO @Sequence
 VALUES ( 'B' );
 INSERT INTO @Sequence
 VALUES ( 'C' );
 INSERT INTO @Sequence
 VALUES ( 'D' );

 INSERT INTO @table
 VALUES ( 1112, 121, 'A', 12 );
 INSERT INTO @table
 VALUES ( 1351, 121, 'A', 13 );
 INSERT INTO @table
 VALUES ( 1411, 121, 'B', 81 );
 INSERT INTO @table
 VALUES ( 1312, 7, 'C', 107 );
 INSERT INTO @table
 VALUES ( 1401, 2, 'A', 107 );
 INSERT INTO @table
 VALUES ( 1454, 2, 'D', 107 );
 INSERT INTO @table
 VALUES ( 1257, 6, 'A', 1 );
 INSERT INTO @table
 VALUES ( 1269, 6, 'B', 12 );
 INSERT INTO @table
 VALUES ( 1335, 6, 'C', 12 );
 INSERT INTO @table
 VALUES ( 1341, 6, 'D', 5 );
 INSERT INTO @table
 VALUES ( 1380, 6, 'A', 3 );




 SELECT r.id1 ,
        fin.id2 ,
        ch3 ,
        r.val
 FROM   ( SELECT    *
          FROM      ( SELECT    CASE WHEN r.chd - l.chd = 1 THEN 0
                                     ELSE 1
                                END [gap in sq] ,
                                l.*
                      FROM      ( SELECT    id2 ,
                                            ASCII(ch) chd ,
                                            ch ,
                                            val ,
                                            id1 ,
                                            row
                                  FROM      @table
                                ) AS l
                                LEFT JOIN ( SELECT  id2 ,
                                                    ASCII(ch) chd ,
                                                    row
                                            FROM    @table
                                          ) AS r ON l.row = r.row - 1
                    ) AS temp ,
                    @Sequence s
          WHERE     temp.[gap in sq] = 1
                    OR ( temp.[gap in sq] = 0
                         AND s.ch3 = temp.ch
                       )
        ) AS fin
        LEFT JOIN @table r ON r.id2 = fin.id2
                              AND r.id1 = fin.id1
                              AND r.ch = fin.ch3

答案 2 :(得分:0)

如果你可以设法在表格中添加一个额外的列,那么定义哪个[id2]是同一序列的一部分,你可以试试这个:

declare @table table
(
  id1   int,
  id2   int,
  ch    char(1) not null,
  val   int,
  category  int -- extra column
)

insert into @table values (1112, 121, 'A', 12, 1)
insert into @table values (1351, 121, 'A', 13, 2)
insert into @table values (1411, 121, 'B', 81, 2)
insert into @table values (1312, 7, 'C', 107, 3)
insert into @table values (1401, 2, 'A', 107, 4)
insert into @table values (1454, 2, 'D', 107, 4)
insert into @table values (1257, 6, 'A', 1, 5)
insert into @table values (1269, 6, 'B', 12, 5)
insert into @table values (1335, 6, 'C', 12, 5)
insert into @table values (1341, 6, 'D', 5, 5)
insert into @table values (1380, 6, 'A', 3, 5)


DECLARE @sequence table (seq varchar(1))
INSERT INTO @sequence values ('A'), ('B'), ('C'), ('D')


SELECT b.id1, a.id2, a.seq, b.val, a.category
INTO #T1
FROM (
    SELECT *
    FROM  @table
    CROSS JOIN @sequence
) A
LEFT JOIN (
    SELECT * FROM @table
) B
    ON 1=1
    AND a.id1 = b.id1
    AND a.id2 = b.id2
    AND a.seq = b.ch
    AND a.val = b.val


;WITH rem_duplicates AS (
    SELECT *, dup = ROW_NUMBER() OVER (PARTITION by id2, seq, category ORDER BY id1 DESC)
    FROM #T1
) DELETE FROM rem_duplicates WHERE dup > 1


SELECT * FROM #T1 ORDER BY id2 DESC, category ASC, seq ASC

DROP TABLE #T1