从表中选择连续范围

时间:2012-02-09 15:13:08

标签: sql sql-server sql-server-2005 gaps-and-islands

我需要从基于连续数字(列N)的表格中提取连续范围,并且这些数字涉及相同的“类别”(下面的列C)。从图形上看,它看起来像这样:

 N  C  D
--------
 1  x  a           C  N1  N2  D1  D2
 2  x  b          ------------------
 3  x  c           x   1   4   a   d     (continuous range with same N)
 4  x  d    ==>    x   6   7   e   f     (new range because "5" is missing)
 6  x  e           y   8  10   g   h     (new range because C changed to "y")
 7  x  f
 8  y  g
 9  y  h
10  y  i

SQL Server是2005.谢谢。

4 个答案:

答案 0 :(得分:4)

DECLARE @myTable Table
(
    N INT,
    C CHAR(1),
    D CHAR(1)
)
INSERT INTO @myTable(N,C,D) VALUES(1,  'x', 'a');
INSERT INTO @myTable(N,C,D) VALUES(2,  'x', 'b');
INSERT INTO @myTable(N,C,D) VALUES(3,  'x', 'c');
INSERT INTO @myTable(N,C,D) VALUES(4,  'x', 'd');
INSERT INTO @myTable(N,C,D) VALUES(6,  'x', 'e');
INSERT INTO @myTable(N,C,D) VALUES(7,  'x', 'f');
INSERT INTO @myTable(N,C,D) VALUES(8,  'y', 'g');
INSERT INTO @myTable(N,C,D) VALUES(9,  'y', 'h');
INSERT INTO @myTable(N,C,D) VALUES(10, 'y', 'i');


WITH StartingPoints AS(

    SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
    FROM @myTable AS A
    WHERE NOT EXISTS(
        SELECT *
        FROM @myTable B
        WHERE B.C = A.C
          AND B.N = A.N - 1
    )
 ),
 EndingPoints AS(
    SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
    FROM @myTable AS A
    WHERE NOT EXISTS (
        SELECT *
        FROM @myTable B
        WHERE B.C = A.C
          AND B.N = A.N + 1
    )
 ) 
SELECT StartingPoints.C,
       StartingPoints.N AS [N1],
       EndingPoints.N AS [N2],
       StartingPoints.D AS [D1],
       EndingPoints.D AS [D2] 
FROM StartingPoints
JOIN EndingPoints ON StartingPoints.rownum = EndingPoints.rownum

<强>结果:

C    N1          N2          D1   D2
---- ----------- ----------- ---- ----
x    1           4           a    d
x    6           7           e    f
y    8           10          g    i

答案 1 :(得分:1)

RANK函数比ROW_NUMBER更安全,以防任何N值重复,如下例所示:

declare @ncd table(N int, C char, D char);

insert into @ncd
select 1,'x','a' union all
select 2,'x','b' union all
select 3,'x','c' union all
select 4,'x','d' union all
select 4,'x','e' union all
select 7,'x','f' union all
select 8,'y','g' union all
select 9,'y','h' union all
select 10,'y','i' union all
select 10,'y','j';

with a as (
    select *
    , r = N-rank()over(partition by C order by N)
    from @ncd
)
select C=MIN(C)
, N1=MIN(N)
, N2=MAX(N)
, D1=MIN(D)
, D2=MAX(D)
from a
group by r;

结果,正确承受重复的4和10:

C    N1          N2          D1   D2
---- ----------- ----------- ---- ----
x    1           4           a    e
x    7           7           f    f
y    8           10          g    j

答案 2 :(得分:1)

使用this answer作为起点,我最终得到以下内容:

;
WITH data (N, C, D) AS (
  SELECT 1,  'x', 'a' UNION ALL
  SELECT 2,  'x', 'b' UNION ALL
  SELECT 3,  'x', 'c' UNION ALL
  SELECT 4,  'x', 'd' UNION ALL
  SELECT 6,  'x', 'e' UNION ALL
  SELECT 7,  'x', 'f' UNION ALL
  SELECT 8,  'y', 'g' UNION ALL
  SELECT 9,  'y', 'h' UNION ALL
  SELECT 10, 'y', 'i'
),
ranked AS (
  SELECT
    curr.*,
    Grp     = curr.N - ROW_NUMBER() OVER (PARTITION BY curr.C ORDER BY curr.N),
    IsStart = CASE WHEN pred.C IS NULL THEN 1 ELSE 0 END,
    IsEnd   = CASE WHEN succ.C IS NULL THEN 1 ELSE 0 END
  FROM data AS curr
    LEFT JOIN data AS pred ON curr.C = pred.C AND curr.N = pred.N + 1
    LEFT JOIN data AS succ ON curr.C = succ.C AND curr.N = succ.N - 1
)
SELECT
  C,
  N1 = MIN(N),
  N2 = MAX(N),
  D1 = MAX(CASE IsStart WHEN 1 THEN D END),
  D2 = MAX(CASE IsEnd   WHEN 1 THEN D END)
FROM ranked
WHERE 1 IN (IsStart, IsEnd)
GROUP BY C, Grp

答案 3 :(得分:0)

编写存储过程。它将创建并填充一个包含C,N1,N2,D1和D2列的临时表。

  • 创建临时表
  • 使用游标循环包含由N
  • 排序的N,C,D的表中的条目
  • 使用变量来检测新范围(Ni
  • 针对检测到的每个范围(检测到新范围或光标)插入临时表

告诉我你是否需要一个代码示例。