我有这个数据的表格
DECLARE @tbl TABLE
(
IDX INTEGER,
VAL VARCHAR(50)
)
--Inserted values for testing
INSERT INTO @tbl(IDX, VAL) VALUES(1,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(2,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(3,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(4,'B')
INSERT INTO @tbl(IDX, VAL) VALUES(5,'B')
INSERT INTO @tbl(IDX, VAL) VALUES(6,'B')
INSERT INTO @tbl(IDX, VAL) VALUES(7,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(8,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(9,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(10,'C')
INSERT INTO @tbl(IDX, VAL) VALUES(11,'C')
INSERT INTO @tbl(IDX, VAL) VALUES(12,'A')
INSERT INTO @tbl(IDX, VAL) VALUES(13,'A')
--INSERT INTO @tbl(IDX, VAL) VALUES(14,'A') -- this line has bad binary code
INSERT INTO @tbl(IDX, VAL) VALUES(14,'A') -- replace with this line and it works
INSERT INTO @tbl(IDX, VAL) VALUES(15,'D')
INSERT INTO @tbl(IDX, VAL) VALUES(16,'D')
Select * From @tbl -- to see what you have inserted...
我正在寻找的输出是Val先前在Idx上预订的每组中的第一和最后的Idx和Val。注意到Val可能会重复!!!同样,Idx可能不会在表格中按升序排列,因为它们在imsert语句中。没有游标请! 即
Val First Last
=================
A 1 3
B 4 6
A 7 9
C 10 11
A 12 14
D 15 16
答案 0 :(得分:1)
如果idx
值保证是连续的,那么试试这个:
Select f.val, f.idx first, l.idx last
From @tbl f
join @tbl l
on l.val = f.val
and l.idx > f.idx
and not exists
(Select * from @tbl
Where val = f.val
and idx = l.idx + 1)
and not exists
(Select * from @tbl
Where val = f.val
and idx = f.idx - 1)
and not exists
(Select * from @tbl
Where val <> f.val
and idx Between f.idx and l.idx)
order by f.idx
如果idx
值不是连续的,那么它需要更复杂......
Select f.val, f.idx first, l.idx last
From @tbl f
join @tbl l
on l.val = f.val
and l.idx > f.idx
and not exists
(Select * from @tbl
Where val = f.val
and idx = (select Min(idx)
from @tbl
where idx > l.idx))
and not exists
(Select * from @tbl
Where val = f.val
and idx = (select Max(idx)
from @tbl
where idx < f.idx))
and not exists
(Select * from @tbl
Where val <> f.val
and idx Between f.idx and l.idx)
order by f.idx
答案 1 :(得分:0)
SQL Server 2012
在SQL Server 2012中,您可以使用带有滞后/导致分析函数的cte序列,如下所示(小提琴here )。代码不假设任何关于idx的类型或序列,并在每个窗口中查询val的第一次和最后一次出现。
;with cte as
(
select val, idx,
ROW_NUMBER() over(order by (select 0)) as urn --row_number without ordering
from @tbl),
cte1 as
(
select urn, val, idx,
lag(val, 1) over(order by urn) as prevval,
lead(val, 1) over(order by urn) as nextval
from cte
),
cte2 as
(
select val, idx, ROW_NUMBER() over(order by (select 0)) as orn,
(ROW_NUMBER() over(order by (select 0))+1)/2 as prn from cte1
where (prevval <> nextval or prevval is null or nextval is null)
),
cte3 as
(
select val, FIRST_VALUE(idx) over(partition by prn order by prn) as firstidx,
LAST_VALUE(idx) over(partition by prn order by prn) as lastidx, orn
from cte2
),
cte4 as
(
select val, firstidx, lastidx, min(orn) as rn
from cte3
group by val, firstidx, lastidx
)
select val, firstidx, lastidx
from cte4
order by rn;
SQL Server 2008
在SQL Server 2008中,由于缺少滞后/超前分析功能,因此代码更加折磨。 (小提琴here )。在这里,代码不假设任何关于idx的类型或序列,并在每个窗口中查询val的第一次和最后一次出现。
;with cte as
(
select val, idx, ROW_NUMBER() over(order by (select 0)) as urn
from @tbl),
cte1 as
(
select m.urn, m.val, m.idx,
_lag.val as prevval, _lead.val as nextval
from cte as m
left join cte as _lag
on _lag.urn = m.urn-1
left join cte AS _lead
on _lead.urn = m.urn+1),
cte2 as
(
select val, idx, ROW_NUMBER() over(order by (select 0)) as orn,
(ROW_NUMBER() over(order by (select 0))+1)/2 as prn from cte1
where (prevval <> nextval or prevval is null or nextval is null)),
cte3 as
( select *, ROW_NUMBER() over(partition by prn order by orn) as rownum
from cte2),
cte4 as
(select o.val, (select i.idx from cte3 as i where i.rownum = 1 and i.prn = o.prn)
as firstidx,
(select i.idx from cte3 as i where i.rownum = 2 and i.prn = o.prn) as lastidx,
o.orn from cte3 as o),
cte5 as (
select val, firstidx, lastidx, min(orn) as rn
from cte4
group by val, firstidx, lastidx
)
select val, firstidx, lastidx
from cte5
order by rn;
注意:强> 这两个解决方案都基于数据库引擎保留插入顺序的假设,尽管关系数据库在理论上不能保证顺序。
答案 2 :(得分:0)
一种方法 - 至少对于不使用特殊功能的SQL Server 2008,将引入帮助程序表和辅助变量。
现在是否真的可以实现(由于许多其他要求)我不知道 - 但它可能会引导您进入解决方案路径,但它确实会解决您当前没有光标的设置要求领导/滞后:
所以基本上我做的是创建一个帮助器表和一个帮助器分组变量: (抱歉命名)
DECLARE @grp TABLE
(
idx INTEGER ,
val VARCHAR(50) ,
gidx INT
)
DECLARE @gidx INT = 1
INSERT INTO @grp
( idx, val, gidx )
SELECT idx ,
val ,
0
FROM @tbl AS t
我使用源表@tbl。
中的值填充它然后我做一个更新技巧,根据VAL更改值的时间为gidx分配值:
UPDATE g
SET @gidx = gidx = CASE WHEN val <> ISNULL(( SELECT val
FROM @grp AS g2
WHERE g2.idx = g.idx - 1
), val) THEN @gidx + 1
ELSE @gidx
END
FROM @grp AS g
这样做是为了在VAL改变之前为gidx赋值1,然后它分配gidx + 1,它也被分配给@gixd变量。等等。 这为您提供了以下可用结果:
idx val gidx
1 A 1
2 A 1
3 A 1
4 B 2
5 B 2
6 B 2
7 A 3
8 A 3
9 A 3
10 C 4
11 C 4
12 A 5
13 A 5
14 A 5
15 D 6
16 D 6
请注意,gidx现在是一个分组因素。
然后用子选择提取数据是一件简单的事情:
SELECT ( SELECT TOP 1
VAL
FROM @GRP g3
WHERE g2.gidx = g3.gidx
) AS Val ,
MIN(idx) AS First ,
MAX(idx) AS Last
FROM @grp AS g2
GROUP BY gidx
这产生了结果:
A 1 3
B 4 6
A 7 9
C 10 11
A 12 14
D 15 16
答案 3 :(得分:0)
我假设IDX
值是唯一的。如果它们也可以假设从1开始并且没有间隙,例如,您可以尝试以下SQL Server 2005+解决方案:
WITH partitioned AS (
SELECT
IDX, Val,
grp = IDX - ROW_NUMBER() OVER (PARTITION BY Val ORDER BY IDX ASC)
FROM @tbl
)
SELECT
Val,
FirstIDX = MIN(IDX),
LastIDX = MAX(IDX)
FROM partitioned
GROUP BY
Val, grp
ORDER BY
FirstIDX
;
如果IDX
值可能有间隙和/或可能从1以外的值开始,则可以使用以上修改:
WITH partitioned AS (
SELECT
IDX, Val,
grp = ROW_NUMBER() OVER ( ORDER BY IDX ASC)
- ROW_NUMBER() OVER (PARTITION BY Val ORDER BY IDX ASC)
FROM @tbl
)
SELECT
Val,
FirstIDX = MIN(IDX),
LastIDX = MAX(IDX)
FROM partitioned
GROUP BY
Val, grp
ORDER BY
FirstIDX
;
注意:如果您最终使用这些查询之一,请确保查询的前面语句用分号分隔,特别是如果您使用的是SQL Server 2008或更高版本子>