我有一个类似于以下测试数据的数据集:
create table #colors (mon int, grp varchar(1), color varchar(5))
insert #colors values
(201501,'A','Red'),
(201502,'A','Red'),
(201503,'A','Red'),
(201504,'A','Red'),
(201505,'A','Red'),
(201506,'A','Red'),
(201501,'B','Red'),
(201502,'B','Red'),
(201503,'B','Blue'),
(201504,'B','Blue'),
(201505,'B','Blue'),
(201506,'B','Blue'),
(201501,'C','Red'),
(201502,'C','Red'),
(201503,'C','Blue'),
(201504,'C','Green'),
(201505,'C','Green'),
(201506,'C','Green'),
(201501,'D','Red'),
(201502,'D','Red'),
(201503,'D','Blue'),
(201504,'D','Blue'),
(201505,'D','Red'),
(201506,'D','Red')
我想知道每个小组在颜色方面所采取的路径,以及最近一个月类别是颜色变化之前的特定颜色。通过这种方式,与颜色关联的月份用作类别 - 颜色组合的上部时间界限。
我尝试使用CTE和row_number()
函数执行此操作,如下面的代码所示,但它不能正常工作。
以下是示例代码:
; with colors (grp, color, mon, rn) as (
select grp
, color
, mon
, row_number() over (partition by grp order by mon asc) rn
from (
select grp
, color
, max(mon) mon
from #colors
group by grp, color
) as z
)
select grp
, firstColor
, firstMonth
, secondColor
, secondMonth
, thirdColor
, thirdMonth
from (
select c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
, row_number() over (partition by c1.grp order by c1.mon asc) rn
from colors c1 left outer join colors c2 on (
c1.grp = c2.grp
and c1.color <> c2.color
and c1.rn = c2.rn - 1
) left outer join colors c3 on (
c1.grp = c3.grp
and c2.color <> c3.color
and c2.rn = c3.rn - 1
)
) as d
where rn = 1
order by grp
导致以下(不正确)结果集:
正如您所看到的,没有迹象表明D组的原始颜色是红色 - 它应该是红色(201502) - &gt;蓝色(201504) - &gt;红色(201506)。这是因为使用了max()
函数,但删除它需要以我无法推断的方式修改连接逻辑。
我尝试删除max()
功能并更改row_number()
上的分区以包含颜色,但我认为这会在逻辑上减少到相同的设置。
如果类别少于这些类别之间的更改,我如何考虑该方案?
答案 0 :(得分:1)
我采取了不同的方法,通常我会避免&#34;预定义&#34;列中的月数(如果可能)。这是一个可以将月份分成行的解决方案,但它实际上将结果组合成预期的输出格式:
WITH nCTE (mon, grp, color, n) AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY mon) n
FROM colors
), monthsCTE (mon, grp, color, n) AS (
SELECT l.mon, l.grp, l.color, ROW_NUMBER() OVER(PARTITION BY l.grp ORDER BY l.mon) n
FROM nCTE l LEFT JOIN nCTE r
ON l.grp = r.grp AND l.n = r.n - 1
WHERE l.color != r.color OR r.color IS NULL
)
SELECT m1.grp, m1.color, m1.mon, m2.color, m2.mon, m3.color, m3.mon
FROM monthsCTE m1 LEFT JOIN monthsCTE m2
ON m1.grp = m2.grp AND m2.n = 2 LEFT JOIN monthsCTE m3
ON m1.grp = m3.grp AND m3.n = 3
WHERE m1.n = 1
ORDER BY 1
您可以使用&#34;内部&#34;使用monthsCTE而不是外部SELECT
来将结果分成不同的行(然后你不需要ROW_NUMBER...
部分),或者保留这样的内容......
编辑:实际上你真正想做的事情实际上更容易。只需删除GROUP BY
子句(以及中断MAX()
函数)。
EDIT2:如Me.Name所述,旧的解决方案会失败多年。更正的代码片段&amp;小提琴。
答案 1 :(得分:1)
使用略有不同的方法,首先使用引导窗口功能确定颜色是否发生变化,然后根据颜色变化的位置对行进行排名:
;with nextcols as
(
select grp, color, mon, lead(color, 1, 'none') over (partition by grp order by mon ) nextcol from #colors
)
, ranked as
(
select *, ROW_NUMBER() over (partition by grp order by mon) MonthIndex from nextcols where color <> nextcol
)
--perhaps you could go pivoting here, but joining on the monthindex works
select r1.grp, r1.color firstCol, r1.mon firstMon, r2.color secondCol, r2.mon secondMon, r3.color thirdCol, r3.mon thirdMon
from ranked r1
left join ranked r2 on r2.grp=r1.grp and r2.MonthIndex = 2
left join ranked r3 on r3.grp=r1.grp and r3.MonthIndex = 3
where r1.MonthIndex = 1
答案 2 :(得分:0)
编辑 - 现在可以使用了!
WITH colors AS(
SELECT *
, ROW_NUMBER() OVER (partition by grp ORDER BY mon desc) RowNumberOrder
FROM colorss
)
select * from (
SELECT row_number() over (partition by c1.grp order by c1.rowNumberOrder asc) rn, c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
FROM colors c1
left join colors c2 on c1.grp = c2.grp and c1.RowNumberOrder < c2.rowNumberOrder and c1.color <> c2.color
left join colors c3 on c3.grp = c2.grp and c2.RowNumberOrder < c3.rowNumberOrder and c2.color <> c3.color
) a where rn = 1