我有以下数据集:
DATE CODE RANK PARTITION
? ABS 0 1
12/04/2014 RET 1 1
20/04/2014 RET 2 1
01/05/2014 ABS 2 1
13/05/2014 RET 2 1
01/06/2015 ABS 2 1
09/10/2015 RETk 2 1
? ABS 0 2
02/04/2015 RET 1 2
03/04/2015 RET 2 2
04/04/2015 ABS 2 2
05/04/2015 STT 3 2
06/04/2015 RETk 4 2
07/04/2015 RETk 4 2
RANK是我想在我的SQL中计算的列,给定DATE,CODE列和同一列的先前值。它在这里被初始化为0。 我想实现的逻辑如下:
If RANK-1 (previous row) IS NULL AND CODE = ABS THEN RANK = 0
If RANK-1 (previous row) IS NULL AND CODE <> ABS THEN RANK <- (RANK-1) + 1
If RANK-1 = 0 or 1 AND CODE = RET THEN RANK <- (RANK-1) + 1
If RANK-1 = 2 AND CODE = STT THEN RANK <- (RANK-1) + 1
If RANK-1 = 3 AND CODE = RETk THEN RANK <- (RANK-1) + 1
If CODE = ABS THEN RANK <- (RANK-1) (previous row)
Else 0
我使用的Teradata版本是R14。计算在分区的基础上完成,如上例所示。我在模型中添加了一些约束以使其更清晰。在这个例子中,如果当前代码是RET,我不会增加等级,直到前一个为0或1.同样,如果我的当前代码是RETk,我不会增加等级,直到前一个等于3,否则,我不会改变等级。我在以下分区中重复相同的过程,等等......
我无法弄清楚如何更新前一个列的当前列值...我尝试了许多OLAP函数的逻辑实现,但没有成功。 任何人都可以给我一个提示吗?
非常感谢您的帮助
答案 0 :(得分:1)
您始终可以对此类任务使用递归查询。但是,除非每组的行数很少,否则性能会很差。
首先,您需要一种前进到下一行的方法,因为无法根据当前行的日期计算下一行的日期,您必须具体化数据并添加ROW_NUMBER :
CREATE TABLE tab(dt DATE, CODE VARCHAR(10), rnk INT, part INT);
INSERT INTO tab( NULL,'ABS' ,0 , 1);
INSERT INTO tab(DATE'2014-04-12','RET' ,1 , 1);
INSERT INTO tab(DATE'2014-04-20','RET' ,2 , 1);
INSERT INTO tab(DATE'2014-05-01','ABS' ,2 , 1);
INSERT INTO tab(DATE'2014-05-13','RET' ,2 , 1);
INSERT INTO tab(DATE'2014-06-01','ABS' ,2 , 1);
INSERT INTO tab(DATE'2014-10-09','RETk',2 , 1);
INSERT INTO tab( NULL,'ABS' ,0 , 2);
INSERT INTO tab(DATE'2015-04-02','RET' ,1 , 2);
INSERT INTO tab(DATE'2015-04-03','RET' ,2 , 2);
INSERT INTO tab(DATE'2015-04-04','ABS' ,2 , 2);
INSERT INTO tab(DATE'2015-04-05','STT' ,3 , 2);
INSERT INTO tab(DATE'2015-04-06','RETk',4 , 2);
INSERT INTO tab(DATE'2015-04-07','RETk',4 , 2);
CREATE VOLATILE TABLE vt AS
(
SELECT dt, code, part
-- used to find the next row
,ROW_NUMBER() OVER (PARTITION BY part ORDER BY dt) AS rn
FROM tab
) WITH DATA
PRIMARY INDEX(part, rn)
ON COMMIT PRESERVE ROWS
;
现在它只是使用CASE一行一行地应用你的逻辑:
WITH RECURSIVE cte (dt, code, rnk, part, rn) AS
(
SELECT
dt
,code
,CASE WHEN code = 'ABS' THEN 0 ELSE 1 END
,part
,rn
FROM vt
WHERE rn = 1
UNION ALL
SELECT
vt.dt
,vt.code
,CASE
WHEN cte.rnk IN (0,1) AND vt.CODE = 'RET' THEN cte.rnk + 1
WHEN cte.rnk = 2 AND vt.CODE = 'STT' THEN cte.rnk + 1
WHEN cte.rnk = 3 AND vt.CODE = 'RETk' THEN cte.rnk + 1
WHEN vt.CODE = 'ABS' THEN cte.rnk
ELSE cte.rnk
END
,vt.part
,vt.rn
FROM vt JOIN cte
ON vt.part =cte.part
AND vt.rn =cte.rn + 1
)
SELECT *
FROM cte
ORDER BY part, dt;
但我认为你的逻辑实际上不是这样的(基于前面的行精确RANK值),你只是陷入程序性思维中: - )
您可能只能使用OLAP函数执行所需操作...
答案 1 :(得分:0)
有些事情:
create table table1
(
datecol date,
code varchar(10),
rankcol integer
);
--insert into table1 select '2014/05/13', 'RETj', 0;
select
case
when s1.code='ABS' and s2.rankcol = 1 then 1
when s1.code='RET' and s2.rankcol = 0 then 1
when s1.code='RET' and s2.rankcol = 1 then 2
else 0
end RET_res,
s1.*, s2.*
from
(select rankcol, code, row_number() OVER (order by datecol) var1 from table1) s1,
(select rankcol, code, row_number() OVER (order by datecol) var1 from table1) s2
where s1.var1=s2.var1-1
order by s1.var1
;