我有下表。我需要找到每个“Seq”列的最小/最大日期,其中有1或连续1。每行只能有一个“1”
Animal Calendar_Date SeqA SeqB SeqC SeqD SeqE
Cat 2/5/2017 0 0 0 1 0
Cat 2/6/2017 1 0 0 0 0
Cat 2/7/2017 1 0 0 0 0
Cat 2/8/2017 1 0 0 0 0
Cat 2/9/2017 1 0 0 0 0
Cat 2/10/2017 0 0 0 0 1
Cat 2/11/2017 0 0 0 0 1
Cat 2/12/2017 0 0 0 0 1
Cat 2/13/2017 0 0 0 0 1
Dog 2/5/2017 1 0 0 0 0
Dog 2/6/2017 1 0 0 0 0
Dog 2/7/2017 0 1 0 0 0
Dog 2/8/2017 0 1 0 0 0
Dog 2/9/2017 1 0 0 0 0
Dog 2/10/2017 1 0 0 0 0
Dog 2/11/2017 0 0 0 1 0
Dog 2/12/2017 0 0 0 1 0
Dog 2/13/2017 0 0 0 1 0
期望的结果是这样的。我尝试过使用row_number,rank和dense_rank但是还没能解决这个问题。
Animal Sequence min Max
Cat D 2/5/2017 2/5/2017
Cat A 2/6/2017 2/9/2017
Cat E 2/10/2017 2/13/2017
Dog A 2/5/2017 2/6/2017
Dog B 2/7/2017 2/8/2017
Dog A 2/9/2017 2/10/2017
Dog D 2/11/2017 2/13/2017
以下答案来自dnoeth的工作。但是,在我的数据集中有一些实例,其中Animal
和Calendar_Date
有多个Seq
填充了1
。这意味着每Seq
每Animal
Calendar_Date
不是唯一的Seq
。我不得不修改代码并运行它五次(每个union
一个)和SELECT Animal,
cast(Min(Trim(Both ',' FROM Seq)) as varchar(8)) as POS_Type,
Min(Calendar_Date) as Min_Date,
Max(Calendar_Date) as Max_Date
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END
-- CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END ||
-- CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
-- CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
-- CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END
AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM Table_A
--and SeqA = 1
) AS dt
where seq is not null
) AS dt
GROUP BY Animal, grp
Union
SELECT Animal,
Min(Trim(Both ',' FROM Seq)) as POS_Type,
Min(Calendar_Date) as Min_Date,
Max(Calendar_Date) as Max_Date
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
-- CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END
CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END
-- CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
-- CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
-- CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END
AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM Table_A
--and SeqA = 1
) AS dt
where seq is not null
) AS dt
GROUP BY Animal, grp
Union
SELECT Animal,
Min(Trim(Both ',' FROM Seq)) as POS_Type,
Min(Calendar_Date) as Min_Date,
Max(Calendar_Date) as Max_Date
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
-- CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END
-- CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END
CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END
-- CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END
-- CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END
AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM Table_A
--and SeqA = 1
) AS dt
where seq is not null
) AS dt
GROUP BY Animal, grp
Union
SELECT Animal,
Min(Trim(Both ',' FROM Seq)) as POS_Type,
Min(Calendar_Date) as Min_Date,
Max(Calendar_Date) as Max_Date
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
-- CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END
-- CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END
-- CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END
CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END
-- CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END
AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM Table_A
--and SeqA = 1
) AS dt
where seq is not null
) AS dt
GROUP BY Animal, grp
Union
SELECT Animal,
Min(Trim(Both ',' FROM Seq)) as POS_Type,
Min(Calendar_Date) as Min_Date,
Max(Calendar_Date) as Max_Date
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
-- CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END
-- CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END
-- CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END
-- CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END
CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END
AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM Table_A
--and SeqA = 1
) AS dt
where seq is not null
) AS dt
GROUP BY Animal, grp
它们在一起运行,如下所示。
s = set()
for x in [ {1,2}, {3,4}, {5,1} ]:
s |= x
答案 0 :(得分:1)
由于您只有五列,一种方法是手动取消它们,并做一个简单的GROUP BY
来获得结果:
SELECT Animal, Sequence, MIN(Calendar_Date), MAX(Calendar_Date)
FROM (
SELECT Animal, 'A' AS Sequence, Calendar_Date FROM MyTable WHERE SeqA=1
UNION ALL
SELECT Animal, 'B' AS Sequence, Calendar_Date FROM MyTable WHERE SeqB=1
UNION ALL
SELECT Animal, 'C' AS Sequence, Calendar_Date FROM MyTable WHERE SeqC=1
UNION ALL
SELECT Animal, 'D' AS Sequence, Calendar_Date FROM MyTable WHERE SeqD=1
UNION ALL
SELECT Animal, 'E' AS Sequence, Calendar_Date FROM MyTable WHERE SeqE=1
) dt
GROUP BY Animal, Sequence
答案 1 :(得分:1)
您需要嵌套的OLAP函数:
SELECT Animal,
Min(Trim(Both ',' FROM Seq)),
Min(Calendar_Date),
Max(Calendar_Date)
FROM
(
SELECT Animal, Calendar_Date, Seq,
-- calculate groups of consecutive values
Sum(flag)
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS Unbounded Preceding) AS grp
FROM
(
SELECT Animal, Calendar_Date,
-- combine all columns into one
CASE WHEN SeqA = 1 THEN 'A' ELSE ',' END ||
CASE WHEN SeqB = 1 THEN 'B' ELSE ',' END ||
CASE WHEN SeqC = 1 THEN 'C' ELSE ',' END ||
CASE WHEN SeqD = 1 THEN 'D' ELSE ',' END ||
CASE WHEN SeqE = 1 THEN 'E' ELSE ',' END AS Seq,
-- check if current and previous row are different
CASE WHEN Min(Seq) -- or LAG in TD16.10
Over (PARTITION BY Animal
ORDER BY Calendar_Date
ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
THEN 0
ELSE 1
END AS flag
FROM tab
) AS dt
) AS dt
GROUP BY Animal, grp
答案 2 :(得分:0)
根据您的数据,这可能是最有效的:
select animal,
(case when seqA = 1 then 'A'
when seqB = 1 then 'B'
when seqC = 1 then 'C'
when seqD = 1 then 'D'
when seqE = 1 then 'E'
end) as sequence,
min(calendar_date), max(calendar_date)
from t
group by seqA, seqB, seqC, seqD, seqE;
这是有效的,因为“seq”列中只有一列在每行中都有一个值。
老实说,我建议在表中添加一个计算列并使用它:
alter table animal
add sequence as (case when seqA = 1 then 'A'
when seqB = 1 then 'B'
when seqC = 1 then 'C'
when seqD = 1 then 'D'
when seqE = 1 then 'E'
end);
然后你可以这样做:
select animal, sequence, min(calendar_date), max(calendar_date)
from t
group by animal, sequence;
不幸的是,Teradata不支持计算列。我最初回答后添加了Teradata标签。但是,您可以使用视图获得相同的效果。