我有以下格式的数据。
match_id team_id won_ind
----------------------------
37 Team1 N
67 Team1 Y
98 Team1 N
109 Team1 N
158 Team1 Y
162 Team1 Y
177 Team1 Y
188 Team1 Y
198 Team1 N
207 Team1 Y
217 Team1 Y
10 Team2 N
13 Team2 N
24 Team2 N
39 Team2 Y
40 Team2 Y
51 Team2 Y
64 Team2 N
79 Team2 N
86 Team2 N
91 Team2 Y
101 Team2 N
这里match_id
按时间顺序排列,37是第一个,217是team1最后一次比赛。 won_ind
表示球队是否赢了比赛。
所以,从上面的数据来看,team1已经失去了第一场比赛,然后赢了一场比赛,然后输了2场比赛,然后赢了4场比赛,依此类推。现在我有兴趣为每支球队找到最长的连胜纪录。
Team_id longest_streak
------------------------
Team1 4
Team2 3
我知道如何在plsql中找到它,但我想知道这是否可以在纯SQL中计算。我尝试使用LEAD,LAG和其他几个功能,但没有到达任何地方。
我创建了样本小提琴here。
答案 0 :(得分:8)
这应该有用, 在这里小提琴:http://sqlfiddle.com/#!4/31f95/27
SELECT team_id, MAX(seq_length) AS longest_sequence
FROM (SELECT team_id, COUNT(*) AS seq_length
FROM (SELECT team_id, won_ind,match_id, SUM(new_group) OVER(ORDER BY match_id) AS group_no
FROM (SELECT team_id, won_ind, match_id,
DECODE(LAG(won_ind) OVER(ORDER BY match_id), won_ind, 0, 1) AS new_group
FROM matches
ORDER BY team_id))
WHERE won_ind = 'Y'
GROUP BY team_id, group_no)
GROUP BY team_id
ORDER BY 2 DESC, 1;
答案 1 :(得分:6)
with original_data as (
select 37 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 67 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 98 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 109 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 158 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 162 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 177 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 188 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 198 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 207 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 217 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 10 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 13 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 24 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 39 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 40 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 51 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 64 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 79 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 86 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 91 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 101 match_id, 'Team2' team_id, 'N' won_id from dual
),
----------------------------------------------------------------------
new_streaks as (
--
-- Identifying new streaks.
-- ------------------------
--
select
match_id,
team_id,
won_id,
--
-- A new streak is identfied if
--
case when
--
-- a) won_id = 'Y' and
--
won_id = 'Y' and
--
-- b) the previous won_id = 'N':
--
lag(won_id) over (partition by team_id order by match_id) = 'N'
--
--
then 1
--
-- All other cases: no new streak:
else 0
--
end new_streak
from
original_data
),
-------------------------------
streak_no as (
--
-- Assigning a unique number to each streak.
-- -----------------------------------------
--
select
--
match_id,
team_id,
--
-- In order to be able to count the number of records
-- of a streak, we first need to assign a unique number
-- to each streak:
--
sum(new_streak) over (partition by team_id order by match_id) streak_no
--
from
new_streaks
where
-- We're only interested in «winning streaks»:
won_id = 'Y'
),
-----------------------------------------------
--
-- Counting the elements per streak
-- --------------------------------
--
records_per_streak as (
select
count(*) counter,
team_id,
streak_no
from
streak_no
group by
team_id,
streak_no
)
------------------------------------------------
--
-- Finally: we can find the «longest streak»
-- per team:
--
select
max(counter) longest_streak,
team_id
from
records_per_streak
group by team_id
;
答案 2 :(得分:3)
使用答案的变体我发布了here
select
team_id,
max(wins)
from
(
select
a.team_id,
a.match_id amatch,
b.match_id bmatch,
(select count(distinct match_id)
from matches matches_inner
where a.team_id = matches_inner.team_id
and matches_inner.match_id between a.match_id and b.match_id) wins
from
matches a
join matches b on a.team_id = b.team_id
and b.match_id > a.match_id
where
not exists
(select 'x'
from matches matches_inner
where a.team_id = matches_inner.team_id
and matches_inner.match_id between a.match_id and b.match_id
and matches_inner.won_ind = 'N')
group by team_id
答案 3 :(得分:3)
我在Teradata上有类似的任务,修改它以在Oracle上运行:
SELECT
team_id,
MAX(cnt)
FROM
(
SELECT
team_id,
COUNT(*) AS cnt
FROM
(
SELECT
team_id,
match_id,
won_ind,
SUM(CASE WHEN won_ind <> 'Y' THEN 1 END)
OVER (PARTITION BY team_id
ORDER BY match_id
ROWS UNBOUNDED PRECEDING) AS dummy
FROM matches
) dt
WHERE won_ind = 'Y'
GROUP BY team_id, dummy
) dt
GROUP BY team_id;