我知道在SQL(Google Big Query)中是一个SUM函数,但是如果我需要计算间隔。例如,
create table BATTLE (battleID VARCHAR(256), countryID VARCHAR(256), yearBATTLE INT);
insert into BATTLE VALUES ('Battle1', 'Country1', 1567);
insert into BATTLE VALUES ('Battle2', 'Country2', 1568);
insert into BATTLE VALUES ('Battle3', 'Country2', 1570);
insert into BATTLE VALUES ('Battle4', 'Country3', 1599);
insert into BATTLE VALUES ('Battle5', 'Country2', 1575);
insert into BATTLE VALUES ('Battle6', 'Country2', 1620);
......
我需要找到战争的数量,其中一场战争是一个间隔小于10年的客户的战斗序列。
答案 0 :(得分:1)
以下是BigQuery Standard SQL
#standardSQL
SELECT
countryID,
MIN(yearBATTLE) start,
MAX(yearBATTLE) finish,
ARRAY_AGG(STRUCT(battleID, yearBATTLE)) battels
FROM (
SELECT
battleID, countryID, yearBATTLE,
SUM(delta) OVER(PARTITION BY countryID ORDER BY yearBATTLE) AS grp
FROM (
SELECT
battleID, countryID, yearBATTLE,
IF(yearBATTLE - IFNULL(LAG(yearBATTLE) OVER(PARTITION BY countryID ORDER BY yearBATTLE), yearBATTLE) <= 10, 0, 1) AS delta
FROM `yourproject.yourdataset.battle`
)
)
GROUP BY countryID, grp
ORDER BY countryID, grp
您可以使用您问题中的虚拟数据进行上述测试/播放(为了示例,我只是将10年改为5年)
#standardSQL
WITH `yourproject.yourdataset.battle` AS (
SELECT 'Battle1' battleID, 'Country1' countryID, 1567 yearBATTLE UNION ALL
SELECT 'Battle2', 'Country2', 1568 UNION ALL
SELECT 'Battle3', 'Country2', 1570 UNION ALL
SELECT 'Battle4', 'Country3', 1599 UNION ALL
SELECT 'Battle5', 'Country2', 1575 UNION ALL
SELECT 'Battle6', 'Country2', 1620
)
SELECT
countryID,
MIN(yearBATTLE) start,
MAX(yearBATTLE) finish,
ARRAY_AGG(STRUCT(battleID, yearBATTLE)) battels
FROM (
SELECT
battleID, countryID, yearBATTLE,
SUM(delta) OVER(PARTITION BY countryID ORDER BY yearBATTLE) AS grp
FROM (
SELECT
battleID, countryID, yearBATTLE,
IF(yearBATTLE - IFNULL(LAG(yearBATTLE) OVER(PARTITION BY countryID ORDER BY yearBATTLE), yearBATTLE) <= 5, 0, 1) AS delta
FROM `yourproject.yourdataset.battle`
)
)
GROUP BY countryID, grp
ORDER BY countryID, grp
结果如下
countryID start finish battels.battleID battels.yearBATTLE
Country1 1567 1567 Battle1 1567
Country2 1568 1575 Battle2 1568
Battle3 1570
Battle5 1575
Country2 1620 1620 Battle6 1620
Country3 1599 1599 Battle4 1599
答案 1 :(得分:0)
你的问题不清楚,但是如果你的意思是战争是一系列的战斗,那么你可以通过将表连接到自己来获得序列:
select *
from BATTLE a
join BATTLE b on a.countryId = b.countryId
where abs(a.yearBATTLE - b.yearBATTLE) < 10
and a.BattleId <> b.battleId