我在Bigquery中有以下几列:Value1,Value2,开始日期和结束日期。参见下图。
我使用了last_value或rank(),但似乎无法获得正确的输出。主要是因为last_value是从value2中选择最后一个日期,而不是从value2更改之前选择last_value。
答案 0 :(得分:1)
据我了解,您希望Val2更改后的“第一个”结束日期。
我不得不用相似的列重新创建您的表,以便您可以使用真实数据进行测试。 所以我想这个查询可以解决您的问题:
WITH
data
AS (
SELECT 'abc' as Value1, 123 as Value2, 1 as start_time, 2 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 2 as start_time, 3 as end_time union all
SELECT 'abc' as Value1, 1234 as Value2, 3 as start_time, 4 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 4 as start_time, 5 as end_time union all
SELECT 'abc' as Value1, 1234 as Value2, 5 as start_time, 6 as end_time union all
SELECT 'abc' as Value1, 1234 as Value2, 6 as start_time, 7 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 7 as start_time, 8 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 8 as start_time, 9 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 9 as start_time, 10 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 10 as start_time, 11 as end_time union all
SELECT 'abc' as Value1, 1234 as Value2, 11 as start_time, 12 as end_time union all
SELECT 'abc' as Value1, 1234 as Value2, 12 as start_time, 13 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 13 as start_time, 14 as end_time union all
SELECT 'abc' as Value1, 123 as Value2, 14 as start_time, 15 as end_time
),
delta
AS (
select *,
case when Value2 != lag(Value2) over (partition by Value1 order by start_time) then 1 else 0 end as value_changed
from data
),
changes
AS (
select Value1, Value2, start_time, end_time, sum(value_changed) over (partition by Value1 order by start_time) as change_index
from delta
)
select Value1, Value2, change_index, min(start_time) as start_time, max(end_time) as end_time
from changes
group by Value1, Value2, change_index