使用LAG()
(in BigQuery standard SQL)时,如何跳过NULL
值,使得前一个值不是NULL
?
我在源表中以相同的格式准备了一些示例行,但是进行了模糊处理。在该示例中,它仅适用于没有前一个NULL
值的行。具体而言,第3行和第3行。应该为{4}分配'2017-01-25 04:02:36'
(与第5行的情况一样),但它们是NULL
。
这是有道理的。但是,肯定有一种简单的方法可以指定像INGORE_NULLS
这样的东西吗?
--TEMP
with example as (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_1' as col_d
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:55') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:55') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 07:16:58') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 07:16:58') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:35:39') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:35:39') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 10:47:48') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 10:47:48') as col_c, 'val_3' as col_d))
--TEMP
SELECT col_a, col_b, col_c,
case when val_1_transposed is null then LAG(val_1_transposed) over (order by col_c) else val_1_transposed end as val_1_transposed,
case when val_2_transposed is null then LAG(val_2_transposed) over (order by col_c) else val_2_transposed end as val_2_transposed,
case when val_3_transposed is null then LAG(val_3_transposed) over (order by col_c) else val_3_transposed end as val_3_transposed
FROM (
SELECT col_a, col_b, col_c,
MAX(IF(col_d = 'val_1', col_c, NULL)) AS val_1_transposed,
MAX(IF(col_d = 'val_2', col_c, NULL)) AS val_2_transposed,
MAX(IF(col_d = 'val_3', col_c, NULL)) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c, col_d FROM example) GROUP BY 1,2,3) ORDER BY col_c DESC
答案 0 :(得分:2)
这里有两个解决方案,详细描述如下:http://sqlmag.com/t-sql/last-non-null-puzzle
我改编了其中一个解决方案,基本上它使用MAX窗口聚合函数来返回到目前为止的最大相关id。通过使用ROWS UNBOUNDED PRECEDING
,您可以不断地达到新的MAX级别,而不是延续并替换NULL滞后条目。
--TEMP
with example as (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_1' as col_d
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 03:19:50') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:23') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:01:59') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:36') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:55') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 04:02:55') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 07:16:58') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 07:16:58') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:35:39') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:35:39') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_1' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 09:46:48') as col_c, 'val_3' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 10:47:48') as col_c, 'val_2' as col_d)
UNION ALL (select 'some_id' as col_a, 'foo' as col_b, timestamp('2017-01-25 10:47:48') as col_c, 'val_3' as col_d))
--TEMP
SELECT col_a, col_b, col_c,
case when val_1_transposed is null then LAG(val_1_transposed) over (order by col_c) else val_1_transposed end as val_1_transposed,
case when val_2_transposed is null then LAG(val_2_transposed) over (order by col_c) else val_2_transposed end as val_2_transposed,
case when val_3_transposed is null then LAG(val_3_transposed) over (order by col_c) else val_3_transposed end as val_3_transposed,
MAX(val_2_transposed) OVER( PARTITION BY grp ORDER BY col_a ROWS UNBOUNDED PRECEDING ) as lag_ignored_nulls
FROM (
select *,
MAX(CASE WHEN val_2_transposed IS NOT NULL THEN col_a END ) OVER( ORDER BY col_a ROWS UNBOUNDED PRECEDING ) AS grp
from (
SELECT col_a, col_b, col_c,
MAX(IF(col_d = 'val_1', col_c, NULL)) AS val_1_transposed,
MAX(IF(col_d = 'val_2', col_c, NULL)) AS val_2_transposed,
MAX(IF(col_d = 'val_3', col_c, NULL)) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c, col_d FROM example) GROUP BY 1,2,3)) ORDER BY col_c DESC
答案 1 :(得分:1)
尝试以下
#standardSQL
--TEMP
WITH example AS (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 03:19:50') AS col_c, 'val_1' AS col_d
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 03:19:50') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 03:19:50') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:23') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:23') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:23') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:59') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:59') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:01:59') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:02:36') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:02:36') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:02:36') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:02:55') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 04:02:55') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 07:16:58') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 07:16:58') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 09:35:39') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 09:35:39') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 09:46:48') AS col_c, 'val_1' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 09:46:48') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 09:46:48') AS col_c, 'val_3' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 10:47:48') AS col_c, 'val_2' AS col_d)
UNION ALL (SELECT 'some_id' AS col_a, 'foo' AS col_b, TIMESTAMP('2017-01-25 10:47:48') AS col_c, 'val_3' AS col_d))
--TEMP
SELECT
col_a, col_b, col_c,
(SELECT TIMESTAMP(s) FROM UNNEST(SPLIT(val_1_transposed)) AS s WITH OFFSET pos WHERE pos = 0) AS val_1_transposed,
(SELECT TIMESTAMP(s) FROM UNNEST(SPLIT(val_2_transposed)) AS s WITH OFFSET pos WHERE pos = 0) AS val_2_transposed,
(SELECT TIMESTAMP(s) FROM UNNEST(SPLIT(val_3_transposed)) AS s WITH OFFSET pos WHERE pos = 0) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c,
CASE WHEN val_1_transposed IS NULL THEN STRING_AGG(CAST(val_1_transposed AS STRING)) OVER (ORDER BY col_c DESC ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) ELSE CAST(val_1_transposed AS STRING) END AS val_1_transposed,
CASE WHEN val_2_transposed IS NULL THEN STRING_AGG(CAST(val_2_transposed AS STRING)) OVER (ORDER BY col_c DESC ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) ELSE CAST(val_2_transposed AS STRING) END AS val_2_transposed,
CASE WHEN val_3_transposed IS NULL THEN STRING_AGG(CAST(val_3_transposed AS STRING)) OVER (ORDER BY col_c DESC ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) ELSE CAST(val_3_transposed AS STRING) END AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c,
MAX(IF(col_d = 'val_1', col_c, NULL)) AS val_1_transposed,
MAX(IF(col_d = 'val_2', col_c, NULL)) AS val_2_transposed,
MAX(IF(col_d = 'val_3', col_c, NULL)) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c, col_d FROM example
) GROUP BY 1,2,3
)
)
ORDER BY col_c DESC
我意识到上面看起来过于沉重 - 下面是稍微重构的版本(现在比原始代码更轻):
#standardSQL
CREATE TEMP FUNCTION GetFirst(list STRING) AS
((SELECT TIMESTAMP(s) FROM UNNEST(SPLIT(list)) AS s WITH OFFSET pos WHERE pos = 0));
--TEMP
-- dummy data here ...
--TEMP
SELECT col_a, col_b, col_c,
GetFirst(IFNULL(val_1_transposed, STRING_AGG(val_1_transposed) OVER (lookback))) AS val_1_transposed,
GetFirst(IFNULL(val_2_transposed, STRING_AGG(val_2_transposed) OVER (lookback))) AS val_2_transposed,
GetFirst(IFNULL(val_3_transposed, STRING_AGG(val_3_transposed) OVER (lookback))) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c,
STRING(MAX(IF(col_d = 'val_1', col_c, NULL))) AS val_1_transposed,
STRING(MAX(IF(col_d = 'val_2', col_c, NULL))) AS val_2_transposed,
STRING(MAX(IF(col_d = 'val_3', col_c, NULL))) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c, col_d FROM example
) GROUP BY 1,2,3
)
WINDOW lookback AS (ORDER BY col_c DESC ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
ORDER BY col_c DESC
答案 2 :(得分:1)
尝试以下版本
从Pentium10提供的链接是BigQuery采用第二种解决方案
看起来它可以胜过第一个 - 因为它只涉及一个窗口聚合函数
#standardSQL
--TEMP
-- dummy data here
--TEMP
SELECT
col_a, col_b, col_c,
TIMESTAMP(SUBSTR(MAX(CONCAT(STRING(col_c), STRING(val_1_transposed))) OVER(win), 23)) AS val_1_transposed,
TIMESTAMP(SUBSTR(MAX(CONCAT(STRING(col_c), STRING(val_2_transposed))) OVER(win), 23)) AS val_2_transposed,
TIMESTAMP(SUBSTR(MAX(CONCAT(STRING(col_c), STRING(val_3_transposed))) OVER(win), 23)) AS val_3_transposed
FROM (
SELECT
col_a, col_b, col_c,
MAX(IF(col_d = 'val_1', col_c, NULL)) AS val_1_transposed,
MAX(IF(col_d = 'val_2', col_c, NULL)) AS val_2_transposed,
MAX(IF(col_d = 'val_3', col_c, NULL)) AS val_3_transposed
FROM (
SELECT col_a, col_b, col_c, col_d FROM example
)
GROUP BY 1,2,3
)
WINDOW win AS (PARTITION BY col_a, col_b ORDER BY col_c ROWS UNBOUNDED PRECEDING)
ORDER BY col_c DESC