达到以下相同效果的有效方法是什么:
WITH
data_one AS (
SELECT "abc" as id, 100 as value, TIMESTAMP("2018-11-26T14:39:51") as created UNION ALL
SELECT "def" as id, 111 as value, TIMESTAMP("2018-11-27T14:39:51") as created
),
data_two AS (
SELECT "abc" as id, 203 as value, TIMESTAMP("2018-11-28T14:39:51") as created UNION ALL
SELECT "ghi" as id, 418 as value, TIMESTAMP("2018-11-28T14:39:51") as created
),
data AS (
SELECT * FROM data_one do
UNION ALL
SELECT * FROM data_two dt
)
SELECT id, value, created FROM (
SELECT *,
rank() over(partition by id order by created desc) rank
FROM data
) WHERE rank = 1
其结果将是:
+----+----- -+-------------------------+
| id | value | created |
+----+-------+-------------------------+
| abc| 203 | 2018-11-28 14:39:51 UTC |
+----+---------------------------------+
| def| 111 | 2018-11-27 14:39:51 UTC |
+----+-------+-------------------------+
| def| 418 | 2018-11-28 14:39:51 UTC |
+----+-------+-------------------------+
如果数据真的很大怎么办?那是一种好的方法还是更好的方法?
答案 0 :(得分:1)
替代选项为
#standardSQL
WITH data_one AS (
SELECT "abc" AS id, 100 AS value, TIMESTAMP("2018-11-26T14:39:51") AS created UNION ALL
SELECT "def" AS id, 111 AS value, TIMESTAMP("2018-11-27T14:39:51") AS created
), data_two AS (
SELECT "abc" AS id, 203 AS value, TIMESTAMP("2018-11-28T14:39:51") AS created UNION ALL
SELECT "ghi" AS id, 418 AS value, TIMESTAMP("2018-11-28T14:39:51") AS created
), data AS (
SELECT * FROM data_one do
UNION ALL
SELECT * FROM data_two dt
)
SELECT id,
ARRAY_AGG(
STRUCT<value INT64, created TIMESTAMP>(value, created)
ORDER BY created DESC LIMIT 1
)[OFFSET(0)].*
FROM data t
GROUP BY id
或者如果您想避免显式声明STRUCT(例如,在有很多列的情况下或更通用)
#standardSQL
WITH data_one AS (
SELECT "abc" AS id, 100 AS value, TIMESTAMP("2018-11-26T14:39:51") AS created UNION ALL
SELECT "def" AS id, 111 AS value, TIMESTAMP("2018-11-27T14:39:51") AS created
), data_two AS (
SELECT "abc" AS id, 203 AS value, TIMESTAMP("2018-11-28T14:39:51") AS created UNION ALL
SELECT "ghi" AS id, 418 AS value, TIMESTAMP("2018-11-28T14:39:51") AS created
), data AS (
SELECT * FROM data_one do
UNION ALL
SELECT * FROM data_two dt
)
SELECT * FROM data WHERE FALSE
UNION ALL
SELECT id,
ARRAY_AGG(
(value, created) ORDER BY created DESC LIMIT 1
)[OFFSET(0)].*
FROM data t
GROUP BY id
两种情况下的结果都是
Row id value created
1 abc 203 2018-11-28 14:39:51 UTC
2 ghi 418 2018-11-28 14:39:51 UTC
3 def 111 2018-11-27 14:39:51 UTC