假设我有一个使用以下代码创建的表结构:
WITH YourTable AS (
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
SELECT 'a2' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
SELECT 'a2' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
)
SELECT
item,
ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
FROM
YourTable
GROUP BY
item
现在我想选择“m”类点击的第一次出现(在一个项目内)的时间戳。目前,如果没有子查询,我看不到这样做的方法。
有任何想法是否可行?
答案 0 :(得分:1)
BigQuery标准SQL
#standardSQL
WITH YourTable AS (
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
SELECT 'a2' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
SELECT 'a2' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
)
SELECT item,
ARRAY_AGG(STRUCT(click_time, userid, class, score) ORDER BY CASE class WHEN 'm' THEN 0 ELSE 1 END, click_time LIMIT 1) AS clicks
FROM YourTable
GROUP BY item
结果如下
item clicks.click_time clicks.userid clicks.class clicks.score
a1 2016-03-03 19:52:23 UTC u1 m 4
a2 2016-03-03 19:52:23 UTC u2 m 2
或
#standardSQL
WITH YourTable AS (
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
SELECT 'a2' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
SELECT 'a2' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
),
TransformedTable AS (
SELECT item,
ARRAY_AGG(STRUCT(click_time, userid, class, score)) AS clicks
FROM YourTable
GROUP BY item
)
SELECT item,
( SELECT click
FROM UNNEST(clicks) click
WHERE class = 'm'
ORDER BY click_time
LIMIT 1
) AS clicks
FROM TransformedTable
与第一个查询的输出完全相同
答案 1 :(得分:0)
我不确定你为什么要避免使用子查询。在这种情况下,您需要在数组上使用标量子查询,该子查询将针对每一行进行求值。试试这个,使用您的示例数据和模式作为基础:
WITH YourTable AS (
SELECT 'a1' AS item, TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
SELECT 'a1' AS item, TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
SELECT 'a2' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
SELECT 'a2' AS item, TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
), TransformedTable AS (
SELECT
item,
ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
FROM
YourTable
GROUP BY
item
)
SELECT
item,
(SELECT click_time FROM UNNEST(clicks)
WHERE class = 'm'
ORDER BY click_time LIMIT 1
) AS first_click_time
FROM TransformedTable;