重复字段内的条件逻辑

时间:2017-11-08 15:25:46

标签: google-bigquery

假设我有一个使用以下代码创建的表结构:

WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
)
SELECT
  item,
  ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
  FROM
  YourTable
GROUP BY
  item

现在我想选择“m”类点击的第一次出现(在一个项目内)的时间戳。目前,如果没有子查询,我看不到这样做的方法。

有任何想法是否可行?

2 个答案:

答案 0 :(得分:1)

BigQuery标准SQL

  
#standardSQL
WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
)
SELECT item,
  ARRAY_AGG(STRUCT(click_time, userid, class, score) ORDER BY CASE class WHEN 'm' THEN 0 ELSE 1 END, click_time LIMIT 1) AS clicks
FROM YourTable
GROUP BY item

结果如下

item    clicks.click_time           clicks.userid   clicks.class    clicks.score  
a1      2016-03-03 19:52:23 UTC     u1              m               4    
a2      2016-03-03 19:52:23 UTC     u2              m               2    

#standardSQL
WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
), 
TransformedTable AS (
  SELECT item,
    ARRAY_AGG(STRUCT(click_time, userid, class, score)) AS clicks
  FROM YourTable
  GROUP BY item
)
SELECT item,
  ( SELECT click 
    FROM UNNEST(clicks) click
    WHERE class = 'm'
    ORDER BY click_time 
    LIMIT 1
  ) AS clicks
FROM TransformedTable 

与第一个查询的输出完全相同

答案 1 :(得分:0)

我不确定你为什么要避免使用子查询。在这种情况下,您需要在数组上使用标量子查询,该子查询将针对每一行进行求值。试试这个,使用您的示例数据和模式作为基础:

WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
), TransformedTable AS (
  SELECT
    item,
    ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
  FROM
    YourTable
  GROUP BY
    item
)
SELECT
  item,
  (SELECT click_time FROM UNNEST(clicks)
   WHERE class = 'm'
   ORDER BY click_time LIMIT 1
  ) AS first_click_time
FROM TransformedTable;