在Bigquery中加入两个简单的查询

时间:2017-11-21 17:58:43

标签: sql join count google-bigquery

这是第一个查询

SELECT
  MONTH(TIMESTAMP(REGEXP_EXTRACT(date, '.*.([0-9]{4})$') + '-' +
  REGEXP_EXTRACT(date, '.([0-9]{2}).') + '-' +
  REGEXP_EXTRACT(date, '^([0-9]{2}).*'))) AS month,
  COUNT(DISTINCT cons_id)
FROM
  [table1]
WHERE
  dbo_type = 'smth'
GROUP BY month

归还用户'按月分组的ID。 这是第二个

SELECT
  MONTH(TIMESTAMP(REGEXP_EXTRACT(date, '.*.([0-9]{4})$') + '-' +
  REGEXP_EXTRACT(date, '.([0-9]{2}).') + '-' +
  REGEXP_EXTRACT(date, '^([0-9]{2}).*'))) AS month,
  COUNT(DISTINCT cons_id)
FROM
  [table1]
WHERE
  dbo_type = 'smth'
  AND success_operations > 0
GROUP BY month

返回有效用户'按相同月份分组的ID。如何加入它们以获得像

这样的简单表格
month | users     | active_users
------| --------- | ---------
9     | 100       | 50
10    | 120       | 60
11    | 140       | 70

---答案----

谢谢,迈克尔!

#legacySQL
SELECT
  INTEGER(REGEXP_EXTRACT(DATE, '.([0-9]{2}).')) AS month,
  EXACT_COUNT_DISTINCT(cons_id) AS users, 
  EXACT_COUNT_DISTINCT(IF(success_operations  > 0, cons_id, NULL)) AS active_users
FROM
  [project:dataset.table]
WHERE
  dbo_type = 'smth'
GROUP BY month
ORDER BY month

1 个答案:

答案 0 :(得分:1)

请尝试以下BigQuery Legacy SQL(就像您在问题中使用它一样)

  
#legacySQL
SELECT
  MONTH(TIMESTAMP(REGEXP_EXTRACT(date, '.*.([0-9]{4})$') + '-' +
  REGEXP_EXTRACT(date, '.([0-9]{2}).') + '-' +
  REGEXP_EXTRACT(date, '^([0-9]{2}).*'))) AS month,
  COUNT(DISTINCT cons_id) AS users, 
  COUNT(DISTINCT IF(success_operations  > 0, cons_id, NULL)) AS active_users
FROM
  [project:dataset.table1]
WHERE
  dbo_type = 'smth'
GROUP BY month 

请注意:旧版SQL中的COUNT(DISTINCT)是近似值 - 请参阅详情 - https://cloud.google.com/bigquery/docs/reference/legacy-sql#countdistinct

您可以使用EXACT_COUNT_DISTINCT代替

此外,它在我看来,而不是冗长

  MONTH(TIMESTAMP(REGEXP_EXTRACT(date, '.*.([0-9]{4})$') + '-' +
  REGEXP_EXTRACT(date, '.([0-9]{2}).') + '-' +
  REGEXP_EXTRACT(date, '^([0-9]{2}).*'))) AS month

你可以只使用

INTEGER(REGEXP_EXTRACT(DATE, '.([0-9]{2}).'))   

对于BigQuery Standard SQL,您可以尝试以下方法:

#standardSQL
SELECT 
  REGEXP_EXTRACT(DATE, '.([0-9]{2}).') AS month,
  COUNT(DISTINCT cons_id) AS users, 
  COUNT(DISTINCT IF(success_operations  > 0, cons_id, NULL)) AS active_users
FROM `project.dataset.yourTable`
WHERE dbo_type = 'smth'
GROUP BY month   

您可以使用虚拟数据进行测试/播放,如下所示

#standardSQL
WITH `project.dataset.yourTable` AS (
  SELECT '31-12-2017' DATE, 1 cons_id, 1 success_operations, 'smth' dbo_type UNION ALL
  SELECT '31-12-2017', 2, 0, 'smth' UNION ALL
  SELECT '31-12-2017', 3, 0, 'smth'
)
SELECT 
  REGEXP_EXTRACT(DATE, '.([0-9]{2}).') AS month,
  COUNT(DISTINCT cons_id) AS users, 
  COUNT(DISTINCT IF(success_operations  > 0, cons_id, NULL)) AS active_users
FROM `project.dataset.yourTable`
WHERE dbo_type = 'smth'
GROUP BY month