SQL:如何按月创建每周用户计数摘要

时间:2019-02-14 02:39:16

标签: sql google-bigquery

我正在尝试创建按月汇总的每周活跃用户计数摘要报告/表。我有一张桌子要2017年6月,一张桌子要2016年5月,我需要将它们合并在一起。日期时间戳是created_utc,它是UNIX时间戳,我可以算出要转换成人类可读的格式,然后从中提取一年中的星期值(从1到52)。我遇到的问题是:

  1. 仅通过值1到4来计算周数。因此,6月的第1周,5月的第1周,6月的第2周和5月的第2周,依此类推。

  2. 根据第1周到第4周的值加入表格

  3. 旋转表并添加WOW Change变量。 我希望决赛桌看起来像这样:

W

| Week       | June_count  | May_count    |WOW_Change |
|:-----------|:-----------:|:------------:|:----------:
| Week_1     |      5      |     8        |   0.6     |
| Week_2     |      2      |     1        |  -0.5     |
| Week_3     |     10      |     5        |  -0.5     |
| Week_4     |     30      |     6        |     1     |  

下面是一些示例数据以及我已经开始的代码。

CREATE TABLE June
(created_utc int, id varchar(6))
;

INSERT INTO June
(created_utc, userid)
VALUES
(1496354167, '6eq4xf'),
(1496362973, '6eqzz3'),
(1496431934, '6ewlm8'),
(1496870877, '6fwied'),
(1496778080, '6fo79k'),
(1496933893, '6g1gcg'),
(1497154559, '6gjkid'),
(1497618561, '6hmeud'),
(1497377349, '6h1osm'),
(1497221017, '6god73'),
(1497731470, '6hvmic'),
(1497273130, '6gs4ay'),
(1498080798, '6ioz8q'),
(1497769316, '6hyer4'),
(1497415729, '6h5cgu'),
(1497978764, '6iffwq')
;

CREATE TABLE May
(created_utc int, id varchar(6))
;

INSERT INTO May
(created_utc, userid)
VALUES
(1493729491, '68sx7k'),
(1493646801, '68m2s2'),
(1493747285, '68uohf'),
(1493664087, '68ntss'),
(1493690759, '68qe5k'),
(1493829196, '691fy9'),
(1493646344, '68m1dv'),
(1494166859, '69rhkl'),
(1493883023, '6963qb'),
(1494362328, '6a83wv'),
(1494525998, '6alv6c'),
(1493945230, '69bkhb'),
(1494050355, '69jqtz'),
(1494418011, '6accd0'),
(1494425781, '6ad0xm'),
(1494024697, '69hx2z'),
(1494586576, '6aql9y')
;

#standardSQL
SELECT created_utc,
DATE(TIMESTAMP_SECONDS(created_utc)) as event_date,
CAST(EXTRACT(WEEK FROM TIMESTAMP_SECONDS(created_utc)) AS STRING) AS week_number,
COUNT(distinct userid) as user_count
FROM June

SELECT created_utc,
DATE(TIMESTAMP_SECONDS(created_utc)) as event_date,
CAST(EXTRACT(WEEK FROM TIMESTAMP_SECONDS(created_utc)) AS STRING) AS week_number,
COUNT(distinct userid) as user_count
FROM May

2 个答案:

答案 0 :(得分:0)

在每月的某天使用算术来获取星期:

SELECT j.weeknumber, j.user_count as june_user_count,
       m.user_count as may_user_count
FROM (SELECT (EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1) / 7 as week_number,
             COUNT(distinct userid) as user_count
      FROM June
      GROUP BY week_number
     ) j JOIN
     (SELECT (EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1) / 7 as week_number,
             COUNT(distinct userid) as user_count
      FROM May
      GROUP BY week_number
     ) m
     ON m.week_number = j.week_number;

请注意,仅根据日期将数据拆分到不同的表中是个坏主意。数据应全部放入一个表中,如果数据量成问题,则可以分区。

答案 1 :(得分:0)

以下是用于BigQuery标准SQL

#standardSQL
SELECT 
  CONCAT('Week_', CAST(week AS STRING)) Week, 
  June.user_count AS June_count, 
  May.user_count AS May_count,
  ROUND((May.user_count - June.user_count) / June.user_count, 2) AS WOW_Change
FROM (
  SELECT COUNT(DISTINCT userid) user_count,
    DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
  FROM `project.dataset.June`
  GROUP BY week
) June
JOIN (
  SELECT COUNT(DISTINCT userid) user_count,
    DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
  FROM `project.dataset.May`
  GROUP BY week
) May
USING(week)

您可以使用问题中的示例数据来测试,玩游戏,如下例所示

#standardSQL
WITH `project.dataset.June` AS (
  SELECT 1496354167 created_utc, '6eq4xf' userid UNION ALL
  SELECT 1496362973, '6eqzz3' UNION ALL
  SELECT 1496431934, '6ewlm8' UNION ALL
  SELECT 1496870877, '6fwied' UNION ALL
  SELECT 1496778080, '6fo79k' UNION ALL
  SELECT 1496933893, '6g1gcg' UNION ALL
  SELECT 1497154559, '6gjkid' UNION ALL
  SELECT 1497618561, '6hmeud' UNION ALL
  SELECT 1497377349, '6h1osm' UNION ALL
  SELECT 1497221017, '6god73' UNION ALL
  SELECT 1497731470, '6hvmic' UNION ALL
  SELECT 1497273130, '6gs4ay' UNION ALL
  SELECT 1498080798, '6ioz8q' UNION ALL
  SELECT 1497769316, '6hyer4' UNION ALL
  SELECT 1497415729, '6h5cgu' UNION ALL
  SELECT 1497978764, '6iffwq' 
), `project.dataset.May` AS (
  SELECT 1493729491 created_utc, '68sx7k' userid UNION ALL
  SELECT 1493646801, '68m2s2' UNION ALL
  SELECT 1493747285, '68uohf' UNION ALL
  SELECT 1493664087, '68ntss' UNION ALL
  SELECT 1493690759, '68qe5k' UNION ALL
  SELECT 1493829196, '691fy9' UNION ALL
  SELECT 1493646344, '68m1dv' UNION ALL
  SELECT 1494166859, '69rhkl' UNION ALL
  SELECT 1493883023, '6963qb' UNION ALL
  SELECT 1494362328, '6a83wv' UNION ALL
  SELECT 1494525998, '6alv6c' UNION ALL
  SELECT 1493945230, '69bkhb' UNION ALL
  SELECT 1494050355, '69jqtz' UNION ALL
  SELECT 1494418011, '6accd0' UNION ALL
  SELECT 1494425781, '6ad0xm' UNION ALL
  SELECT 1494024697, '69hx2z' UNION ALL
  SELECT 1494586576, '6aql9y' 
)
SELECT 
  CONCAT('Week_', CAST(week AS STRING)) Week, 
  June.user_count AS June_count, 
  May.user_count AS May_count,
  ROUND((May.user_count - June.user_count) / June.user_count, 2) AS WOW_Change
FROM (
  SELECT COUNT(DISTINCT userid) user_count,
    DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
  FROM `project.dataset.June`
  GROUP BY week
) June
JOIN (
  SELECT COUNT(DISTINCT userid) user_count,
    DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
  FROM `project.dataset.May`
  GROUP BY week
) May
USING(week)
-- ORDER BY week     

有结果(由于样本数据仅限于前两周,结果也只显示了两周,当您将其应用于真实数据时应该不会有问题)

Row Week    June_count  May_count   WOW_Change   
1   Week_1  5           12          1.4  
2   Week_2  6           5           -0.17