假设我在bigquery中查看了数据
Person | Amount | yearMonth
---------------------------
AA | 100 | 201701
AA | 200 | 201702
AA | 300 | 201703
AA | 70 | 201704
AB | 10 | 201701
AB | 50 | 201702
AB | 60 | 201703
AB | 70 | 201704
AC | 70 | 201701
AC | 80 | 201702
AC | 30 | 201703
AC | 10 | 201704
现在,我需要每个月为每个人获得过去3个月的平均值
预期结果:
Person | Amount | yearMonth
---------------------------
AA | 200 | 201703(avg of 201701-201703)
AA | 190 | 201704(avg of 201702-201704)
AB | 40 | 201703(avg of 201701-201703)
AB | 60 | 201704(avg of 201702-201704)
AC | 60 | 201703(avg of 201701-201703)
AC | 40 | 201704(avg of 201702-201704)
这是如何计算的?
第一行
我不确定如何分组。如果您的答案与此问题有关联,我不介意。
谢谢堆
在遗留SQL中是否也可以?我还没有迁移到标准SQL。 我的观点是遗留SQL
答案 0 :(得分:2)
下面是BigQuery Standard SQL(至少应该让您了解正确分组的逻辑)
#standardSQL
SELECT
person, yearMonth, CAST(amount AS INT64) amount
FROM (
SELECT
person, yearMonth, dt,
AVG(amount) OVER(PARTITION BY person ORDER BY dt RANGE BETWEEN 63 PRECEDING AND CURRENT row) amount,
COUNT(1) OVER(PARTITION BY person ORDER BY dt RANGE BETWEEN 63 PRECEDING AND CURRENT row) months
FROM (
SELECT
person, amount, yearMonth,
UNIX_DATE(DATE(DIV(yearMonth, 100), MOD(yearMonth, 100), 1)) AS dt
FROM `project.dataset.table`
)
)
WHERE months = 3
-- ORDER BY person, yearMonth
您可以使用虚拟数据进行测试/播放,如下所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'AA' person, 100 amount, 201701 yearMonth UNION ALL
SELECT 'AA', 200, 201702 UNION ALL
SELECT 'AA', 300, 201703 UNION ALL
SELECT 'AA', 70, 201704 UNION ALL
SELECT 'AB', 10, 201701 UNION ALL
SELECT 'AB', 50, 201702 UNION ALL
SELECT 'AB', 60, 201703 UNION ALL
SELECT 'AB', 70, 201704 UNION ALL
SELECT 'AC', 70, 201701 UNION ALL
SELECT 'AC', 80, 201702 UNION ALL
SELECT 'AC', 30, 201703 UNION ALL
SELECT 'AC', 10, 201704
)
SELECT
person, yearMonth, CAST(amount AS INT64) amount
FROM (
SELECT
person, yearMonth, dt,
AVG(amount) OVER(PARTITION BY person ORDER BY dt RANGE BETWEEN 63 PRECEDING AND CURRENT row) amount,
COUNT(1) OVER(PARTITION BY person ORDER BY dt RANGE BETWEEN 63 PRECEDING AND CURRENT row) months
FROM (
SELECT
person, amount, yearMonth,
UNIX_DATE(DATE(DIV(yearMonth, 100), MOD(yearMonth, 100), 1)) AS dt
FROM `project.dataset.table`
)
)
WHERE months = 3
ORDER BY person, yearMonth
输出符合预期
person yearMonth amount
AA 201703 200
AA 201704 190
AB 201703 40
AB 201704 60
AC 201703 60
AC 201704 40
为BigQuery Legacy SQL添加了版本
#legacySQL
SELECT
person, yearMonth, INTEGER(amount) amount
FROM (
SELECT
person, yearMonth, dt,
AVG(amount) OVER(PARTITION BY person ORDER BY dt range BETWEEN 63*60*60*24 preceding AND current row) amount,
COUNT(1) OVER(PARTITION BY person ORDER BY dt range BETWEEN 63*60*60*24 preceding AND current row) months
FROM (
SELECT
person, amount, yearMonth,
TIMESTAMP_TO_SEC(TIMESTAMP(CONCAT(STRING(INTEGER(yearMonth/100)), '-', SUBSTR(STRING(100 + yearMonth % 100), 2, 2), '-01'))) AS dt
FROM [project:dataset.table]
)
)
WHERE months = 3
-- ORDER BY person, yearMonth
您可以使用以下示例使用虚拟数据
来测试/播放它#legacySQL
SELECT
person, yearMonth, INTEGER(amount) amount
FROM (
SELECT
person, yearMonth, dt,
AVG(amount) OVER(PARTITION BY person ORDER BY dt range BETWEEN 63*60*60*24 preceding AND current row) amount,
COUNT(1) OVER(PARTITION BY person ORDER BY dt range BETWEEN 63*60*60*24 preceding AND current row) months
FROM (
SELECT
person, amount, yearMonth,
TIMESTAMP_TO_SEC(TIMESTAMP(CONCAT(STRING(INTEGER(yearMonth/100)), '-', SUBSTR(STRING(100 + yearMonth % 100), 2, 2), '-01'))) AS dt
FROM -- [project:dataset.table]
(SELECT 'AA' person, 100 amount, 201701 yearMonth),
(SELECT 'AA' person, 200 amount, 201702 yearMonth),
(SELECT 'AA' person, 300 amount, 201703 yearMonth),
(SELECT 'AA' person, 70 amount, 201704 yearMonth),
(SELECT 'AB' person, 10 amount, 201701 yearMonth),
(SELECT 'AB' person, 50 amount, 201702 yearMonth),
(SELECT 'AB' person, 60 amount, 201703 yearMonth),
(SELECT 'AB' person, 70 amount, 201704 yearMonth),
(SELECT 'AC' person, 70 amount, 201701 yearMonth),
(SELECT 'AC' person, 80 amount, 201702 yearMonth),
(SELECT 'AC' person, 30 amount, 201703 yearMonth),
(SELECT 'AC' person, 10 amount, 201704 yearMonth)
)
)
WHERE months = 3
ORDER BY person, yearMonth