client_id <- (2260419,2260412,2260413,2260415)
transaction_date <- (2016-09-03, 2016-09-04, 2016-09-06, 2016-09-07)
amount <- (350.0, 250.0,431.0,220.0)
month_ARPU AS
(SELECT
visit_month,
Avg(revenue) AS ARPU
FROM
(SELECT
Cust_id,
Datediff(MONTH, ‘2010-01-01’, transaction_date) AS visit_month,
Sum(transaction_size) AS revenue
FROM transactions
WHERE transaction_date > Dateadd(‘year’, -1, CURRENT_DATE)
GROUP BY
1,
2)
GROUP BY 1)
我理解SELECT, GROUP BY, COUNT(1), SUM()
和AS()如何单独运作,但不像上面的代码那样整体运作,主要是COUNT(1)
和SUM()
的运作方式。
答案 0 :(得分:0)
好的,这是一个选项,但我做了一些假设。问题是有3种情况选择了一个未在group by
函数中添加的值。因此,我假设这个值在group by
子句中每个变量的每个级别都是唯一的。
library(tidyverse)
month_ARPU <- transactions %>%
group_by(1,2) %>%
summarise(visit_month = length(seq(from = as.Date("2010-01-01"), to = as.Date(unique(transaction_date)), by = 'month')) - 1,
revenue = sum(transaction_size),
Cust_id = unique(Cust_id),
transaction_date = as.Date(unique(transaction_date)) %>% ungroup() %>%
filter(transaction_date > as.Date(Sys.time())-365) %>%
group_by(1,visit_month) %>%
summarise(ARPU = mean(revenue))