他的杰出思想家
我想创建一个CASE条件,以便对active_users表示“是”,即60天内存在,对于相同的onymous_id,则为最近的uuid_ts。
SELECT t1.anonymous_id user_id,
t1.uuid_ts activity_date,
t2.uuid_ts signup_date,
-- Activity Lifetime: difference of number of days signed up to last activity
DATE_DIFF(CAST(t2.uuid_ts AS DATE), CAST(t1.uuid_ts AS DATE), DAY) AS activity_lifetime,
-- New Users: If month of activity is same as sign_up month
(CASE WHEN DATE_DIFF(CAST(t1.uuid_ts AS DATE), CAST(t2.uuid_ts AS DATE), MONTH)=0 THEN TRUE ELSE FALSE END) AS new_user,
-- Active Users: If month of activity is greater than sign_up month AND activity is found
(CASE WHEN DATE_DIFF(CAST(t1.uuid_ts AS DATE), CAST(t2.uuid_ts AS DATE), MONTH)>0
-- ** ____ NEED HELP HERE ____ **
AND anonymous_id NOT IN (SELECT anonymous_id FROM datascience.last_user_activity)
AND DATE_ADD(activity_date, INTERVAL 60 DAY) > (S)
FROM datascience.last_user_activity AS t1
INNER JOIN datascience.full_signup_completed AS t2
ON t2.anonymous_id = t1.anonymous_id
WHERE DATE(t1.uuid_ts) IS NOT NULL AND DATE(t2.uuid_ts) IS NOT NULL
ORDER BY activity_lifetime DESC
样本数据:
anon_id|signup_date|activity_date|
__________________________________
123 |01-01-2019 |02-01-2019 |
123 |01-01-2019 |02-02-2019 |
123 |01-01-2019 |02-03-2019 |
123 |01-01-2019 |02-04-2019 |
想要:
anon_id|signup_date|activity_date| active
__________________________________
123 |01-01-2019 |02-01-2019 | yes
123 |01-01-2019 |02-02-2019 | yes
123 |01-01-2019 |02-03-2019 | no
123 |01-01-2019 |02-04-2019 | no
如果同一行中存在未来日期(在60天范围内),则活动字段显示“是”,否则显示“否”。
答案 0 :(得分:2)
仍然不能100%地确定这是您要寻找的东西,但我希望它能对您有所帮助:
在60天内:
(从2019年2月4日开始> 2019年2月3日且在60天内输出为“是,是,是,否”)
WITH
sample_data AS (
SELECT
'123' AS anon_id, DATE('2019-01-01') AS signup_date,
DATE('2019-01-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-02-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-03-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-04-02') AS activity_date)
SELECT
anon_id,
signup_date,
activity_date,
(CASE
WHEN EXISTS( SELECT 'found' FROM sample_data t2 WHERE t2.anon_id = t1.anon_id AND t2.activity_date > t1.activity_date AND t2.activity_date <= DATE_ADD(t1.activity_date, INTERVAL 60 DAY)) THEN 'yes'
ELSE
'no'
END
) AS active
FROM
sample_data t1
ORDER BY 1,2,3
60天或以后:
(输出将是“是,否,否,否”,因为2月有28天和3月31日,所以在02-02-2019和02-04-2019之间有59天)
WITH
sample_data AS (
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-01-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-02-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-03-02') AS activity_date
UNION ALL
SELECT
'123' AS anon_id,
DATE('2019-01-01') AS signup_date,
DATE('2019-04-02') AS activity_date)
SELECT
anon_id,
signup_date,
activity_date,
(CASE
WHEN EXISTS( SELECT 'found' FROM sample_data t2 WHERE t2.anon_id = t1.anon_id AND t2.activity_date >= DATE_ADD(t1.activity_date, INTERVAL 60 DAY)) THEN 'yes'
ELSE
'no'
END
) AS active
FROM
sample_data t1
ORDER BY 1,2,3
答案 1 :(得分:1)
您的问题/逻辑/日期还不清楚,但我认为以下查询应该为您指明正确的方向。
with joined as (
-- Join your tables and handle casting here (only have to do it once)
select
anonymous_id,
date(full_signup_completed.uuid_ts) as signup_date,
extract(month from full_signup_completed.uuid_ts) as signup_month,
date(last_user_activity.uuid_ts) as activity_date,
extract(month from last_user_activity.uuid_ts) as activity_month
from datascience.full_signup_completed
left join datascience.last_user_activity using(anonymous_id)
where full_signup_completed.uuid_ts is not null and last_user_activity.uuid_ts is not null
),
activity60 as (
-- for each activity date, is there a future activity date within 60 days?
select j1.anonymous_id,j1.activity_date, true as has_activity_within_60_days
from joined j1
cross join joined j2
where j1.anonymous_id = j2.anonymous_id and date_diff(j2.activity_date, j1.activity_date, day) <= 60
group by 1,2
),
final as (
-- Get all of your logic
select
joined.*,
date_diff(activity_date,signup_date, day) as activity_lifetime,
signup_month = activity_month as new_user, -- Evaluates to T/F
(activity_month > signup_month) and has_activity_within_60_days as your_custom_field -- Evaluates to aT/F
from joined
inner join activity60 using(anonymous_id,activity_date)
)
select * from final
order by activity_lifetime desc
在您的示例中,您的日期是否采用DD-MM-YYYY
格式?如果不是这样,我不确定60天的约束条件是否有意义。