用户保留-查询优化

时间:2019-09-17 02:31:55

标签: sql sql-server

问题

我需要显示12周内每周的用户保留率。我有一个可以使用3周的查询,但似乎有很多代码。寻找一种更优雅的解决方案,该解决方案不涉及将表明确地与其自身联接12次。

背景

表格布局如下:

  

EventId | EventTime |计数(总是1)|用户名

我到目前为止所拥有的

IF OBJECT_ID('tempdb..#CohortTable') IS NOT NULL
DROP TABLE #CohortTable

;WITH CTE AS
(
SELECT GETDATE() as dt, 1 as Id

UNION ALL

SELECT DATEADD(dd,-1,dt),CTE.Id - 1
FROM CTE
WHERE Id >= -84
)
SELECT dt AS StartDate, CASE WHEN DATEADD(DD, 6, dt) > GETDATE() THEN 
GETDATE() ELSE DATEADD(DD, 6, dt) END as EndDate, CONCAT(FORMAT(dt, 'MMM dd'), ' - ', FORMAT(CASE WHEN DATEADD(DD, 6, dt) > GETDATE() THEN GETDATE() ELSE DATEADD(DD, 6, dt) END, 'MMM dd')) as Cohort
INTO #CohortTable
FROM CTE A
WHERE  DATEPART(dw,dt)=1
ORDER BY StartDate
OPTION (MAXRECURSION 0);

SELECT C.*, SUM(ELessThan1W.Count) AS '<1Week', SUM(Plus1W.Count) '+1Week', SUM(Plus2W.Count) '+2Week', SUM(Plus3W.Count) '+3Week'
FROM #CohortTable C
LEFT OUTER JOIN Event ELessThan1W
ON ELessThan1W.EventTime BETWEEN C.StartDate AND C.EndDate
AND ELessThan1W.UserId IS NOT NULL
LEFT OUTER JOIN Event Plus1W
ON ELessThan1W.UserId = Plus1W.UserId
AND Plus1W.UserId IS NOT NULL
AND Plus1W.EventTime BETWEEN DATEADD(WEEK, 1, C.StartDate) AND DATEADD(WEEK, 1, C.EndDate)
LEFT OUTER JOIN Event Plus2W
ON ELessThan1W.UserId = Plus2W.UserId
AND Plus2W.UserId IS NOT NULL
AND Plus2W.EventTime BETWEEN DATEADD(WEEK, 2, C.StartDate) AND DATEADD(WEEK, 2, C.EndDate)
LEFT OUTER JOIN Event Plus3W
ON ELessThan1W.UserId = Plus3W.UserId
AND Plus3W.UserId IS NOT NULL
AND Plus3W.EventTime BETWEEN DATEADD(WEEK, 3, C.StartDate) AND DATEADD(WEEK, 3, C.EndDate)
GROUP BY C.Cohort, C.StartDate, C.EndDate
ORDER BY C.StartDate ASC

样本数据

  

1 | 2019-09-01 5:00 | 1 | 1234

     

2 | 2019-09-01 6:00 AM | 1 | 2345

     

3 | 2019-09-09 12:00 PM | 1 | 1234

预期结果

enter image description here

1 个答案:

答案 0 :(得分:1)

我有一个版本,您可能会认为可能更简单。它没有所有的联接,但确实有许多CTE。

;WITH dates AS
(
    -- Set up the date range
    SELECT convert(date,GETDATE()) as dt, 1 as Id
    UNION ALL
    SELECT DATEADD(dd,-1,dt),dates.Id - 1
    FROM dates
    WHERE Id >= -84
)
, cohort as (
    -- create the cohorts
    SELECT dt AS StartDate, 
        convert(date,CASE WHEN DATEADD(DD, 6, dt) > convert(date,GETDATE()) THEN convert(date,GETDATE()) ELSE DATEADD(DD, 6, dt) END) as EndDate, 
        CONCAT(FORMAT(dt, 'MMM dd'), ' - ', FORMAT(CASE WHEN DATEADD(DD, 6, dt) > GETDATE() THEN GETDATE() ELSE DATEADD(DD, 6, dt) END, 'MMM dd')) as Cohort,
        row_number() over (order by dt) as CohortNo
    FROM dates A
    WHERE  DATEPART(dw,dt)=1
)
, event as (
    -- You probbly have this as an event table, I have created as a temp CTE
    select * from (values (1,'2019-09-01 5:00 AM',1,1234),(2,'2019-09-01 6:00 AM',1,2345),(3,'2019-09-09 12:00PM',1,1234)) as t(EventId,EventTime,Counter,UserID)
)
, cohortevent as (
    -- The complete set of cohorts and their events
    select c.*, e.*
    from cohort c
    left join event e on e.eventtime between c.StartDate and C.EndDate
)
, Retained as(
    -- Recursive CTE that works out how long each user has been retained
    select *, case when Userid is not null then 1 else 0 end as ret
    from cohortevent c
    union all
    select c.StartDate,c.EndDate,c.CoHort,c.CohortNo,c.EventId,c.EventTime,c.Counter,c.UserID, ret+1
    from cohortevent c
    join Retained on Retained.userid=c.userid and Retained.CohortNo=c.CohortNo-1 and Retained.eventid<c.eventid
)
, WeeksRetained as (
    -- Get the highest number of weeks, which is the actual number per user (could probably be combined with previous CTE)
    select StartDate, Enddate, Cohort, userID, 
        case when max(ret)=1 then '<1W' else '+'+convert(varchar,max(ret)-1)+'W' end as Weeks
    from Retained
    group by StartDate, Enddate, Cohort,userid
)
-- Finally pivot this by the number of weeks
select *
from 
(
  select StartDate, EndDate, Cohort, Weeks, count(distinct userID) as UserCount
  from WeeksRetained
  group by StartDate, EndDate, Cohort, Weeks
) src
pivot
(
  sum(UserCount)
  for Weeks in ([<1W], [+1W], [+2W], [+3W], [+4W], [+5W], [+6W], [+7W], [+8W], [+9W], [+10W], [+11W], [+12W])
) piv
OPTION (MAXRECURSION 0);

结果是:

StartDate   EndDate     Cohort          <1W     +1W     +2W     +3W     +4W     +5W     +6W     +7W     +8W     +9W     +10W    +11W    +12W
2019-06-23  2019-06-29  Jun 23 - Jun 29 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-06-30  2019-07-06  Jun 30 - Jul 06 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-07-07  2019-07-13  Jul 07 - Jul 13 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-07-14  2019-07-20  Jul 14 - Jul 20 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-07-21  2019-07-27  Jul 21 - Jul 27 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-07-28  2019-08-03  Jul 28 - Aug 03 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-08-04  2019-08-10  Aug 04 - Aug 10 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-08-11  2019-08-17  Aug 11 - Aug 17 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-08-18  2019-08-24  Aug 18 - Aug 24 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-08-25  2019-08-31  Aug 25 - Aug 31 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-09-01  2019-09-07  Sep 01 - Sep 07 2       NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-09-08  2019-09-14  Sep 08 - Sep 14 NULL    1       NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
2019-09-15  2019-09-17  Sep 15 - Sep 17 NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL