情况:
我有三个桌子。表1包含ID和订阅日期。表2包含ID,活动状态和活动状态更改的最新日期。 表3具有ID和状态更改的所有日志。 注意: 在订阅日期,所有ID均处于活动状态。如果一天中的状态变化不止一个,那么最近的一个就是要选择的状态。
目标:
我需要计算每天每种状态的ID数。即每天有多少人活跃,不活跃和有风险。 我的问题是确保即使在特定日期没有数据,ID的状态也会每天进行计数。例如: ID 5(请参阅下面的小提琴)自5月2日(加入日期)以来一直处于活动状态,并且没有状态更改,因此,直到现在,他应该每天被视为活动状态。
在其他地方咨询了此问题之后,一些人建议创建函数并交叉应用并将计数存储在表中。我没有这样做的技能,但这是否可以解决这个问题?
所需的输出:
+------------+----------+-------+
| date | status | count |
+------------+----------+-------+
| 1-May-2019 | active | 0 |
| 1-May-2019 | inactive | 0 |
| 1-May-2019 | risky | 1 |
| 2-May-2019 | active | 1 |
| 2-May-2019 | inactive | 0 |
| 2-May-2019 | risky | 1 |
| 3-May-2019 | active | 1 |
| 3-May-2019 | inactive | 0 |
| 3-May-2019 | risky | 1 |
| 4-May-2019 | active | 1 |
| 4-May-2019 | inactive | 0 |
| 4-May-2019 | risky | 1 |
| 5-May-2019 | active | 3 |
| 5-May-2019 | inactive | 0 |
| 5-May-2019 | risky | 1 |
| ... | ... | ... |
+------------+----------+-------+
小提琴:
--create date table (not sure if usable)
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '2019-05-01'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
INSERT INTO #dates ([date]) VALUES (@dIncr)
SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO
-- ID + Subscribed Date (starts active at joindate)
create table #t1 (id int, [subdate] date)
insert into #t1 values
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO
-- ID + Latest activity date
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO
-- ID + Activity Logs Date
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO
我现在拥有什么:
;with cte as (
select
#t1.id
,COALESCE(LAG(datechange) over(partition by #t1.id order by datechange),subdate) as StartDate
,#t3.datechange
,COALESCE(LAG(statuschange) over(partition by #t1.id order by datechange),'active') as PreviousStatusChange
,#t3.statuschange
from #t1
inner join #t2 on #t1.id=#t2.id
left join #t3 on #t1.id=#t3.id
)
select
cte.id
,cte.StartDate
,coalesce(cte.datechange,'2099-01-01') as EndDate
,PreviousStatusChange
,coalesce(statuschange,previousstatuschange) AS NewStatus
from cte
答案 0 :(得分:2)
日期表是实现此目的的正确方法。您需要种子数据来获得所需的输出。我打开了您的日期表,以便年长的用户填写。
我还添加了一个状态表,因为您的输出要求每个状态的每个日期都需要一行。
DROP TABLE IF EXISTS #dates
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '01/01/2019'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
INSERT INTO #dates ([date]) VALUES (@dIncr)
SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO
DROP TABLE IF EXISTS #status
CREATE TABLE #status (status varchar(20))
INSERT INTO #status VALUES
('active'),
('inactive'),
('risky')
GO
DROP TABLE IF EXISTS #t1
create table #t1 (id int, [subdate] date)
insert into #t1 values
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO
DROP TABLE IF EXISTS #t2
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO
DROP TABLE IF EXISTS #t3
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO
DECLARE
@From DATE
, @Thru DATE;
SET @From = '05/01/2019';
SET @Thru = '05/19/2019';
WITH
output_foundation AS
(
SELECT date, status
FROM #dates CROSS JOIN #status
)
, id_foundation AS
(
SELECT DISTINCT id, date
FROM #t1 CROSS JOIN #Dates
)
, id_stat AS
(
SELECT id, datechange, statuschange FROM #t3
UNION
SELECT id, subdate, 'active' FROM #t1
UNION
SELECT id, datestatus, status FROM #t2
)
, id_spread AS
(
SELECT
IFDN.id
, IFDN.date
, IDS.statuschange
FROM
id_foundation AS IFDN
LEFT OUTER JOIN id_stat AS IDS
ON IFDN.id = IDS.id
AND IFDN.date = IDS.datechange
), id_fill AS
(
SELECT
IDS.id
, IDS.date
, COALESCE(IDS.statuschange, LS.statuschange) AS statuschange
FROM
id_spread AS IDS
OUTER APPLY
(
SELECT TOP 1 statuschange
FROM id_spread
WHERE id = IDS.id AND date < IDS.date AND statuschange IS NOT NULL
ORDER BY date DESC
) AS LS
WHERE
(IDS.statuschange IS NOT NULL OR LS.statuschange IS NOT NULL)
)
SELECT
OFDN.date
, OFDN.status
, COUNT(statuschange) AS count
FROM
output_foundation AS OFDN
LEFT OUTER JOIN id_fill AS IDF
ON OFDN.date = IDF.date
AND OFDN.status = IDF.statuschange
WHERE
OFDN.date >= @From
AND OFDN.date <= @Thru
GROUP BY
OFDN.date
, OFDN.status
ORDER BY
OFDN.date
, OFDN.status;
答案 1 :(得分:0)
可能会在下面的查询中为您提供帮助。我不确定它是否与您想要的结果相同,因为您提到所需的输出与在临时表查询中提供的示例数据不匹配。 目前,我正在考虑您需要直到现在为止每天每种状态的可交换总和。
SELECT
R.date
,R.status
,SUM (StausValue) OVER (PARTITION BY [status] ORDER BY date) AS Count
FROM
(
SELECT Q.* , CASE WHEN T3.datechange IS NOT NULL THEN 1 ELSE 0 END as StausValue FROM (
select D.Date, t2.[status] from #dates D
CROSS JOIN (SELECT DISTINCT [status] FROM #t2 )t2
)Q
LEFT JOIN #T3 T3 ON T3.[statuschange]=Q.status AND T3.[datechange]=Q.Date
)R order by Date asc, Status ASC
答案 2 :(得分:0)
我认为您还需要在解决方案中添加两点:
下面您可以找到示例解决方案。这不是一个理想的选择,但是我想给您一个大概的认识。例如,我没有解决一个ID在一天中两次更改状态的情况。基本上,我为每个ID DimDate创建了一个SC2表,并将它们连接在一起
--initial insert for new subscribers (they begin as active)
drop table if exists #t4
create table #t4 (id int, [Status] varchar(20), OpenDate date, CloseDate date, IsCurrent int)
insert into #t4(id, [Status], OpenDate, CloseDate, IsCurrent)
select
id
,'active'
,[subdate]
,'9999-12-31' --we don't know CloseDate for this version
,1
from #t1
declare @i date = '2019-01-01'
--filing versions till 2019-05-15
while @i < '2019-05-15'
begin
update t4
set t4.CloseDate = case when t4.OpenDate = @i then @i else dateadd(day,-1, @i) end--avoiding overlapping versions
,t4.IsCurrent = 0 -- there can only one version that is current
from #t4 as t4
join #t3 as t3
on t3.id = t4.id
and t4.IsCurrent = 1
where t3.[datechange] = @i
--inserting a new version
insert into #t4(id, [Status], OpenDate, CloseDate, IsCurrent)
select
t3.id
,t3.statuschange
,t3.datechange
,'9999-12-31'
,1 --the newiest version
from #t3 as t3
where t3.[datechange] = @i
set @i = DATEADD(day, 1, @i)
end
--populating an examplary DimDate
drop table if exists #DimDate
create table #DimDate (
dat date,
dateFormatted as FORMAT(dat, 'dd-MMM-yyyy')
)
set @i = '2019-01-01'
while @i < '2019-06-01'
begin
insert into #DimDate(dat)
select @i
set @i = DATEADD(day, 1, @i)
end
--final result
select
d.dateFormatted
,v.statuses
,count(t4.Status) as [count]
from #DimDate as d
cross join (values ('inactive'), ('active'), ('risky')) as v(statuses)
left join #t4 as t4
on v.statuses = t4.Status
and d.dat between t4.OpenDate and t4.CloseDate
group by
d.dateFormatted
,v.statuses
,d.dat
order by d.dat
答案 3 :(得分:0)
我省略了这一部分:“当一天中有多个状态更改时,最近的状态是可供选择的状态。”您将需要找到一种方法来选择一天的最后状态,对于当前的设计,这是不可能的,也许是在#t3上添加时间列或增量ID的话。... 它对我有用...请复制整个代码,然后重试。
--create date table (not sure if usable)
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '2019-05-01'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
INSERT INTO #dates ([date]) VALUES (@dIncr)
SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO
-- ID + Subscribed Date (starts active at joindate)
create table #t1 (id int, [subdate] date)
insert into #t1 values
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO
-- ID + Latest activity date
/*create table #t2 (id int, [status] int, [datestatus] date)
insert into #t2 values
(9,'risky', '2019-03-01'),
(1, 1, '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO*/
-- ID + Activity Logs Date
create table #t3 (id int, [statuschange] int, [datechange] date)
insert into #t3 values
(9,2, '2019-01-01'),
(9,1, '2019-02-01'),
(9,3, '2019-03-01'),
(2, 3, '2019-05-08'),
(2, 2, '2019-05-13'),
(3, 2, '2019-05-08'),
(3, 1, '2019-05-14'),
(4, 2, '2019-05-15'),
(4, 3, '2019-05-15')
GO
---Status Table
create table #t4 (id int, [status] varchar(max))
insert into #t4 values
(1, 'active'),
(2,'inactive'),
(3,'risky')
;WITH unionall AS--- join data from t1 and t3
(SELECT id
,1 as [statuschange]--starts active at joindate
, [subdate] as datechange
FROM #t1
union ALL
SELECT id , [statuschange] , [datechange]
FROM #t3
),
userstatuslog as(
SELECT id, [statuschange],datechange as beginingdate
,COALESCE( DATEADD(DAY,-1, lead(datechange) OVER(PARTITION BY id ORDER BY [datechange])), getdate()) as enddate
from unionall
)
,datestatus as(
SELECT
id, statuschange, beginingdate, enddate, [date]
,case WHEN [date]< beginingdate then 0
WHEN [date]>=beginingdate AND [date]<=enddate then statuschange
END as newstatus
FROM userstatuslog CROSS JOIN #dates)
,crossjoin as (
SELECT [date],id
from #dates CROSS join #t4
)
,removenulls as (
SELECT *
FROM datestatus
where newstatus is NOT NULL AND newstatus<>0
)
SELECT
crossjoin.date,crossjoin.id, sum(case when newstatus is null then 0 else 1 end)
FROM crossjoin left join datestatus on crossjoin.date=datestatus.date AND crossjoin.id=newstatus
GROUP BY crossjoin.date,crossjoin.id
ORDER BY crossjoin.date,crossjoin.id