与group by和join的累积总和

时间:2016-12-15 12:03:02

标签: tsql

我有点挣扎着找到一个干净的方法来做到这一点。假设我在名为Records的表中有以下记录:

   |Name|  |InsertDate|  |Size|
    john    30.06.2015     1
    john    10.01.2016     10
    john    12.01.2016     100
    john    05.03.2016     1000
    doe     01.01.2016     1

如何获得2016年和月份的记录等于或小于3按月分组(即使该月份不存在,例如本例中的第2个月),累计总和为Size,包括月?我希望得到如下结果:

   |Name|  |Month|  |Size|
    john      1      111
    john      2      111
    john      3      1111
    doe       1      1

2 个答案:

答案 0 :(得分:1)

正如其他评论者已经说过的那样,您只需要一个包含日期的表格,您可以从 join 为您提供源表没有记录的日期: / p>

-- Build the source data table.
declare @t table(Name nvarchar(10)
                ,InsertDate date
                ,Size int
                );
insert into @t values
 ('john','20150630',1   )
,('john','20160110',10  )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1   );

-- Specify the year you want to search for by storing the first day here.
declare @year date = '20160101';

-- This derived table builds a set of dates that you can join from.
-- LEFT JOINing from here is what gives you rows for months without records in your source data.
with Dates
as
(
    select @year as MonthStart
            ,dateadd(day,-1,dateadd(month,1,@year)) as MonthEnd
    union all
    select dateadd(month,1,MonthStart)
            ,dateadd(day,-1,dateadd(month,2,MonthStart))
    from Dates
    where dateadd(month,1,MonthStart) < dateadd(yyyy,1,@year)
)
select t.Name
        ,d.MonthStart
        ,sum(t.Size) as Size
from Dates d
    left join @t t
        on(t.InsertDate <= d.MonthEnd)
where d.MonthStart <= '20160301'        -- Without knowing what your logic is for specifying values only up to March, I have left this part for you to automate.
group by t.Name
        ,d.MonthStart
order by t.Name
        ,d.MonthStart;

如果数据库中有静态日期引用表,则不需要创建派生表,只需执行以下操作:

select d.DateValue
      ,<Other columns>
from DatesReferenceTable d
    left join <Other Tables> o
        on(d.DateValue = o.AnyDateColumn)
etc

答案 1 :(得分:1)

这是另一种利用计数表(又名数字表)来创建日期表的方法。请注意我的评论。

-- Build the source data table.
declare @t table(Name nvarchar(10), InsertDate date, Size int);
insert into @t values
 ('john','20150630',1   )
,('john','20160110',10  )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1   );

-- A year is fine, don't need a date data type
declare @year smallint = 2016;

WITH -- dummy rows for a tally table:
E AS (SELECT E FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(e)),
dateRange(totalDays, mn, mx) AS -- Get the range and number of months to create
(
  SELECT DATEDIFF(MONTH, MIN(InsertDate), MAX(InsertDate)), MIN(InsertDate), MAX(InsertDate)
  FROM @t
),
iTally(N) AS -- Tally Oh! Create an inline Tally (aka numbers) table starting with 0
(
  SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1))-1
  FROM E a CROSS JOIN E b CROSS JOIN E c CROSS JOIN E d
),
RunningTotal AS -- perform a running total by year/month for each person (Name)
(
  SELECT
    yr = YEAR(DATEADD(MONTH, n, mn)),
    mo = MONTH(DATEADD(MONTH, n, mn)),
    Name, 
    Size = SUM(Size) OVER 
      (PARTITION BY Name ORDER BY YEAR(DATEADD(MONTH, n, mn)), MONTH(DATEADD(MONTH, n, mn)))
  FROM iTally
  CROSS JOIN dateRange
  LEFT JOIN @t ON MONTH(InsertDate) = MONTH(DATEADD(MONTH, n, mn))
  WHERE N <= totalDays
) -- Final output will only return rows where the year matches @year:
SELECT
  name = ISNULL(name, LAG(Name, 1) OVER (ORDER BY yr, mo)),
  yr, mo,
  size = ISNULL(Size, LAG(Size, 1) OVER (ORDER BY yr, mo))
FROM RunningTotal
WHERE yr = @year
GROUP BY yr, mo, name, size;

结果:

name       yr          mo          size
---------- ----------- ----------- -----------
doe        2016        1           1
john       2016        1           111
john       2016        2           111
john       2016        3           1111