SQL:计算每个日期范围的中位数

时间:2017-03-07 18:24:46

标签: sql sql-server sql-server-2008 recursion median

我正在使用SQL Server 2008,并且我正在尝试计算每个日期范围的中值。

实施例: 每个日期有4个日期具有多个值,(1/1 / 16,3 / 1 / 16,7 / 1 / 16,10 / 1/16),将计算日期10/1/16的中值来自日期范围1/1/16 - 7/1/16的值。日期7/1/16的中值将根据日期范围1/1/16 - 3/1/16中的值计算。

如果日期为10/1/16,则10/1/16值不应包含在中位数中(所有日期均为真。此外,未来日期不应包括在计算中)。

下面的查询计算MAX访问日期的中值。但是,我需要它来计算其他3个访问日期的中位数。我已经尝试删除MAX CTE并为所有访问日期添加联接<比访问日期,但我无法让它工作。到目前为止,我写这篇文章并不成功,所以任何帮助都会很棒。我在下面列出了样本数据和我的预期结果。

编辑:某种递归可能会起作用吗?

;CREATE TABLE #TEST(QUESTION VARCHAR(15), VISIT_DATE DATE, VALUE INT)

;INSERT #TEST(QUESTION, VISIT_DATE, VALUE)
VALUES
('ABC', '1/1/2016', '80'),
('ABC', '1/1/2016', '90'),
('ABC', '1/1/2016', '100'),
('ABC', '3/1/2016', '70'),
('ABC', '3/1/2016', '80'),
('ABC', '3/1/2016', '90'),
('ABC', '3/1/2016', '100'),
('ABC', '7/1/2016', '50'),
('ABC', '7/1/2016', '60'),
('ABC', '7/1/2016', '70'),
('ABC', '10/1/2016', '10'),
('ABC', '10/1/2016', '20'),
('ABC', '10/1/2016', '30'),
('ABC', '10/1/2016', '40')


;WITH MAX_VISITDATE AS (
    SELECT MAX(VISIT_DATE) AS MAX_VISITDATE
    FROM #TEST
), MEDIAN AS (
    SELECT RN.Question, AVG(RN.VALUE) AS GroupMedianPastQtrs
    FROM
    (   SELECT QUESTION, VALUE, ROW_NUMBER() OVER (PARTITION BY QUESTION ORDER BY VALUE) AS ROWNUMBER, COUNT(*) OVER (PARTITION BY Question) AS QuestionCount
        FROM #TEST T 
        WHERE VISIT_DATE NOT IN (SELECT MAX_VISITDATE FROM MAX_VISITDATE)
    ) RN
    WHERE RN.ROWNUMBER IN (RN.QuestionCount/2+1, (RN.QuestionCount+1)/2)
    GROUP BY RN.Question
)
SELECT *
FROM #TEST T
INNER JOIN MEDIAN ON T.Question = MEDIAN.Question

--Expected Results:

Question|Visit_DAte |Value|GroupMedian  |
--------|-----------|-----|-------------|
'ABC'   |'1/1/2016' |'80' |''           |--No Median, no previous values
'ABC'   |'1/1/2016' |'90' |''           |--No Median, no previous values
'ABC'   |'1/1/2016' |'100'|''           |--No Median, no previous values
'ABC'   |'3/1/2016' |'70' |'90'         |--Median value from date 1/1/16
'ABC'   |'3/1/2016' |'80' |'90'         |--Median value from date 1/1/16
'ABC'   |'3/1/2016' |'90' |'90'         |--Median value from date 1/1/16
'ABC'   |'3/1/2016' |'100'|'90'         |--Median value from date 1/1/16
'ABC'   |'7/1/2016' |'50' |'90'         |--Median value from date range 1/1/16 to 3/1/16
'ABC'   |'7/1/2016' |'60' |'90'         |--Median value from date range 1/1/16 to 3/1/16
'ABC'   |'7/1/2016' |'70' |'90'         |--Median value from date range 1/1/16 to 3/1/16
'ABC'   |'10/1/2016'|'10' |'80'         |--Median value from date range 1/1/16 to 7/1/16
'ABC'   |'10/1/2016'|'20' |'80'         |--Median value from date range 1/1/16 to 7/1/16
'ABC'   |'10/1/2016'|'30' |'80'         |--Median value from date range 1/1/16 to 7/1/16
'ABC'   |'10/1/2016'|'40' |'80'         |--Median value from date range 1/1/16 to 7/1/16

1 个答案:

答案 0 :(得分:1)

我没有SQL Server 2008框来测试它。所以我尽力交叉检查以下每个功能是否可用于2008年:

;WITH
    tmp AS
    (
        SELECT          a.QUESTION
                    ,   a.VISIT_DATE
                    ,   b.VALUE
                    ,   ROW_NUMBER() OVER (PARTITION BY a.QUESTION, a.VISIT_DATE ORDER BY b.VALUE)
                                                                AS RowNumber
                    ,   FLOOR(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
                                                                AS LowerMedianRowNumber
                    ,   CEILING(CONVERT(float, COUNT(b.Value) OVER (PARTITION BY a.QUESTION, a.VISIT_DATE) + 1) / 2)
                                                                AS UpperMedianRowNumber
        FROM            (
                            SELECT DISTINCT
                                        QUESTION
                                    ,   VISIT_DATE
                            FROM        #TEST
                        )       a
        INNER JOIN      #TEST   b   ON a.QUESTION   = b.QUESTION
                                   AND a.VISIT_DATE > b.VISIT_DATE

    ),
    GroupMedian AS
    (
        SELECT          QUESTION
                    ,   VISIT_DATE
                    ,   AVG(Value)      AS MedianValue
        FROM            tmp
        WHERE           RowNumber IN (LowerMedianRowNumber, UpperMedianRowNumber)
        GROUP BY        QUESTION
                    ,   VISIT_DATE
    )

SELECT          a.*
        ,       b.MedianValue
FROM            #TEST           a
LEFT  JOIN      GroupMedian     b   ON a.QUESTION   = b.QUESTION
                                   AND a.VISIT_DATE = b.VISIT_DATE
ORDER BY        QUESTION
        ,       VISIT_DATE