SQL Server避免在线性回归

时间:2018-01-21 14:56:21

标签: sql-server

我有一个线性回归查询来确定一个正趋势,它适用于数据中符号有2个或更多日期的情况。当只有1个日期时,我得到除零误差。如何在没有错误的情况下返回结果。数据可以有许多单个符号记录。我从另一个stackoverflow答案中选择了这个查询;所以我不是它如何工作的专家,因此非常感谢任何有关如何避免错误的帮助。

使用:

    IF OBJECT_ID('tempdb..#temp4') IS NOT NULL DROP TABLE #temp4
    GO
    create table #temp4
    (
        id int,
        value decimal(18,8),
        symbol nvarchar(50),
        [created] datetime
    )

    insert into #temp4 (id, value, symbol, [created])
    values
    (1,0.1,'abc','2018-01-19 20:34:24'),
    (1,0.2,'abc','2018-01-19 21:00:45'),
    (1,0.3,'abc','2018-01-19 21:54:08'),
    (2,50,'123','2018-01-19 21:00:45'),
    (2,60,'123','2018-01-19 21:54:08'),
    (3,40,'CJ','2018-01-19 21:36:20'),
    (3,40,'CJ','2018-01-19 21:36:26')

    select id, symbol, 1.0*sum((x-xbar)*(y-ybar))/sum((x-xbar)*(x-xbar)) as Beta
    from
    (
        select id, symbol,
            avg(value) over(partition by id) as ybar,
            value as y,
            avg(datediff(second,'2018-01-01 20:34',[created])) over(partition by id) as xbar,
            datediff(second,'2018-01-01 20:34',[created]) as x
        from #temp4
    ) as Calcs
    group by id, symbol
    having (COUNT(symbol) > 1) AND 1.0*sum((x-xbar)*(y-ybar))/sum((x-xbar)*(x-xbar))>0

不起作用:

IF OBJECT_ID('tempdb..#temp4') IS NOT NULL DROP TABLE #temp4
GO
create table #temp4
(
    id int,
    value decimal(18,8),
    symbol nvarchar(50),
    [created] datetime
)

insert into #temp4 (id, value, symbol, [created])
values
(1,0.1,'abc','2018-01-19 20:34:24'),
(1,0.2,'abc','2018-01-19 21:00:45'),
(1,0.3,'abc','2018-01-19 21:54:08'),
(2,50,'123','2018-01-19 21:00:45'),
(2,60,'123','2018-01-19 21:54:08'),
(3,40,'CJ','2018-01-19 21:36:20')

-- remove this records and I get divide by zero error
-- (3,40,'CJ','2018-01-19 21:36:26')

select id, symbol, 1.0*sum((x-xbar)*(y-ybar))/sum((x-xbar)*(x-xbar)) as Beta
from
(
    select id, symbol,
        avg(value) over(partition by id) as ybar,
        value as y,
        avg(datediff(second,'2018-01-01 20:34',[created])) over(partition by id) as xbar,
        datediff(second,'2018-01-01 20:34',[created]) as x
    from #temp4
) as Calcs
group by id, symbol
having (COUNT(symbol) > 1) AND 1.0*sum((x-xbar)*(y-ybar))/sum((x-xbar)*(x-xbar))>0

1 个答案:

答案 0 :(得分:1)

使用NULLIF来处理除以零错误

HAVING ( Count(symbol) > 1 )
       AND 1.0 * Sum(( x - xbar ) * ( y - ybar )) / NULLIF(Sum(( x - xbar ) * ( x - xbar )), 0) > 0