以下是我的数据示例,表RR_Linest:
投资组合---- Month_number ----收藏品
A --- --------- 1 --------------------- $ 100 -------------- -------------------------------------------------- ---------------------
A -------------- 2 --------------------- $ 90
A ------------- 3 --------------------- $ 80 ------------- -------------------------------------------------- -----------------------
A ------------- 4 --------------------- $ 70 ------------- -------------------------------------------------- -----------------------
B ------------- 1 -------------------- $ 100 -------------- -------------------------------------------------- ---------------------
B ---- -------- 2 ---------------------- $ 90 ------------- -------------------------------------------------- ----------------------
B - ------------ 3 --------------------- $ 80
我能够通过删除投资组合列并且仅为一个选定的投资组合选择month_Number(x)和集合数据(y)来弄清楚如何获得一个投资组合的斜率,截距,RSquare (我删除了投资组合B的数据)并运行下面的代码。
我一直在尝试更改功能,以便在运行时;它通过投资组合给出了斜率,截距和R平方。有人知道怎么做吗?我尝试了很多方法,但我无法弄清楚。
首先我创建了这个函数:
声明@RegressionInput_A [dbo]。[RegressionInput_A]
插入@RegressionInput_A(x,y) 选择 ([模型月份]),log([collection $]) 来自[dbo]。[RR_Linest]
从[dbo] .LinearRegression_A
中选择*GO
drop function dbo.LinearRegression_A
CREATE FUNCTION dbo.LinearRegression_A
(
@RegressionInputs_A AS dbo.RegressionInput_A READONLY
)
RETURNS @RegressionOutput_A TABLE
(
Slope DECIMAL(18, 6),
Intercept DECIMAL(18, 6),
RSquare DECIMAL(18, 6)
)
AS
BEGIN
DECLARE @Xaverage AS DECIMAL(18, 6)
DECLARE @Yaverage AS DECIMAL(18, 6)
DECLARE @slope AS DECIMAL(18, 6)
DECLARE @intercept AS DECIMAL(18, 6)
DECLARE @rSquare AS DECIMAL(18, 6)
SELECT
@Xaverage = AVG(x),
@Yaverage = AVG(y)
FROM
@RegressionInputs_A
SELECT
@slope = SUM((x - @Xaverage) * (y - @Yaverage))/SUM(POWER(x - @Xaverage, 2))
FROM
@RegressionInputs_A
SELECT
@intercept = @Yaverage - (@slope * @Xaverage)
SELECT @rSquare = 1 - (SUM(POWER(y - (@intercept + @slope * x), 2))/(SUM(POWER(y - (@intercept + @slope * x), 2)) + SUM(POWER(((@intercept + @slope * x) - @Yaverage), 2))))
FROM
@RegressionInputs_A
INSERT INTO
@RegressionOutput_A
(
Slope,
Intercept,
RSquare
)
SELECT
@slope,
@intercept,
@rSquare
RETURN
END
GO
然后我运行功能
declare @RegressionInput_A [dbo].[RegressionInput_A]
insert into @RegressionInput_A (x,y)
select
([model month]),log([collection $])
from [dbo].[RR_Linest]
select * from [dbo].[LinearRegression_A](@RegressionInput_A)
答案 0 :(得分:4)
我总是从一个示例数据库/表开始,如果它确实很复杂,以确保我给用户一个正确的解决方案。
让我们根据模型创建一个名为[test]的数据库。
--
-- Create a simple db
--
-- use master
use master;
go
-- delete existing databases
IF EXISTS (SELECT name FROM sys.databases WHERE name = N'Test')
DROP DATABASE Test
GO
-- simple db based on model
create database Test;
go
-- switch to new db
use [Test];
go
让我们创建一个名为[InputToLinearReg]的表类型。
--
-- Create table type to pass data
--
-- Delete the existing table type
IF EXISTS (SELECT * FROM sys.systypes WHERE name = 'InputToLinearReg')
DROP TYPE dbo.InputToLinearReg
GO
-- Create the table type
CREATE TYPE InputToLinearReg AS TABLE
(
portfolio_cd char(1),
month_num int,
collections_amt money
);
go
好的,这是使用CTE的多层SELECT语句。查询分析器将此视为可以并行执行的SQL语句,而不是常规函数。请参阅Wayne文章的黑匣子部分。
--
-- Create in line table value function (fast)
--
-- Remove if it exists
IF OBJECT_ID('CalculateLinearReg') > 0
DROP FUNCTION CalculateLinearReg
GO
-- Create the function
CREATE FUNCTION CalculateLinearReg
(
@ParmInTable AS dbo.InputToLinearReg READONLY
)
RETURNS TABLE
AS
RETURN
(
WITH cteRawData as
(
SELECT
T.portfolio_cd,
CAST(T.month_num as decimal(18, 6)) as x,
LOG(CAST(T.collections_amt as decimal(18, 6))) as y
FROM
@ParmInTable as T
),
cteAvgByPortfolio as
(
SELECT
portfolio_cd,
AVG(x) as xavg,
AVG(y) as yavg
FROM
cteRawData
GROUP BY
portfolio_cd
),
cteSlopeByPortfolio as
(
SELECT
R.portfolio_cd,
SUM((R.x - A.xavg) * (R.y - A.yavg)) / SUM(POWER(R.x - A.xavg, 2)) as slope
FROM
cteRawData as R
INNER JOIN
cteAvgByPortfolio A
ON
R.portfolio_cd = A.portfolio_cd
GROUP BY
R.portfolio_cd
),
cteInterceptByPortfolio as
(
SELECT
A.portfolio_cd,
(A.yavg - (S.slope * A.xavg)) as intercept
FROM
cteAvgByPortfolio as A
INNER JOIN
cteSlopeByPortfolio S
ON
A.portfolio_cd = S.portfolio_cd
)
SELECT
A.portfolio_cd,
A.xavg,
A.yavg,
S.slope,
I.intercept,
1 - (SUM(POWER(R.y - (I.intercept + S.slope * R.x), 2)) /
(SUM(POWER(R.y - (I.intercept + S.slope * R.x), 2)) +
SUM(POWER(((I.intercept + S.slope * R.x) - A.yavg), 2)))) as rsquared
FROM
cteRawData as R
INNER JOIN
cteAvgByPortfolio as A ON R.portfolio_cd = A.portfolio_cd
INNER JOIN
cteSlopeByPortfolio S ON A.portfolio_cd = S.portfolio_cd
INNER JOIN
cteInterceptByPortfolio I ON S.portfolio_cd = I.portfolio_cd
GROUP BY
A.portfolio_cd,
A.xavg,
A.yavg,
S.slope,
I.intercept
);
最后但并非最不重要的是,设置表变量并获得答案。与您上面的解决方案不同,它按产品组ID进行分组。
-- Load data into variable
DECLARE @InTable AS InputToLinearReg;
-- insert data
insert into @InTable
values
('A', 1, 100.00),
('A', 2, 90.00),
('A', 3, 80.00),
('A', 4, 70.00),
('B', 1, 100.00),
('B', 2, 90.00),
('B', 3, 80.00);
-- show data
select * from CalculateLinearReg(@InTable)
go
以下是使用您的数据的结果图片。
答案 1 :(得分:1)
CREATE FUNCTION dbo.LinearRegression
(
@RegressionInputs AS dbo.RegressionInput READONLY
)
RETURNS TABLE AS
RETURN
(
WITH
t1 AS ( --calculate averages
SELECT portfolio, x, y,
AVG(x) OVER(PARTITION BY portfolio) Xaverage,
AVG(y) OVER(PARTITION BY portfolio) Yaverage
FROM @RegressionInputs
),
t2 AS ( --calculate slopes
SELECT portfolio, Xaverage, Yaverage,
SUM((x - Xaverage) * (y - Yaverage))/SUM(POWER(x - Xaverage, 2)) slope
FROM t1
GROUP BY portfolio, Xaverage, Yaverage
),
t3 AS ( --calculate intercepts
SELECT portfolio, slope,
(Yaverage - (slope * Xaverage) ) AS intercept
FROM t2
),
t4 AS ( --calculate rSquare
SELECT t1.portfolio, slope, intercept,
1 - (SUM(POWER(y - (intercept + slope * x), 2))/(SUM(POWER(y - (intercept + slope * x), 2)) + SUM(POWER(((intercept + slope * x) - Yaverage), 2)))) AS rSquare
FROM t1
INNER JOIN t3 ON (t1.portfolio = t3.portfolio)
GROUP BY t1.portfolio
)
SELECT portfolio, slope, intercept, rSquare FROM t4
)