F#中的样本外预测

时间:2015-04-19 22:26:42

标签: r f#

我是F#的新手,想要运行样本外的OLS预测。我的框架看起来类似于以下内容。我想使用前3个数据点估计参数,然后预测最后一个数据点。

let dfff= 
[ "year" => series [ 1 => 1990.0; 2 => 1991.00; 3 => 1992.0;  4 => 1993.0]
"gold"  => series [ 1 => 10.0; 2 => 10.00; 3 => 15.0; 4 => 20.0] 
"silver"  => series [ 1 => 20.0; 2 => 30.00; 3 => 45.0; 4 => 55.0] ] 
|> frame
|> Frame.indexColsWith ["one"; "two"; "three"]

let dfff2 = dfff |> Frame.filterRows (fun k row -> row?one <= 1992.0 )
let mygold = [| for i in dfff2.GetColumn<float>("two").Values -> i |] 
let mysilver = [| for i in dfff2.GetColumn<float>("three").Values -> i |] 

let myparam =
namedParams [
    "a", box mygold;
    "b", box mysilver;
] |> R.data_frame

let myresult = R.lm(formula = "a~b", data = myparam) 
R.summary(myresult)
let mycoefs = R.coef(R.summary(myresult))
let residuals = myresult.AsList().["residuals"].AsNumeric()
let dfff3 = dfff |> Frame.filterRows (fun k row -> row?one == 1993.0 )

有人可以帮我解决如何完成代码的问题吗?我想检索beta和alpha系数并估计样本预测。这条线不起作用:

let beta=mycoefs.[1,1]
let predict=[| for i in dfff3 -> fun float i*beta |]

1 个答案:

答案 0 :(得分:2)

清理示例代码以使其更具可读性:

let dfff= 
    [ "year" => series [ 1 => 1990.0; 2 => 1991.00; 3 => 1992.0;  4 => 1993.0]
      "gold"  => series [ 1 => 10.0; 2 => 10.00; 3 => 15.0; 4 => 20.0] 
      "silver"  => series [ 1 => 20.0; 2 => 30.00; 3 => 45.0; 4 => 55.0] ] 
    |> frame

let dfff2 = dfff |> Frame.filterRows (fun k row -> row?year <= 1992.0 )
let dfff3 = dfff |> Frame.filterRows (fun k row -> row?year = 1993.0 )

let myresult = R.lm(formula = "gold~silver", data = dfff2) 
R.summary(myresult)
let mycoefs = R.coef(R.summary(myresult))

您可以通过几种方式预测最终观察结果:

R.predict_lm(myresult, dfff3)
/// val it : SymbolicExpression =        4 
/// 16.57895 

或者

let betas = mycoefs.AsNumericMatrix().[0..1,0]
betas.[0] + (dfff3.["silver"].Values |> Array.ofSeq).[0] * betas.[1]

/// val it : float = 16.57894737

您的原始问题提到了移动窗口。您可以更常地执行类似下面的操作,这样可以扩展样本预测窗口。我也用年份作为指数,因为它是一个时间序列。

open RProvider
open RDotNet
open RProvider.``base``
open RProvider.stats
open System
open Deedle

let years = [|1990 .. 2010|]
let rand = System.Random()
let gold = [ for i in years do yield rand.NextDouble()]
let silver = [ for i in gold do yield 2.0 * i + rand.NextDouble()]

let x = Frame.ofColumns["gold"  => Series(years, gold);
                        "silver" => Series(years, silver) ]

let listOfWindows = 
    [for year in years.[2..] do 
     yield x |> Frame.filterRows (fun key row -> key <= year)]

listOfWindows |> List.map (fun x -> 
    let yearToPredict = x.RowKeys |> Seq.max
    let train = 
        x |> Frame.filterRows (fun year _ -> year < yearToPredict)
    let test = 
        x |> Frame.filterRows (fun year _ -> year = yearToPredict)
    let mdl = R.lm("silver~gold", train)
    let predicted = R.predict_lm(mdl, test).AsNumeric().[0]
    let actual = test.["silver"].[yearToPredict]
    yearToPredict, predicted, actual
    )

/// val it : (int * float * float) list =
///  [(1987, 2.229193258, 1.281468206); (1988, 1.111561003, 1.248677743);
///   (1989, 1.039488768, 0.7363931172); (1990, 1.680597973, 1.808993845);
///   (1991, 1.943243437, 2.092237694); (1992, 2.555973809, 2.00253973);...
///