矢量化"累积"回归

时间:2018-03-28 09:46:24

标签: r data.table regression vectorization

我有数据

var pickOption = [["1", "2", "3","4","5","6","7","8","9"], ["0","1","2", "3","4","5","6"],["0","1","2", "3","4","5","6"]]
@IBOutlet weak var travellersTextField: UITextField!

override func viewDidLoad() {
    super.viewDidLoad()
    let pickerView = UIPickerView()
    pickerView.delegate = self
    travellersTextField.inputView = pickerView
}

func numberOfComponents(in pickerView: UIPickerView) -> Int {
        return pickOption.count
}

func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
        return pickOption[component].count
}

func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
        return  pickOption[component][row]
}
func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
    let adult = pickOption[0][pickerView.selectedRow(inComponent: 0)]
    let child = pickOption[1][pickerView.selectedRow(inComponent: 1)]
    let infant = pickOption[2][pickerView.selectedRow(inComponent: 2)]

    travellersTextField.text = adult + " adults," + child + " children," + infant + " infants"
    travellersTextField.becomeFirstResponder()

}

其中 dat <- data.frame(t=1:100,y=rnorm(100),x1=rnorm(100)),x2=rnorm(100)) 给出了时间点。我想根据前面的时间点,在ty的每个时间点对x1进行回归。

我可以创建一个循环

x2

但我想知道是否有人知道使用reg <- matrix(rep(NA,3*nrow(dat),ncol=3) for(i in 11:nrow(dat)){ reg[i,] <- coefficients(lm(y ~ x1 + x2, data=dat[1:i,])) } 进行矢量化的方法。

2 个答案:

答案 0 :(得分:3)

我们可以使用非等自连接来获取表格:

library(data.table)
setDT(dat)
# not clear if you wanted points _strictly_ before present, 
#   but the fix is basically clear -- just add nomatch = 0L to skip the first row
dat[dat, on = .(t <= t), allow.cartesian = TRUE]
        t           y         x1          x2
   1:   1 -0.51729096  0.1765509  1.06562278
   2:   2 -0.51729096  0.1765509  1.06562278
   3:   2  0.85173679 -0.7801053  0.05249113
   4:   3 -0.51729096  0.1765509  1.06562278
   5:   3  0.85173679 -0.7801053  0.05249113
  ---                                       
5046: 100  1.03802913 -2.7042756  2.05639758
5047: 100 -1.29122593  0.9013410  0.77088748
5048: 100  0.08262791  0.4135725  0.92694074
5049: 100 -0.93397320  0.2719790 -0.26097185
5050: 100 -1.23897617  0.9008160  0.61121185
             i.y       i.x1        i.x2
   1: -0.5172910  0.1765509  1.06562278
   2:  0.8517368 -0.7801053  0.05249113
   3:  0.8517368 -0.7801053  0.05249113
   4: -0.5080630 -2.0701757 -1.01573263
   5: -0.5080630 -2.0701757 -1.01573263
  ---                                  
5046: -1.2389762  0.9008160  0.61121185
5047: -1.2389762  0.9008160  0.61121185
5048: -1.2389762  0.9008160  0.61121185
5049: -1.2389762  0.9008160  0.61121185
5050: -1.2389762  0.9008160  0.61121185

(有点令人困惑,但在t <= t中,LHS t指的是LHS dat,RHS t指的是RHS dat

从这里我们只需按t分组并运行回归:

dat[dat, on = .(t <= t), allow.cartesian = TRUE
    ][ , as.list(coef(lm(y ~ x1 + x2))), keyby = t
       # (only adding head here to limit output)
       ][ , head(.SD)]
#    t (Intercept)          x1          x2
# 1: 1  -0.5172910          NA          NA
# 2: 2  -0.2646369 -1.43105510          NA
# 3: 3   9.1879448  9.96212179 -10.7580819
# 4: 4  -0.3504059 -0.36654096   0.4523271
# 5: 5  -0.1681879 -0.06670494   0.3553107
# 6: 6   1.2108223  1.04082291  -0.6947567

答案 1 :(得分:1)

在回归即席函数上使用lapply尝试此解决方案:

f<-function(i,dat)
+ {
+       out <- coefficients(lm(y ~ x1 + x2, data=dat[1:i,]))
+       return(out)
+ }
> lapply(seq(1:nrow(dat)),f,dat=dat)
[[1]]
(Intercept)          x1          x2 
  0.4949079          NA          NA 

[[2]]
(Intercept)          x1          x2 
 -0.4552593   2.4497037          NA 

[[3]]
(Intercept)          x1          x2 
  0.1023961   1.6163017  -0.8490789 

[[4]]
(Intercept)          x1          x2 
 -0.9136870   2.1235787   0.9072042 

...

[[100]]
(Intercept)          x1          x2 
 0.06118874 -0.02917001  0.15879213