我有一个类似于以下的数据框,总共有500列:
Advanced >
我想计算滚动窗口线性回归,其中我的窗口大小为12个数据点,每个连续回归由6个数据点分隔。对于每个回归,“Days”将始终是模型的x分量,y将是其他每个列(B1,后跟B2,B3等)。然后,我想将系数保存为具有现有列标题(B1,B2等)的数据帧。
我认为我的代码很接近,但效果不好。我在动物园图书馆里使用了rollapply。
Probes <- data.frame(Days=seq(0.01, 4.91, 0.01), B1=5:495,B2=-100:390, B3=10:500,B4=-200:290)
如果可能的话,我还想将“xmins”保存到矢量中以添加到数据帧中。这意味着每个回归中使用的最小x值(基本上它是“Days”列中的每6个数字。) 谢谢你的帮助。
答案 0 :(得分:1)
1)定义一个动物园对象z
,其数据包含Probes
,其索引取自第一列探针,即Days
。注意lm
允许y
为矩阵定义coefs
函数,该函数计算回归系数。最后rollapply
超过z
。请注意,返回的对象的索引给出了xmin。
library(zoo)
z <- zoo(Probes, Probes[[1]])
coefs <- function(z) c(unlist(as.data.frame(coef(lm(z[,-1] ~ z[,1])))))
rz <- rollapply(z, 12, by = 6, coefs, by.column = FALSE, align = "left")
,并提供:
> head(rz)
B11 B12 B21 B22 B31 B32 B41 B42
0.01 4 100 -101 100 9 100 -201 100
0.07 4 100 -101 100 9 100 -201 100
0.13 4 100 -101 100 9 100 -201 100
0.19 4 100 -101 100 9 100 -201 100
0.25 4 100 -101 100 9 100 -201 100
0.31 4 100 -101 100 9 100 -201 100
请注意,如果您需要DF <- fortify.zoo(rz)
的数据框表示,则可以使用rz
。
2)对行号进行rollaplly
的替代方法有点类似:
library(zoo)
y <- as.matrix(Probes[-1])
Days <- Probes$Days
n <- nrow(Probes)
coefs <- function(ix) c(unlist(as.data.frame(coef(lm(y ~ Days, subset = ix)))),
xmins = Days[ix][1])
r <- rollapply(1:n, 12, by = 6, coefs)
答案 1 :(得分:0)
试试这个:
# here are the xmin values you wanted
xmins <- Probes$Days[seq(1,nrow(Probes),6)]
# here we build a function that will run regressions across the columns
# y1 vs x, y2 vs x, y3 vs x...
# you enter the window and by (12/6) in order to limit the interval being
# regressed. this is later called in do.call
runreg <- function(Probes,m,window=12,by=6){
# beg,end are used to specify the interval
beg <- seq(1,nrow(Probes),by)[m]
end <- beg+window-1
# this is used to go through all the columns
N <- ncol(Probes)-1
tmp <- numeric(N)
# go through each column and store the coefficients in tmp
for(i in 1:N){
y <- Probes[[i+1]][beg:end]
x <- Probes$Days[beg:end]
tmp[i] <- coef(lm(y~x))[2][[1]]
}
# put all our column regressions into a dataframe
res <- rbind('coeff'=tmp)
colnames(res) <- colnames(Probes)[-1]
return(res)
}
# now that we've built the function to do the column regressions
# we just need to go through all the window-ed regressions (row regressions)
res <- do.call(rbind,lapply(1:length(xmins),function(m) runreg(Probes,m)))
# these rownames are the index of the xmin values
rownames(res) <- seq(1,nrow(Probes),6)
res <- data.frame(res,xmins)
答案 2 :(得分:0)
您还可以如下使用rollRegres
包
# setup data
Probes <- data.frame(
# I changed the days to be intergers
Days=seq(1L, 491L, 1L),
B1=5:495, B2=-100:390, B3=10:500 , B4=-200:290)
# setup grp argument
grp_arg <- as.integer((Probes$Days - 1L) %/% 6)
# estimate coefs. width argument is realtive in grp units
library(rollRegres)
X <- cbind(1, Probes$Days / 100)
Ys <- as.matrix(Probes[, 2:5])
out <- lapply(1:ncol(Ys), function(i)
roll_regres.fit(x = X, y = Ys[, i], width = 2L, grp = grp_arg)$coefs)
out <- do.call(cbind, out)
# only keep the complete.cases and the unique values
colnames(out) <- sapply(1:4, function(i) paste0("B", i, 0:1))
out <- out[c(T, grp_arg[-1] != head(grp_arg, -1)), ]
out <- out[complete.cases(out), ]
head(out)
#R B10 B11 B20 B21 B30 B31 B40 B41
#R [1,] 4 100 -101 100 9 100 -201 100
#R [2,] 4 100 -101 100 9 100 -201 100
#R [3,] 4 100 -101 100 9 100 -201 100
#R [4,] 4 100 -101 100 9 100 -201 100
#R [5,] 4 100 -101 100 9 100 -201 100
#R [6,] 4 100 -101 100 9 100 -201 100
该解决方案比zoo
解决方案
library(zoo) coefs <- function(z) c(unlist(as.data.frame(coef(lm(z[,-1] ~ z[,1]))))) microbenchmark::microbenchmark( rollapply = {
z <- zoo(Probes, Probes[[1]])
rz <- rollapply(z, 12, by = 6, coefs, by.column = FALSE, align = "left") }, roll_regres = {
grp_arg <- as.integer((Probes$Days - 1L) %/% 6)
X <- cbind(1, Probes$Days / 100)
Ys <- as.matrix(Probes[, 2:5])
out <- lapply(1:ncol(Ys), function(i)
roll_regres.fit(x = X, y = Ys[, i], width = 2L, grp = grp_arg)$coefs)
out <- do.call(cbind, out)
colnames(out) <- sapply(1:4, function(i) paste0("B", i, 0:1))
out <- out[c(T, grp_arg[-1] != head(grp_arg, -1)), ]
out <- out[complete.cases(out), ]
head(out) } )
#R Unit: microseconds
#R expr min lq mean median uq max neval
#R rollapply 53392.614 56330.492 59793.106 58363.2825 60902.938 119206.76 100
#R roll_regres 865.186 920.297 1074.161 983.9015 1047.705 5071.41 100
由于版本0.1.0
中的验证错误,目前您需要从Github安装软件包。因此,运行
devtools::install_github("boennecd/rollRegres", upgrade_dependencies = FALSE,
build_vignettes = TRUE)