我正在尝试编写R(统计编程语言)中的函数,这样我就可以自动计算线性回归(lm)
问题: 通过“步”函数计算回归,因此不能预先知道所选系数。 问题
自动识别步进功能选择的系数。
Vlookup并交叉乘以结果的第二列Ex。“View(OpenCoefs)”(估计)与原始数据框“sp”的各列的最后一行(最后一天)
理想的解决方案是一个函数,我只需键入“run()”,它将返回每个回归的“y”s,即第二天S& P500指数的预测(开放,低) ,高,关闭)。
代码从雅虎财经网站检索数据,因此如果您运行它,它就可以运行。
这是代码。
sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))
sp$Adj.Close<-NULL
sp<-sp[nrow(sp):1,]
sp<-as.data.frame(sp)
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Open" ] <-
( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_High" ] <-
( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Low" ] <-
( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Close" ] <-
( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Volume" ] <-
( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1
}
nRows_in_sp<-1:nrow(sp)
sp<-cbind(sp,nRows_in_sp)
Open_Rollin<-NA
sp<-cbind(sp,Open_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_Rollin" ]<-0
} else {
sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
}
}
Close_Rollin<-NA
nRows_in_sp<-1:nrow(sp)
sp<-cbind(sp,Close_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , " Close_Rollin" ]<-0
} else {
sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
}
}
Low_Rollin<-NA
sp<-cbind(sp,Low_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_Rollin" ]<-0
} else {
sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
}
}
High_Rollin<-NA
sp<-cbind(sp,High_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_Rollin" ]<-0
} else {
sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
}
}
Open_GR_Rollin<-NA
sp<-cbind(sp,Open_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_GR_Rollin" ]<-0
} else {
sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
}
}
Close_GR_Rollin<-NA
sp<-cbind(sp, Close_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Close_GR_Rollin" ]<-0
} else {
sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
}
}
Low_GR_Rollin<-NA
sp<-cbind(sp, Low_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_GR_Rollin" ]<-0
} else {
sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
}
}
High_GR_Rollin<-NA
sp<-cbind(sp, High_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_GR_Rollin" ]<-0
} else {
sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
}
}
Open_SD_Rollin<-NA
sp<-cbind(sp,Open_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
}
}
Close_SD_Rollin<-NA
sp<-cbind(sp, Close_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
}
}
Low_SD_Rollin<-NA
sp<-cbind(sp, Low_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
}
}
High_SD_Rollin<-NA
sp<-cbind(sp, High_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
}
}
N <- length(sp[,"Open"])
Openlag <- c(NA, sp[,"Open"][1:(N-1)])
sp<-cbind(sp,Openlag)
Highlag <- c(NA, sp[,"High"][1:(N-1)])
sp<-cbind(sp,Highlag)
Lowlag <- c(NA, sp[,"Low"][1:(N-1)])
sp<-cbind(sp,Lowlag)
Closelag <- c(NA, sp[,"Close"][1:(N-1)])
sp<-cbind(sp,Closelag)
Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])
sp<-cbind(sp,Gr_Openlag)
Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])
sp<-cbind(sp,Gr_Highlag)
Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])
sp<-cbind(sp,Gr_Lowlag)
Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])
sp<-cbind(sp,Gr_Closelag)
Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])
sp<-cbind(sp,Gr_Volumelag)
Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Open_GR_Rollinlag)
Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Low_GR_Rollinlag)
High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, High_GR_Rollinlag)
Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Close_GR_Rollinlag)
Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Open_SD_Rollinlag)
Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Low_SD_Rollinlag)
High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, High_SD_Rollinlag)
Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Close_SD_Rollinlag)
OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
View(OpenCoefs)
View(LowCoefs)
View(HighCoefs)
View(CloseCoefs)
View(sp)
答案 0 :(得分:6)
你的代码太糟糕了,我不得不怜悯你。 :)这是您的代码的重构版本:
library(quantmod)
sp <- getSymbols("^GSPC", auto.assign=FALSE)
sp$GSPC.Adjusted <- NULL
colnames(sp) <- gsub("^GSPC\\.","",colnames(sp))
sp$Gr_Open <- ROC(Op(sp), type="discrete")
sp$Gr_High <- ROC(Hi(sp), type="discrete")
sp$Gr_Low <- ROC(Lo(sp), type="discrete")
sp$Gr_Close <- ROC(Cl(sp), type="discrete")
sp$Gr_Volume <- ROC(Vo(sp), type="discrete")
N <- 100
sp$Open_Rollin <- runMean(sp$Open, N)
sp$High_Rollin <- runMean(sp$High, N)
sp$Low_Rollin <- runMean(sp$Low, N)
sp$Close_Rollin <- runMean(sp$Close, N)
sp$Open_GR_Rollin <- runMean(sp$Gr_Open, N)
sp$High_GR_Rollin <- runMean(sp$Gr_High, N)
sp$Low_GR_Rollin <- runMean(sp$Gr_Low, N)
sp$Close_GR_Rollin <- runMean(sp$Gr_Close, N)
sp$Open_SD_Rollin <- runSD(sp$Open, N)
sp$High_SD_Rollin <- runSD(sp$High, N)
sp$Low_SD_Rollin <- runSD(sp$Low, N)
sp$Close_SD_Rollin <- runSD(sp$Close, N)
spLag <- lag(sp)
colnames(spLag) <- paste(colnames(sp),"lag",sep="")
sp <- na.omit(merge(sp, spLag))
为了回答第二个问题,没有必要回答您的第一个问题。您不必手动将系数与数据交叉乘法。您只需从模型中访问拟合值即可。这要求你保留模型......
f <- Open ~ Openlag + Lowlag + Highlag + Closelag +
Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag +
Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag +
Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag
OpenLM <- lm(f, data=sp)
HighLM <- update(OpenLM, High ~ .)
LowLM <- update(OpenLM, Low ~ .)
CloseLM <- update(OpenLM, Close ~ .)
OpenStep <- step(OpenLM,direction="both",test="F")
HighStep <- step(HighLM,direction="both",test="F")
LowStep <- step(LowLM,direction="both",test="F")
CloseStep <- step(CloseLM,direction="both",test="F")
tail(fitted(OpenStep),1)
# 2013-02-01
# 1497.91
tail(fitted(HighStep),1)
# 2013-02-01
# 1504.02
tail(fitted(LowStep),1)
# 2013-02-01
# 1491.934
tail(fitted(CloseStep),1)
# 2013-02-01
# 1499.851