自动vlookup和乘以R的系数

时间:2013-02-04 20:00:33

标签: r regression vlookup

我正在尝试编写R(统计编程语言)中的函数,这样我就可以自动计算线性回归(lm)

问题: 通过“步”函数计算回归,因此不能预先知道所选系数。 问题

  1. 自动识别步进功能选择的系数。

  2. Vlookup并交叉乘以结果的第二列Ex。“View(OpenCoefs)”(估计)与原始数据框“sp”的各列的最后一行(最后一天)

  3. 理想的解决方案是一个函数,我只需键入“run()”,它将返回每个回归的“y”s,即第二天S& P500指数的预测(开放,低) ,高,关闭)。

    代码从雅虎财经网站检索数据,因此如果您运行它,它就可以运行。

    这是代码。

    sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))
    
    sp$Adj.Close<-NULL
    
    sp<-sp[nrow(sp):1,]
    
    sp<-as.data.frame(sp)
    
    
    for ( i in 2:nrow( sp ) ) {
    sp[ i , "Gr_Open" ] <-
        ( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1       
    } 
    
    
    for ( i in 2:nrow( sp ) ) {
    sp[ i , "Gr_High" ] <-
        ( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1       
    } 
    
    
    for ( i in 2:nrow( sp ) ) {
    sp[ i , "Gr_Low" ] <-
        ( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1       
    } 
    
    
    for ( i in 2:nrow( sp ) ) {
    sp[ i , "Gr_Close" ] <-
        ( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1       
    } 
    
    
    for ( i in 2:nrow( sp ) ) {
    sp[ i , "Gr_Volume" ] <-
        ( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1       
    } 
    
    nRows_in_sp<-1:nrow(sp)
    
    sp<-cbind(sp,nRows_in_sp)
    
    
    Open_Rollin<-NA
    
    sp<-cbind(sp,Open_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "Open_Rollin" ]<-0 
    } else {
    sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
    }
    }
    
    
    Close_Rollin<-NA
    
    nRows_in_sp<-1:nrow(sp)
    
    sp<-cbind(sp,Close_Rollin)
    
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , " Close_Rollin" ]<-0 
    } else {
    sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
    }
    }
    
    
    
    Low_Rollin<-NA
    
    sp<-cbind(sp,Low_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "Low_Rollin" ]<-0 
    } else {
    sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
    }
    }
    
    
    High_Rollin<-NA
    
    sp<-cbind(sp,High_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "High_Rollin" ]<-0 
    } else {
    sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
    }
    }
    
    
    Open_GR_Rollin<-NA
    
    sp<-cbind(sp,Open_GR_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "Open_GR_Rollin" ]<-0 
    } else {
    sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
    }
    }
    
    
    
    Close_GR_Rollin<-NA
    
    sp<-cbind(sp, Close_GR_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "Close_GR_Rollin" ]<-0 
    } else {
    sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
    }
    }
    
    
    
    Low_GR_Rollin<-NA
    
    sp<-cbind(sp, Low_GR_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "Low_GR_Rollin" ]<-0 
    } else {
    sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
    }
    }
    
    
    High_GR_Rollin<-NA
    
    sp<-cbind(sp, High_GR_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]<=1000)
    {
    sp[ i , "High_GR_Rollin" ]<-0 
    } else {
    sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
    }
    }
    
    
    Open_SD_Rollin<-NA
    
    sp<-cbind(sp,Open_SD_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]>100)
    {
    sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
    } 
    }
    
    
    
    Close_SD_Rollin<-NA
    
    sp<-cbind(sp, Close_SD_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]>100)
    {
    sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
    } 
    }
    
    
    Low_SD_Rollin<-NA
    
    sp<-cbind(sp, Low_SD_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]>100)
    {
    sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
    } 
    }
    
    
    
    High_SD_Rollin<-NA
    
    sp<-cbind(sp, High_SD_Rollin)
    for ( i in 2:nrow( sp ) ) {
    if(sp[i,"nRows_in_sp"]>100)
    {
    sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
    } 
    }
    
    
    N <- length(sp[,"Open"])
    
    
    
    Openlag <- c(NA, sp[,"Open"][1:(N-1)])
    sp<-cbind(sp,Openlag)
    
    Highlag <- c(NA, sp[,"High"][1:(N-1)])
    
    sp<-cbind(sp,Highlag)
    
    Lowlag <- c(NA, sp[,"Low"][1:(N-1)])
    
    sp<-cbind(sp,Lowlag)
    
    Closelag <- c(NA, sp[,"Close"][1:(N-1)])
    
    sp<-cbind(sp,Closelag)
    
    
    Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])
    
    sp<-cbind(sp,Gr_Openlag)
    
    Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])
    
    sp<-cbind(sp,Gr_Highlag)
    
    Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])
    
    sp<-cbind(sp,Gr_Lowlag)
    
    Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])
    
    sp<-cbind(sp,Gr_Closelag)
    
    Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])
    
    sp<-cbind(sp,Gr_Volumelag)
    
    
    
    Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Open_GR_Rollinlag)
    
    Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Low_GR_Rollinlag)
    
    High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
    sp<-cbind(sp, High_GR_Rollinlag)
    
    Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Close_GR_Rollinlag)
    
    
    Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Open_SD_Rollinlag)
    
    Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Low_SD_Rollinlag)
    
    High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, High_SD_Rollinlag)
    
    Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])
    
    sp<-cbind(sp, Close_SD_Rollinlag)
    
    
    
    
    OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
    
    
    LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
    
    
    HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
    
    
    CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
    
    
    View(OpenCoefs)
    
    View(LowCoefs)
    
    View(HighCoefs)
    
    View(CloseCoefs)
    
    View(sp)
    

1 个答案:

答案 0 :(得分:6)

你的代码太糟糕了,我不得不怜悯你。 :)这是您的代码的重构版本:

library(quantmod)
sp <- getSymbols("^GSPC", auto.assign=FALSE)
sp$GSPC.Adjusted <- NULL
colnames(sp) <- gsub("^GSPC\\.","",colnames(sp))

sp$Gr_Open   <- ROC(Op(sp), type="discrete")
sp$Gr_High   <- ROC(Hi(sp), type="discrete")
sp$Gr_Low    <- ROC(Lo(sp), type="discrete")
sp$Gr_Close  <- ROC(Cl(sp), type="discrete")
sp$Gr_Volume <- ROC(Vo(sp), type="discrete")

N <- 100
sp$Open_Rollin  <- runMean(sp$Open, N)
sp$High_Rollin  <- runMean(sp$High, N)
sp$Low_Rollin   <- runMean(sp$Low, N)
sp$Close_Rollin <- runMean(sp$Close, N)

sp$Open_GR_Rollin  <- runMean(sp$Gr_Open, N)
sp$High_GR_Rollin  <- runMean(sp$Gr_High, N)
sp$Low_GR_Rollin   <- runMean(sp$Gr_Low, N)
sp$Close_GR_Rollin <- runMean(sp$Gr_Close, N)

sp$Open_SD_Rollin  <- runSD(sp$Open, N)
sp$High_SD_Rollin  <- runSD(sp$High, N)
sp$Low_SD_Rollin   <- runSD(sp$Low, N)
sp$Close_SD_Rollin <- runSD(sp$Close, N)

spLag <- lag(sp)
colnames(spLag) <- paste(colnames(sp),"lag",sep="")
sp <- na.omit(merge(sp, spLag))

为了回答第二个问题,没有必要回答您的第一个问题。您不必手动将系数与数据交叉乘法。您只需从模型中访问拟合值即可。这要求你保留模型......

f <- Open ~ Openlag + Lowlag + Highlag + Closelag +
  Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag +
  Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag +
  Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag

OpenLM <- lm(f, data=sp)
HighLM <- update(OpenLM, High ~ .)
LowLM <- update(OpenLM, Low ~ .)
CloseLM <- update(OpenLM, Close ~ .)

OpenStep <- step(OpenLM,direction="both",test="F")
HighStep <- step(HighLM,direction="both",test="F")
LowStep <- step(LowLM,direction="both",test="F")
CloseStep <- step(CloseLM,direction="both",test="F")

tail(fitted(OpenStep),1)
# 2013-02-01 
#    1497.91 
tail(fitted(HighStep),1)
# 2013-02-01 
#    1504.02 
tail(fitted(LowStep),1)
# 2013-02-01 
#   1491.934 
tail(fitted(CloseStep),1)
# 2013-02-01 
#   1499.851