r:2个子集函数,第2个工作但第1个不工作

时间:2014-02-22 16:30:32

标签: r tweedie

我正在尝试编写自己的函数。有2个子集函数用于删除部分数据。第二个子集函数有效,但第一个失败。我检查了用于数据子集的字段的类和级别,一切似乎都正确。代码如下所示:

mdl.sum<-function(seed=0, times=0, method=c("Boostrap", "cv"), alpha=0.05, data, Prev.vars=NULL, varia, depvar, Expo=NULL, WP=NULL, weight, offset=NULL, p=1.6, outfile){

  mdlout<-data.frame(var.lev=character(0), Estimate = numeric(0),pvalue=numeric(0), AIC=numeric(0), converged=numeric(0), Varorder=numeric(0), CVGroup = numeric(0) )

  set.seed(seed)
  data$y<-data[, depvar]/data[, weight]
  loc_y<-dim(data)[2]

  index<-createResample(data$y, times, list=FALSE)

  foreach(j = 1:times) %dopar% {

    CV.Grp<-j
    train <- data[index[,j],]

    for (i in varia) {
      if (i %in% Prev.vars){next} else{
        if (length(Prev.vars)==0) {
          Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[i]), collapse="+")), collapse="~"))
        } else{
          Mdl.form<-as.formula(paste(c(names(train)[loc_y], paste(c(paste(c("offset(", names(train)[offset], ")"), collapse=""), names(train)[Prev.vars],names(train)[i]), collapse="+")), collapse="~"))
        }

        LR.glm<-glm(Mdl.form, family=tweedie(var.power=p, link.power=0),data=train, weights=train[, weight])
        temp.coef<-data.frame(coef(summary(LR.glm)))
        temp.coef<-cbind(var.lev=row.names(temp.coef), temp.coef)
        mdlout<-rbind(mdlout, cbind(temp.coef[,c(1:2,5)], AIC=AICtweedie(LR.glm), converged=(LR.glm$converged+0), Varorder=i, CVGroup = CV.Grp ))

      }
    }
  }

  mdlout2<-subset(mdlout, mdlout$converged==1)
  mdlout2<-subset(mdlout, mdlout$var.lev!="(Intercept)")

  write.csv(mdlout2, file=outfile, row.names = FALSE)

}

然后,运行以下代码:

library(lattice)
library(Matrix)
library(MASS)
library(RODBC)
library(statmod)
library(tweedie)
library(ggplot2)
library(gtools)
library(colorspace)
library(vcd)
library(cvTools)
library(car)
library(caret)
library(foreach)
library(xtable)

mdl.sum(seed=0, times=5, method="Boostrap", alpha=0.2, data=table.1.2,Prev.vars=2, varia=1:3,depvar=7, Expo=4, WP=8, weight=4, offset=10, p=1.7, outfile='D:/b1.csv')

根据Jdbaba的建议,我输入(table.1.2),如下所示。

> dput(table.1.2)
structure(list(premiekl = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Class", 
"Code: 1=Weight over 60kg and more than 2 gears", "Code: 2=Other"
)), moptva = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("1", "2"), class = "factor", comment = c("Name: Age", 
"Code: 1=At most 1 year", "Code: 2=2 years or more")), zon = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor", comment = c("Name: Zone", 
"Code: 1=Central and semi-central parts of Sweden's three largest cities", 
"Code: 2=suburbs and middle-sized towns", "Code: 3=Lesser towns, except those in 5 or 7", 
"Code: 4=Small towns and countryside, except 5--7", "Code: 5=Northern towns", 
"Code: 6=Northern countryside", "Code: 7=Gotland (Sweden's largest island)"
)), dur = structure(c(62.9, 112.9, 133.1, 376.6, 9.4, 70.8, 4.4, 
352.1, 840.1, 1378.3, 5505.3, 114.1, 810.9, 62.3, 191.6, 237.3, 
162.4, 446.5, 13.2, 82.8, 14.5, 844.8, 1296, 1214.9, 3740.7, 
109.4, 404.7, 66.3), comment = c("Name: Duration", "Unit: year"
)), medskad = structure(c(18256L, 13632L, 20877L, 13045L, 0L, 
15000L, 8018L, 8232L, 7418L, 7318L, 6922L, 11131L, 5970L, 6500L, 
7754L, 6933L, 4402L, 8214L, 0L, 5830L, 0L, 4728L, 4252L, 4212L, 
3846L, 3925L, 5280L, 7795L), comment = c("Name: Claim severity", 
"Unit: SEK")), antskad = structure(c(17L, 7L, 9L, 7L, 0L, 1L, 
1L, 52L, 69L, 75L, 136L, 2L, 14L, 1L, 43L, 34L, 11L, 8L, 0L, 
3L, 0L, 94L, 99L, 37L, 56L, 4L, 5L, 1L), comment = "Name: No. claims"), 
    riskpre = structure(c(4936L, 845L, 1411L, 242L, 0L, 212L, 
    1829L, 1216L, 609L, 398L, 171L, 195L, 103L, 104L, 1740L, 
    993L, 298L, 147L, 0L, 211L, 0L, 526L, 325L, 128L, 58L, 144L, 
    65L, 118L), comment = c("Name: Pure premium", "Unit: SEK"
    )), helpre = structure(c(2049L, 1230L, 762L, 396L, 990L, 
    594L, 396L, 1229L, 738L, 457L, 238L, 594L, 356L, 238L, 1024L, 
    615L, 381L, 198L, 495L, 297L, 198L, 614L, 369L, 229L, 119L, 
    297L, 178L, 119L), comment = c("Name: Actual premium", "Note: The premium for one year according to the tariff in force 1999", 
    "Unit: SEK")), skadfre = structure(c(0.27027027027027, 0.0620017714791851, 
    0.067618332081142, 0.0185873605947955, 0, 0.0141242937853107, 
    0.227272727272727, 0.1476853166714, 0.0821330793953101, 0.0544148588841326, 
    0.0247034675676167, 0.0175284837861525, 0.017264767542237, 
    0.0160513643659711, 0.224425887265136, 0.143278550358196, 
    0.0677339901477833, 0.0179171332586786, 0, 0.036231884057971, 
    0, 0.111268939393939, 0.0763888888888889, 0.0304551814964195, 
    0.0149704600743176, 0.036563071297989, 0.0123548307388189, 
    0.0150829562594268), comment = c("Name: Claim frequency", 
    "Unit: /year")), offset = structure(c(7.6251071482389, 7.11476944836646, 
    6.63594655568665, 5.98141421125448, 6.89770494312864, 6.38687931936265, 
    5.98141421125448, 7.11395610956603, 6.60394382460047, 6.12468339089421, 
    5.47227067367148, 6.38687931936265, 5.87493073085203, 5.47227067367148, 
    6.93147180559945, 6.42162226780652, 5.9427993751267, 5.28826703069454, 
    6.20455776256869, 5.6937321388027, 5.28826703069454, 6.41999492814714, 
    5.91079664404053, 5.43372200355424, 4.77912349311153, 5.6937321388027, 
    5.18178355029209, 4.77912349311153), comment = c("Name: Actual premium", 
    "Note: The premium for one year according to the tariff in force 1999", 
    "Unit: SEK"))), .Names = c("premiekl", "moptva", "zon", "dur", 
"medskad", "antskad", "riskpre", "helpre", "skadfre", "offset"
), row.names = c(NA, -28L), comment = c("Title: Partial casco moped insurance from Wasa insurance, 1994--1999", 
"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas", "Copyright: http://www2.math.su.se/~esbj/GLMbook/"
), class = "data.frame")

1 个答案:

答案 0 :(得分:0)

我认为代码中只有一个小错误:

# mdlout2<-subset(mdlout, mdlout$var.lev!="(Intercept)") # Should be:
mdlout2<-subset(mdlout2, mdlout2$var.lev!="(Intercept)")

但是,正如@IShouldBuyABoat建议的那样,使用[

进行子集会更安全
mdlout2<-mdlout[mdlout$var.lev!="(Intercept)") & mdlout$converged==1,]