构建交叉验证Gamma障碍模型作为函数?

时间:2018-04-23 21:02:17

标签: r dplyr glm

我正在尝试构建一个函数来执行具有二项式/ gamma分布的交叉验证障碍模型。障碍模型通常适用于零膨胀泊松分布,但我想将零膨胀Gamma分布作为分析成本数据的方法。

背景

零膨胀泊松模型将分析分为两部分:分析的第一部分是概率分布测量0或大于零的二项/负二项/几何评估,第二部分测量概率分布非零分布。换句话说,Mullahy 1986和Zeileis et. al中的主要等式建立了一种分析方法:

enter image description here

我想调整他们的方法来处理具有伽玛分布的成本数据。因此,代替而不是

enter image description here

我也在尝试使用交叉验证来优化功能。要将此作为交叉验证,有两个主要步骤。第一种是将其分析为 y = 0 y>之间的二项式/负二项式/几何。 0 的。然后给出 y> 0 ,您使用glmfamily = gamma匹配伽马模型。

以下是我的以下代码:

cv.hurdle.gamma <- function(model=, dat=, k =, reorder = TRUE, 
                            model.1.type = "binomial"){
  require(ISLR)
  require(dplyr)
  require(pscl)
  require(purrr)
  require(modelr)
  require(broom)
  require(tidyr)

  if(2*k <= nrow(dat)) {
    stop("Error: Ratio of folds to observation too low")
  }
  set.seed(seed = seed.num)
  dat$y.binom  <- ifelse(dat[[all.vars(model)[1]]] > 0, 1, 0)
  analysis.glm <- dat %>%
                  select(one_of(c(all.vars(model)[-1], "y.binom"))) %>%
                  crossv_kfold(k = k) %>%
                  mutate(model = map(train, ~ glm(formula = y.binom~., data = ., family = model.1.type)))
predictions.glm <- analysis.glm %>% unnest(map2(model, test, ~augment(.x, newdata = .y)))

#### here is the problem
  analysis.gam <- dat %>%
                  crossv_kfold(k = k) %>%
                  do(UQ(as.name(all.vars(model)[1])) > 0) %>%
                  mutate(model = map(train, ~ glm(formula = model, data = ., family = gamma)))


  predictions.gam <- analysis.gam %>% unnest(map2(model, test, ~augment(.x, newdata = .y)))

}

predictions.glm...以外的所有内容都有效,但在尝试适应gamma模型时,我的代码开始分崩离析。为了排列交叉验证统计信息,我需要使用相同的数据,但要过滤y>0的值;然而,显然这不能在crossv_kfold行之后完成。有没有解决方法或者我必须手动执行此操作(即,没有上面列出的各种软件包的帮助)?

dput(dat)
structure(list(y = c(0.0873588341359355, 0.287729826121801, 0.0573073814623841, 
0, 1.07186262575082, 0, 0, 0.444751108347917, 0.955880506855193, 
0.125869761859781, 1.04246597654281, 0.602367511170065, 0.00455502647867159, 
0.00570279008712059, 0, 0.0521668943455801, 0, 0, 0.0469040199977254, 
0, 0, 0.0474795351641914, 0.120960529037452, 0.13394714054228, 
0.170519704446726, 1.35849570212068, 0.15642171331789, 0.169653755120677, 
0, 0.23087847751788, 0.11965648095792, 0.05741581236216, 0.582921517508486, 
0.00148674505016221, 0, 0.0013607346063604, 0, 0.0472718998735441, 
0, 0.000181409965466182, 0, 0.000709954445533417, 0, 0.0089611179005829, 
2.93984784510064, 0, 0.00436508966978864, 0.525095489397255, 
0, 0.0184075190708377, 0.0017275574306807, 0, 0.0520226458749667, 
0.0769380759915087, 0.762644090784732, 0.00233031977900812, 0.183680160482936, 
0.129044399795561, 0.0530417702466234, 0.631689960608072, 0, 
0.933019961504137, 1.12201619108448, 0.372145228493276, 0.00190349118355715, 
0.575403592374592, 1.58425037410382, 0, 0.889061970325199, 0, 
0.100911877883966, 0, 0.011846061973296, 0.0207678749042814, 
0.627964019952863, 0, 0, 0.567588186306217, 0, 0, 0.00299615902846718, 
0, 0.00504131436609304, 0.663140790310113, 0, 2.27950907180982, 
0, 0.21963189016903, 0.611057282347885, 0.220545583037388, 0.203262667054132, 
0.00670167686952219, 0.246511878756605, 0, 0, 0, 0.141828489450798, 
0.928524697512492, 0, 0.0214352693961126, 0.304795754800635, 
0.458087622778207, 0.596526597046027, 0, 1.57998526858726, 0.000208486015415954, 
0.0572569251888749, 0.0928399345039089, 0, 0.0873365147950202, 
0, 0, 1.31424709069847, 0.127539270107218, 0.000774334672839962, 
0.00906305802497871, 0, 2.10789573993233, 0.15954127861041, 0.00385807130417187, 
0, 0.150121022176224, 0.657916596142017, 0.000329461688517048, 
0, 0.285586397395188, 0.041008510808684, 0.483696390503764, 0.477433017890856, 
0.191979163630133, 1.58836976308611, 1.96136277919986, 0.608679854672383, 
0.0128610469587465, 0, 0.301268972605286, 0.0847832353154572, 
0.0467279608215621, 0, 0.0779962962653117, 0.19342719263354, 
0.0042169079224134, 1.29554472345076, 1.64663868768609, 0, 0.111046843155957, 
0.34223572157047, 0, 0.00115742163347153, 0), x1 = c(-0.626453810742332, 
0.183643324222082, -0.835628612410047, 1.59528080213779, 0.329507771815361, 
-0.820468384118015, 0.487429052428485, 0.738324705129217, 0.575781351653492, 
-0.305388387156356, 1.51178116845085, 0.389843236411431, -0.621240580541804, 
-2.2146998871775, 1.12493091814311, -0.0449336090152309, -0.0161902630989461, 
0.943836210685299, 0.821221195098089, 0.593901321217509, 0.918977371608218, 
0.782136300731067, 0.0745649833651906, -1.98935169586337, 0.61982574789471, 
-0.0561287395290008, -0.155795506705329, -1.47075238389927, -0.47815005510862, 
0.417941560199702, 1.35867955152904, -0.102787727342996, 0.387671611559369, 
-0.0538050405829051, -1.37705955682861, -0.41499456329968, -0.394289953710349, 
-0.0593133967111857, 1.10002537198388, 0.763175748457544, -0.164523596253587, 
-0.253361680136508, 0.696963375404737, 0.556663198673657, -0.68875569454952, 
-0.70749515696212, 0.36458196213683, 0.768532924515416, -0.112346212150228, 
0.881107726454215, 0.398105880367068, -0.612026393250771, 0.341119691424425, 
-1.12936309608079, 1.43302370170104, 1.98039989850586, -0.367221476466509, 
-1.04413462631653, 0.569719627442413, -0.135054603880824, 2.40161776050478, 
-0.0392400027331692, 0.689739362450777, 0.0280021587806661, -0.743273208882405, 
0.188792299514343, -1.80495862889104, 1.46555486156289, 0.153253338211898, 
2.17261167036215, 0.475509528899663, -0.709946430921815, 0.610726353489055, 
-0.934097631644252, -1.2536334002391, 0.291446235517463, -0.443291873218433, 
0.00110535163162413, 0.0743413241516641, -0.589520946188072, 
-0.568668732818502, -0.135178615123832, 1.1780869965732, -1.52356680042976, 
0.593946187628422, 0.332950371213518, 1.06309983727636, -0.304183923634301, 
0.370018809916288, 0.267098790772231, -0.54252003099165, 1.20786780598317, 
1.16040261569495, 0.700213649514998, 1.58683345454085, 0.558486425565304, 
-1.27659220845804, -0.573265414236886, -1.22461261489836, -0.473400636439312, 
-0.620366677224124, 0.0421158731442352, -0.910921648552446, 0.158028772404075, 
-0.654584643918818, 1.76728726937265, 0.716707476017206, 0.910174229495227, 
0.384185357826345, 1.68217608051942, -0.635736453948977, -0.461644730360566, 
1.43228223854166, -0.650696353310367, -0.207380743601965, -0.392807929441984, 
-0.319992868548507, -0.279113302976559, 0.494188331267827, -0.177330482269606, 
-0.505957462114257, 1.34303882517041, -0.214579408546869, -0.179556530043387, 
-0.100190741213562, 0.712666307051405, -0.0735644041263263, -0.0376341714670479, 
-0.681660478755657, -0.324270272246319, 0.0601604404345152, -0.588894486259664, 
0.531496192632572, -1.51839408178679, 0.306557860789766, -1.53644982353759, 
-0.300976126836611, -0.528279904445006, -0.652094780680999, -0.0568967778473925, 
-1.91435942568001, 1.17658331201856, -1.664972436212, -0.463530401472386, 
-1.11592010504285, -0.750819001193448, 2.08716654562835, 0.0173956196932517, 
-1.28630053043433, -1.64060553441858), x2 = c(4L, 1L, 3L, 3L, 
2L, 8L, 1L, 4L, 1L, 2L, 1L, 1L, 5L, 4L, 2L, 3L, 1L, 2L, 7L, 3L, 
4L, 2L, 2L, 8L, 5L, 6L, 4L, 4L, 2L, 4L, 7L, 2L, 2L, 4L, 1L, 2L, 
3L, 1L, 3L, 7L, 8L, 1L, 3L, 2L, 4L, 2L, 3L, 1L, 2L, 4L, 3L, 1L, 
4L, 2L, 7L, 2L, 5L, 1L, 5L, 3L, 1L, 2L, 5L, 2L, 3L, 5L, 1L, 4L, 
4L, 3L, 3L, 3L, 6L, 2L, 2L, 2L, 2L, 5L, 1L, 3L, 3L, 3L, 4L, 7L, 
2L, 2L, 4L, 5L, 1L, 6L, 1L, 1L, 6L, 3L, 1L, 2L, 4L, 2L, 2L, 1L, 
4L, 1L, 6L, 5L, 6L, 4L, 2L, 4L, 0L, 6L, 8L, 2L, 4L, 4L, 4L, 3L, 
3L, 2L, 4L, 5L, 3L, 3L, 4L, 1L, 2L, 2L, 1L, 6L, 5L, 5L, 6L, 1L, 
2L, 3L, 1L, 4L, 4L, 1L, 3L, 6L, 0L, 2L, 3L, 3L, 2L, 2L, 2L, 6L, 
1L, 1L), y.binom = c(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 
0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 
1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 
1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 
0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 1, 0)), .Names = c("y", "x1", "x2", "y.binom"
), row.names = c(NA, -150L), class = "data.frame")

0 个答案:

没有答案