我正在尝试构建一个函数来执行具有二项式/ gamma分布的交叉验证障碍模型。障碍模型通常适用于零膨胀泊松分布,但我想将零膨胀Gamma分布作为分析成本数据的方法。
背景
零膨胀泊松模型将分析分为两部分:分析的第一部分是概率分布测量0或大于零的二项/负二项/几何评估,第二部分测量概率分布非零分布。换句话说,Mullahy 1986和Zeileis et. al中的主要等式建立了一种分析方法:
我想调整他们的方法来处理具有伽玛分布的成本数据。因此,代替而不是
我也在尝试使用交叉验证来优化功能。要将此作为交叉验证,有两个主要步骤。第一种是将其分析为 y = 0 和 y>之间的二项式/负二项式/几何。 0 的。然后给出 y> 0 ,您使用glm
与family = gamma
匹配伽马模型。
以下是我的以下代码:
cv.hurdle.gamma <- function(model=, dat=, k =, reorder = TRUE,
model.1.type = "binomial"){
require(ISLR)
require(dplyr)
require(pscl)
require(purrr)
require(modelr)
require(broom)
require(tidyr)
if(2*k <= nrow(dat)) {
stop("Error: Ratio of folds to observation too low")
}
set.seed(seed = seed.num)
dat$y.binom <- ifelse(dat[[all.vars(model)[1]]] > 0, 1, 0)
analysis.glm <- dat %>%
select(one_of(c(all.vars(model)[-1], "y.binom"))) %>%
crossv_kfold(k = k) %>%
mutate(model = map(train, ~ glm(formula = y.binom~., data = ., family = model.1.type)))
predictions.glm <- analysis.glm %>% unnest(map2(model, test, ~augment(.x, newdata = .y)))
#### here is the problem
analysis.gam <- dat %>%
crossv_kfold(k = k) %>%
do(UQ(as.name(all.vars(model)[1])) > 0) %>%
mutate(model = map(train, ~ glm(formula = model, data = ., family = gamma)))
predictions.gam <- analysis.gam %>% unnest(map2(model, test, ~augment(.x, newdata = .y)))
}
predictions.glm...
以外的所有内容都有效,但在尝试适应gamma模型时,我的代码开始分崩离析。为了排列交叉验证统计信息,我需要使用相同的数据,但要过滤y>0
的值;然而,显然这不能在crossv_kfold
行之后完成。有没有解决方法或者我必须手动执行此操作(即,没有上面列出的各种软件包的帮助)?
dput(dat)
structure(list(y = c(0.0873588341359355, 0.287729826121801, 0.0573073814623841,
0, 1.07186262575082, 0, 0, 0.444751108347917, 0.955880506855193,
0.125869761859781, 1.04246597654281, 0.602367511170065, 0.00455502647867159,
0.00570279008712059, 0, 0.0521668943455801, 0, 0, 0.0469040199977254,
0, 0, 0.0474795351641914, 0.120960529037452, 0.13394714054228,
0.170519704446726, 1.35849570212068, 0.15642171331789, 0.169653755120677,
0, 0.23087847751788, 0.11965648095792, 0.05741581236216, 0.582921517508486,
0.00148674505016221, 0, 0.0013607346063604, 0, 0.0472718998735441,
0, 0.000181409965466182, 0, 0.000709954445533417, 0, 0.0089611179005829,
2.93984784510064, 0, 0.00436508966978864, 0.525095489397255,
0, 0.0184075190708377, 0.0017275574306807, 0, 0.0520226458749667,
0.0769380759915087, 0.762644090784732, 0.00233031977900812, 0.183680160482936,
0.129044399795561, 0.0530417702466234, 0.631689960608072, 0,
0.933019961504137, 1.12201619108448, 0.372145228493276, 0.00190349118355715,
0.575403592374592, 1.58425037410382, 0, 0.889061970325199, 0,
0.100911877883966, 0, 0.011846061973296, 0.0207678749042814,
0.627964019952863, 0, 0, 0.567588186306217, 0, 0, 0.00299615902846718,
0, 0.00504131436609304, 0.663140790310113, 0, 2.27950907180982,
0, 0.21963189016903, 0.611057282347885, 0.220545583037388, 0.203262667054132,
0.00670167686952219, 0.246511878756605, 0, 0, 0, 0.141828489450798,
0.928524697512492, 0, 0.0214352693961126, 0.304795754800635,
0.458087622778207, 0.596526597046027, 0, 1.57998526858726, 0.000208486015415954,
0.0572569251888749, 0.0928399345039089, 0, 0.0873365147950202,
0, 0, 1.31424709069847, 0.127539270107218, 0.000774334672839962,
0.00906305802497871, 0, 2.10789573993233, 0.15954127861041, 0.00385807130417187,
0, 0.150121022176224, 0.657916596142017, 0.000329461688517048,
0, 0.285586397395188, 0.041008510808684, 0.483696390503764, 0.477433017890856,
0.191979163630133, 1.58836976308611, 1.96136277919986, 0.608679854672383,
0.0128610469587465, 0, 0.301268972605286, 0.0847832353154572,
0.0467279608215621, 0, 0.0779962962653117, 0.19342719263354,
0.0042169079224134, 1.29554472345076, 1.64663868768609, 0, 0.111046843155957,
0.34223572157047, 0, 0.00115742163347153, 0), x1 = c(-0.626453810742332,
0.183643324222082, -0.835628612410047, 1.59528080213779, 0.329507771815361,
-0.820468384118015, 0.487429052428485, 0.738324705129217, 0.575781351653492,
-0.305388387156356, 1.51178116845085, 0.389843236411431, -0.621240580541804,
-2.2146998871775, 1.12493091814311, -0.0449336090152309, -0.0161902630989461,
0.943836210685299, 0.821221195098089, 0.593901321217509, 0.918977371608218,
0.782136300731067, 0.0745649833651906, -1.98935169586337, 0.61982574789471,
-0.0561287395290008, -0.155795506705329, -1.47075238389927, -0.47815005510862,
0.417941560199702, 1.35867955152904, -0.102787727342996, 0.387671611559369,
-0.0538050405829051, -1.37705955682861, -0.41499456329968, -0.394289953710349,
-0.0593133967111857, 1.10002537198388, 0.763175748457544, -0.164523596253587,
-0.253361680136508, 0.696963375404737, 0.556663198673657, -0.68875569454952,
-0.70749515696212, 0.36458196213683, 0.768532924515416, -0.112346212150228,
0.881107726454215, 0.398105880367068, -0.612026393250771, 0.341119691424425,
-1.12936309608079, 1.43302370170104, 1.98039989850586, -0.367221476466509,
-1.04413462631653, 0.569719627442413, -0.135054603880824, 2.40161776050478,
-0.0392400027331692, 0.689739362450777, 0.0280021587806661, -0.743273208882405,
0.188792299514343, -1.80495862889104, 1.46555486156289, 0.153253338211898,
2.17261167036215, 0.475509528899663, -0.709946430921815, 0.610726353489055,
-0.934097631644252, -1.2536334002391, 0.291446235517463, -0.443291873218433,
0.00110535163162413, 0.0743413241516641, -0.589520946188072,
-0.568668732818502, -0.135178615123832, 1.1780869965732, -1.52356680042976,
0.593946187628422, 0.332950371213518, 1.06309983727636, -0.304183923634301,
0.370018809916288, 0.267098790772231, -0.54252003099165, 1.20786780598317,
1.16040261569495, 0.700213649514998, 1.58683345454085, 0.558486425565304,
-1.27659220845804, -0.573265414236886, -1.22461261489836, -0.473400636439312,
-0.620366677224124, 0.0421158731442352, -0.910921648552446, 0.158028772404075,
-0.654584643918818, 1.76728726937265, 0.716707476017206, 0.910174229495227,
0.384185357826345, 1.68217608051942, -0.635736453948977, -0.461644730360566,
1.43228223854166, -0.650696353310367, -0.207380743601965, -0.392807929441984,
-0.319992868548507, -0.279113302976559, 0.494188331267827, -0.177330482269606,
-0.505957462114257, 1.34303882517041, -0.214579408546869, -0.179556530043387,
-0.100190741213562, 0.712666307051405, -0.0735644041263263, -0.0376341714670479,
-0.681660478755657, -0.324270272246319, 0.0601604404345152, -0.588894486259664,
0.531496192632572, -1.51839408178679, 0.306557860789766, -1.53644982353759,
-0.300976126836611, -0.528279904445006, -0.652094780680999, -0.0568967778473925,
-1.91435942568001, 1.17658331201856, -1.664972436212, -0.463530401472386,
-1.11592010504285, -0.750819001193448, 2.08716654562835, 0.0173956196932517,
-1.28630053043433, -1.64060553441858), x2 = c(4L, 1L, 3L, 3L,
2L, 8L, 1L, 4L, 1L, 2L, 1L, 1L, 5L, 4L, 2L, 3L, 1L, 2L, 7L, 3L,
4L, 2L, 2L, 8L, 5L, 6L, 4L, 4L, 2L, 4L, 7L, 2L, 2L, 4L, 1L, 2L,
3L, 1L, 3L, 7L, 8L, 1L, 3L, 2L, 4L, 2L, 3L, 1L, 2L, 4L, 3L, 1L,
4L, 2L, 7L, 2L, 5L, 1L, 5L, 3L, 1L, 2L, 5L, 2L, 3L, 5L, 1L, 4L,
4L, 3L, 3L, 3L, 6L, 2L, 2L, 2L, 2L, 5L, 1L, 3L, 3L, 3L, 4L, 7L,
2L, 2L, 4L, 5L, 1L, 6L, 1L, 1L, 6L, 3L, 1L, 2L, 4L, 2L, 2L, 1L,
4L, 1L, 6L, 5L, 6L, 4L, 2L, 4L, 0L, 6L, 8L, 2L, 4L, 4L, 4L, 3L,
3L, 2L, 4L, 5L, 3L, 3L, 4L, 1L, 2L, 2L, 1L, 6L, 5L, 5L, 6L, 1L,
2L, 3L, 1L, 4L, 4L, 1L, 3L, 6L, 0L, 2L, 3L, 3L, 2L, 2L, 2L, 6L,
1L, 1L), y.binom = c(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0,
1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 0)), .Names = c("y", "x1", "x2", "y.binom"
), row.names = c(NA, -150L), class = "data.frame")