我想在我的数据集上绘制交互效果。由于保密原因,我无法共享完整的数据集,但我添加了effect()
函数的输出,可以将其替换为绘图函数以重现我的结果
交互效应介于两个连续变量varP
和varL
之间。该模型是二项式概率glm。我正在使用effects()
包。
调用基数R plot()
会生成以下图表:
plot(effect("varL:varP", hx.x), multiline = TRUE)
调用ggplot
会产生错误,因为ggplot2
无法处理连续变量(Error: A continuous variable can not be mapped to linetype
)。所以我决定使用cut()
将连续变换为分类变量并重做回归。现在调用ggplot
会产生以下图:
ggplot(data.frame(effect("varL:varP_range", hx.x1))) +
geom_line(aes(varL,fit,linetype=varP_range)) +
theme_bw() +
geom_point(aes(varL,fit, shape = varP_range), size = 4)
effect()
调用会产生以下数据框:
structure(list(varL = c(0, 4900000, 9800000, 1.5e+07, 2e+07,
0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07,
2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000,
1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000,
9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07),
varP_range = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L), .Label = c("(0,0.1]", "(0.1,0.2]", "(0.2,0.3]",
"(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]",
NA), class = "factor"), fit = c(0.0493753018091432, 0.0674980435061065,
0.0903776022141578, 0.120475525568566, 0.155489425305936,
0.0137978311572348, 0.0146948327184384, 0.0156415523658014,
0.0167030042368772, 0.0177811545669692, 0.0283241226002015,
0.0320688611660079, 0.0362132772188478, 0.0410808356583781,
0.0462488565292175, 0.0376893470992477, 0.0434689290038736,
0.0499436899670642, 0.0576329205782917, 0.0658761148111993,
0.0461027343009466, 0.0491516310196363, 0.0523594626652533,
0.0559432088894986, 0.0595689358953948, 0.0271243286792884,
0.0288090021916312, 0.0305797680715237, 0.0325565879397662,
0.0345556259933517, 0.0585874566843392, 0.0695486360371181,
0.0820248510196267, 0.0970343941431089, 0.113280038210583,
0.0209075509863267, 0.0315251673839061, 0.046253681947139,
0.0674584535698224, 0.0942769896468577), se = c(0.026293040668674,
0.0305781859107515, 0.0540612604209606, 0.0833019722980397,
0.112487542810938, 0.0338534463222995, 0.0398771851800222,
0.0731653210576088, 0.113913900015802, 0.15437353282528,
0.0409108254231718, 0.0511475671899819, 0.0972355260310069,
0.152187250282481, 0.206403037601197, 0.0130040830190118,
0.0163131582968098, 0.03036212612247, 0.0472104967827754,
0.0638665398985353, 0.00832820196917705, 0.0121268883535081,
0.0232740805148284, 0.0361535949146859, 0.0487814483071773,
0.0113362224665359, 0.0147098330114556, 0.027542020584634,
0.0427882020481342, 0.057830481511906, 0.01601220706873,
0.0215340145342622, 0.040911260287911, 0.0636748713064177,
0.0860732173940343, 0.0419361989774607, 0.0474273262485864,
0.085507397635156, 0.132979171779259, 0.180309829530109),
lower = c(0.0443332267166309, 0.0600183728153565, 0.0743144400165266,
0.0907939516369112, 0.108668209187463, 0.0116238722892657,
0.012023490450893, 0.0108135425488573, 0.00937570172867644,
0.00809811278621186, 0.0235056239743921, 0.025500658500369,
0.0234608868503482, 0.0208470526400479, 0.0184481609874694,
0.0356438216686479, 0.0406013691901053, 0.0441057429680943,
0.0477084891588961, 0.0512971927819841, 0.0445466712620454,
0.0467806376034436, 0.0476567944606461, 0.048393808464277,
0.0490625016612481, 0.0257635103540086, 0.0269634191042765,
0.0270321095726769, 0.0269061995593561, 0.0267392100653416,
0.0550074959164803, 0.0640838876027606, 0.0705422656571929,
0.0773047979652944, 0.0841065921246788, 0.017106637503393,
0.025481438787035, 0.0321633667203674, 0.0395762352261805,
0.0476311065775831), upper = c(0.0548650233126245, 0.0756784493915628,
0.108885802402636, 0.156404820719125, 0.213976690522108,
0.0163138248731072, 0.0178616891841985, 0.0222128708020368,
0.0284614557616305, 0.0359832219741959, 0.0339378335208811,
0.039975201853988, 0.0541535055907615, 0.0749340625856128,
0.100655675600276, 0.0398297048294564, 0.0464977933470156,
0.0563819985882714, 0.0691131684577491, 0.0834778881744663,
0.0477021593328273, 0.0516176358055639, 0.0574233019439704,
0.0643924091986213, 0.0717620710757599, 0.0285446055381808,
0.0307584171780848, 0.0345046440925246, 0.0391516285886827,
0.0441582201173176, 0.062347826961093, 0.0753653653841884,
0.0948621519669373, 0.120230399782042, 0.149038245196167,
0.0254001895885684, 0.0387079476679479, 0.0649209421145592,
0.108535107340218, 0.16815843130232)), .Names = c("varL",
"varP_range", "fit", "se", "lower", "upper"), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40"), class = "data.frame")
我的问题是:
1)effect()
包中的effects
如何将连续变换为分类变量?如何使用ggplot()
复制?
2)基础R图中的线条在同一坐标处相交的原因是什么?
答案 0 :(得分:4)
回答您的第一个问题:要定义因素,error_reporting()
使用
php.ini
其中nice定义如此(from here):
Effects
使用它的一个例子:
nice(seq(min(var), max(var), length.out=5))
这可以直接与ggplot一起使用:
nice <- function (x, direction = c("round", "down", "up"), lead.digits = 1) {
direction <- match.arg(direction)
if (length(x) > 1){
result <- sapply(x, nice, direction = direction, lead.digits = lead.digits)
if (anyDuplicated(result)) result <- nice(x, direction=direction, lead.digits = lead.digits + 1)
return(result)
}
if (x == 0)
return(0)
power.10 <- floor(log(abs(x), 10))
if (lead.digits > 1)
power.10 <- power.10 - lead.digits + 1
lead.digit <- switch(direction, round = round(abs(x)/10^power.10),
down = floor(abs(x)/10^power.10), up = ceiling(abs(x)/10^power.10))
sign(x) * lead.digit * 10^power.10
}
关于问题2,一种直观的思考方式可能是因为Yhat的平均值没有变化(用固定的z预测),所有部分效果的标准正常CDF共享相同的交集。 E.g。
library(effects)
set.seed(123)
x = rnorm(100)
z = rexp(100)
y = factor(sample(1:2, 100, replace=T))
test = glm(y~x+z+x*z, family = binomial(link = "probit"))
preddat <- matrix('', 25, 100)
preddat <- expand.grid(nice(seq(min(x), max(x), length.out=5)), nice(seq(min(z), max(z), length.out=5)))
colnames(preddat) <- c("x", "z")
predicts <- predict(test, preddat, type = "response")
dim(predicts) <- c(5,5)
effectspred <- pnorm(effect("x:z", test)$fit)
dim(effectspred) <- c(5,5)
all.equal(effectspred, predicts)
[1] TRUE