我使用gam
包中的mgcv
拟合模型并将结果存储在model
中,到目前为止,我一直在使用plot(model)
查看平滑组件。我最近开始使用ggplot2并喜欢它的输出。所以我想知道,是否可以使用ggplot2绘制这些图形?
以下是一个例子:
x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)
model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")
plot(model, rug=FALSE, select=1)
plot(model, rug=FALSE, select=2)
我对s(x1, k=10)
和s(x2, k=20)
不感兴趣。
部分答案:
我深入研究plot.gam
和mgcv:::plot.mgcv.smooth
并构建了我自己的函数,它从平滑的组件中提取预测的效果和标准误差。它不处理plot.gam
的所有选项和案例,因此我只将其视为部分解决方案,但它对我来说效果很好。
EvaluateSmooths = function(model, select=NULL, x=NULL, n=100) {
if (is.null(select)) {
select = 1:length(model$smooth)
}
do.call(rbind, lapply(select, function(i) {
smooth = model$smooth[[i]]
data = model$model
if (is.null(x)) {
min = min(data[smooth$term])
max = max(data[smooth$term])
x = seq(min, max, length=n)
}
if (smooth$by == "NA") {
by.level = "NA"
} else {
by.level = smooth$by.level
}
range = data.frame(x=x, by=by.level)
names(range) = c(smooth$term, smooth$by)
mat = PredictMat(smooth, range)
par = smooth$first.para:smooth$last.para
y = mat %*% model$coefficients[par]
se = sqrt(rowSums(
(mat %*% model$Vp[par, par, drop = FALSE]) * mat
))
return(data.frame(
label=smooth$label
, x.var=smooth$term
, x.val=x
, by.var=smooth$by
, by.val=by.level
, value = y
, se = se
))
}))
}
这会返回一个带有平滑组件的“熔化”数据框,因此现在可以使用上面示例中的ggplot
:
smooths = EvaluateSmooths(model)
ggplot(smooths, aes(x.val, value)) +
geom_line() +
geom_line(aes(y=value + 2*se), linetype="dashed") +
geom_line(aes(y=value - 2*se), linetype="dashed") +
facet_grid(. ~ x.var)
如果有人知道在一般情况下允许这样的包裹,我将非常感激。
答案 0 :(得分:20)
您可以将visreg包与plyr包结合使用。 visreg基本上绘制了你可以使用predict()的任何模型。
library(mgcv)
library(visreg)
library(plyr)
library(ggplot2)
# Estimating gam model:
x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)
model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")
# use plot = FALSE to get plot data from visreg without plotting
plotdata <- visreg(model, type = "contrast", plot = FALSE)
# The output from visreg is a list of the same length as the number of 'x' variables,
# so we use ldply to pick the objects we want from the each list part and make a dataframe:
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
# The ggplot:
ggplot(smooths, aes(x, smooth)) + geom_line() +
geom_line(aes(y=lower), linetype="dashed") +
geom_line(aes(y=upper), linetype="dashed") +
facet_grid(. ~ Variable, scales = "free_x")
我们可以把整个东西放到一个函数中,并添加一个选项来显示模型中的残差(res = TRUE):
ggplot.model <- function(model, type="conditional", res=FALSE,
col.line="#7fc97f", col.point="#beaed4", size.line=1, size.point=1) {
require(visreg)
require(plyr)
plotdata <- visreg(model, type = type, plot = FALSE)
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
residuals <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$res[[part$meta$x]],
y=part$res$visregRes))
if (res)
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
geom_point(data = residuals, aes(x, y), col=col.point, size=size.point) +
facet_grid(. ~ Variable, scales = "free_x")
else
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
facet_grid(. ~ Variable, scales = "free_x")
}
ggplot.model(model)
ggplot.model(model, res=TRUE)
颜色是从http://colorbrewer2.org/中挑选出来的。
答案 1 :(得分:4)
答案 2 :(得分:0)
已更新,允许用户选择要绘制的变量。
将“残差”一词更改为“ res_data”,以避免与residuals
函数冲突。
ggplot.model <- function(model, type="conditional", res=FALSE,
col.line="#7fc97f", col.point="#beaed4", size.line=1, size.point=1, no_col = NULL,
what = "all") {
require(visreg)
require(plyr)
plotdata <- visreg(model, type = type, plot = FALSE)
smooths <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$fit[[part$meta$x]],
smooth=part$fit$visregFit,
lower=part$fit$visregLwr,
upper=part$fit$visregUpr))
res_data <- ldply(plotdata, function(part)
data.frame(Variable = part$meta$x,
x=part$res[[part$meta$x]],
y=part$res$visregRes))
if (what != "all") {
smooths <- smooths %>%
filter(lapply(Variable,as.character)%in% what)
res_data <- res_data%>%
filter(lapply(Variable,as.character)%in% what)
}
if (res)
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
geom_point(data = res_data, aes(x, y), col=col.point, size=size.point) +
facet_wrap(. ~ Variable, scales = "free_x", ncol = no_col) + theme_bw()
else
ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
facet_wrap(. ~ Variable, scales = "free_x", ncol=no_col)
}