使用R中的多个变量可视化预测glm

时间:2018-11-12 10:49:32

标签: r glm predict

我想使用下面的数据集来拟合glm并可视化predict()。

y=c(-18.948,-19.007,-18.899,-19.022,-20.599,-19.778,-17.344,-20.265,-20.258,-19.886,-18.05,-19.824,-20.1,-20.508,-20.455,-16.573,-20.249,-20.205,-20.436,-16.358,-17.717,-19.794,-20.372,-19.944,-20.072,-19.889,-20.139,-19.132,-20.275,-19.953,-19.769,-20.2,-19.638,-17.419,-19.086,-18.347,-18.73,-18.872,-18.956,-19.28,-18.176,-19.036,-18.084,-20.11,-19.641,-19.656,-19.25,-18.68,-19.089,-18.969,-18.161,-17.603,-20.37,-19.233,-18.961,-19.083,-20.118,-19.795,-17.154,-16.75)

x1=c(9.698,9.583,9.356,9.326,9.438,9.733,8.803,8.973,9.141,9.044,8.788,9.377,9.26,10.186,9.035,9.569,9.431,9.09,8.776,9.117,9.393,9.408,9.307,8.868,8.398,8.407,9.364,9.074,8.444,9.122,10.11,7.81,9.777,6.472,9.521,8.92,9.341,9.446,9.08,8.071,8.047,8.019,7.419,9.022,9.981,9.337,9.989,10.013,9.31,10.843,8.337,9.103,6.438,9.372,9.071,8.749,9.016,8.181,9.284,8.44)
x2=c('S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S03','S04','S04','S04','S04','S04','S04','S06','S06','S06','S06','S06','S06','S06','S06','S07','S07','S07','S07','S07','S07','S07','S07','S07','S08','S08','S09','S09','S09','S09','S09','S09','S09','S10','S03','S03','S03','S04','S04','S07','S07')
x3=c('A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','A1','P1','P1','P1','P1','P1','P1','P1')

mydata <- data.frame(y,x1,x2,x3)

适合的glm型号:

myglm <- glm(y ~ x1+x2+x3+x1:x2, family="gaussian", data= mydata)

预测:

1)。提取x1的范围

min <- min(mydata$x1)
max <- max(mydata$x1)

2)。创建一个新的数据框。x

问题来了:
如何在new.x中包括x2和x3?

new.x <- data.frame(
     x1=seq(min, max, length=60),
     x2= ???
     x3= ???)

然后用new.y预测myglm

new.y = predict(myglm, newdata=new.x, se.fit=TRUE)

合并new.x和new.y:

addThese <- data.frame(new.x, new.y)

间隔

addThese <- mutate(addThese,
                   d15N=exp(fit),
                   lwr=exp(fit-1.96*se.fit),
                   upr=exp(fit+1.96*se.fit))

3)。可视化原始数据点并添加了glm预测平滑线:

ggplot(addThese, aes(x1, fit))+
  geom_point(shape=21, size=3)+
  geom_smooth(data=addThese,
              aes(ymin=lwr, ymax=upr),
              stat='identity')

1 个答案:

答案 0 :(得分:1)

我仍然想知道这是否是创建new.data的正确方法,但是我会尝试一下。因此,根据您的数据,稍微修改代码:

myglm <- glm(y ~ x1 + x2 + x3 + x1:x2, family = gaussian, data = mydata)

minx <- min(mydata$x1)
maxx <- max(mydata$x1)

# create data with all combinations of x1, x2, x3

new.data <- expand.grid(x1 = seq(minx, maxx, length.out = 60), 
                        x2 = unique(mydata$x2), 
                        x3 = unique(mydata$x3)
                        )

# visualize data

data.frame(predict(myglm, newdata = new.data, se.fit = T)[1:2]) %>% 
  bind_cols(new.data) %>% 
  mutate(d15N = exp(fit), lwr = fit - 1.96 * se.fit, upr = fit + 1.96 * se.fit) %>%
  ggplot(aes(x = x1, y = fit, colour = interaction(x2, x3))) +
  geom_point(size = 1, alpha = .75, pch = 19, position = "jitter") +
  geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", alpha = .5) +
  facet_wrap(~interaction(x2, x3, sep = " : "), nrow = 5) +
  ggthemes::theme_few() +
  labs(y = "Predicted value", x = bquote(x[1])) +
  theme(legend.position = "none")

glm-viz