泊松回归预测

时间:2016-12-05 23:40:02

标签: r plot regression glm predict

我目前正在使用模型

处理数据集
glm1 <- glm(FALL ~ GRP + AGE + SEX + offset(log(FU)), family=poisson, data=dat)

现在我需要预测一年内对照组女性的跌倒量。

我需要执行predict功能,但我不确定如何。我试着做了几件事,最后尝试了这个:

levels(dat$GRP)
levels(dat$SEX)
SEX="FEMALE"
GRP="CONTROL"
FU="12"
y<- predict(glm1, type = 'response')
plot(x=dat$AGE[order(dat$AGE)],y=y[order(dat$FALL)],type='l')

但这只给我一个奇怪的情节。我需要做什么?

编辑:根据要求添加数据以重现性

dat <- structure(list(FALL = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 
2L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 
3L, 0L, 1L, 1L, 0L, 0L, 2L, 3L, 0L, 0L, 3L, 1L, 0L, 0L, 2L, 1L, 
2L, 2L, 1L, 1L, 0L, 0L, 0L, 4L, 1L, 0L, 0L, 0L, 0L, 2L, 3L, 1L, 
0L, 1L, 2L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
3L, 4L, 0L, 1L, 0L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
1L, 0L, 1L, 0L, 0L, 3L, 0L, 0L, 2L, 0L, 0L, 2L, 0L, 3L, 1L, 0L, 
0L, 1L, 1L, 2L, 1L, 0L, 0L, 0L, 0L, 1L, 0L), GRP = structure(c(1L, 
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 
2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L), .Label = c("CONTROL", "TAI CHI"), class = "factor"), 
FU = c(18, 12, 17, 4, 23, 16, 22, 24, 23, 11, 22, 9, 23, 
8, 20, 17, 23, 17, 15, 17, 19, 21, 22, 16, 14, 21, 20, 21, 
7, 22, 19, 12, 15, 21, 24, 11, 23, 21, 10, 15, 19, 19, 16, 
24, 17, 23, 16, 17, 18, 18, 20, 8, 21, 16, 15, 19, 23, 14, 
13, 6, 16, 18, 9, 7, 16, 14, 16, 18, 13, 12, 15, 22, 17, 
17, 20, 21, 11, 24, 9, 13, 24, 12, 21, 20, 19, 17, 21, 15, 
17, 11, 24, 10, 18, 9, 16, 19, 6, 13, 22, 18, 10, 15, 14, 
21, 21, 5, 24, 21, 11, 23, 21, 16, 22, 6, 24, 18, 21), AGE = c(71, 
81, 71, 79, 77, 79, 76, 86, 75, 75, 76, 83, 71, 80, 77, 79, 
77, 74, 83, 81, 83, 79, 74, 79, 78, 85, 82, 71, 81, 78, 82, 
74, 73, 75, 83, 78, 83, 83, 65, 75, 75, 75, 75, 78, 80, 69, 
80, 73, 74, 79, 76, 78, 70, 77, 77, 76, 84, 71, 73, 76, 80, 
77, 74, 78, 68, 76, 77, 76, 72, 72, 76, 82, 72, 80, 78, 83, 
80, 73, 79, 75, 79, 75, 80, 77, 81, 78, 74, 79, 78, 74, 79, 
77, 77, 85, 79, 73, 78, 73, 70, 68, 74, 82, 75, 77, 77, 73, 
73, 83, 74, 87, 76, 81, 77, 78, 66, 79, 82), SEX = structure(c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L), .Label = c("FEMALE", 
"MALE"), class = "factor")), .Names = c("FALL", "GRP", "FU", 
"AGE", "SEX"), class = "data.frame", row.names = c(NA, -117L))

亲切的问候。

编辑:置信区间问题

我还有一个问题。我创建了这样的置信区间:

prs <- predict(glm1, newdata = newdat, type = "response", se.fit=TRUE)
newdat$pred <- prs[[1]]
newdat$se <- prs[[2]]
newdat$lo <- newdat$pred - 1.96 * newdat$se 
newdat$up <- newdat$pred + 1.96 * newdat$se

但是可以在同一图表中绘制这个吗?

1 个答案:

答案 0 :(得分:2)

使用IntPtr hwndMain = OpenPaint(); // Method that opens MSPaint 时,您需要设置predict。只需在没有newdata的情况下调用predict即可返回适合的值。因此,newdata来电主要是为了predict

看,您希望glm1$fitted.values SEX == "FEMALE"GRP == "CONTROL"进行预测。使用

FU == 12

enter image description here

最初我建议:

## I use `AGE = 65:87` because this is what `range(dat$AGE)` gives
## we must provide all covariates used in model formula to make `predict` work
## recycling rule is applied here.
## `GRP`, `SEX` and `FU` are given a single value, while `AGE` has length 23
## they will be recycled 23 times
newdat <- data.frame(AGE = 65:87, GRP = "CONTROL", SEX = "FEMALE", FU = 12)
pred <- predict(glm1, newdata = newdat, type = "response")
plot(newdat$AGE, pred, type = "l")

但这是一个坏主意。它会为您提供一个空数据框,因为newdat <- subset(dat, GRP == "CONTROL" & SEX == "FEMALE" & FU == 12) 中没有符合选择条件的匹配列。

跟进(实际上更值得回答)

  

我还有一个问题。我创建了这样的置信区间:

dat
  

但是可以在同一图表中绘制这个吗?

您的置信区间未正确计算。响应通常不是分布式的,因此您无法使用prs <- predict(glm1, newdata = newdat, type = "response", se.fit=TRUE) newdat$pred <- prs[[1]] newdat$se <- prs[[2]] newdat$lo <- newdat$pred - 1.96 * newdat$se newdat$up <- newdat$pred + 1.96 * newdat$se 。线性预测器渐近正态,因此您需要为线性预测器生成置信带,然后使用反向链接函数将其转换为响应比例。

1.96

要在同一地块上绘制它们,您可以使用ginv <- glm1$family$linkinv ## inverse link function prs <- predict(glm1, newdata = newdat, type = "link", se.fit=TRUE) newdat$pred <- ginv(prs[[1]]) newdat$lo <- ginv(prs[[1]] - 1.96 * prs[[2]]) newdat$up <- ginv(prs[[1]] + 1.96 * prs[[2]]) + plot

lines

enter image description here

或者,您可以使用with(newdat, plot(AGE, pred, type = "l", ylim = c(min(lo), max(up)) )) with(newdat, lines(AGE, lo, lty = 2)) with(newdat, lines(AGE, up, lty = 2))

matplot