这几乎肯定是一个新问题/
对于下面的数据集,我一直试图在ggplot2中绘制logit和probit曲线,但没有成功。
Ft Temp TD
1 66 0
6 72 0
11 70 1
16 75 0
21 75 1
2 70 1
7 73 0
12 78 0
17 70 0
22 76 0
3 69 0
8 70 0
13 67 0
18 81 0
23 58 1
4 68 0
9 57 1
14 53 1
19 76 0
5 67 0
10 63 1
15 67 0
20 79 0
我天真地使用的代码是
library(ggplot2)
TD<-mydata$TD
Temp<-mydata$Temp
g<- qplot(Temp,TD)+geom_point()+stat_smooth(method="glm",family="binomial",formula=y~x,col="red")
g1<-g+labs(x="Temperature",y="Thermal Distress")
g1
g2<-g1+stat_smooth(method="glm",family="binomial",link="probit",formula=y~x,add=T)
g2
您能否告诉我如何改进代码以便在同一图表上绘制这两条曲线?
谢谢
答案 0 :(得分:17)
另一种方法是生成您自己的预测值并使用ggplot绘制它们 - 然后您可以更好地控制最终绘图(而不是依靠stat_smooth
进行计算;如果您和& #39;重新使用多个协变量,并且在绘制时需要保持其手段或模式的常量。
library(ggplot2)
# Generate data
mydata <- data.frame(Ft = c(1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8,
13, 18, 23, 4, 9, 14, 19, 5, 10, 15, 20),
Temp = c(66, 72, 70, 75, 75, 70, 73, 78, 70, 76, 69, 70,
67, 81, 58, 68, 57, 53, 76, 67, 63, 67, 79),
TD = c(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0))
# Run logistic regression model
model <- glm(TD ~ Temp, data=mydata, family=binomial(link="logit"))
# Create a temporary data frame of hypothetical values
temp.data <- data.frame(Temp = seq(53, 81, 0.5))
# Predict the fitted values given the model and hypothetical data
predicted.data <- as.data.frame(predict(model, newdata = temp.data,
type="link", se=TRUE))
# Combine the hypothetical data and predicted values
new.data <- cbind(temp.data, predicted.data)
# Calculate confidence intervals
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- model$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- model$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- model$family$linkinv(new.data$fit) # Rescale to 0-1
# Plot everything
p <- ggplot(mydata, aes(x=Temp, y=TD))
p + geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Temperature", y="Thermal Distress")
奖金,只是为了好玩:如果您使用自己的预测函数,您可以对协变量发疯,比如展示模型如何适应Ft
的不同级别:
# Alternative, if you want to go crazy
# Run logistic regression model with two covariates
model <- glm(TD ~ Temp + Ft, data=mydata, family=binomial(link="logit"))
# Create a temporary data frame of hypothetical values
temp.data <- data.frame(Temp = rep(seq(53, 81, 0.5), 2),
Ft = c(rep(3, 57), rep(18, 57)))
# Predict the fitted values given the model and hypothetical data
predicted.data <- as.data.frame(predict(model, newdata = temp.data,
type="link", se=TRUE))
# Combine the hypothetical data and predicted values
new.data <- cbind(temp.data, predicted.data)
# Calculate confidence intervals
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- model$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- model$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- model$family$linkinv(new.data$fit) # Rescale to 0-1
# Plot everything
p <- ggplot(mydata, aes(x=Temp, y=TD))
p + geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax,
fill=as.factor(Ft)), alpha=0.5) +
geom_line(data=new.data, aes(y=fit, colour=as.factor(Ft))) +
labs(x="Temperature", y="Thermal Distress")
答案 1 :(得分:3)
您在stat_smooth中使用的这两个函数重叠。这就是为什么你认为你不能在同一个图上看到这两个。运行以下将清除第二行的蓝色。
library(ggplot2)
TD<-mydata$TD
Temp<-mydata$Temp
g <- qplot(Temp,TD)+geom_point()+stat_smooth(method="glm",family="binomial",formula=y~x,col="red")
g1<-g+labs(x="Temperature",y="Thermal Distress")
g1
g2<-g1+stat_smooth(method="glm",family="binomial",link="probit",formula=y~x,add=T,col='blue')
g2
如果您在第二个stat_smooth上运行另一个系列,例如poisson glm:
library(ggplot2)
TD<-mydata$TD
Temp<-mydata$Temp
g <- qplot(Temp,TD)+geom_point()+stat_smooth(method="glm",family="binomial",formula=y~x,col="red")
g1<-g+labs(x="Temperature",y="Thermal Distress")
g1
g2<-g1+stat_smooth(method="glm",family="poisson",link="log",formula=y~x,add=T,col='blue')
g2
然后你可以看到确实画了两行: