此图从回归输出中获取残差,并按顺序绘制它们(X轴为残差等级)。
我对此的最佳尝试是:
library(ggplot2)
library(modelr)
d <- d %>% add_residuals(mod1, var = "resid")
d$resid_rank <- rank(d$resid)
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_bar(stat="identity") +
theme_bw()
但是,这将产生一个完全空白的图形。我尝试过这样的事情:
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_segment(yend = 0, aes(xend=resid)) +
theme_bw()
但这会产生错误方向的线段。什么是正确的方法,然后用第三个因素为这些线条着色?
假数据集:
library(estimatr)
library(fabricatr)
#simulation
dat <- fabricate(
N = 10000,
y = runif(N, 0, 10),
x = runif(N, 0, 100)
)
#add an outlier
dat <- rbind(dat, c(300, 5))
dat <- rbind(dat, c(500, 3))
dat$y_log <- log(dat$y)
dat$x_log <- log(dat$x)
dat$y_log_s <- scale(log(dat$y))
dat$x_log_s <- scale(log(dat$x))
mod1 <- lm(y_log ~ x_log, data = dat))
答案 0 :(得分:2)
我使用了lm()
帮助页面上的内置数据集来创建此示例。我也直接使用resid()
来获得残差。目前尚不清楚彩条在何处/为什么会有所不同,但是基本上您需要在data.frame中添加一列,具体说明其为红色还是蓝色,然后将其传递给fill
。
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.4.4
#example from lm
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
resids <- data.frame(resid = resid(lm.D9))
#why are some bars red and some blue? No clue - so I'll pick randomly
resids$group <- sample(c("group 1", "group 2"), nrow(resids), replace = TRUE)
#rank
resids$rank <- rank(-1 * resids$resid)
ggplot(resids, aes(rank, resid, fill = group)) +
geom_bar(stat = "identity", width = 1) +
geom_hline(yintercept = c(-1,1), colour = "darkgray", linetype = 2) +
geom_hline(yintercept = c(-2,2), colour = "lightgray", linetype = 1) +
theme_bw() +
theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("group 1" = "red", "group 2" = "blue"))
由reprex package(v0.2.1)于2019-01-24创建