我正在进行回归,并希望突出显示具有极端残值的点。
我运行回归并将剩余列添加到我的数据框中然后我设置我要在dat $ Outlier列中显示的数据标签,并设置我希望点在dat $ Color列中的颜色
基本上,当您运行此代码时,我想将dat $ Outlier的文本显示为数据标签,并根据dat $ color
显示颜色的点time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m)
abline(reg)
例如,如果我的dat数据框如下所示:
t s m resid Outlier Color
1 2015-05-01 10:00:00 -0.7141181 -0.54383561 -0.3645389 black
2 2015-05-01 10:05:00 -1.7444731 0.09249989 -0.4226707 black
3 2015-05-01 10:10:00 -1.1257465 0.12563139 0.2466758 black
4 2015-05-01 10:15:00 0.6201680 -0.47515076 1.0746872 2015-05-01 10:15:00 red
5 2015-05-01 10:20:00 -0.7979108 -0.60000735 -0.5341534 black
如何将第4行显示为2015-05-01 10:15:00作为数据标签并将该点涂成红色?
答案 0 :(得分:0)
您可以使用points
和text
:
set.seed(1)
time = as.POSIXct(c("2015-05-01 10:00:00","2015-05-01 10:05:00","2015-05-01 10:10:00","2015-05-01 10:15:00","2015-05-01 10:20:00"),"GMT")
s =rnorm(5)
m= rnorm(5)
dat =data.frame(t =time, s = s, m =m)
reg = lm(s~m)
dat$resid = resid(reg)
dat$Outlier = ifelse(dat$resid > sd(dat$resid)*1, as.character(dat$t), "")
dat$Color = ifelse(dat$resid > sd(dat$resid)*1, "red", "black")
dat
plot(s, m, lwd=3)
abline(reg)
outlierRows <- which(dat$Outlier != "")
points(s[outlierRows],m[outlierRows],col="red",lwd=8)
text(s[outlierRows],m[outlierRows],
labels=dat$Outlier[outlierRows],
pos=1)
> dat
t s m resid Outlier
1 2015-05-01 10:00:00 -0.6264538 -0.8204684 -0.37276065
2 2015-05-01 10:05:00 0.1836433 0.4874291 -0.08680989
3 2015-05-01 10:10:00 -0.8356286 0.7383247 -1.20662950
4 2015-05-01 10:15:00 1.5952808 0.5757814 1.28941997 2015-05-01 10:15:00
5 2015-05-01 10:20:00 0.3295078 -0.3053884 0.37678008
Color
1 black
2 black
3 black
4 red
5 black
>
答案 1 :(得分:0)
library(ggplot2)
ggplot(data = dat) +
geom_point(aes(x = m, y = s, colour = Color)) +
geom_abline(slope = reg$coefficients[[2]],
intercept = reg$coefficients[[1]]) +
geom_text(aes(label=Outlier, x = m, y = s)) +
scale_color_identity(guide = "none")