基于组和相同组中的点的回归线的颜色在ggplot2中不匹配

时间:2015-10-25 06:50:43

标签: r colors ggplot2 grouping linear-regression

我正在尝试绘制一个图表,显示整个数据集的回归线(在我的例子中,不同的雄性小鼠品系的血液学数据)以及各个菌株的回归线。我看到了一个类似的帖子(Regression line for the entire dataset together with regression lines based on groups in R ggplot2 ?),但它没有解决回归线的颜色,所以它并没有真正帮助。我已经将rainbow的颜色分配给每个菌株,以便在图表上区分它们。理想情况下,应变点的颜色和该应变的回归线颜色应该匹配。

编辑:对不起Jaap,我删除了所有ML $,并且我找到了未找到的变量的错误。我添加了data = ML,但仍然无效。另外,当我添加group = Color时,颜色仍然是错误的。 这是原始数据ML:

ML <- structure(list(Strain = structure(c(15L, 15L, 15L, 15L, 33L, 
33L, 33L, 33L, 33L, 33L, 33L, 33L, 17L, 17L, 17L, 17L, 18L, 18L, 
18L, 9L, 9L, 9L, 35L, 35L, 35L, 35L, 28L, 28L, 28L, 28L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 12L, 12L, 12L, 12L, 23L, 23L, 23L, 
21L, 21L, 21L, 8L, 8L, 8L, 8L, 22L, 22L, 22L, 11L, 11L, 11L, 
11L, 13L, 13L, 13L, 13L, 3L, 3L, 3L, 3L, 14L, 14L, 14L, 22L, 
22L, 22L, 22L, 29L, 29L, 29L, 29L, 32L, 32L, 32L, 32L, 21L, 21L, 
21L, 21L, 24L, 24L, 24L, 24L, 16L, 16L, 16L, 3L, 3L, 3L, 3L, 
6L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 6L, 6L, 14L, 14L, 14L, 
14L, 31L, 31L, 20L, 20L, 26L, 26L, 26L, 26L, 4L, 4L, 4L, 5L, 
18L, 18L, 18L, 18L, 23L, 23L, 23L, 23L, 27L, 27L, 8L, 8L, 29L, 
29L, 29L, 29L, 17L, 17L, 34L, 27L, 27L, 27L, 27L, 9L, 9L, 9L, 
9L, 10L, 10L, 10L, 10L, 6L, 6L, 6L, 4L, 4L, 7L, 7L, 7L, 7L, 20L, 
20L, 20L, 20L, 25L, 25L, 25L, 25L, 30L, 30L, 30L, 30L, 19L, 19L, 
19L, 19L), .Label = c("129X1/SvJ", "A/J", "AXB10/PgnJ", "AXB13/PgnJ", 
"AXB15/PgnJ", "AXB19a/PgnJ", "AXB4/PgnJ", "AXB8/PgnJ", "BALB/cByJ", 
"BALB/cJ", "BTBRT+tf/J", "BXA1/PgnJ", "BXA12/PgnJ", "BXA13/PgnJ", 
"BXA14/PgnJ", "BXA16/PgnJ", "BXA4/PgnJ", "BXA8/PgnJ", "BXD40/TyJ", 
"BXD5/TyJ", "BXD75/RwwJ", "BXH10/TyJ", "BXH14/TyJ", "BXH8/TyJ", 
"C3H/HeJ", "C57BL/6J", "C58/J", "CXB12/HiAJ", "CXB2/ByJ", "DBA/2J", 
"LP/J", "NOD/ShiL_+J", "NOR/LtJ", "NZB/BINJ", "SJL/J"), class = "factor"), 
    Color = structure(c(9L, 9L, 9L, 9L, 28L, 28L, 28L, 28L, 28L, 
    28L, 28L, 28L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 20L, 20L, 
    20L, 26L, 26L, 26L, 26L, 19L, 19L, 19L, 19L, 31L, 31L, 31L, 
    31L, 25L, 25L, 25L, 25L, 14L, 14L, 14L, 14L, 2L, 2L, 2L, 
    4L, 4L, 4L, 22L, 22L, 22L, 22L, 3L, 3L, 3L, 16L, 16L, 16L, 
    16L, 7L, 7L, 7L, 7L, 32L, 32L, 32L, 32L, 8L, 8L, 8L, 3L, 
    3L, 3L, 3L, 21L, 21L, 21L, 21L, 29L, 29L, 29L, 29L, 4L, 4L, 
    4L, 4L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 32L, 32L, 32L, 32L, 
    35L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 35L, 35L, 8L, 8L, 8L, 
    8L, 30L, 30L, 5L, 5L, 15L, 15L, 15L, 15L, 33L, 33L, 33L, 
    34L, 12L, 12L, 12L, 12L, 2L, 2L, 2L, 2L, 17L, 17L, 22L, 22L, 
    21L, 21L, 21L, 21L, 11L, 11L, 27L, 17L, 17L, 17L, 17L, 20L, 
    20L, 20L, 20L, 18L, 18L, 18L, 18L, 35L, 35L, 35L, 33L, 33L, 
    24L, 24L, 24L, 24L, 5L, 5L, 5L, 5L, 13L, 13L, 13L, 13L, 23L, 
    23L, 23L, 23L, 6L, 6L, 6L, 6L), .Label = c("#000FFFFF", "#003AFFFF", 
    "#0066FFFF", "#0092FFFF", "#00BDFFFF", "#00E9FFFF", "#00FF0FFF", 
    "#00FF3AFF", "#00FF66FF", "#00FF92FF", "#00FFBDFF", "#00FFE9FF", 
    "#1D00FFFF", "#1DFF00FF", "#4900FFFF", "#49FF00FF", "#7500FFFF", 
    "#75FF00FF", "#A000FFFF", "#A0FF00FF", "#CC00FFFF", "#CCFF00FF", 
    "#F800FFFF", "#F8FF00FF", "#FF0000FF", "#FF002CFF", "#FF0057FF", 
    "#FF0083FF", "#FF00AFFF", "#FF00DBFF", "#FF2C00FF", "#FF5700FF", 
    "#FF8300FF", "#FFAF00FF", "#FFDB00FF"), class = "factor"), 
    y = c(48.4, 48.3, 49.9, 47.4, 48, 43, 41.6, 41.5, 43.4, 42.7, 
    42.3, 42.6, 54.2, 58.8, 60.1, 56.8, 41.6, 43.1, 43.1, 51.5, 
    54.2, 50.6, 46.2, 57, 52.1, 51.1, 59.8, 65.9, 58.8, 58.1, 
    54.6, 52.3, 53.9, 54.6, 51.8, 53.8, 60.5, 50.8, 41.7, 40.2, 
    40.6, 38.8, 47.6, 48.8, 46.9, 50.8, 48.5, 49.1, 39.4, 41.4, 
    41.1, 40.2, 43.6, 39.3, 45.2, 47.8, 59, 60.3, 45.8, 38.8, 
    39.1, 39.9, 40, 48.3, 49.5, 46.3, 45.6, 58.8, 57.7, 59.3, 
    56.1, 54.3, 46.6, 42.4, 47.9, 46.2, 43.9, 47.9, 61.3, 50.3, 
    49.1, 58.1, 47, 44.9, 48.1, 51.9, 58.9, 56.3, 61.9, 58.2, 
    50, 52.2, 45.9, 44.2, 47.2, 48.5, 44.8, 41.6, 46.1, 43.9, 
    49.5, 49.4, 50.7, 47.6, 48, 41.5, 39.9, 59.7, 59.1, 58, 57.4, 
    48.7, 55.4, 43.5, 46.4, 41.2, 45.5, 39.3, 47.3, 51.9, 48.8, 
    56.7, 51, 42.5, 41.6, 40.6, 41.5, 48.8, 47.9, 48.9, 47, 40.4, 
    41.7, 39.8, 41.6, 50.9, 46.6, 49.9, 45.3, 52.4, 59.6, 54.7, 
    39.5, 42.8, 39.9, 40.9, 54.3, 53.8, 53.6, 54.8, 53.6, 56.7, 
    56.7, 54.1, NA, NA, NA, NA, NA, 47.8, 51.6, 46.7, 47.5, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), x = c(26.6, 29.4, 26.8, 22.9, 9.5, 7.8, 5.6, 7.8, 9.1, 
    8.3, 8.3, 6.8, 24.9, 29, 30.7, 46.2, 26.7, 29.6, 25.5, 29.5, 
    34, 44.8, 6, 9.1, 7.2, 15.2, 44.1, 27.4, 43.5, 36.6, 37.5, 
    40, 34, 39.5, 25.3, 38, 25.8, 23.8, 22.9, 21.6, 16.4, 16.9, 
    28.1, 27.5, 36.2, 8.5, 17.9, 26.1, 13.9, 14.3, 27.6, 25.2, 
    26.4, 16, 17.5, 5, 6.6, 6.1, 3.9, 16.2, 17.5, 20.1, 13.4, 
    13.9, 18.6, 10.1, 11.7, 32.2, 35.9, 31.7, 19.8, 27.8, 17, 
    18.3, 36.2, 26.5, 20.6, 22.6, 11.3, 12.4, 7.4, 8.5, 16.2, 
    17.3, 19.7, 9.3, 20.3, 17.5, 44.9, 26.4, 21.4, 10.4, 13.6, 
    12.1, 20.6, 34.8, 5.5, 12.8, 18.3, 12.6, 18, 20.3, 19, 21.5, 
    17.1, 11.8, 10.6, 47, 45.5, 50.4, 46.4, 5.6, 6.1, 22.9, 21.7, 
    8.3, 9, 7.2, 11.2, 14.2, 12.8, 41.4, 10.4, 29.9, 17.2, 16.7, 
    20.9, 25.1, 17.1, 18.2, 11.2, 4.8, 7.4, 14.5, 13.5, 23.4, 
    19.1, 20, 18, 33.8, 15.8, 4.1, 7.2, 4.9, 5, 6, 40.3, 39.2, 
    25.8, 31.5, 48.2, 45.8, 43.3, 53.6, NA, NA, NA, NA, NA, 18.9, 
    18.5, 22, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA)), .Names = c("Strain", "Color", "y", 
"x"), row.names = c(NA, -179L), class = "data.frame")

Here are the other variables that will be mentioned in my code.

#35, because there're 35 strains
#I did NameY and NameX since I was to put this thing into a loop to plot for may other variables   
MyRainbow <- rainbow(35)
NameY <- "MCV(fl)"
NameX <- "HCT(%)"

点的正确颜色应如下所示:

enter image description here

我正在尝试为ggplot2中的每个菌株添加回归线,但是我得到了错误的颜色,或者我得到了正确的颜色,但回归线的颜色与点的颜色不匹配。我不满意当颜色不是我指定的颜色但是点的颜色与相应的回归线的颜色相匹配,因为我想在整个项目中保持代表每个应变的颜色一致(所以Stack Overflow上的其他类似帖子在谈到这个问题时没有回答我的答案)。此外,我重新排序了数据框ML中的压力,并没有做出改变。

以下是颜色错误但匹配的情况的代码:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

创建的图片:

enter image description here

以下是点具有正确颜色的代码,但回归线的颜色与点的颜色不匹配,回归线的颜色与上一个scinario中的颜色相同:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

生成的图像可以在这里找到:

然后当我这样做时:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

我收到错误:设置美学的长度不兼容:颜色 这里出了什么问题?如何将回归线转换为正确的颜色?

非常感谢你的帮助!

1 个答案:

答案 0 :(得分:1)

使用您在问题中提供的数据(以及图表下方的代码)

bam

# create new dataset without missing data
df.ML <- na.omit(ML) 

ggplot(df.ML,aes(y = y, x = x)) +
     geom_point(aes(colour = Strain), size = 1, alpha = .8) + 
     guides(col = guide_legend(nrow = 16)) +
     geom_smooth(method="lm", se= F, aes(colour = Strain, group = Strain)) +
     geom_smooth(method = 'lm', colour = 'black', se = T)  +
     scale_colour_manual(values = df.ML$Color)

(如果这回答了别人的问题,请保留我的初步答案)。

在您的问题中使用the example you link to, colour = group添加到第二个geom_smooth()给我这个污点(下面的代码)

bam2

set.seed(1)
library(plyr)
alldata <- ddply(data.frame(group = letters[1:5], x = rnorm(50)), 'group', 
                 mutate, y=runif(1,-1,1) * x +rnorm(10))
library(ggplot2)
ggplot(alldata,aes(y = y, x = x)) +
     geom_point(aes(colour = group, shape = group), size = 3, alpha = .8) + 
     geom_smooth(method="lm", se= F, size = 1, aes(linetype = group, colour = group, group = group)) +
     geom_smooth(method = 'lm',size = 1, colour = 'black', se = F) + theme_bw()