在ggplot中进行子集化时的因子顺序

时间:2014-01-09 02:19:43

标签: r ggplot2 subset r-factor

我在x轴上有因子,并以一种直观的方式用ggplot绘制这些因子水平。它工作正常。但是,当我在ggplot中使用subset命令时,它会重新排序我原始的因子序列。 是否可以在ggplot中进行子集化并保留因子级别的顺序?

以下是数据和代码:

library(ggplot2)
library(plyr)
dat <- structure(list(SubjectID = structure(c(12L, 4L, 6L, 7L, 12L, 
7L, 5L, 8L, 14L, 1L, 15L, 1L, 7L, 1L, 7L, 5L, 4L, 2L, 9L, 6L, 
7L, 13L, 12L, 2L, 15L, 3L, 5L, 13L, 13L, 10L, 7L, 8L, 10L, 10L, 
1L, 10L, 12L, 7L, 6L, 10L), .Label = c("s001", "s002", "s003", 
"s004", "s005", "s006", "s007", "s008", "s009", "s010", "s011", 
"s012", "s013", "s014", "s015"), class = "factor"), Parameter = structure(c(7L, 
3L, 5L, 3L, 6L, 4L, 6L, 7L, 7L, 4L, 7L, 12L, 8L, 11L, 1L, 4L, 
3L, 4L, 6L, 4L, 6L, 6L, 12L, 5L, 12L, 1L, 7L, 13L, 11L, 1L, 4L, 
1L, 6L, 13L, 10L, 10L, 10L, 13L, 5L, 8L), .Label = c("(Intercept)", 
"c0.008", "c0.01", "c0.015", "c0.02", "c0.03", "PrevCorr1", "PrevFail1", 
"c0.025", "c0.004", "c0.006", "c0.009", "c0.012", "c0.005"), class = "factor"), 
    Weight = c(0.0352725634087837, 1.45546697427904, 2.29457594510248, 
    0.479548914792514, 6.39680995359234, 1.48829600339586, 2.69253113220079, 
    -0.171219812386926, -0.453625394224277, 1.43732884325816, 
    0.742416863226952, 0.256935761466245, -0.29401087047524, 
    0.34653127811481, 0.33120592543102, 2.79213318878505, 2.47047299128637, 
    1.022450287681, 6.92891513416868, 0.648982326396105, 6.58336282626389, 
    6.40600461501379, 1.80062359655524, 3.86658202530889, 1.23833324887194, 
    -0.026560261876089, 0.121670468861011, 0.9290824087063, 0.349104382483186, 
    0.24722583823016, 1.82473621255801, -0.712668411699556, 6.51789901685784, 
    0.74682257127003, 0.0755807984938072, 0.131705709322157, 
    0.246465073382095, 0.876279316248929, 1.83442709571662, -0.579086982613267
    )), .Names = c("SubjectID", "Parameter", "Weight"), row.names = c(2924L, 
784L, 1537L, 1663L, 3138L, 1744L, 1266L, 1996L, 3548L, 86L, 3692L, 
230L, 1613L, 213L, 1627L, 1024L, 832L, 384L, 2418L, 1568L, 1714L, 
3362L, 3200L, 497L, 3632L, 683L, 1020L, 3281L, 3263L, 2779L, 
1632L, 1995L, 2674L, 2753L, 312L, 2638L, 3198L, 1809L, 1569L, 
2589L), class = "data.frame")



## Sort factors in the order that will make it intuitive to read the plot
## It goes, "(Intercept), "PrevCorr1", "PrevFail1", "c0.004", "c0.006", etc.  
paramNames <- levels(dat$Parameter)
contrastNames <- sort(paramNames[grep("c0",paramNames)])
biasNames <- paramNames[!paramNames %in% contrastNames]
dat$Parameter <- factor(dat$Parameter, levels=c(biasNames, contrastNames))

## Add grouping parameter that will be used to plot different weights in different colors
dat$plotColor <-"Contrast"
dat$plotColor[dat$Parameter=="(Intercept)"] <- "Intercept"
dat$plotColor[grep("PrevCorr", dat$Parameter)] <- "PrevSuccess"
dat$plotColor[grep("PrevFail", dat$Parameter)] <- "PrevFail"

p <- ggplot(dat, aes(x=Parameter, y=Weight)) + 
        # The following command, which adds geom_line to data points of the graph, changes the order of levels
        # If I uncomment the next line, the factor level order goes wrong. 
        #geom_line(subset=.(plotColor=="Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1) + 
        geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") + 
        geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean") + 
        theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 
print(p)

Here is the plot when geom line is commented

And here is what happens when geom_line is uncommented

1 个答案:

答案 0 :(得分:2)

如果您切换绘制对象的顺序,问题就会消失:

p <- ggplot(dat, aes(x=Parameter, y=Weight)) + 
  # The following command, which adds geom_line to data points of the graph, changes the order of levels
  # If I uncomment the next line, the factor level order goes wrong. 
  geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") + 
  geom_line(subset = .(plotColor == "Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1)  +                 
  geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean")  +                 
  theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 
print(p)

correct plot

我认为问题在于首先绘制子集数据,它会丢弃原始数据的级别,当你重新加入点时,它不知道放在哪里。首先使用原始数据进行绘制时,它会保持水平。我不确定,你可能不得不接受我的话。