在不同变量上使用相同名称排序因子级别 - ggplot2

时间:2016-01-26 15:44:12

标签: r ggplot2

我正在绘制一个时间轴或时间序列的鸟类图,我将无线电发射器连接到,并在繁殖季节的过程中跟踪。时间线显示我第一次标记一只鸟时,以及我何时停止跟踪一只鸟。

每只鸟都标有其无线电标签频率,即6个数字(151.XXX)。两年都使用了2个无线电标签,151.164和151.094。当我使用facet_grid按年绘制时间轴时,这是有问题的。基本上所有的鸟都被绘制了,但是这会扰乱地块的排序(我希望有一个鸟被标记为按顺序排列的初始日期;见下图)。是否可以保持这些因子水平相同但修正排序?

我的数据框如下所示:

dat<-structure(list(freq = structure(c(21L, 1L, 32L, 8L, 11L, 16L, 
5L, 30L, 13L, 26L, 10L, 19L, 22L, 34L, 23L, 4L, 17L, 33L, 36L, 
3L, 14L, 31L, 24L, 35L, 15L, 20L, 27L, 29L, 6L, 18L, 28L, 25L, 
12L, 9L, 7L, 2L), .Label = c("094_1", "094_2", "11", "112", "122", 
"132", "152", "164_1", "164_2", "179", "191", "216", "226", "231", 
"250", "251", "265", "280", "295", "338", "34", "372", "38", 
"385", "429", "46", "475", "53", "558", "57", "71", "72", "831", 
"876", "919", "965"), class = "factor"), site = structure(c(3L, 
4L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 
3L, 3L, 3L), .Label = c("BSLP", "GSPR", "HSGL", "SCFA"), class = "factor"), 
zone = c(18L, 17L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 
18L, 18L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 
2015L, 2015L, 2015L), jstart = c(106L, 113L, 119L, 119L, 
122L, 124L, 125L, 125L, 128L, 131L, 104L, 104L, 104L, 104L, 
104L, 105L, 105L, 105L, 105L, 105L, 106L, 106L, 109L, 109L, 
110L, 110L, 110L, 110L, 111L, 111L, 115L, 121L, 139L, 142L, 
147L, 159L), jend = c(147L, 154L, 133L, 129L, 143L, 170L, 
168L, 170L, 164L, 178L, 153L, 160L, 147L, 156L, 140L, 141L, 
160L, 122L, 166L, 160L, 133L, 160L, 149L, 160L, 162L, 158L, 
155L, 159L, 162L, 163L, 156L, 160L, 194L, 173L, 196L, 202L
)), .Names = c("freq", "site", "zone", "year", "jstart", 
"jend"), class = "data.frame", row.names = c(NA, -36L))

以下是代码:

library(ggplot2)
library(RColorBrewer)

dat$freq<-as.factor(dat$freq)
dat$year<-as.factor(dat$year)

# change the factor names:
levels(dat$freq) #just checking the levels
levels(dat$freq)[levels(dat$freq)=="094_1"] <- "151.094"
levels(dat$freq)[levels(dat$freq)=="094_2"] <- "151.094"
levels(dat$freq)[levels(dat$freq)=="164_1"] <- "151.164"
levels(dat$freq)[levels(dat$freq)=="164_2"] <- "151.164"
levels(dat$freq)[levels(dat$freq)=="34"] <- "151.034"
levels(dat$freq)[levels(dat$freq)=="72"] <- "151.072"
levels(dat$freq)[levels(dat$freq)=="191"] <- "151.191"
levels(dat$freq)[levels(dat$freq)=="251"] <- "151.251"
levels(dat$freq)[levels(dat$freq)=="122"] <- "151.122"
levels(dat$freq)[levels(dat$freq)=="57"] <- "151.057"
levels(dat$freq)[levels(dat$freq)=="226"] <- "151.226"
levels(dat$freq)[levels(dat$freq)=="179"] <- "151.179"
levels(dat$freq)[levels(dat$freq)=="295"] <- "151.295"
levels(dat$freq)[levels(dat$freq)=="372"] <- "151.372"
levels(dat$freq)[levels(dat$freq)=="876"] <- "151.876"
levels(dat$freq)[levels(dat$freq)=="38"] <- "151.038"
levels(dat$freq)[levels(dat$freq)=="112"] <- "151.112"
levels(dat$freq)[levels(dat$freq)=="265"] <- "151.265"
levels(dat$freq)[levels(dat$freq)=="831"] <- "151.831"
levels(dat$freq)[levels(dat$freq)=="965"] <- "151.965"
levels(dat$freq)[levels(dat$freq)=="11"] <- "151.011"
levels(dat$freq)[levels(dat$freq)=="231"] <- "151.231"
levels(dat$freq)[levels(dat$freq)=="71"] <- "151.071"
levels(dat$freq)[levels(dat$freq)=="385"] <- "151.385"
levels(dat$freq)[levels(dat$freq)=="919"] <- "151.919"
levels(dat$freq)[levels(dat$freq)=="250"] <- "151.250"
levels(dat$freq)[levels(dat$freq)=="338"] <- "151.338"
levels(dat$freq)[levels(dat$freq)=="475"] <- "151.475"
levels(dat$freq)[levels(dat$freq)=="558"] <- "151.558"
levels(dat$freq)[levels(dat$freq)=="132"] <- "151.132"
levels(dat$freq)[levels(dat$freq)=="280"] <- "151.280"
levels(dat$freq)[levels(dat$freq)=="53"] <- "151.053"
levels(dat$freq)[levels(dat$freq)=="429"] <- "151.429"
levels(dat$freq)[levels(dat$freq)=="216"] <- "151.216"
levels(dat$freq)[levels(dat$freq)=="152"] <- "151.152"
levels(dat$freq)[levels(dat$freq)=="46"] <- "151.046"

# order the factors:
dat$freq <- factor(dat$freq, levels = rev(dat$freq[order(dat$year,dat$jstart)]))
dat$freq # notice the changed order of factor levels

# plot the data:
ggplot(dat) +
 geom_segment(aes(x = jstart, y = freq, xend = jend, yend = freq, color=site), alpha=0.5, size = 3) +
 scale_color_brewer(palette="Set1", name="Study site") +
 xlab("Date") +
 ylab("Bird ID") +
 theme_bw() +
 theme(legend.position="bottom",
    legend.title = element_text(colour="black", size=16, face="bold"),
    legend.text = element_text(colour="black", size = 14, face = "plain"),
    axis.title.x = element_text(colour="black", size=16, vjust=-0.1, face="bold"),
    axis.text.x  = element_text(angle=0, vjust=1, size=14, colour="black"),
    axis.title.y =element_text(colour="black", size=16, vjust=1.3, face="bold"),
    axis.text.y  = element_text(angle=0, size=14, colour="black"),
    strip.text.y = element_text(size=18)) + 
facet_grid(year ~ ., scales = "free_y", space = "free_y") +
scale_x_continuous(labels = function(x) format(as.Date(as.character(x), "%j"), "%d-%b"))

这两年(151.164和151.094)的因素在2014年没有正确排序。

timelineplot

有没有办法纠正这个?我想保持这些因子级别名称相同(即151.XXX,两年保留151.164和151.094)。

谢谢,杰伊

0 个答案:

没有答案