答案 0 :(得分:2)
下面的代码将数据整理成适合绘图的形式,然后使用geom_segment
来布置序列。为了整理数据,我们希望每列都是一个变量,每行都是一个观察点。
library(tidyverse)
## Clean up data frame and convert to long form
df = map_df(seq(1,ncol(df),3), # Turn each group of three columns into separate data frames that we'll stack into long format
~ setNames(df[-1,.x:(.x+2)], c("DD","Start","End")), # Column names appear to be in the first data row, so we'll remove this row and provide new column names
.id="Pama") %>% # This line and next add a "Pama" column
mutate(Pama = paste0("Pama", Pama)) %>%
filter(!DD %in% c("n/a","")) %>% # Remove empty rows
mutate_at(vars(matches("^[SE]")), funs(as.numeric(as.character(.)))) # All columns are in character format. Convert the numbers to numeric format.
数据框现在如下所示:
Pama DD Start End <chr> <chr> <dbl> <dbl> 1 Pama1 zf 12 89 2 Pama1 zf 116 199 3 Pama1 PAMANA 280 331 4 Pama2 GGTR 115 195 5 Pama2 T_reg 232 362 6 Pama2 PAMANA 376 577 7 Pama3 GGTR 66 144
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=value, label=value, colour=DD), size=2.5,
fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
更新:为了解决您的评论,这里是另一种定位数字的方法,以避免重叠。
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=ifelse(key=="Start", value + 10, value - 10), label=value),
colour="white", size=2.8, fontface="bold", position=position_nudge(0,-0.2)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
更新2 :为了解决您的第二条评论,我们将添加一个分组列,我们将用它来替换高和低数字标签:
# Add grouping variable to alternate high and low labels
df = df %>% group_by(Pama) %>% arrange(Start) %>%
mutate(hilow = rep(c("high","low"),nrow(df))[1:n()])
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="high"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,0.3)) +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="low"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
答案 1 :(得分:1)
这是一个奇怪的数据结构。如果您能够以某种方式修改源数据以从头开始获得整洁的dataframe
,那么这可能会更好地发挥作用,其中每列是单个变量,每行都是观察。
我们可以纠缠数据以获取此类dataframe
(这在base
R中,您可以通过其他方式实现相同的目标,同时使用dplyr
或data.table
):
df2 <- rbind(setNames(cbind(rep('DD1', nrow(df) - 1), df[2:nrow(df), 1:3]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD2', nrow(df) - 1), df[2:nrow(df), 4:6]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD3', nrow(df) - 1), df[2:nrow(df), 7:9]), c('DD', 'Pama', 'Start', 'End'))
)
df2$Start <- as.numeric(as.character(df2$Start))
df2$End <- as.numeric(as.character(df2$End))
df2 <- df2[!df2$Pama %in% c('','n/a'), ]
df2
#> DD Pama Start End
#> 2 DD1 zf 12 89
#> 4 DD1 zf 116 199
#> 10 DD1 PAMANA 280 331
#> 51 DD2 GGTR 115 195
#> 81 DD2 T_reg 232 362
#> 91 DD2 PAMANA 376 577
#> 52 DD3 GGTR 66 144
这为我们提供了一个很好的数据集,我们可以将ggplot2
的{{1}}映射到一个简单的列:
aestethic
文本定位确实会导致问题,在这个例子中我们不得不减少文本大小以使其有点正确。
以下是基于library(ggplot2)
ggplot(df2, aes(y = DD, color = Pama)) +
geom_segment(aes(x = Start, xend = End, yend = DD), size = 10) +
geom_text(aes(label = Start, x = Start), size = 2.5, nudge_y = -.15) +
geom_text(aes(label = End, x = End), size = 2.5, nudge_y = -.15) +
scale_y_discrete(position = 'right') +
theme(panel.background = element_rect(fill = 'white'),
axis.text.x = element_blank(),
axis.text.y.right = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.title = element_blank())
包的解决方案:
ggrepel
(强制增加文本大小以显示它不重叠)
PS:是的,这个更新使它更像@ eipi10的回答..这是一个很好的答案,当然我会偷他的:P