我想生成一张图,描绘了我工作的有机体的14个线性染色体,可以在每条染色体的特定位置用彩色条进行扩展。理想情况下,我想使用R,因为这是我遇到的唯一编程语言。
我已经探索了各种方法,例如:使用GenomeGraphs,但我发现这比我想要的更复杂/显示的数据比我所拥有的更多(例如显示细胞色带),并且通常特异于人类染色体。
我基本上想要的是14个以下尺寸的灰色条:
chromosome size
1 640851
2 947102
3 1067971
4 1200490
5 1343557
6 1418242
7 1445207
8 1472805
9 1541735
10 1687656
11 2038340
12 2271494
13 2925236
14 3291936
然后有彩色标记描绘沿染色体长度散布的约150个位置。例如在这些位置标记:
Chromosome Position
3 817702
12 1556936
13 1131566
理想情况下,我还希望能够根据基因座指定几种不同的颜色,例如
Chromosome Position Type
3 817702 A
12 1556936 A
13 1131566 A
5 1041685 B
11 488717 B
14 1776463 B
其中'A'标记为蓝色,'B'标记为绿色,例如。
在这张图片中粘贴了一幅与我想要制作的非常相似的情节(来自Bopp等人,PlOS Genetics 2013; 9(2):e1003293):
有人可以推荐一种方法吗?它不一定必须是生物信息学包,如果有另一种方法我可以使用R生成14条特定比例尺寸的条,在条形图上的指定位置有标记。例如我一直在考虑从ggplot2修改一个简单的条形图,但我不知道如何在特定位置沿着条形图标记。
答案 0 :(得分:8)
只需保存barplot
来电,然后拨打segments
即可在适当的位置制作商标。 E.g:
bp <- barplot(dat$size, border=NA, col="grey80")
with(marks,
segments(
bp[Chromosome,]-0.5,
Position,
bp[Chromosome,]+0.5,
Position,
col=Type,
lwd=2,
lend=1
)
)
使用的数据:
dat <- structure(list(chromosome = 1:14, size = c(640851L, 947102L,
1067971L, 1200490L, 1343557L, 1418242L, 1445207L, 1472805L, 1541735L,
1687656L, 2038340L, 2271494L, 2925236L, 3291936L)), .Names = c("chromosome",
"size"), class = "data.frame", row.names = c(NA, -14L))
marks <- structure(list(Chromosome = c(3L, 12L, 13L, 5L, 11L, 14L), Position = c(817702L,
1556936L, 1131566L, 1041685L, 488717L, 1776463L), Type = structure(c(1L,
1L, 1L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor")), .Names = c("Chromosome",
"Position", "Type"), class = "data.frame", row.names = c(NA,
-6L))
答案 1 :(得分:1)
以下是绘制这些类型图的一般解决方案,改编自this post。
我选择使用n
,因为它允许对形状大小进行更精细的调整,并允许形状按分辨率缩放;我认为geom_rect
宽度不会扩展。
还要注意使用这种方法,基因改变位置的标记是按比例绘制的,这意味着它们可能会变得很薄,以至于在图上不容易看到;如果需要,您可以自行决定将其调整到最小尺寸。
geom_segment
library("ggplot2") # for the plot
library("ggrepel") # for spreading text labels on the plot, you can replace with `geom_text` if you want
library("scales") # for axis labels notation
# insert your steps to load data from tabular files or other sources here;
# dummy datasets taken directly from files shown in this example
# data with the copy number alterations for the sample
sample_cns <- structure(list(gene = c("AC116366.7", "ANKRD24", "APC", "SNAPC3",
"ARID1A", "ATM", "BOD1L1", "BRCA1", "C11orf65", "CHD5"), chromosome = c("chr5",
"chr19", "chr5", "chr9", "chr1", "chr11", "chr4", "chr17", "chr11",
"chr1"), start = c(131893016L, 4183350L, 112043414L, 15465517L,
27022894L, 108098351L, 13571634L, 41197694L, 108180886L, 6166339L
), end = c(131978056L, 4224502L, 112179823L, 15465578L, 27107247L,
108236235L, 13629211L, 41276113L, 108236235L, 6240083L), cn = c(1L,
1L, 1L, 7L, 1L, 1L, 3L, 3L, 1L, 1L), CNA = c("loss", "loss",
"loss", "gain", "loss", "loss", "gain", "gain", "loss", "loss"
)), .Names = c("gene", "chromosome", "start", "end", "cn", "CNA"
), row.names = c(NA, 10L), class = "data.frame")
# > head(sample_cns)
# gene chromosome start end cn CNA
# 1 AC116366.7 chr5 131893016 131978056 1 loss
# 2 ANKRD24 chr19 4183350 4224502 1 loss
# 3 APC chr5 112043414 112179823 1 loss
# 4 SNAPC3 chr9 15465517 15465578 7 gain
# 5 ARID1A chr1 27022894 27107247 1 loss
# 6 ATM chr11 108098351 108236235 1 loss
# hg19 chromosome sizes
chrom_sizes <- structure(list(chromosome = c("chrM", "chr1", "chr2", "chr3", "chr4",
"chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12",
"chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19",
"chr20", "chr21", "chr22", "chrX", "chrY"), size = c(16571L, 249250621L,
243199373L, 198022430L, 191154276L, 180915260L, 171115067L, 159138663L,
146364022L, 141213431L, 135534747L, 135006516L, 133851895L, 115169878L,
107349540L, 102531392L, 90354753L, 81195210L, 78077248L, 59128983L,
63025520L, 48129895L, 51304566L, 155270560L, 59373566L)), .Names = c("chromosome",
"size"), class = "data.frame", row.names = c(NA, -25L))
# > head(chrom_sizes)
# chromosome size
# 1 chrM 16571
# 2 chr1 249250621
# 3 chr2 243199373
# 4 chr3 198022430
# 5 chr4 191154276
# 6 chr5 180915260
# hg19 centromere locations
centromeres <- structure(list(chromosome = c("chr1", "chr2", "chr3", "chr4",
"chr5", "chr6", "chr7", "chr8", "chr9", "chrX", "chrY", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22"), start = c(121535434L,
92326171L, 90504854L, 49660117L, 46405641L, 58830166L, 58054331L,
43838887L, 47367679L, 58632012L, 10104553L, 39254935L, 51644205L,
34856694L, 16000000L, 16000000L, 17000000L, 35335801L, 22263006L,
15460898L, 24681782L, 26369569L, 11288129L, 13000000L), end = c(124535434L,
95326171L, 93504854L, 52660117L, 49405641L, 61830166L, 61054331L,
46838887L, 50367679L, 61632012L, 13104553L, 42254935L, 54644205L,
37856694L, 19000000L, 19000000L, 20000000L, 38335801L, 25263006L,
18460898L, 27681782L, 29369569L, 14288129L, 16000000L)), .Names = c("chromosome",
"start", "end"), class = "data.frame", row.names = c(NA, -24L
))
# > head(centromeres)
# chromosome start end
# 1 chr1 121535434 124535434
# 2 chr2 92326171 95326171
# 3 chr3 90504854 93504854
# 4 chr4 49660117 52660117
# 5 chr5 46405641 49405641
# 6 chr6 58830166 61830166
# create an ordered factor level to use for the chromosomes in all the datasets
chrom_order <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7",
"chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14",
"chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21",
"chr22", "chrX", "chrY", "chrM")
chrom_key <- setNames(object = as.character(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25)),
nm = chrom_order)
chrom_order <- factor(x = chrom_order, levels = rev(chrom_order))
# convert the chromosome column in each dataset to the ordered factor
chrom_sizes[["chromosome"]] <- factor(x = chrom_sizes[["chromosome"]],
levels = chrom_order)
sample_cns[["chromosome"]] <- factor(x = sample_cns[["chromosome"]],
levels = chrom_order)
centromeres[["chromosome"]] <- factor(x = centromeres[["chromosome"]],
levels = chrom_order)
# create a color key for the plot
group.colors <- c(gain = "red", loss = "blue")
答案 2 :(得分:0)
希望这会有所帮助
{ # dataframes
dfChromSize<-read.table(text="chromNumber size
1 640851
2 947102
3 1067971
4 1200490
5 1343557
6 1418242
7 1445207
8 1472805
9 1541735
10 1687656
11 2038340
12 2271494
13 2925236
14 3291936", header=T)
dfChromData<-read.table(text="chromNumber markPos markSize markName
3 817702 50000 type1
12 1556936 50000 type2
13 1131566 50000 type2", header=T, stringsAsFactors=F)
dfMarkColor<-data.frame(markName=c("type1","type2"),
markColor=c("red","green"),
stringsAsFactors = F)
} # dataframes
# reorder by size , optional
{
decOrder<- order(dfChromSize$size, decreasing = T)
dfChromSize$neworder <- decOrder
dfChromData$neworder <- dfChromSize$neworder[match(dfChromData$chromNumber,dfChromSize$chromNumber)]
}
svg("myplot2.svg", width=19.2, height=9.72) # write to file / optional
par(mfrow=c(1,2))
par(las=1, mar=c(2,8,0,0), cex.axis=1.6) # hor, b l t r
separatorx<-1.2 # FIRST plot - left plot
####################################################
#
# plotting chromosomes BOTH PLOTS - MAIN PLOT
#
####################################################
{ n<-nrow(dfChromSize)
croybot<-rep(0, n)
normalizeToOne<-1/max(dfChromSize$size)
chroSizeOrdered<-dfChromSize$size[decOrder]
croytop<- chroSizeOrdered*normalizeToOne
croxleft <-sapply(1:n, function(x) rep(separatorx*(x)) )
croxright<-sapply(1:n, function(x) rep((separatorx*(x))+0.3 ) )
xm <- list(matrix(c(croxleft,croxright,croxright,croxleft), nrow=n ) )
ym <- list(matrix(c(croybot,croybot,croytop,croytop), nrow=n ) )
x <- split(xm[[1]], row(xm[[1]]))
y <- split(ym[[1]], row(ym[[1]]))
plot("",xlim=c(min(unlist(x), na.rm=T),(max(unlist(x), na.rm=T)+0 ) ),
ylim=c(0,(max(unlist(y) , na.rm = T))+.0), ylab = "", xaxt='n',
xlab="", yaxt='n',main = NULL,frame.plot = F)
mapply(function(x,y) polygon(x=((x)), y=((y)),
col="gray", lwd=.5), x=x, y=y)
# y axis labels
axis(side=2, at=c(seq(0,max(dfChromSize$size),length.out = 11)*normalizeToOne ),
labels = format(round(c(seq(0,max(dfChromSize$size),length.out = 11) ),0),nsmall=0
))
# x axis labels (chromosomes)
chrw<-x[[1]][2]-x[[1]][1]
axis(side=1, at=xm[[1]][,1]+chrw/2,
labels = dfChromSize$neworder, pos = 0, tick =F) # chrom name column
# chromosomes plot end
#############################
#
# plotting marks BOTH PLOTS
#
#############################
yinf<-ysup<-yMark<-xMark<-NULL
for (i in 1:nrow(dfChromData)){
yinf<-dfChromData[i,"markPos"]*normalizeToOne
ysup<-dfChromData[i,"markPos"]*normalizeToOne+dfChromData[i,"markSize"]*normalizeToOne
xMark[[i]]<-xm[[1]][dfChromData$neworder[i],] # change name of column if necessary
yMark[[i]]<-c(yinf,yinf,ysup,ysup)
}
mapply(function(x,y,z) polygon(x=(x),
y=(y),
col=z,
lwd=.5),
x=(xMark), y=(yMark), z=c(dfMarkColor$markColor[match(dfChromData$markName, dfMarkColor$markName)])
)
} #
###########################################
#
# plotting labels inline FIRST PLOT
#
###########################################
{xMark2<-do.call("rbind",xMark)
yMark2<-do.call("rbind",yMark)
chrw<-xMark2[1,2]-xMark2[1,1]
text(x=(t(xMark2[1:nrow(dfChromData),1])+separatorx*.1),
y=(t( (c(yMark2[1:nrow(dfChromData),1]+yMark2[1:nrow(dfChromData),3])
)/2 ) ),
labels=dfChromData$markName, cex=0.8, col="black",pos=4)
} # inline labels
########################
# now make right plot
########################
separatorx<-0.4
# run plot again up in:
# { n<-nrow(dfChromSize)
#####################################################
#
# plotting labels externally SECOND PLOT only
#
#####################################################
{maxx<-(max(unlist(x)) )
labelx<-maxx-c(maxx/15,maxx/15*2,maxx/15*2,maxx/15)-1
labelx<-t(replicate(nrow(dfMarkColor),labelx) )
labely<- sapply(c(.1,.1,.12,.12), function(x) x + 1:nrow(dfMarkColor)*.05 )+.5
# boxes
mapply(function(x,y,z) polygon(x=(x),
y=(y),
col=z,
lwd=.5),
x=(split(labelx, row(labelx) )), y=(split(labely, row(labely) )), z=dfMarkColor$markColor)
# text
text(x=(t(labelx[1:nrow(dfMarkColor),1])),
y=(t( (c(labely[1:nrow(dfMarkColor),1]+labely[1:nrow(dfMarkColor),3])
)/2 ) ),
labels=dfMarkColor$markName, cex=1.8, col="black",pos=4)
}
dev.off() # stop saving to .svg