如何在R

时间:2016-06-16 06:14:39

标签: r ggplot2

我有一个名为mymat的矩阵,行数超过70000。我想绘制key.related.sheet(样本),其中Num_Good_SNPs_A列的值将是X轴,IBS_from列将是Y轴。我怎样才能在R中完成这项工作?

    mymat<-structure(c("AOGC-02-0010:AOGC-02-0010", "AOGC-02-0010:AOGC-02-0022", 
"AOGC-02-0010:AOGC-02-0063", "AOGC-02-0010:AOGC-02-0079", "AOGC-02-0010:AOGC-02-0087", 
"AOGC-02-0010:AOGC-02-0105", "AOGC-02-0010:AOGC-02-0108", "AOGC-02-0010:AOGC-02-0112", 
"AOGC-02-0010:AOGC-02-0118", "AOGC-02-0010:AOGC-02-0161", "AOGC-02-0022:AOGC-08-0032", 
"AOGC-02-0022:AOGC-08-0054", "AOGC-02-0022:AOGC-08-0067", "AOGC-02-0022:AOGC-08-0083", 
"AOGC-02-0022:AOGC-08-0090", "AOGC-02-0022:AOGC-08-0097", "AOGC-02-0022:AOGC-08-0125", 
"AOGC-02-0022:AOGC-08-0139", "AOGC-02-0022:AOGC-08-0140", "AOGC-02-0022:AOGC-08-0154", 
"AOGC-02-0022:AOGC-08-0167", "1.12", "-0.0176", "0.0253", "0.0132", 
"-0.00835", "-0.0205", "-0.00759", "-0.0134", "-0.0351", "0.00399", 
"0.00021", "-0.0144", "0.013", "0.0035", "-0.00973", "-0.0109", 
"-0.0000367", "0.0196", "0.00304", "-0.00509", "-0.0224", "16987", 
"13360", "12837", "11836", "12097", "13016", "15128", "14564", 
"13262", "14685", "14944", "13516", "14257", "11146", "13545", 
"15112", "13164", "13343", "10284", "12705", "10599", "1.12", 
"-0.00582", "0.0171", "0.0107", "-0.0132", "-0.0214", "-0.0145", 
"0.00123", "-0.0268", "0.00374", "0.00549", "-0.0215", "0.0285", 
"0.0038", "-0.0133", "-0.0268", "-0.00474", "0.0259", "0.00451", 
"-0.0111", "-0.0234", "14152", "11228", "10739", "9903", "10134", 
"10871", "12597", "12141", "11083", "12246", "12573", "11368", 
"11988", "9410", "11420", "12699", "11084", "11197", "8642", 
"10710", "8957", "1.13", "-0.00634", "0.0192", "0.0232", "-0.0328", 
"-0.0383", "-0.0318", "-0.0104", "-0.000175", "-0.0178", "0.0283", 
"-0.0158", "0.0265", "0.00121", "-0.00154", "-0.0118", "0.00984", 
"-0.0346", "0.00538", "0.0209", "-0.0485", "7158", "5720", "5478", 
"5049", "5178", "5480", "6374", "6133", "5613", "6187", "6358", 
"5852", "6095", "4772", "5835", "6433", "5706", "5658", "4377", 
"5560", "4678", "1.15", "0.0139", "0.0155", "0.0113", "-0.0232", 
"-0.0175", "-0.00998", "-0.00379", "0.00973", "-0.0469", "0.0484", 
"0.000755", "0.0332", "0.00556", "0.0302", "-0.049", "-0.00191", 
"-0.0276", "0.00223", "0.037", "-0.0643", "3759", "2986", "2849", 
"2634", "2681", "2875", "3394", "3237", "2932", "3243", "3313", 
"3023", "3163", "2499", "3047", "3311", "2897", "2966", "2303", 
"2869", "2342"), .Dim = c(21L, 9L), .Dimnames = list(c("1", "2", 
"3", "4", "5", "6", "7", "8", "9", "10", "100", "101", "102", 
"103", "104", "105", "106", "107", "108", "109", "110"), c("key.related.sheet", 
"IBS_from:324711SNPS", "Num_Good_SNPs_A:324711SNPS", "IBS_from:266640SNPS", 
"Num_Good_SNPs_A:266640SNPS", "IBS_from:133224SNPS", "Num_Good_SNPs_A:133224SNPS", 
"IBS_from:66441SNPS", "Num_Good_SNPs_A:66441SNPS")))

以下是我尝试的代码:

mydf <- as.data.frame(mymat)
rownames(mydf) <- mydf[,"key.related.sheet"] 
mydf[, grepl("IBS",colnames(mydf))] <- lapply(mydf[, grepl("IBS",colnames(mydf))], function (x){as.numeric(as.character(x))})
mydf[, grepl("Num_Good",colnames(mydf))] <- lapply(mydf[, grepl("Num_Good",colnames(mydf))], function (x){as.numeric(as.character(x))})
effCis <- grep('^IBS',names(mydf));
find.measurements <- grep("^Num_Good_SNPs", names(mydf))

#xlim <- c(1,length(find.measurements));
#xlim <- range(mydf[,find.measurements],na.rm=T);
xlim <- c(1,length(effCis))
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
yticks <- seq(ylim[1L],ylim[2L],0.1);

xticks <- seq(from = max(mydf[, find.measurements]), to = min(mydf[, find.measurements]), length.out = length(effCis))
#xticks <- seq(from = max(mydf[, find.measurements]), to = min(mydf[, find.measurements]), length.out = 7)

plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='Number of good SNPs used',ylab='Samples',xaxs='i',yaxs='i',axes=FALSE)
#plot(mydf[,effCis],mydf[,find.measurements])

par(mar=c(5,4,4,5)+.1)
mtext("IBS", side=4, line= 2.5)
#plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='Numbers of good SNPs used',ylab='IBS',xaxs='i',yaxs='i',axes=TRUE)
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);


axis(side = 1, at = xticks)

##axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
#axis(4L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
axis(4L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.6);

hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=rainbow(nrow(mydf)),stringsAsFactors=F);

#cryptic.col <- data.frame(cryptic=seq_len(nrow(mydf)),col=rainbow(nrow(mydf)),stringsAsFactors=F);
#with((mydf[which(mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9),]), text(xlab ~ ylab, labels = rownames(mydf)[mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9]), pos = 4)

#rownames(mydf)[mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9]


#hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
  hybrid <- hybrid.col$hybrid[ri];
  col <- hybrid.col$col[ri];
  x <- xticks;
  y <- c(as.matrix(mydf[hybrid,effCis]));
  points(x,y,pch=16L,xpd=NA);
  with(spline(x,y,splineN),{
    lines(x,y,col=col,lwd=2,xpd=NA);
    localwin <- which(x > 2 & x < 3);
    tp <- which.min (abs (diff ( y [localwin]) ) );
    if (length (tp) > 0L) points (x[localwin [tp]] , y[localwin[tp]] , col = col,pch=4L);
    localwin <- which (x > 2 & x < 5);
    tp <- which.min (diff (y[localwin]));
    if (length(tp) > 0L) {
      m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
      if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
    };
  });
};

abline(h = 0.5, lwd = 2, lty = 15 , col = "Black");
abline(h = 1.0, lwd = 2, lty = 15, col = "Black");
abline(h = 0, lwd = 2, lty = 15, col = "Black");

1 个答案:

答案 0 :(得分:1)

我们可以使用melt中的data.table将其重新整形为'long'格式,因为数据集中有多个patterns。由于matrix只能包含一个类(当character中有一个matrix元素时,整个数据集将为character矩阵),当我们转换它时到data.table,列将全部为character类。数字列将转换回其原始类,然后使用ggplot根据OP的描述绘制列。

library(data.table)
library(ggplot2)
library(dplyr)
melt(as.data.table(mymat), measure =  patterns("^IBS", "^Num_Good"), 
     value.name = c("IBS", "Num_Good")) %>%
.[, c("IBS", "Num_Good") := lapply(.SD, as.numeric), .SDcols = IBS:Num_Good] %>% 
.[, Grp := cut(IBS, breaks = c(-Inf, 0.3, 0.5, 1.2, Inf))] %>%
  ggplot(., aes(x = Num_Good, y = IBS, col = key.related.sheet)) + 
       geom_line() + 
       facet_wrap(~Grp)