我有一个csv文件,该文件由制表符空间分隔。注意:confidence_interval列的值(上下限)用空格隔开。
Clusters vs regions
#Cluster total_cluster_markers total_gene_list overlap confidence_interval p-value odds_ratio
cluster0_fisher 512 5840 209 1.7182382746801 2.47748441988708 1.481417e-14 2.064883
cluster1_fisher 425 5840 151 1.32891483798265 2.00728239610416 2.765814e-06 1.635712
cluster2_fisher 2339 5840 778 1.39944144525363 1.68467086400037 2.828628e-19 1.535827
cluster3_fisher 745 5840 294 1.68442891430663 2.28721533485484 1.176685e-17 1.96402
cluster4_fisher 960 5840 359 1.57334775537734 2.06762614937187 4.882507e-17 1.804544
cluster5_fisher 1038 5840 401 1.6771709720448 2.17858158866511 7.620883e-22 1.91242
cluster6_fisher 601 5840 258 1.91504755165867 2.67872178784458 3.390846e-21 2.266055
cluster7_fisher 914 5840 365 1.75583321881608 2.31518016906748 8.043077e-23 2.017119
cluster8_fisher 1144 5840 435 1.6468354153442 2.11561394952237 4.589145e-22 1.867252
cluster9_fisher 2390 5840 870 1.64540029553634 1.97088684681019 1.797583e-36 1.801035
cluster10_fisher 2564 5840 952 1.72111579504647 2.04939671061079 2.037412e-44 1.87854
cluster11_fisher 510 5840 212 1.77408302607108 2.55721196987857 7.082665e-16 2.131412
cluster12_fisher 1692 5840 654 1.76451430192618 2.17296718053884 1.0919e-35 1.958701
cluster13_fisher 3083 5840 1134 1.73401721793267 2.03895922438595 2.043825e-51 1.880658
cluster14_fisher 733 5840 276 1.55077148161245 2.11642986931938 1.094299e-13 1.812898
cluster15_fisher 1373 5840 377 0.988223149703785 1.26730522269035 0.07333463 1.119935
cluster16_fisher 703 5840 273 1.62824034204187 2.23248738568041 1.911082e-15 1.908004
我需要使用比值比和CI来制作误差线图,这是我正在使用的代码:
library('ggplot2')
args <- commandArgs(TRUE)
cluster_file <- args[1]
headers=read.csv(cluster_file, skip = 1, sep='\t', header = F, nrows = 1, as.is = T)
boxLabels = c("C0","C1", "C2", "C3",
"C4", "C5", "C6",
"C7", "C8", "C9",
"C10", "C11","C12",
"C13", "C14","C15",
"C16")
dat <- read.csv(cluster_file, header=FALSE, sep='\t' ,skip=2,
stringsAsFactors = FALSE)
colnames(dat)=headers
dat <- cbind(dat, do.call("rbind", strsplit(dat[, 5], " ")))
pdf("rplot.pdf")
boxCILow=c(dat$"1")
boxCIHigh=c(dat$"2")
(p <- ggplot(dat, aes(x = odds_ratio, y = boxLabels)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5,
height =
.2, color = "gray50") +
geom_point(size = 2, color = "orange") +
scale_x_continuous(breaks = seq(1.0, 3.0, 0.1), labels = seq(1.0, 3.0, 0.1),
limits = c(0.9,3.0)) +
theme_bw()+
theme(panel.grid.minor = element_blank()) +
ylab("") +
xlab("Odds ratio") +
ggtitle(tools::file_path_sans_ext(cluster_file))
)
dev.plot()
这是我得到的警告: 删除了15个包含缺失值的行(geom_errorbarh)。
图形看起来不正确。我已经使用相同的数据手动将3个向量输入到ggplot2代码,该图看起来正确。所以我想知道我是否已经正确解析了csv文件中的信息?
答案 0 :(得分:-1)
使用library('tidyverse')为我解决了格式问题。