我是新手#34; R",我在R中编写了一个程序,它从一个平面文件中提取数据,并将探针IDS(其微阵列数据)与另一个包含基因注释的文件相匹配(名称,符号,同义词等。我可以拿出那些与基因匹配的id,但是我无法找出那些不匹配的id。我想打印" NA"对于与GENE名称/同义词没有任何匹配的特定ID。代码如下。
## Getting the probe.ids with gene names
probe <- read.delim("super.txt",stringsAsFactors=F, header = T, sep="\t")
probe$probeid<-tolower(probe$probeid)
## Matching with probe ids which dosnt have any annotation
names<-read.delim("GSE42568_probeid.txt", as.is=T, stringsAsFactors=F, header=T)
names<-names$probeid
vec<- NULL
system.time({
#lenght of genenames, i:e total gene names in the file
for(i in 1:11390){
cat("Index of required name is ",grep(names[i],probe$probeid,fixed=T), "\n")
index<-grep(names[i],probe$probeid,fixed=T)
vec<-c(vec,index)
}
})
vec_1 <- data.frame(probe[vec,])
write.table(vec_1, file = "probeids_matched.txt", row.names = FALSE, append = FALSE, col.names = TRUE, sep = "\t")
我很遗憾发布这些愚蠢的问题,但我尝试了各种各样的东西,它对我有用...... :( 非常感谢。
structure(c("function (..., list = character(), package = NULL, lib.loc = NULL, ",
" verbose = getOption(\"verbose\"), envir = .GlobalEnv) ",
"{", " fileExt <- function(x) {", " db <- grepl(\"\\\\.[^.]+\\\\.(gz|bz2|xz)$\", x)",
" ans <- sub(\".*\\\\.\", \"\", x)", " ans[db] <- sub(\".*\\\\.([^.]+\\\\.)(gz|bz2|xz)$\", \"\\\\1\\\\2\", ",
" x[db])", " ans", " }", " names <- c(as.character(substitute(list(...))[-1L]), list)",
" if (!is.null(package)) {", " if (!is.character(package)) ",
" stop(\"'package' must be a character string or NULL\")",
" if (any(package %in% \"base\")) ", " warning(\"datasets have been moved from package 'base' to package 'datasets'\")",
" if (any(package %in% \"stats\")) ", " warning(\"datasets have been moved from package 'stats' to package 'datasets'\")",
" package[package %in% c(\"base\", \"stats\")] <- \"datasets\"",
" }"), .Dim = c(20L, 1L), .Dimnames = list(c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20"), ""), class = "noquote")
当前输出为:
- 所需名称的索引是2164 2165
- 所需名称的索引是2182
- 所需名称的索引是
- 所需名称的索引是
- 所需名称的索引是
- 所需名称的索引是2751
需要输出:
答案 0 :(得分:0)
我认为这就是你要找的东西:
vec_NA <- data.frame(probe[-vec,])
编辑:要生成您想要更正代码的消息,如下所示
NoMatchID = NULL
for (i in 1:11390){
index<-grep(names[i],probe$probeid,fixed=T)
if (length(index)!=0) {
cat("Index of", names[i],"is", index, "\n")
} else {
cat("Index of", names[i], "is NA \n")
NoMatchID = c(NoMatchID,i)
}
}
请注意,NoMatchID是您的名称向量中的ID。如果您想要数据帧探测中的ID
NoMatchID_probe = setdiff(1:nrow(probe), unique(vec))