我有一个CSV文件,其中包含10000个此类行
ref
ref smbole name
r1 ts table_spoon
r2 kn knife
r3 fr door
... ... ...
以及从某些具有R格式的文本文件中提取的矩阵(包含60至100行)
数据
ref smbole name
r2 kn NA
r3 NA door
NA NA table_spoon
NA NA door
... ... ...
我想根据data matrix
用ref matrix
的值映射NA values
,我的意思是用等效值替换每个NA
我的预期输出是
ref smbole name
r2 kn knife
r3 fr door
r1 ts table_spoon
r3 fr door
我尝试了这段代码,但是它没有任何改变
ref <- as.matrix(read.delim("name.csv", sep = "\t"))
fun <- function(rowi,r) {
res <- apply(as.data.frame(ref),1,function(x) {length(na.omit(match(na.omit(rowi),x)))})
IND <- which( max(data) == data )[1]
rowi[is.na(rowi)] <- unlist(genemap[IND,])[is.na(rowi)]
return(rowi)
}
as.data.frame(t(apply(data, 1, fun, ref))
)
答案 0 :(得分:0)
有点混乱,但这可能有用。
假设数据如下所示:
ref<-structure(list(ref = c("r1", "r2", "r3"), smbole = c("ts", "kn",
"fr"), name = c("table_spoon", "knife", "door")), class = "data.frame", row.names = c(NA, -3L))
data<-structure(list(ref = c("r2", "r3", NA, NA), smbole = c("kn",
NA, NA, NA), name = c(NA, "door", "table_spoon", "door")), class = "data.frame", row.names = c(NA, -4L))
您可以尝试:
Reduce(function(x,y) {x[rowSums(!is.na(x))==0,]<-y[rowSums(!is.na(x))==0,];x},
Map(function(x,y) ref[match(y,x),],ref,data))
给出:
# ref smbole name
#2 r2 kn knife
#3 r3 fr door
#NA r1 ts table_spoon
#NA.1 r3 fr door
答案 1 :(得分:0)
您可以遍历dat
的每一行,在ref
中找到匹配的行,然后结果是所有那些匹配的行。
t(
apply(dat, 1, function(x){
ind <- which.max(!is.na(x)) #index of first non-NA
ref[ref[,ind] == x[ind],] # row of ref which matches this value
})
)
# ref smbole name
# [1,] "r2" "kn" "knife"
# [2,] "r3" "fr" "door"
# [3,] "r1" "ts" "table_spoon"
# [4,] "r3" "fr" "door"
使用的数据:
ref <- structure(c("r1", "r2", "r3", "ts", "kn", "fr", "table_spoon",
"knife", "door"), .Dim = c(3L, 3L), .Dimnames = list(NULL, c("ref",
"smbole", "name")))
# ref smbole name
# [1,] "r1" "ts" "table_spoon"
# [2,] "r2" "kn" "knife"
# [3,] "r3" "fr" "door"
dat <- structure(c("r2", "r3", NA, NA, "kn", NA, NA, NA, NA, "door",
"table_spoon", "door"), .Dim = c(4L, 3L), .Dimnames = list(NULL,
c("ref", "smbole", "name")))
# ref smbole name
# [1,] "r2" "kn" NA
# [2,] "r3" NA "door"
# [3,] NA NA "table_spoon"
# [4,] NA NA "door"
答案 2 :(得分:0)
具有data.table
更新联接的解决方案:
library(data.table)
ref <- as.data.frame(ref, stringsAsFactors = F); setDT(ref)
data <- as.data.frame(data, stringsAsFactors = F); setDT(data)
for(oncol in colnames(ref)){
for(scol in setdiff(colnames(ref), oncol)){
rcol <- paste0('i.', scol)
data[ref, (scol) := ifelse(is.na(get(scol)), get(rcol), get(scol)), on = oncol]
}
}
# > data
# ref smbole name
# 1: r2 kn knife
# 2: r3 fr door
# 3: r1 ts table spoon
# 4: r3 fr door
以下是使用中的数据:
data <- structure(list(ref = c("r2", "r3", NA, NA), smbole = c("kn",
NA, NA, NA), name = c(NA, "door", "table spoon", "door")), row.names = c(NA,
-4L), class = c("data.table", "data.frame"))
ref <- structure(list(ref = c("r1", "r2", "r3"), smbole = c("ts", "kn",
"fr"), name = c("table spoon", "knife", "door")), row.names = c(NA,
-3L), class = c("data.table", "data.frame"))