我一直坚持这个问题一段时间了。需要一些帮助。 我正在将以下文件(可能比3个文件文件更糟)读入数据帧。 我的输入文件如下所示: 文件1:
someName someMOD someID
A T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P) 1
B S495(P) 2
C S162(P),Q159(D) 3
D S45(P),C47(C),S48(P),S26(P) 4
E S18(P) 5
file2的:
someName someMOD someID
C S162(P),Q159(D) 3
D S45(P),C47(C),S48(P),S26(P) 4
F S182(P) 6
E S18(P) 5
Z Q100(P) 9
A T754(P),M691(O),S694(P),S739(P),S740(P) 1
file3的:
someName someMOD someID
A T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P) 1
B S495(P) 2
D S45(P),C47(C),S48(P),S26(P) 4
E S18(P) 5
F S182(P) 6
L Z182(P) 8
C S162(P),Q159(D) 3
我的代码:
fileList <- dir(pattern="*.xls")
i<-1
j<-1
a<-list()
mybigtable<-data.frame
for (f in 1:length(fileList)){
fileName <- fileList[f]
X <-read.xls(fileName)
if(regexpr("Drug_Rep", fileName)[1]>0){
a[[i]]<-X
}
i=i+1
}
else{
#Don't do anything
}
}
#Now i want to merge my dataframes
mymerge <- function(x, y)
merge(x, y, by=c("someName", "someID"), all=TRUE))
Reduce(mymerge,a) #passing my list of dataframes 'a'
我在'a'列表上输了dput :(
)list(structure(list(someName = structure(c(1L, 2L, 4L, 5L, 6L,
7L, 3L), .Label = c("A", "B", "C", "D", "E", "F", "L"), class = "factor"),
someMOD = structure(c(6L, 5L, 4L, 2L, 3L, 7L, 1L), .Label = c("S162(P),Q159(D)",
"S18(P)", "S182(P)", "S45(P),C47(C),S48(P),S26(P)", "S495(P)",
"T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P)",
"Z182(P)"), class = "factor"), someID = c(1L, 2L, 4L, 5L,
6L, 8L, 3L)), .Names = c("someName", "someMOD", "someID"), class = "data.frame", row.names = c(NA,
-7L)), structure(list(someName = structure(1:5, .Label = c("A",
"B", "C", "D", "E"), class = "factor"), someMOD = structure(c(5L,
4L, 1L, 3L, 2L), .Label = c("S162(P),Q159(D)", "S18(P)", "S45(P),C47(C),S48(P),S26(P)",
"S495(P)", "T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P)"
), class = "factor"), someID = 1:5), .Names = c("someName", "someMOD",
"someID"), class = "data.frame", row.names = c(NA, -5L)), structure(list(
someName = structure(c(2L, 3L, 5L, 4L, 6L, 1L), .Label = c("A",
"C", "D", "E", "F", "Z"), class = "factor"), someMOD = structure(c(2L,
5L, 4L, 3L, 1L, 6L), .Label = c("Q100(P)", "S162(P),Q159(D)",
"S18(P)", "S182(P)", "S45(P),C47(C),S48(P),S26(P)", "T754(P),M691(O),S694(P),S739(P),S740(P)"
), class = "factor"), someID = c(3L, 4L, 6L, 5L, 9L, 1L)), .Names = c("someName",
"someMOD", "someID"), class = "data.frame", row.names = c(NA,
-6L)))
填写列表时我的错误是什么?任何帮助都非常感谢。 我只想尝试以下内容:
答案 0 :(得分:2)
我之前给你的代码的问题是如果有任何重复的列名,{em>和你正在合并3个以上的数据集,merge
会感到困惑。您必须重命名someMOD
列,以免它们发生冲突。 for
循环可以为此目的起作用。
dupvars <- which(!names(a[[1]]) %in% c("someName", "someID"))
for(i in seq_along(a))
names(a[[i]])[dupvars] <- paste0(names(a[[i]])[dupvars], i)
# and then merge
Reduce(mymerge, a)
答案 1 :(得分:1)
也许问题是你实际上并没有在标准意义上尝试merge
,而是reshape
。在这种情况下,您可以在添加“时间”变量后rbind
同时data.frame
所有dcast
,并使用“reshape2”中的rbind
来获取您所追求的内容:
data.frame
加在temp <- do.call(rbind,
lapply(seq_along(a),
function(x) data.frame(a[[x]], time = x)))
head(temp)
# someName someMOD someID time
# 1 A T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P) 1 1
# 2 B S495(P) 2 1
# 3 D S45(P),C47(C),S48(P),S26(P) 4 1
# 4 E S18(P) 5 1
# 5 F S182(P) 6 1
# 6 L Z182(P) 8 1
data.frame
library(reshape2)
dcast(temp, someName + someID ~ time, value.var="someMOD")
# someName someID 1
# 1 A 1 T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P)
# 2 B 2 S495(P)
# 3 C 3 S162(P),Q159(D)
# 4 D 4 S45(P),C47(C),S48(P),S26(P)
# 5 E 5 S18(P)
# 6 F 6 S182(P)
# 7 L 8 Z182(P)
# 8 Z 9 <NA>
# 2
# 1 T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P)
# 2 S495(P)
# 3 S162(P),Q159(D)
# 4 S45(P),C47(C),S48(P),S26(P)
# 5 S18(P)
# 6 <NA>
# 7 <NA>
# 8 <NA>
# 3
# 1 T754(P),M691(O),S694(P),S739(P),S740(P)
# 2 <NA>
# 3 S162(P),Q159(D)
# 4 S45(P),C47(C),S48(P),S26(P)
# 5 S18(P)
# 6 S182(P)
# 7 <NA>
# 8 Q100(P)
从“长”格式转换为“宽”格式{{1}}