我有这样的数据帧(100):
df1
v1 v2 v3
1 1 2 a
2 2 3 b
3 3 4 c
4 4 5 d
5 3 5 e
df2
v1 v2 v3
1 1 2 j
2 2 3 i
3 3 4 t
4 3 5 r
df3
v1 v2 v3
1 2 3 t
2 2 4 g
3 6 7 i
4 8 9 t
现在,我想根据第三列中的值合并它们并形成一个文件,这样,如果第三列中有相应的值,那么序列中将有一个值0。 因此,生成的文件将是
v1 v2 v3
1 1 2 aj0
2 2 3 bit
3 2 4 00g
4 3 4 ct0
5 3 5 er0
6 4 5 d00
7 6 7 00i
8 8 9 00t
我能够为此提出一个代码,如下所示:
> df12 <- merge(df1, df2, by=c("v1", "v2"), all=TRUE)
> df123 <- merge(df12, df3, by=c("v1", "v2"), all=TRUE)
> df123$v3 <- paste0(df123[,3], df123[,4], df123[,5])
> df123$v3.x <- df123$v3.y <- NULL
> df123$v3 <- gsub("NA", "0", df123$v3)
但是,我有100个数据帧,我怎么能用它们呢?谢谢!
答案 0 :(得分:4)
使用dplyr
library(dplyr)
library(tidyr)
unnest(dfs,group) %>%
spread(group, v3, fill=0) %>%
unite(v3, df1:df3, sep="")
# v1 v2 v3
#1 1 2 aj0
#2 2 3 bit
#3 2 4 00g
#4 3 4 ct0
#5 3 5 er0
#6 4 5 d00
#7 6 7 00i
#8 8 9 00t
或使用data.table
library(data.table)
dt <- rbindlist(Map(`cbind`,group=names(dfs),dfs))
dcast.data.table(dt, v1+v2~group, value.var="v3", fill="0")[,
list(v3=do.call(`paste0`, .SD)), by=list(v1,v2), .SDcols=names(dfs)]
# v1 v2 v3
#1: 1 2 aj0
#2: 2 3 bit
#3: 2 4 00g
#4: 3 4 ct0
#5: 3 5 er0
#6: 4 5 d00
#7: 6 7 00i
#8: 8 9 00t
如果您有文件P1.txt
,P2.txt
等,则可以将文件读入列表,然后应用上述代码。假设文件在工作目录中
files <- list.files(pattern="^P\\d+")
dfs <-lapply(files, function(x) read.table(x, header=TRUE, sep="") #or use `fread` from `data.table`
nm1 <- gsub("\\..*", "", files)
dfs <- setNames(dfs, nm1)
您需要将df1:df3
代码中的dplyr
替换为P1:P100
unnest(dfs,group) %>%
spread(group, v3, fill=0) %>%
unite(v3, P1:P100, sep="")
###data
df1 <- structure(list(v1 = c(1L, 2L, 3L, 4L, 3L), v2 = c(2L, 3L, 4L,
5L, 5L), v3 = c("a", "b", "c", "d", "e")), .Names = c("v1", "v2",
"v3"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5"))
df2 <- structure(list(v1 = c(1L, 2L, 3L, 3L), v2 = 2:5, v3 = c("j",
"i", "t", "r")), .Names = c("v1", "v2", "v3"), class = "data.frame",
row.names = c("1", "2", "3", "4"))
df3 <- structure(list(v1 = c(2L, 2L, 6L, 8L), v2 = c(3L, 4L, 7L, 9L),
v3 = c("t", "g", "i", "t")), .Names = c("v1", "v2", "v3"), class = "data.frame",
row.names = c("1", "2", "3", "4"))
dfs <- mget(ls(pattern="^df\\d+"))
答案 1 :(得分:3)
您可以使用Reduce
操作数据框列表来执行此操作:
dfs <- list(df1, df2, df3)
merged <- Reduce(function(x, y) merge(x, y, by=c("v1", "v2"), all=T), dfs)
v3 <- gsub("NA", "0", apply(merged[,c(-1, -2)], 1, paste, collapse=""))
data.frame(v1=merged$v1, v2=merged$v2, v3)
# v1 v2 v3
# 1 1 2 aj0
# 2 2 3 bit
# 3 2 4 00g
# 4 3 4 ct0
# 5 3 5 er0
# 6 4 5 d00
# 7 6 7 00i
# 8 8 9 00t