在R中,我有两个这样的对向量:
x <- c("A=5", "B=1", "D=1", "E=1", "F=2", "G=1")
y <- c("A=2", "B=1", "C=3", "D=1", "H=4")
我想将其转换为data.frame,如下所示:
A B C D E F G H
x 5 1 0 1 1 2 1 0
y 2 1 3 1 0 0 0 4
x或y中包含的所有键都应构成列,未出现在x或y中的键应添加值为零。
答案 0 :(得分:9)
这是一种基于环境的方法。创建评估name = val对的单独环境。然后合并它们:
xe <- new.env()
ye <- new.env()
with(xe, eval(parse(text=x)))
with(ye, eval(parse(text=y)))
# > ls(env=ye)
# [1] "A" "B" "C" "D" "H"
# edit as. list makes even more compact!
df1 <- merge(as.list(xe), as.list(ye), all=TRUE, sort=FALSE)
# sort keeps row order with x on top!
A B D E F G C H
1 5 1 1 1 2 1 NA NA
2 2 1 1 NA NA NA 3 4
df1[is.na(df1)] <- 0
df1
A B D E F G C H
1 2 1 1 0 0 0 3 4
2 5 1 1 1 2 1 0 0
使用reshape :: rbind.fill方法解决了两个参数相等而导致丢失一行的问题。
df1 <- rbind.fill(as.data.frame(as.list(xe)), as.data.frame(as.list(ye)) )
答案 1 :(得分:6)
不是最漂亮的解决方案,但很容易理解:
1)将字符串解析为数据框:
df1 <- as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)
结果:
> as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)
V1 V2 V3 V4 V5 V6
1 A B D E F G
2 5 1 1 1 2 1
2)提供标题:
names(df1) <- df1[1,]
df1 <- df1[-1,]
结果:
> df1
A B D E F G
2 5 1 1 1 2 1
3)对其他字符串执行相同的操作:
df2 <- as.data.frame(sapply(strsplit(y, '='), rbind), stringsAsFactors=FALSE)
names(df2) <- df2[1,]
df2 <- df2[-1,]
4)合并:
df <- merge(df1, df2, all=TRUE, sort=TRUE)
结果:
> df
A B D E F G C H
1 2 1 1 <NA> <NA> <NA> 3 4
2 5 1 1 1 2 1 <NA> <NA>
更新:基于评论的上述多功能一体化妆品:
> df1 <- as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)
> names(df1) <- df1[1,]
> df1 <- df1[-1,]
>
> df2 <- as.data.frame(sapply(strsplit(y, '='), rbind), stringsAsFactors=FALSE)
> names(df2) <- df2[1,]
> df2 <- df2[-1,]
>
> library(reshape)
> df <- rbind.fill(df1,df2)
> df[is.na(df)] <- 0
> df <- df[, order(names(df))]
> df
A B C D E F G H
1 5 1 0 1 1 2 1 0
2 2 1 3 1 0 0 0 4
答案 2 :(得分:6)
这是另一种变体:
x <- c("A=5", "B=1","D=1", "E=1", "F=2", "G=1")
y <- c("A=2", "B=1", "C=3", "D=1","H=4")
# Extract names & values
m <- do.call('cbind', strsplit(x, '='))
xn <- m[1,]
xv <- as.numeric(m[2,])
m <- do.call('cbind', strsplit(y, '='))
yn <- m[1,]
yv <- as.numeric(m[2,])
# Merge names
an <- sort(union(xn,yn))
# Assemble result
r <- matrix(0, 2, length(an), dimnames=list(NULL, an))
r[1,xn] <- xv
r[2,yn] <- yv
# Inspect result:
r
# A B C D E F G H
#[1,] 5 1 0 1 1 2 1 0
#[2,] 2 1 3 1 0 0 0 4
# ...if you want a data.frame instead of a matrix:
as.data.frame(r)
# A B C D E F G H
#1 5 1 0 1 1 2 1 0
#2 2 1 3 1 0 0 0 4
答案 3 :(得分:2)
只需添加我的解决方案。
x <- c("A=11", "B=1", "D=1", "E=1", "F=2", "GZ=1")
y <- c("A=2", "B=1", "C=3", "D=1", "H=4")
pos.x <- as.numeric(regexpr("=", x))
pos.y <- as.numeric(regexpr("=", y))
x.1 <- data.frame(key=substring(x,1,pos.x-1),
x=as.numeric(substring(x,pos.x+1)),
stringsAsFactors=F)
y.1 <- data.frame(key=substring(y,1,pos.y-1),
y=as.numeric(substring(y,pos.y+1)),
stringsAsFactors=F)
d <- merge(x.1, y.1, all=T)
d[is.na(d)] <- 0
row.names(d) <- d$key
d$key <- NULL
d <- as.data.frame(t(d))
d
class(d)
答案 4 :(得分:1)
使用eval(parse)
vec2list <- function(x){
x_con <- paste(x, collapse = ",")
eval(parse(text = paste('list(', x_con, ')')))
}
plyr::ldply(llply(list(x, y), vec2list), data.frame)
A B D E F G C H
1 5 1 1 1 2 1 NA NA
2 2 1 1 NA NA NA 3 4