R将<key,value =“”>对转换为dataframe </key,>

时间:2011-11-14 20:45:23

标签: r vector dataframe

在R中,我有两个这样的对向量:

x <- c("A=5", "B=1",        "D=1", "E=1", "F=2", "G=1")
y <- c("A=2", "B=1", "C=3", "D=1",                     "H=4")

我想将其转换为data.frame,如下所示:

  A B C D E F G H
x 5 1 0 1 1 2 1 0
y 2 1 3 1 0 0 0 4

x或y中包含的所有键都应构成列,未出现在x或y中的键应添加值为零。

5 个答案:

答案 0 :(得分:9)

这是一种基于环境的方法。创建评估name = val对的单独环境。然后合并它们:

xe <- new.env()
ye <- new.env()
with(xe, eval(parse(text=x)))
with(ye, eval(parse(text=y)))
# > ls(env=ye)
# [1] "A" "B" "C" "D" "H"
# edit as. list makes even more compact!
 df1 <- merge(as.list(xe), as.list(ye), all=TRUE, sort=FALSE)  
 # sort keeps row order with x on top!
  A B D  E  F  G  C  H
1 5 1 1  1  2  1 NA NA
2 2 1 1 NA NA NA  3  4

 df1[is.na(df1)] <- 0
 df1
  A B D E F G C H
1 2 1 1 0 0 0 3 4
2 5 1 1 1 2 1 0 0

使用reshape :: rbind.fill方法解决了两个参数相等而导致丢失一行的问题。

df1 <- rbind.fill(as.data.frame(as.list(xe)), as.data.frame(as.list(ye)) )

答案 1 :(得分:6)

不是最漂亮的解决方案,但很容易理解:

1)将字符串解析为数据框:

df1 <- as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)

结果:

> as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)
  V1 V2 V3 V4 V5 V6
1  A  B  D  E  F  G
2  5  1  1  1  2  1

2)提供标题:

names(df1) <- df1[1,]
df1 <- df1[-1,]

结果:

> df1
  A B D E F G
2 5 1 1 1 2 1

3)对其他字符串执行相同的操作:

df2 <- as.data.frame(sapply(strsplit(y, '='), rbind), stringsAsFactors=FALSE)
names(df2) <- df2[1,]
df2 <- df2[-1,]

4)合并:

df <- merge(df1, df2, all=TRUE, sort=TRUE)

结果:

> df
  A B D    E    F    G    C    H
1 2 1 1 <NA> <NA> <NA>    3    4
2 5 1 1    1    2    1 <NA> <NA>

更新:基于评论的上述多功能一体化妆品:

> df1 <- as.data.frame(sapply(strsplit(x, '='), rbind), stringsAsFactors=FALSE)
> names(df1) <- df1[1,]
> df1 <- df1[-1,]
> 
> df2 <- as.data.frame(sapply(strsplit(y, '='), rbind), stringsAsFactors=FALSE)
> names(df2) <- df2[1,]
> df2 <- df2[-1,]
> 
> library(reshape)
> df <- rbind.fill(df1,df2)
> df[is.na(df)] <- 0
> df <- df[, order(names(df))]
> df
  A B C D E F G H
1 5 1 0 1 1 2 1 0
2 2 1 3 1 0 0 0 4

答案 2 :(得分:6)

这是另一种变体:

x <- c("A=5", "B=1","D=1", "E=1", "F=2", "G=1")
y <- c("A=2", "B=1", "C=3", "D=1","H=4")

# Extract names & values
m <- do.call('cbind', strsplit(x, '='))
xn <- m[1,]
xv <- as.numeric(m[2,])
m <- do.call('cbind', strsplit(y, '='))
yn <- m[1,]
yv <- as.numeric(m[2,])

# Merge names    
an <- sort(union(xn,yn))

# Assemble result
r <- matrix(0, 2, length(an), dimnames=list(NULL, an))
r[1,xn] <- xv
r[2,yn] <- yv

# Inspect result:
r
#     A B C D E F G H
#[1,] 5 1 0 1 1 2 1 0
#[2,] 2 1 3 1 0 0 0 4

# ...if you want a data.frame instead of a matrix:
as.data.frame(r)
#  A B C D E F G H
#1 5 1 0 1 1 2 1 0
#2 2 1 3 1 0 0 0 4

答案 3 :(得分:2)

只需添加我的解决方案。

x <- c("A=11", "B=1", "D=1", "E=1", "F=2", "GZ=1")
y <- c("A=2", "B=1", "C=3", "D=1", "H=4")

pos.x <- as.numeric(regexpr("=", x))
pos.y <- as.numeric(regexpr("=", y))

x.1 <- data.frame(key=substring(x,1,pos.x-1),
                  x=as.numeric(substring(x,pos.x+1)),
                  stringsAsFactors=F)

y.1 <- data.frame(key=substring(y,1,pos.y-1),
                  y=as.numeric(substring(y,pos.y+1)),
                  stringsAsFactors=F)

d <- merge(x.1, y.1, all=T)

d[is.na(d)] <- 0

row.names(d) <- d$key
d$key <- NULL

d <- as.data.frame(t(d))

d
class(d)

答案 4 :(得分:1)

使用eval(parse)

的另一种方法
vec2list <- function(x){
  x_con <- paste(x, collapse = ",")
  eval(parse(text = paste('list(', x_con, ')')))
}

plyr::ldply(llply(list(x, y), vec2list), data.frame)

  A B D  E  F  G  C  H
1 5 1 1  1  2  1 NA NA
2 2 1 1 NA NA NA  3  4