具有这样的分类数据:
data.frame(id = c(1,2,3,4,5), stock1 = c(1,2,0,1,2), stock2 = c(0,1,0,1,1), end = c(0,1,3,0,3), start = c(2,3,0,1,0))
id stock1 stock2 end start 1 1 1 0 0 2 2 2 2 1 1 3 3 3 0 0 3 0 4 4 1 1 0 1 5 5 2 1 3 0
如何将它们从长格式转换成宽格式,从而以特定名称显示每列是否存在?
预期输出示例:
data.frame(id = c(1,2,3,4,5), stock1_0 = c(0,0,1,0,0), stock1_1 = c(1,0,0,1,0), stock1_2 = c(0,1,0,0,1), stock2_0 = c(1,0,1,0,0), stock2_1 = c(0,1,0,0,0), end_0 = c(1,0,0,1,0), end_1 = c(0,1,0,0,0), end_3 = c(0,0,1,0,1), start_0 = c(0,0,1,0,1), start_1 = c(0,0,0,1,0), start_2 = c(1,0,0,0,0), start_3 = c(0,1,0,0,0))
id stock1_0 stock1_1 stock1_2 stock2_0 stock2_1 end_0 end_1 end_3 start_0 start_1 start_2 start_3 1 1 0 1 0 1 0 1 0 0 0 0 1 0 2 2 0 0 1 0 1 0 1 0 0 0 0 1 3 3 1 0 0 1 0 0 0 1 1 0 0 0 4 4 0 1 0 0 0 1 0 0 0 1 0 0 5 5 0 0 1 0 0 0 0 1 1 0 0 0
答案 0 :(得分:3)
您可以使用model.matrix
。
data.frame(dat[1],
do.call(cbind, lapply(seq(dat)[-1], function(x)
`colnames<-`(m <- model.matrix( ~ as.factor(dat[[x]]) - 1),
paste(names(dat[x]), seq_len(ncol(m)), sep="_")))))
# id stock1_1 stock1_2 stock1_3 stock2_1 stock2_2 end_1 end_2 end_3 start_1
# 1 1 0 1 0 1 0 1 0 0 0
# 2 2 0 0 1 0 1 0 1 0 0
# 3 3 1 0 0 1 0 0 0 1 1
# 4 4 0 1 0 0 1 1 0 0 0
# 5 5 0 0 1 0 1 0 0 1 1
# start_2 start_3 start_4
# 1 0 1 0
# 2 0 0 1
# 3 0 0 0
# 4 1 0 0
# 5 0 0 0
数据:
dat <- structure(list(id = c(1, 2, 3, 4, 5), stock1 = c(1, 2, 0, 1,
2), stock2 = c(0, 1, 0, 1, 1), end = c(0, 1, 3, 0, 3), start = c(2,
3, 0, 1, 0)), class = "data.frame", row.names = c(NA, -5L))
答案 1 :(得分:2)
一种方法是获取长格式的数据,将列名与值组合,然后以宽格式获取数据。
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -id) %>%
unite(name, name, value) %>%
mutate(value = 1) %>%
pivot_wider(values_fill = list(value = 0))
# A tibble: 5 x 13
# id stock1_1 stock2_0 end_0 start_2 stock1_2 stock2_1 end_1 start_3 stock1_0 end_3 start_0 start_1
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 1 1 1 1 0 0 0 0 0 0 0 0
#2 2 0 0 0 0 1 1 1 1 0 0 0 0
#3 3 0 1 0 0 0 0 0 0 1 1 1 0
#4 4 1 0 1 0 0 1 0 0 0 0 0 1
#5 5 0 0 0 0 1 1 0 0 0 1 1 0
答案 2 :(得分:2)
library(data.table)
setDT(df)
dcast(melt(df, 'id'),
id ~ paste0(variable, '_', value),
fun.aggregate = length)
# id end_0 end_1 end_3 start_0 start_1 start_2 start_3 stock1_0
# 1: 1 1 0 0 0 0 1 0 0
# 2: 2 0 1 0 0 0 0 1 0
# 3: 3 0 0 1 1 0 0 0 1
# 4: 4 1 0 0 0 1 0 0 0
# 5: 5 0 0 1 1 0 0 0 0
# stock1_1 stock1_2 stock2_0 stock2_1
# 1: 1 0 1 0
# 2: 0 1 0 1
# 3: 0 0 1 0
# 4: 1 0 0 1
# 5: 0 1 0 1