我有一个类似以下的数据集:
Age Monday Tuesday Wednesday
6-9 a b
6-9 b a c
6-9 c a
9-10 c c b
9-10 c a b
使用R,我想获取以下数据集/结果(其中1表示元素的存在,0表示元素的不存在):
Age a b c
6-9 1 1 0
6-9 1 1 1
6-9 1 0 1
9-10 0 1 1
9-10 1 1 1
答案 0 :(得分:3)
这可以通过melt
完成,然后通过table
来创建频率计数
library(reshape2)
df['New']=row.names(df)
s=melt(df,c('Age','New'))
s=as.data.frame.matrix(table(s$New,s$value))
s$Age=df$Age
s
a b c Age
1 1 1 0 6-9
2 1 1 1 6-9
3 1 0 1 6-9
4 0 1 2 9-10
5 1 1 1 9-10
答案 1 :(得分:1)
带有pivot_longer
和pivot_wider
的一个选项
library(dplyr)
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(Age, rn)) %>%
filter(value != '') %>%
select(-name) %>%
distinct %>%
mutate(val = 1) %>%
pivot_wider(names_from = value, values_from = val,
values_fill = list(val = 0)) %>%
select(-rn)
# A tibble: 5 x 4
# Age a b c
# <chr> <dbl> <dbl> <dbl>
#1 6-9 1 1 0
#2 6-9 1 1 1
#3 6-9 1 0 1
#4 9-10 0 1 1
#5 9-10 1 1 1
df1 <- structure(list(Age = c("6-9", "6-9", "6-9", "9-10", "9-10"),
Monday = c("a", "b", "", "c", "c"), Tuesday = c("b", "a",
"c", "c", "a"), Wednesday = c("", "c", "a", "b", "b")),
class = "data.frame", row.names = c(NA,
-5L))
答案 2 :(得分:1)
使用ID变量的data.table
解决方案;
library(data.table)
library(magrittr)
df <- setDT(df)
ag = function(x){if(length(x>1)){1}else{length(x)}}
df[,idx:=.I][]%>%
melt(id.vars = c("Age","idx")) %>%
.[,.(Age,value,idx)]%>%
dcast(Age+idx~value,fun.aggregate = ag)%>%
.[,-c("idx","NA")]
Age a b c
1: 6-9 1 1 0
2: 6-9 1 1 1
3: 6-9 1 0 1
4: 9-10 0 1 1
5: 9-10 1 1 1
数据:
df <- read.table(text = "Age Monday Tuesday Wednesday
6-9 a b NA
6-9 b a c
6-9 NA c a
9-10 c c b
9-10 c a b",header = T)