格式化R分类中的数据

时间:2015-03-09 03:24:02

标签: r dataframe categorical-data

reason <- c("v","v","v","v","v","s","s","s","v","v","v","s","s")
location <- c("c","c","c","c","c","c","c","c","h","h","h","h","h")
zero_one <- c(1,1,0,1,1,1,1,0,1,0,0,1,0)  
df <- data.frame(reason, location, zero_one)

是否有一种简单的方法可以转换&#34; df&#34;到&#34; DF&#34;,其中&#34; DF&#34;具有以下形状:

reason  location  #zeros  #ones  
     v         c       1      4  
     s         c       1      2  
     v         h       2      1  
     s         h       1      1

2 个答案:

答案 0 :(得分:3)

您可以使用dcast

执行此操作
library(reshape2)
dcast(transform(df, zero_one= factor(zero_one, levels=0:1,
  labels=c('zeros', 'ones'))), ...~zero_one, value.var='zero_one', length)
#   reason location zeros ones
#1      s        c     1    2
#2      s        h     1    1
#3      v        c     1    4
#4      v        h     2    1

或使用data.table(与@ jalapic&#39;类似的方法)

setDT(df)[,list(zeros=sum(!zero_one), ones=sum(!!zero_one)),
            .(reason, location)][]
#   reason location zeros ones
#1:      v        c     1    4
#2:      s        c     1    2
#3:      v        h     2    1
#4:      s        h     1    1

base R

 aggregate(cbind(zeros=!zero_one, ones=!!zero_one)~., df, FUN= sum)
 #  reason location zeros ones
 #1      s        c     1    2
 #2      v        c     1    4
 #3      s        h     1    1
 #4      v        h     2    1

答案 1 :(得分:2)

您可以非常简单地使用dplyr执行此操作:

library(dplyr)
df %>% 
 group_by(reason,location) %>% 
 summarize(zeros = sum(zero_ones==0), ones = sum(zero_ones==1))

#  reason location zeros ones
#1      s        c     1    2
#2      s        h     1    1
#3      v        c     1    4
#4      v        h     2    1