如何为R中的每个类别创建计数列

时间:2015-11-17 23:24:09

标签: r data.table dplyr reshape2

我有一个这样的数据框:

ID <- c("A","A","B","B","A","B","B","B","A","A","A","A","B","B","A","A","A","B","B","B")
Measurement <- c("Len","Len","Len","Wid","Ht","Ht","Wid","Len","Ht","Ht"
                 ,"Wid","Ht","Len","Ht","Wid","Len","Wid","Ht","Len","Wid")
STATUS <- c("FAIL","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_HIGH","FAIL#Pts","FAIL","FAIL_AVG_LOW","FAIL"
            ,"FAIL#Pts","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_LOW","FAIL","FAIL_AVG_LOW","FAIL","FAIL#Pts")
df1 <- data.frame(ID,Measurement,STATUS)
df1 <- within(df1, MEAS_ID <- paste(Measurement, ID, sep=' '))

我正在尝试创建一个包含单个失败计数和列的列。每个类别的总失败计数。我想要的输出看起来像这样

   ID Measurement        STATUS Count Count_total MEAS_ID
1   A         Len          FAIL     2           3   Len A
2   A         Len  FAIL_AVG_LOW     1           3   Len A
3   A          Ht          FAIL     3           4    Ht A
4   A          Ht  FAIL_AVG_LOW     1           4    Ht A
5   A         Wid          FAIL     2           3   Wid A
6   A         Wid      FAIL#Pts     1           3   Wid A
7   B         Len          FAIL     2           4   Len B
8   B         Len FAIL_AVG_HIGH     2           4   Len B
9   B          Ht          FAIL     1           3    Ht B
10  B          Ht FAIL_AVG_HIGH     1           3    Ht B
11  B          Ht  FAIL_AVG_LOW     1           3    Ht B
12  B         Wid          FAIL     1           3   Wid B
13  B         Wid      FAIL#Pts     2           3   Wid B

我尝试以这种方式计算计数,但似乎无法按预期工作。

df1 <- within(df1, { count <- ave(STATUS, MEAS_ID, FUN=function(x) length(unique(x)))})

1 个答案:

答案 0 :(得分:2)

使用dplyr包:

ID <- c("A","A","B","B","A","B","B","B","A","A","A","A","B","B","A","A","A","B","B","B")
Measurement <- c("Len","Len","Len","Wid","Ht","Ht","Wid","Len","Ht","Ht"
                 ,"Wid","Ht","Len","Ht","Wid","Len","Wid","Ht","Len","Wid")
STATUS <- c("FAIL","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_HIGH","FAIL#Pts","FAIL","FAIL_AVG_LOW","FAIL"
            ,"FAIL#Pts","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_LOW","FAIL","FAIL_AVG_LOW","FAIL","FAIL#Pts")
df1 <- data.frame(ID,Measurement,STATUS)
df1 <- within(df1, MEAS_ID <- paste(Measurement, ID, sep=' '))


library(dplyr)

df1 %>% 
  group_by(MEAS_ID) %>%
  mutate(N_category = n()) %>%
  count(ID,Measurement,STATUS,MEAS_ID,N_category) %>%
  ungroup()


#        ID Measurement        STATUS MEAS_ID N_category     n
#     (fctr)      (fctr)        (fctr)   (chr)      (int) (int)
# 1       A          Ht          FAIL    Ht A          4     3
# 2       A          Ht  FAIL_AVG_LOW    Ht A          4     1
# 3       A         Len          FAIL   Len A          3     2
# 4       A         Len  FAIL_AVG_LOW   Len A          3     1
# 5       A         Wid          FAIL   Wid A          3     2
# 6       A         Wid      FAIL#Pts   Wid A          3     1
# 7       B          Ht          FAIL    Ht B          3     1
# 8       B          Ht FAIL_AVG_HIGH    Ht B          3     1
# 9       B          Ht  FAIL_AVG_LOW    Ht B          3     1
# 10      B         Len          FAIL   Len B          4     2
# 11      B         Len FAIL_AVG_HIGH   Len B          4     2
# 12      B         Wid          FAIL   Wid B          3     1
# 13      B         Wid      FAIL#Pts   Wid B          3     2

另一种方法是使用data.table包:

library(data.table)

setDT(df1)[, N_category := .N, by=.(MEAS_ID)][, .N, keyby=.(ID,Measurement,STATUS,MEAS_ID,N_category)]

#    ID Measurement        STATUS MEAS_ID N_category N
# 1:  A          Ht          FAIL    Ht A          4 3
# 2:  A          Ht  FAIL_AVG_LOW    Ht A          4 1
# 3:  A         Len          FAIL   Len A          3 2
# 4:  A         Len  FAIL_AVG_LOW   Len A          3 1
# 5:  A         Wid          FAIL   Wid A          3 2
# 6:  A         Wid      FAIL#Pts   Wid A          3 1
# 7:  B          Ht          FAIL    Ht B          3 1
# 8:  B          Ht FAIL_AVG_HIGH    Ht B          3 1
# 9:  B          Ht  FAIL_AVG_LOW    Ht B          3 1
# 10: B         Len          FAIL   Len B          4 2
# 11: B         Len FAIL_AVG_HIGH   Len B          4 2
# 12: B         Wid          FAIL   Wid B          3 1
# 13: B         Wid      FAIL#Pts   Wid B          3 2