我有一个这样的数据框:
ID <- c("A","A","B","B","A","B","B","B","A","A","A","A","B","B","A","A","A","B","B","B")
Measurement <- c("Len","Len","Len","Wid","Ht","Ht","Wid","Len","Ht","Ht"
,"Wid","Ht","Len","Ht","Wid","Len","Wid","Ht","Len","Wid")
STATUS <- c("FAIL","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_HIGH","FAIL#Pts","FAIL","FAIL_AVG_LOW","FAIL"
,"FAIL#Pts","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_LOW","FAIL","FAIL_AVG_LOW","FAIL","FAIL#Pts")
df1 <- data.frame(ID,Measurement,STATUS)
df1 <- within(df1, MEAS_ID <- paste(Measurement, ID, sep=' '))
我正在尝试创建一个包含单个失败计数和列的列。每个类别的总失败计数。我想要的输出看起来像这样
ID Measurement STATUS Count Count_total MEAS_ID
1 A Len FAIL 2 3 Len A
2 A Len FAIL_AVG_LOW 1 3 Len A
3 A Ht FAIL 3 4 Ht A
4 A Ht FAIL_AVG_LOW 1 4 Ht A
5 A Wid FAIL 2 3 Wid A
6 A Wid FAIL#Pts 1 3 Wid A
7 B Len FAIL 2 4 Len B
8 B Len FAIL_AVG_HIGH 2 4 Len B
9 B Ht FAIL 1 3 Ht B
10 B Ht FAIL_AVG_HIGH 1 3 Ht B
11 B Ht FAIL_AVG_LOW 1 3 Ht B
12 B Wid FAIL 1 3 Wid B
13 B Wid FAIL#Pts 2 3 Wid B
我尝试以这种方式计算计数,但似乎无法按预期工作。
df1 <- within(df1, { count <- ave(STATUS, MEAS_ID, FUN=function(x) length(unique(x)))})
答案 0 :(得分:2)
使用dplyr
包:
ID <- c("A","A","B","B","A","B","B","B","A","A","A","A","B","B","A","A","A","B","B","B")
Measurement <- c("Len","Len","Len","Wid","Ht","Ht","Wid","Len","Ht","Ht"
,"Wid","Ht","Len","Ht","Wid","Len","Wid","Ht","Len","Wid")
STATUS <- c("FAIL","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_HIGH","FAIL#Pts","FAIL","FAIL_AVG_LOW","FAIL"
,"FAIL#Pts","FAIL","FAIL_AVG_HIGH","FAIL","FAIL","FAIL_AVG_LOW","FAIL","FAIL_AVG_LOW","FAIL","FAIL#Pts")
df1 <- data.frame(ID,Measurement,STATUS)
df1 <- within(df1, MEAS_ID <- paste(Measurement, ID, sep=' '))
library(dplyr)
df1 %>%
group_by(MEAS_ID) %>%
mutate(N_category = n()) %>%
count(ID,Measurement,STATUS,MEAS_ID,N_category) %>%
ungroup()
# ID Measurement STATUS MEAS_ID N_category n
# (fctr) (fctr) (fctr) (chr) (int) (int)
# 1 A Ht FAIL Ht A 4 3
# 2 A Ht FAIL_AVG_LOW Ht A 4 1
# 3 A Len FAIL Len A 3 2
# 4 A Len FAIL_AVG_LOW Len A 3 1
# 5 A Wid FAIL Wid A 3 2
# 6 A Wid FAIL#Pts Wid A 3 1
# 7 B Ht FAIL Ht B 3 1
# 8 B Ht FAIL_AVG_HIGH Ht B 3 1
# 9 B Ht FAIL_AVG_LOW Ht B 3 1
# 10 B Len FAIL Len B 4 2
# 11 B Len FAIL_AVG_HIGH Len B 4 2
# 12 B Wid FAIL Wid B 3 1
# 13 B Wid FAIL#Pts Wid B 3 2
另一种方法是使用data.table
包:
library(data.table)
setDT(df1)[, N_category := .N, by=.(MEAS_ID)][, .N, keyby=.(ID,Measurement,STATUS,MEAS_ID,N_category)]
# ID Measurement STATUS MEAS_ID N_category N
# 1: A Ht FAIL Ht A 4 3
# 2: A Ht FAIL_AVG_LOW Ht A 4 1
# 3: A Len FAIL Len A 3 2
# 4: A Len FAIL_AVG_LOW Len A 3 1
# 5: A Wid FAIL Wid A 3 2
# 6: A Wid FAIL#Pts Wid A 3 1
# 7: B Ht FAIL Ht B 3 1
# 8: B Ht FAIL_AVG_HIGH Ht B 3 1
# 9: B Ht FAIL_AVG_LOW Ht B 3 1
# 10: B Len FAIL Len B 4 2
# 11: B Len FAIL_AVG_HIGH Len B 4 2
# 12: B Wid FAIL Wid B 3 1
# 13: B Wid FAIL#Pts Wid B 3 2