Question

我正在寻找一个解决方案来添加列＆＃34; desired_result＆＃34;优选使用dplyr和/或ave（）。请参阅此处的数据框，其中组是＆＃34;部分＆＃34;以及我想要的特殊情况＆＃34; desired_results＆＃34;要按顺序计数的列在＆＃34;展示＆＃34;：

structure(list(section = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), exhibit = structure(c(1L, 
2L, 3L, 3L, 1L, 2L, 2L, 3L), .Label = c("a", "b", "c"), class = "factor"), 
desired_result = c(1L, 2L, 3L, 3L, 1L, 2L, 2L, 3L)), .Names = c("section", 
"exhibit", "desired_result"), class = "data.frame", row.names = c(NA, 
-8L))

Answer 1

我最近将一个函数rleid()推送到data.table（目前在开发版本1.9.5上可用），这正是这样做的。如果您有兴趣，可以按照this安装。

require(data.table) # 1.9.5, for `rleid()`
require(dplyr)
DF %>% 
  group_by(section) %>% 
  mutate(desired_results=rleid(exhibit))

#   section exhibit desired_result desired_results
# 1       1       a              1               1
# 2       1       b              2               2
# 3       1       c              3               3
# 4       1       c              3               3
# 5       2       a              1               1
# 6       2       b              2               2
# 7       2       b              2               2
# 8       2       c              3               3

Answer 2

dense_rank它是

library(dplyr)
df %>% 
  group_by(section) %>% 
  mutate(desire=dense_rank(exhibit))
#  section exhibit desired_result desire
#1       1       a              1      1
#2       1       b              2      2
#3       1       c              3      3
#4       1       c              3      3
#5       2       a              1      1
#6       2       b              2      2
#7       2       b              2      2
#8       2       c              3      3

Answer 3

如果确切的枚举是必要的，并且您需要所需的结果保持一致（以便不同部分中的相同展品始终具有相同的数字），您可以尝试：

library(dplyr)
df <- data.frame(section = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
                 exhibit = c('a', 'b', 'c', 'c', 'a', 'b', 'b', 'c'))
if (is.null(saveLevels <- levels(df$exhibit)))
    saveLevels <- sort(unique(df$exhibit)) ## or levels(factor(df$exhibit))
df %>%
    group_by(section) %>%
    mutate(answer = as.integer(factor(exhibit, levels = saveLevels)))
## Source: local data frame [8 x 3]
## Groups: section
##   section exhibit answer
## 1       1       a      1
## 2       1       b      2
## 3       1       c      3
## 4       1       c      3
## 5       2       a      1
## 6       2       b      2
## 7       2       b      2
## 8       2       c      3

如果/后来的exhibit中出现新的section，他们应该获得新的枚举结果。（注意最后exhibit是不同的。）

df2 <- data.frame(section = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
                  exhibit = c('a', 'b', 'c', 'c', 'a', 'b', 'b', 'd'))
if (is.null(saveLevels2 <- levels(df2$exhibit)))
    saveLevels2 <- sort(unique(df2$exhibit))
df2 %>%
    group_by(section) %>%
    mutate(answer = as.integer(factor(exhibit, levels = saveLevels2)))
## Source: local data frame [8 x 3]
## Groups: section
##   section exhibit answer
## 1       1       a      1
## 2       1       b      2
## 3       1       c      3
## 4       1       c      3
## 5       2       a      1
## 6       2       b      2
## 7       2       b      2
## 8       2       d      4

R - 添加在组内按顺序计数但在重复时重复的列

3 个答案: