我想在每个Id级别首先出现“C”之前计算“I”的出现次数。我尝试过这段代码,但能够计算列中的所有“我”。 代码我试过
library(plyr)
Impres = ddply(df, .(Id), summarize, No_of_I_before_First_C = length(which(Character == "I")))
示例数据
Id Character
1 I
1 I
1 C
1 I
2 I
2 C
输出应该是这样的
Id Count_Of_I_before_First_C
1 2
2 1
答案 0 :(得分:0)
这是一个想法,
first1 <- function(x, letter){
which(x == letter)[1]-1
}
aggregate(Character ~ Id, df, first1, 'C')
# Id Character
#1 1 2
#2 2 1
稍微概括一下,
first1 <- function(x, letter, letter_count){
ind <- which(x == letter)[1]
sum(grepl(letter_count, x[1:ind]))
}
aggregate(Character ~ Id, df, first1, 'C', 'I')
# Id Character
#1 1 2
#2 2 1
答案 1 :(得分:0)
require(dplyr)
require(magrittr)
df <- data.frame(Id = c(1,1,1,1,2,2), Character = c('I', 'I', 'C', 'I', 'I', 'C'))
这个函数会在第一个C
之前给出你的数量foo <- function (character) {
is_before_C <- (character == 'C') %>% cummax() %>% not()
# is_before_C <- !cummax(character == 'C') # the same
is_I <- character == 'I'
is_I_before_C <- is_I & is_before_C
return(sum(is_I_before_C))
}
然后您可以使用此功能汇总数据
df %>%
group_by(Id) %>%
summarise(Count_Of_I_before_First_C = foo(Character))
结果:
# A tibble: 2 × 2
Id Count_Of_I_before_First_C
<dbl> <int>
1 1 2
2 2 1
答案 2 :(得分:0)
以下是data.table
解决方案:
library(data.table)
dt <- data.table(Id = c(1,1,1,1,2,2), Character = c('I', 'I', 'C', 'I', 'I', 'C'))
dt[, cnt.c := cumsum(Character == "C"), by = Id]
res <- dt[cnt.c == 0, .(Count_Of_I_before_First_C = length(Character)), by = Id]
答案 3 :(得分:0)
也许:
library(dplyr)
rlei <- function(x) {
r <- rle(x)
I <- which(r$values=="I")
C <- which(r$values=="C")
r$lengths[which(I<C)][1]
}
group_by(df, Id) %>%
summarise(Count_Of_I_before_First_C=rlei(.$Character))