在特定字母之前计算字母表的出现次数

时间:2016-11-16 07:20:31

标签: r aggregate

我想在每个Id级别首先出现“C”之前计算“I”的出现次数。我尝试过这段代码,但能够计算列中的所有“我”。 代码我试过

library(plyr)
Impres = ddply(df, .(Id), summarize, No_of_I_before_First_C = length(which(Character == "I")))

示例数据

Id  Character
1     I
1     I
1     C
1     I
2     I
2     C

输出应该是这样的

Id  Count_Of_I_before_First_C
1     2
2     1

4 个答案:

答案 0 :(得分:0)

这是一个想法,

first1 <- function(x, letter){
           which(x == letter)[1]-1
           }

aggregate(Character ~ Id, df, first1, 'C')
#  Id Character
#1  1         2
#2  2         1

稍微概括一下,

first1 <- function(x, letter, letter_count){
    ind <- which(x == letter)[1]
    sum(grepl(letter_count, x[1:ind]))
    }

aggregate(Character ~ Id, df, first1, 'C', 'I')
#  Id Character
#1  1         2
#2  2         1

答案 1 :(得分:0)

require(dplyr)
require(magrittr)
df <- data.frame(Id = c(1,1,1,1,2,2), Character = c('I', 'I', 'C', 'I', 'I', 'C'))

这个函数会在第一个C

之前给出你的数量
foo <- function (character) {

  is_before_C <- (character == 'C') %>% cummax() %>% not()
  # is_before_C <- !cummax(character == 'C') # the same
  is_I <- character == 'I'
  is_I_before_C <- is_I & is_before_C

  return(sum(is_I_before_C))
}

然后您可以使用此功能汇总数据

df %>% 
  group_by(Id) %>% 
  summarise(Count_Of_I_before_First_C = foo(Character))

结果:

# A tibble: 2 × 2
     Id Count_Of_I_before_First_C
  <dbl>                     <int>
1     1                         2
2     2                         1

答案 2 :(得分:0)

以下是data.table解决方案:

library(data.table)
dt <- data.table(Id = c(1,1,1,1,2,2), Character = c('I', 'I', 'C', 'I', 'I', 'C'))
dt[, cnt.c := cumsum(Character == "C"), by = Id]
res <- dt[cnt.c == 0, .(Count_Of_I_before_First_C = length(Character)), by = Id]

答案 3 :(得分:0)

也许:

library(dplyr)

rlei <- function(x) {
  r <- rle(x)
  I <- which(r$values=="I")
  C <- which(r$values=="C")
  r$lengths[which(I<C)][1]
}

group_by(df, Id) %>% 
  summarise(Count_Of_I_before_First_C=rlei(.$Character))