如何在r中的列中对一系列值进行分组?

时间:2016-07-05 15:45:18

标签: r dplyr

目标

我有一个名为' State'其中包含不同的字符。其中一个角色是'关注'。我想标记'以下'的第一个序列(连续值)。作为' 1st',第二' 2nd'等等。以下是示例数据:

数据

structure(list(Vehicle.ID2 = c("3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588", 
"3361-588", "3361-588", "3361-588", "3361-588"), Frame.ID = 2110:2146, 
    State = c("Following", "Following", "Following", "Following", 
    "Following", "Following", "Following", "Following", "Following", 
    "Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh", 
    "Following", "Following", "Following", "Following", "Following", 
    "Following", "Following", "Following", "Following", "Following", 
    "Following", "Following", "Following", "Following", "Following", 
    "Following", "Following", "Following", "Approaching-fastveh", 
    "Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh", 
    "Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh"
    ), grp = c("1st", "1st", "1st", "1st", "1st", "1st", "1st", 
    "1st", "1st", ".", ".", ".", "2nd", "2nd", "2nd", "2nd", 
    "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", 
    "2nd", "2nd", "2nd", "2nd", "2nd", ".", ".", ".", ".", ".", 
    ".", ".")), .Names = c("Vehicle.ID2", "Frame.ID", "State", 
"grp"), row.names = c(NA, -37L), class = c("tbl_df", "data.frame"
))

期望输出

rle

我尝试了什么

我尝试使用Vehicle.ID2,但它只能计算唯一值。请指导我如何解决这个问题。请注意,在原始数据中,我有多个dplyr,因此我更喜欢使用{ "concertTicket" : { ... some valid json object ... } }

4 个答案:

答案 0 :(得分:2)

我们可以使用rle

inverse.rle(within.list(rle(foo$State=="Following"), {
               values1 <- values
               values1[values] <- seq_along(values[values])
               values1[!values] <- '.'
               values <- values1}))

如果有不同的&#39; Vehicle.ID2&#39;,我们可以使用ave

with(foo, ave(State == "Following", Vehicle.ID2, FUN = function(x) {
           inverse.rle(within.list(rle(x), {
                     values1 <- values
                     values1[values] <- seq_along(values[values])
                     values1[!values] <- '.'
                     values <- values1
              }))
            })) 
#[1] "1" "1" "1" "1" "1" "1" "1" "1" "1" "." "." "." "2" "2" "2" "2" "2" 
#[19] "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "." "." "." "." "." "." "."

答案 1 :(得分:1)

作为@ akrun答案的替代方案(我正在学习rle,谢谢!),这是一个dplyr - esque建议:

foo %>%
  mutate(
    grp = cumsum(State == "Following" &
                 State != lag(State, default="")),
    grp = ifelse(State == "Following", grp, "-")
  )
# Source: local data frame [37 x 4]
#    Vehicle.ID2 Frame.ID               State   grp
#          <chr>    <int>               <chr> <chr>
# 1     3361-588     2110           Following     1
# 2     3361-588     2111           Following     1
# 3     3361-588     2112           Following     1
# 4     3361-588     2113           Following     1
# 5     3361-588     2114           Following     1
# ..         ...      ...                 ...   ...

答案 2 :(得分:1)

您可以使用循环执行此操作,但它不像此处发布的rledplyr解决方案那样优雅。

temp <- rep(0,37)
label <- 1
now_following <- 0
for (i in 1:length(temp)) {
  if (foo$State[i] == "Following") {
    temp[i] <- label
    now_following <- 1
  } else {
    temp[i] <- 0
    if (now_following) {
      now_following <- 0
      label <- label + 1
    }
  }
}
df <- cbind(foo,temp)

结果:

> df
   Vehicle.ID2 Frame.ID               State temp
1     3361-588     2110           Following    1
2     3361-588     2111           Following    1
3     3361-588     2112           Following    1
4     3361-588     2113           Following    1
5     3361-588     2114           Following    1
6     3361-588     2115           Following    1
7     3361-588     2116           Following    1
8     3361-588     2117           Following    1
9     3361-588     2118           Following    1
10    3361-588     2119 Approaching-fastveh    0
11    3361-588     2120 Approaching-fastveh    0
12    3361-588     2121 Approaching-fastveh    0
13    3361-588     2122           Following    2
14    3361-588     2123           Following    2
15    3361-588     2124           Following    2
16    3361-588     2125           Following    2
17    3361-588     2126           Following    2
18    3361-588     2127           Following    2
19    3361-588     2128           Following    2
20    3361-588     2129           Following    2
21    3361-588     2130           Following    2
22    3361-588     2131           Following    2
23    3361-588     2132           Following    2
24    3361-588     2133           Following    2
25    3361-588     2134           Following    2
26    3361-588     2135           Following    2
27    3361-588     2136           Following    2
28    3361-588     2137           Following    2
29    3361-588     2138           Following    2
30    3361-588     2139           Following    2
31    3361-588     2140 Approaching-fastveh    0
32    3361-588     2141 Approaching-fastveh    0
33    3361-588     2142 Approaching-fastveh    0
34    3361-588     2143 Approaching-fastveh    0
35    3361-588     2144 Approaching-fastveh    0
36    3361-588     2145 Approaching-fastveh    0
37    3361-588     2146 Approaching-fastveh    0

如果您愿意,可以将其打包成一个功能:

label_contiguous_strings <- function(string_vector, string) {
  # Function returns a vector of labels for the string vector input.
  len_vector <- length(string_vector)
  temp <- rep(0,len_vector)
  label <- 1
  now_following <- 0
  for (i in 1:len_vector) {
    if (string_vector[i] == "Following") {
      temp[i] <- label
      now_following <- 1
    } else {
      temp[i] <- 0
      if (now_following) {
        now_following <- 0
        label <- label + 1
      }
    }
  }
  return(temp)
}

结果:

> label_contiguous_strings(foo$State)
 [1] 1 1 1 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0

答案 3 :(得分:1)

以下是使用rle的第二种方法:

# get rle
myRle <- rle(df$State)
# get categories
counts <- cumsum(myRle$values == "Following")
# add in missings
is.na(counts) <- myRle$values != "Following"
# build variable
df$grp <- rep(counts, myRle$lengths)