我有一个名为' State'其中包含不同的字符。其中一个角色是'关注'。我想标记'以下'的第一个序列(连续值)。作为' 1st',第二' 2nd'等等。以下是示例数据:
structure(list(Vehicle.ID2 = c("3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588", "3361-588", "3361-588",
"3361-588", "3361-588", "3361-588", "3361-588"), Frame.ID = 2110:2146,
State = c("Following", "Following", "Following", "Following",
"Following", "Following", "Following", "Following", "Following",
"Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh",
"Following", "Following", "Following", "Following", "Following",
"Following", "Following", "Following", "Following", "Following",
"Following", "Following", "Following", "Following", "Following",
"Following", "Following", "Following", "Approaching-fastveh",
"Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh",
"Approaching-fastveh", "Approaching-fastveh", "Approaching-fastveh"
), grp = c("1st", "1st", "1st", "1st", "1st", "1st", "1st",
"1st", "1st", ".", ".", ".", "2nd", "2nd", "2nd", "2nd",
"2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd", "2nd",
"2nd", "2nd", "2nd", "2nd", "2nd", ".", ".", ".", ".", ".",
".", ".")), .Names = c("Vehicle.ID2", "Frame.ID", "State",
"grp"), row.names = c(NA, -37L), class = c("tbl_df", "data.frame"
))
rle
我尝试使用Vehicle.ID2
,但它只能计算唯一值。请指导我如何解决这个问题。请注意,在原始数据中,我有多个dplyr
,因此我更喜欢使用{
"concertTicket" : { ... some valid json object ... }
}
。
答案 0 :(得分:2)
我们可以使用rle
inverse.rle(within.list(rle(foo$State=="Following"), {
values1 <- values
values1[values] <- seq_along(values[values])
values1[!values] <- '.'
values <- values1}))
如果有不同的&#39; Vehicle.ID2&#39;,我们可以使用ave
with(foo, ave(State == "Following", Vehicle.ID2, FUN = function(x) {
inverse.rle(within.list(rle(x), {
values1 <- values
values1[values] <- seq_along(values[values])
values1[!values] <- '.'
values <- values1
}))
}))
#[1] "1" "1" "1" "1" "1" "1" "1" "1" "1" "." "." "." "2" "2" "2" "2" "2"
#[19] "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "2" "." "." "." "." "." "." "."
答案 1 :(得分:1)
作为@ akrun答案的替代方案(我正在学习rle
,谢谢!),这是一个dplyr
- esque建议:
foo %>%
mutate(
grp = cumsum(State == "Following" &
State != lag(State, default="")),
grp = ifelse(State == "Following", grp, "-")
)
# Source: local data frame [37 x 4]
# Vehicle.ID2 Frame.ID State grp
# <chr> <int> <chr> <chr>
# 1 3361-588 2110 Following 1
# 2 3361-588 2111 Following 1
# 3 3361-588 2112 Following 1
# 4 3361-588 2113 Following 1
# 5 3361-588 2114 Following 1
# .. ... ... ... ...
答案 2 :(得分:1)
您可以使用循环执行此操作,但它不像此处发布的rle
和dplyr
解决方案那样优雅。
temp <- rep(0,37)
label <- 1
now_following <- 0
for (i in 1:length(temp)) {
if (foo$State[i] == "Following") {
temp[i] <- label
now_following <- 1
} else {
temp[i] <- 0
if (now_following) {
now_following <- 0
label <- label + 1
}
}
}
df <- cbind(foo,temp)
结果:
> df
Vehicle.ID2 Frame.ID State temp
1 3361-588 2110 Following 1
2 3361-588 2111 Following 1
3 3361-588 2112 Following 1
4 3361-588 2113 Following 1
5 3361-588 2114 Following 1
6 3361-588 2115 Following 1
7 3361-588 2116 Following 1
8 3361-588 2117 Following 1
9 3361-588 2118 Following 1
10 3361-588 2119 Approaching-fastveh 0
11 3361-588 2120 Approaching-fastveh 0
12 3361-588 2121 Approaching-fastveh 0
13 3361-588 2122 Following 2
14 3361-588 2123 Following 2
15 3361-588 2124 Following 2
16 3361-588 2125 Following 2
17 3361-588 2126 Following 2
18 3361-588 2127 Following 2
19 3361-588 2128 Following 2
20 3361-588 2129 Following 2
21 3361-588 2130 Following 2
22 3361-588 2131 Following 2
23 3361-588 2132 Following 2
24 3361-588 2133 Following 2
25 3361-588 2134 Following 2
26 3361-588 2135 Following 2
27 3361-588 2136 Following 2
28 3361-588 2137 Following 2
29 3361-588 2138 Following 2
30 3361-588 2139 Following 2
31 3361-588 2140 Approaching-fastveh 0
32 3361-588 2141 Approaching-fastveh 0
33 3361-588 2142 Approaching-fastveh 0
34 3361-588 2143 Approaching-fastveh 0
35 3361-588 2144 Approaching-fastveh 0
36 3361-588 2145 Approaching-fastveh 0
37 3361-588 2146 Approaching-fastveh 0
如果您愿意,可以将其打包成一个功能:
label_contiguous_strings <- function(string_vector, string) {
# Function returns a vector of labels for the string vector input.
len_vector <- length(string_vector)
temp <- rep(0,len_vector)
label <- 1
now_following <- 0
for (i in 1:len_vector) {
if (string_vector[i] == "Following") {
temp[i] <- label
now_following <- 1
} else {
temp[i] <- 0
if (now_following) {
now_following <- 0
label <- label + 1
}
}
}
return(temp)
}
结果:
> label_contiguous_strings(foo$State)
[1] 1 1 1 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0
答案 3 :(得分:1)
以下是使用rle
的第二种方法:
# get rle
myRle <- rle(df$State)
# get categories
counts <- cumsum(myRle$values == "Following")
# add in missings
is.na(counts) <- myRle$values != "Following"
# build variable
df$grp <- rep(counts, myRle$lengths)