Question

这类似于我最近问过的question，但有所不同。可以说我有以下数据：

library(tidyverse)
    df <- structure(list(x = c("a", "a", "a", "a", "b", "b", "b", "b", 
                               "b", "c", "c", "c", "c", "d", "d", "e", "e", "f", "g", "g", "g", 
                               "g", "g", "g", "g", "g"), y = c(" free", " with", "  sus", "  sus", 
                                                               "  sus", " free", " free", "  sus", " free", " with", " sus", 
                                                               " free", "  sus", " free", " free", " with", "  sus", "  sus", 
                                                               " free", " sus", " sus", " sus", " sus", " free", " sus", " free"
                               ), indicator = c(0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 
                                                0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0)), row.names = c(NA, -26L), class = c("tbl_df", 
                                                                                                                      "tbl", "data.frame"))
    df
       x     y       indicator
       <chr> <chr>       <dbl>
     1 a     " free"         0
     2 a     " with"         0
     3 a     "  sus"         1
     4 a     "  sus"         0
     5 b     "  sus"         1
     6 b     " free"         0
     7 b     " free"         0
     8 b     "  sus"         1
     9 b     " free"         0
    10 c     " with"         0
    11 c     " sus"          1
    12 c     " free"         0
    13 c     "  sus"         1
    14 d     " free"         0
    15 d     " free"         0
    16 e     " with"         0
    17 e     "  sus"         1
    18 f     "  sus"         1
    19 g     " free"         0
    20 g     " sus"          0
    21 g     " sus"          1
    22 g     " sus"          0
    23 g     " sus"          0
    24 g     " free"         0
    25 g     " sus"          1
    26 g     " free"         0

我想创建一个变量，如果indicator==1，我通过对变量x进行分组来搜索前一行和后一行，如果下一个和前一个不是{ {1}}等于sus。因此，如果下一个或最后一个free之前有一个with，则它的值不会为1。如果free且它在组的最后一行或第一行中，那么我假设indicator==1不在下一行或上一行，例如组with，b，c。我想要的输出是：

我想要一种灵活的方法，它可以遍历许多行（在x y indicator newvariable <chr> <chr> <dbl> <dbl> 1 a " free" 0 0 2 a " with" 0 0 3 a " sus" 1 0 4 a " sus" 0 0 5 b " sus" 1 1 6 b " free" 0 0 7 b " free" 0 0 8 b " sus" 1 1 9 b " free" 0 0 10 c " with" 0 0 11 c " sus" 1 0 12 c " free" 0 0 13 c " sus" 1 1 14 d " free" 0 0 15 d " free" 0 0 16 e " with" 0 0 17 e " sus" 1 0 18 f " sus" 1 1 19 g " free" 0 0 20 g " sus" 0 0 21 g " sus" 1 1 22 g " sus" 0 0 23 g " sus" 0 0 24 g " free" 0 0 25 g " sus" 1 1 26 g " free" 0 0之前可以有许多sus，并且像组free一样，每个组可以有多个indicator==1 ）。我在想以下内容，但我希望g和lag遍历许多先前和随后的行：

lead

我认为我无法采用与上一个问题df %>% group_by(x) %>% mutate(newvariable = as.integer(indicator == 1 & lag(y[y != "sus"]) =='free' & lead(y[y != "sus"]) == 'free' )) #taken idea from previous question #mutate(newvariable = as.integer(last(y) == 'sus' & last(y[y != "sus"]) == 'with')相同的方法，但是如果有人有想法请找相似的东西吗？也许last？

Answer 1

这似乎可以完成工作。我使用的技巧是建立一个临时数据帧，在该数据帧中，连续的相同y值序列会折叠成一条记录。

df = df %>% 
    group_by(x) %>% 
    do({
        # subgroup is constant in series of records with constant x and y
        df_x = mutate(., i=row_number(), subgroup=cumsum(y!=lag(y, default="")))

        df_subgroups = df_x %>% 
            distinct(subgroup, y) %>% 
            mutate(
                prev_distinct_y = lag(y, default="free"),
                next_distinct_y = lead(y, default="free")
            )
        df_x = df_x %>% left_join(df_subgroups)
        df_x %>% mutate(fixed_indicator = 0 + (indicator==1 & prev_distinct_y!=" with" & next_distinct_y!=" with") )
    })

查看组R

1 个答案: