我想创建一个新列,其中关于3组的第一行和最后一行为2,否则为NA

时间:2019-09-27 02:26:02

标签: r dataframe

我有3个小组

         group1     group2     group3     time
            1          1         1         3:0
            1          1         1         4:0
            1          1         1         9:0
            1          2         1         6:0
            1          2         2         5:0
            1          2         2         2:0      
            1          2         2         1:0
            2          1         1         3:0
            2          3         2         1:0

新列

         group1     group2     group3     time      new
            1          1         1         3:0       2
            1          1         1         4:0       NA
            1          1         1         9:0       2
            1          2         1         6:0       2
            1          2         2         5:0       2
            1          2         2         2:0       NA
            1          2         2         1:0       2
            2          1         1         3:0       2
            2          3         2         1:0       2

group_by(group1,group2,group3)的第一行和最后一行是2,其他行是NA。我知道我可以通过slice和mutate获得它,但是找不到正确的格式。

3 个答案:

答案 0 :(得分:2)

d %>%
    group_by_at(vars(-time)) %>%
    mutate(new = replace(NA, range(row_number()), 2))
## A tibble: 9 x 5
## Groups:   group1, group2, group3 [5]
#  group1 group2 group3 time    new
#   <int>  <int>  <int> <chr> <dbl>
#1      1      1      1 3:0       2
#2      1      1      1 4:0      NA
#3      1      1      1 9:0       2
#4      1      2      1 6:0       2
#5      1      2      2 5:0       2
#6      1      2      2 2:0      NA
#7      1      2      2 1:0       2
#8      2      1      1 3:0       2
#9      2      3      2 1:0       2

答案 1 :(得分:1)

检查row_number中的ifelse

library(dplyr)

df %>%
  group_by(group1, group2, group3) %>%
  mutate(new = ifelse(row_number() %in% c(1L, n()), 2, NA))
  #OR from @d.b
  #mutate(new = ifelse(row_number() %in% range(row_number()), 2, NA))

#  group1 group2 group3 time    new
#   <int>  <int>  <int> <fct> <dbl>
#1      1      1      1 3:0       2
#2      1      1      1 4:0      NA
#3      1      1      1 9:0       2
#4      1      2      1 6:0       2
#5      1      2      2 5:0       2
#6      1      2      2 2:0      NA
#7      1      2      2 1:0       2
#8      2      1      1 3:0       2
#9      2      3      2 1:0       2

我们可以在基数R或data.table中实现相同的逻辑

df$new <- with(df, ave(group1, group1, group2, group3, FUN = function(x) 
                   ifelse(seq_along(x) %in% c(1L, length(x)), 2, NA)))


library(data.table)
setDT(df)[, new := ifelse(seq_along(time) %in% c(1L, .N), 2, NA), 
            .(group1, group2, group3)]

数据

df <- structure(list(group1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), 
group2 = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 3L), group3 = c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L), time = structure(c(3L, 4L, 
7L, 6L, 5L, 2L, 1L, 3L, 1L), .Label = c("1:0", "2:0", "3:0", 
"4:0", "5:0", "6:0", "9:0"), class = "factor")), class = "data.frame", 
row.names = c(NA, -9L))

答案 2 :(得分:0)

这是data.table.I的一种选择,它应该更有效

library(data.table)
nm1 <- grep("^group\\d+$", names(df1), value = TRUE)
i1 <- setDT(df1)[, .I[c(1, .N)], by = nm1]$V1
df1[i1, new := 2][]
#   group1 group2 group3 time new
#1:      1      1      1  3:0   2
#2:      1      1      1  4:0  NA
#3:      1      1      1  9:0   2
#4:      1      2      1  6:0   2
#5:      1      2      2  5:0   2
#6:      1      2      2  2:0  NA
#7:      1      2      2  1:0   2
#8:      2      1      1  3:0   2
#9:      2      3      2  1:0   2

或使用dplyr

library(dplyr)
df1 %>%
   group_by_at(vars(starts_with('group'))) %>% 
   mutate(new = 2 * NA^ !row_number() %in% c(1, n()))
# A tibble: 9 x 5
# Groups:   group1, group2, group3 [5]
#  group1 group2 group3 time    new
#   <int>  <int>  <int> <fct> <dbl>
#1      1      1      1 3:0       2
#2      1      1      1 4:0      NA
#3      1      1      1 9:0       2
#4      1      2      1 6:0       2
#5      1      2      2 5:0       2
#6      1      2      2 2:0      NA
#7      1      2      2 1:0       2
#8      2      1      1 3:0       2
#9      2      3      2 1:0       2

数据

df1 <- structure(list(group1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), 
 group2 = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 3L), group3 = c(1L, 
 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L), time = structure(c(3L, 4L, 
 7L, 6L, 5L, 2L, 1L, 3L, 1L), .Label = c("1:0", "2:0", "3:0", 
 "4:0", "5:0", "6:0", "9:0"), class = "factor")), class = "data.frame", 
  row.names = c(NA, -9L))