根据另一列中的值创建列

时间:2021-04-21 11:30:51

标签: r

我试图根据数据集中另一列 measurement_type 的值创建一个新列 Jar。 如果列 Jar = blank 并且 Jar 的前一个值(在上面的行中)是 8,那么我希望测量类型发生变化。

我已经试过了,但我无法让它工作

addmeasurementVector <- function(df)
{
  measurement <- c("a", "b", "c", "d", "e")

  #initial values
  measurement_type <- vector() #will become a new column
  previousJar <- 0
  current_measurement_type <- "a"

  #loops through every row to create a new column
  for (row in 1:nrow(dt))
  {
    currentJar <- df[row, "Jar"]

    #criteria for a change of measurement_type

    if (previousJar == "8" & currentJar == "blank")
      current_measurement_type <- measurement[match(current_measurement_type, measurement) + 1] #change of measurement_type to next type

    measurement_type <- c(measurement_type, current_measurement_type) #adds the new column item

    previousJar <- currentJar #for the next iteration
  }

  df <- cbind(df, measurement) #adds the new column

  return(df)
}

有什么想法吗?

df 看起来像这样


df <- structure(list(Jar = c("blank", "1", "2", "3", "4", "blank", 
"5", "6", "7", "8", "blank", "1", "2", "3", "4", "blank", "5", 
"6", "7", "8", "blank", "1", "2", "3", "4", "blank", "5", "6", 
"7", "8", "blank", "1", "2", "3", "4", "blank", "5", "6", "7", 
"8", "blank", "1", "2", "3", "4", "blank", "5", "6", "7", "8"
)), row.names = c(NA, -50L), class = "data.frame")

预期的 df 看起来像这样

expected_df <- structure(list(Jar = c("blank", "1", "2", "3", "4", "blank", 
"5", "6", "7", "8", "blank", "1", "2", "3", "4", "blank", "5", 
"6", "7", "8", "blank", "1", "2", "3", "4", "blank", "5", "6", 
"7", "8", "blank", "1", "2", "3", "4", "blank", "5", "6", "7", 
"8", "blank", "1", "2", "3", "4", "blank", "5", "6", "7", "8"
), measurement_type = c("a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "d", "d", "d", "d", 
"d", "d", "d", "d", "d", "d", "e", "e", "e", "e", "e", "e", "e", 
"e", "e", "e")), class = "data.frame", row.names = c(NA, -50L
))

1 个答案:

答案 0 :(得分:2)

这个就行了

BaseR

df$measurement_type <- letters[cumsum(df$Jar == 'blank' & c("1", df$Jar[-nrow(df)]) == '8') +1]

     Jar measurement_type
1  blank                a
2      1                a
3      2                a
4      3                a
5      4                a
6  blank                a
7      5                a
8      6                a
9      7                a
10     8                a
11 blank                b
12     1                b
13     2                b
14     3                b
15     4                b
16 blank                b
17     5                b
18     6                b
19     7                b
20     8                b
21 blank                c
22     1                c
23     2                c
24     3                c
25     4                c
26 blank                c
27     5                c
28     6                c
29     7                c
30     8                c
31 blank                d
32     1                d
33     2                d
34     3                d
35     4                d
36 blank                d
37     5                d
38     6                d
39     7                d
40     8                d
41 blank                e
42     1                e
43     2                e
44     3                e
45     4                e
46 blank                e
47     5                e
48     6                e
49     7                e
50     8                e

dplyr 语法

library(dplyr)
df %>% mutate(measurement_type = cumsum(Jar == 'blank' & lag(Jar, default = "1") == "8"))

     Jar measurement_type
1  blank                0
2      1                0
3      2                0
4      3                0
5      4                0
6  blank                0
7      5                0
8      6                0
9      7                0
10     8                0
11 blank                1
12     1                1
13     2                1
14     3                1
15     4                1
16 blank                1
17     5                1
18     6                1
19     7                1
20     8                1
21 blank                2
22     1                2
23     2                2
24     3                2
25     4                2
26 blank                2
27     5                2
28     6                2
29     7                2
30     8                2
31 blank                3
32     1                3
33     2                3
34     3                3
35     4                3
36 blank                3
37     5                3
38     6                3
39     7                3
40     8                3
41 blank                4
42     1                4
43     2                4
44     3                4
45     4                4
46 blank                4
47     5                4
48     6                4
49     7                4
50     8                4

df %>% mutate(measurement_type = cumsum(Jar == 'blank' & lag(Jar, default = "1") == "8"),
              measurement_type = letters[measurement_type +1])

     Jar measurement_type
1  blank                a
2      1                a
3      2                a
4      3                a
5      4                a
6  blank                a
7      5                a
8      6                a
9      7                a
10     8                a
11 blank                b
12     1                b
13     2                b
14     3                b
15     4                b
16 blank                b
17     5                b
18     6                b
19     7                b
20     8                b
21 blank                c
22     1                c
23     2                c
24     3                c
25     4                c
26 blank                c
27     5                c
28     6                c
29     7                c
30     8                c
31 blank                d
32     1                d
33     2                d
34     3                d
35     4                d
36 blank                d
37     5                d
38     6                d
39     7                d
40     8                d
41 blank                e
42     1                e
43     2                e
44     3                e
45     4                e
46 blank                e
47     5                e
48     6                e
49     7                e
50     8                e