按组的多条件声明

时间:2015-03-12 15:25:14

标签: r

我有一个简单的数据集。

structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 4L, 
4L, 5L, 5L), Primrely = c(0L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 4L, 
3L, 1L, 2L, 2L), Primset = c(-4L, -3L, 1L, 2L, -4L, 5L, 3L, 1L, 
2L, -4L, -2L, -3L, 3L), Primvalue = c(45L, 5L, 6L, 15L, 53L, 
45L, 44L, 65L, 1L, 5L, 1L, 12L, 5L), Secrely = c(5L, 7L, 2L, 
1L, 2L, 0L, 4L, 5L, 1L, 1L, 1L, 0L, 2L), Secset = c(-3L, 1L, 
2L, -2L, -3L, 2L, 5L, 7L, 7L, 4L, 3L, 2L, 1L), Secvalue = c(38L, 
-2L, -1L, 8L, 46L, 38L, 37L, 58L, -6L, -2L, -6L, 5L, -2L), Desired = structure(c(NA, 
1L, NA, NA, 2L, 2L, NA, NA, NA, NA, NA, 1L, 1L), .Label = c("Primary", 
"Secondary"), class = "factor")), .Names = c("ID", "Primrely", 
"Primset", "Primvalue", "Secrely", "Secset", "Secvalue", "Desired"
), class = "data.frame", row.names = c(NA, -13L))

   ID Primrely Primset Primvalue Secrely Secset Secvalue   Desired
1   1        0      -4        45       5     -3       38      <NA>
2   1        2      -3         5       7      1       -2   Primary
3   1        1       1         6       2      2       -1      <NA>
4   1        1       2        15       1     -2        8      <NA>
5   2        1      -4        53       2     -3       46 Secondary
6   2        1       5        45       0      2       38 Secondary
7   2        3       3        44       4      5       37      <NA>
8   3        4       1        65       5      7       58      <NA>
9   4        4       2         1       1      7       -6      <NA>
10  4        3      -4         5       1      4       -2      <NA>
11  4        1      -2         1       1      3       -6      <NA>
12  5        2      -3        12       0      2        5   Primary
13  5        2       3         5       2      1       -2   Primary

对于每个ID,我想选择符合条件(Prim = primary, Sec = secondary)的行:如果Primrely02且{{1 } {是Primset,为每个-3:3选择所有行。如果给定ID没有符合主要条件的行,请选择符合次要条件ID的行。理想情况下,我想添加一列(Secrely is 0 or 2 and Secset is -3:3)来表明符合哪些条件(Desired)

我一直在使用(primary/secondary/NA)ifelse函数而没有太多运气,主要是因为我不知道如果命令if else加入给定的R主要标准已经满足(例如ID符合第二个标准,但不需要它,因为它已经符合第一个标准)。换句话说,如果“主要”出现在给定的ID中,它将胜过满足的所有“次要”标准。我很感激任何建议。

2 个答案:

答案 0 :(得分:2)

如果我现在正确理解你:

(在步骤中留下来向您展示我在做什么,您可以删除它们和/或如果您愿意,可以一步完成)

dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 4L, 
                      4L, 5L, 5L), Primrely = c(0L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 4L, 
                                                3L, 1L, 2L, 2L), Primset = c(-4L, -3L, 1L, 2L, -4L, 5L, 3L, 1L, 
                                                                             2L, -4L, -2L, -3L, 3L), Primvalue = c(45L, 5L, 6L, 15L, 53L, 
                                                                                                                   45L, 44L, 65L, 1L, 5L, 1L, 12L, 5L), Secrely = c(5L, 7L, 2L, 
                                                                                                                                                                    1L, 2L, 0L, 4L, 5L, 1L, 1L, 1L, 0L, 2L), Secset = c(-3L, 1L, 
                                                                                                                                                                                                                        2L, -2L, -3L, 2L, 5L, 7L, 7L, 4L, 3L, 2L, 1L), Secvalue = c(38L, 
                                                                                                                                                                                                                                                                                    -2L, -1L, 8L, 46L, 38L, 37L, 58L, -6L, -2L, -6L, 5L, -2L), Desired = structure(c(NA, 
                                                                                                                                                                                                                                                                                                                                                                     1L, NA, NA, 2L, 2L, NA, NA, NA, NA, NA, 1L, 1L), .Label = c("Primary", 
                                                                                                                                                                                                                                                                                                                                                                                                                                 "Secondary"), class = "factor")), .Names = c("ID", "Primrely", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                              "Primset", "Primvalue", "Secrely", "Secset", "Secvalue", "Desired"
                                                                                                                                                                                                                                                                                                                                                                                                                                 ), class = "data.frame", row.names = c(NA, -13L))


within(dat, {
  Desired_step1 <- ifelse(Primrely %in% c(0,2) & Primset %in% -3:3,
                        1, ifelse(Secrely %in% c(0,2) & Secset %in% -3:3,
                                  2, 3))
  Desired_new <- factor(ave(Desired_step1, ID, FUN = function(x) 
    ifelse(x == min(x), x, NA)),
    levels = 1:3, labels = c('Primary', 'Secondary', 'NA'))
  Desired_step1 <- c('1'='Primary','2'='Secondary','3'=NA)[Desired_step1]
})


#    ID Primrely Primset Primvalue Secrely Secset Secvalue   Desired Desired_new Desired_step1
# 1   1        0      -4        45       5     -3       38      <NA>        <NA>          <NA>
# 2   1        2      -3         5       7      1       -2   Primary     Primary       Primary
# 3   1        1       1         6       2      2       -1      <NA>        <NA>     Secondary
# 4   1        1       2        15       1     -2        8      <NA>        <NA>          <NA>
# 5   2        1      -4        53       2     -3       46 Secondary   Secondary     Secondary
# 6   2        1       5        45       0      2       38 Secondary   Secondary     Secondary
# 7   2        3       3        44       4      5       37      <NA>        <NA>          <NA>
# 8   3        4       1        65       5      7       58      <NA>          NA          <NA>
# 9   4        4       2         1       1      7       -6      <NA>          NA          <NA>
# 10  4        3      -4         5       1      4       -2      <NA>          NA          <NA>
# 11  4        1      -2         1       1      3       -6      <NA>          NA          <NA>
# 12  5        2      -3        12       0      2        5   Primary     Primary       Primary
# 13  5        2       3         5       2      1       -2   Primary     Primary       Primary

答案 1 :(得分:0)

这是我的快速&amp;假设您的data.frame名为df,则为脏解决方案。您可以自己完善它:

df$Desired <- ifelse((df$Primrely==0 | df$Primrely==2) & (df$Primset >= -3 & df$Primset <= 3),
                     "Primary",
                     NA)

idx <- is.na(df$Desired)
df$Desired[idx] <- ifelse((df$Secrely[idx]==0 | df$Secrely[idx]==2) & (df$Secset[idx] >= -3 & df$Secset[idx] <= 3),
                          "Secondary",
                          NA)