根据嵌套for循环中的条件创建新列

时间:2016-06-24 23:49:41

标签: r for-loop nested subset lapply

我正在尝试在我的数据集中创建一个新列,告诉我产品的所有3个月的收入是0,所有3个月都是0,或者3个月都没有0。

我提供了NewColumn作为我希望结果的样子。

data$ZEROES <- 0
data$ZEROES2 <- 0
for (i in unique(data$product_id)){
    for (j in unique(data$Revenue)){
        n[j] <-ifelse(all(data$Value == 0)," ALL 0", 
        ifelse(any(data$Value == 0),"Some 0", 
        ifelse(all(data$Value != 0), "None 0", "Blank")))
    data$ZEROES[j] <-n[j]
    data$ZEROES2[i] <-long$ZEROES[j]
    }
} 

product_ id Date         Revenue           Value    NewColumn
1           January       in               0           Some 0   
1           February      in               1           Some 0 
1           March         in               0           Some 0 
1           January       out              0           All 0 
1           February      out              0           All 0 
1           March         out              0           All 0 
2           January       in               1           No 0 
2           February      in               2           No 0 
2           March         in               3           No 0 
2           January       out              1           Some 0 
2           February      out              1           Some 0 
2           March         out              0           Some 0 

数据

structure(list(product_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
               Date = c("January", "February", "March", "January", "February", "March", "January", "February", "March", "January", "February", "March"),
               Revenue = c("in", "in", "in", "out", "out", "out", "in", "in", "in", "out", "out", "out"),
               Value = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 3L, 1L, 1L, 0L),
               NewColumn = c("Some 0",  "Some 0", "Some 0", "All 0", "All 0", "All 0", "No 0", "No 0",  "No 0", "Some 0", "Some 0", "Some 0")),
          .Names = c("product_id",  "Date", "Revenue", "Value", "NewColumn"),
          class = "data.frame", row.names = c(NA, -12L))

3 个答案:

答案 0 :(得分:5)

在基础R中,您可以创建自定义函数,然后使用ave在每个组中进行计算:

f <- function(x) if(all(x)) 3 else if(any(x)) 2 else 1
c("None","Some","All")[with(dat, ave(Value==0, list(product_id,Revenue), FUN=f))]
# [1] "Some" "Some" "Some" "All"  "All"  "All"  "None" "None" "None" "Some"
#[11] "Some" "Some"

答案 1 :(得分:3)

@ thelatemail的解决方案很简洁。以下是两种替代解决方案:

  1. 基地R:

    do.call("rbind", by(dat, list(dat$product_id, dat$Revenue), FUN = function(df) {
      within(df, NewColumn <- ifelse(all(Value == 0), "All 0",
                                     ifelse(all(Value != 0), "No 0", "Some 0")))
    }))
    #    product_id     Date Revenue Value NewColumn
    # 1           1  January      in     0    Some 0
    # 2           1 February      in     1    Some 0
    # 3           1    March      in     0    Some 0
    # 7           2  January      in     1      No 0
    # 8           2 February      in     2      No 0
    # 9           2    March      in     3      No 0
    # 4           1  January     out     0     All 0
    # 5           1 February     out     0     All 0
    # 6           1    March     out     0     All 0
    # 10          2  January     out     1    Some 0
    # 11          2 February     out     1    Some 0
    # 12          2    March     out     0    Some 0
    
  2. 使用dplyr

    library(dplyr)
    dat %>%
      group_by(product_id, Revenue) %>%
      mutate(
        NewColumn = ifelse(all(Value == 0), "All 0",
                           ifelse(all(Value != 0), "No 0", "Some 0"))
      )
    # Source: local data frame [12 x 5]
    # Groups: product_id, Revenue [4]
    #    product_id     Date Revenue Value NewColumn
    #         <int>    <chr>   <chr> <int>     <chr>
    # 1           1  January      in     0    Some 0
    # 2           1 February      in     1    Some 0
    # 3           1    March      in     0    Some 0
    # 4           1  January     out     0     All 0
    # 5           1 February     out     0     All 0
    # 6           1    March     out     0     All 0
    # 7           2  January      in     1      No 0
    # 8           2 February      in     2      No 0
    # 9           2    March      in     3      No 0
    # 10          2  January     out     1    Some 0
    # 11          2 February     out     1    Some 0
    # 12          2    March     out     0    Some 0
    

答案 2 :(得分:1)

使用data.table

library(data.table)
setDT(df1)[, NewColumn := c("No 0", "Some 0", "All 0")[(all(!Value) + 
                        any(!Value))+1], .(product_id, Revenue)]
#    product_id     Date Revenue Value NewColumn
# 1:          1  January      in     0    Some 0
# 2:          1 February      in     1    Some 0
# 3:          1    March      in     0    Some 0
# 4:          1  January     out     0     All 0
# 5:          1 February     out     0     All 0
# 6:          1    March     out     0     All 0
# 7:          2  January      in     1      No 0
# 8:          2 February      in     2      No 0
# 9:          2    March      in     3      No 0
#10:          2  January     out     1    Some 0
#11:          2 February     out     1    Some 0
#12:          2    March     out     0    Some 0