我正在尝试在我的数据集中创建一个新列,告诉我产品的所有3个月的收入是0,所有3个月都是0,或者3个月都没有0。
我提供了NewColumn
作为我希望结果的样子。
data$ZEROES <- 0
data$ZEROES2 <- 0
for (i in unique(data$product_id)){
for (j in unique(data$Revenue)){
n[j] <-ifelse(all(data$Value == 0)," ALL 0",
ifelse(any(data$Value == 0),"Some 0",
ifelse(all(data$Value != 0), "None 0", "Blank")))
data$ZEROES[j] <-n[j]
data$ZEROES2[i] <-long$ZEROES[j]
}
}
product_ id Date Revenue Value NewColumn
1 January in 0 Some 0
1 February in 1 Some 0
1 March in 0 Some 0
1 January out 0 All 0
1 February out 0 All 0
1 March out 0 All 0
2 January in 1 No 0
2 February in 2 No 0
2 March in 3 No 0
2 January out 1 Some 0
2 February out 1 Some 0
2 March out 0 Some 0
数据
structure(list(product_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
Date = c("January", "February", "March", "January", "February", "March", "January", "February", "March", "January", "February", "March"),
Revenue = c("in", "in", "in", "out", "out", "out", "in", "in", "in", "out", "out", "out"),
Value = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 3L, 1L, 1L, 0L),
NewColumn = c("Some 0", "Some 0", "Some 0", "All 0", "All 0", "All 0", "No 0", "No 0", "No 0", "Some 0", "Some 0", "Some 0")),
.Names = c("product_id", "Date", "Revenue", "Value", "NewColumn"),
class = "data.frame", row.names = c(NA, -12L))
答案 0 :(得分:5)
在基础R中,您可以创建自定义函数,然后使用ave
在每个组中进行计算:
f <- function(x) if(all(x)) 3 else if(any(x)) 2 else 1
c("None","Some","All")[with(dat, ave(Value==0, list(product_id,Revenue), FUN=f))]
# [1] "Some" "Some" "Some" "All" "All" "All" "None" "None" "None" "Some"
#[11] "Some" "Some"
答案 1 :(得分:3)
@ thelatemail的解决方案很简洁。以下是两种替代解决方案:
基地R:
do.call("rbind", by(dat, list(dat$product_id, dat$Revenue), FUN = function(df) {
within(df, NewColumn <- ifelse(all(Value == 0), "All 0",
ifelse(all(Value != 0), "No 0", "Some 0")))
}))
# product_id Date Revenue Value NewColumn
# 1 1 January in 0 Some 0
# 2 1 February in 1 Some 0
# 3 1 March in 0 Some 0
# 7 2 January in 1 No 0
# 8 2 February in 2 No 0
# 9 2 March in 3 No 0
# 4 1 January out 0 All 0
# 5 1 February out 0 All 0
# 6 1 March out 0 All 0
# 10 2 January out 1 Some 0
# 11 2 February out 1 Some 0
# 12 2 March out 0 Some 0
使用dplyr
library(dplyr)
dat %>%
group_by(product_id, Revenue) %>%
mutate(
NewColumn = ifelse(all(Value == 0), "All 0",
ifelse(all(Value != 0), "No 0", "Some 0"))
)
# Source: local data frame [12 x 5]
# Groups: product_id, Revenue [4]
# product_id Date Revenue Value NewColumn
# <int> <chr> <chr> <int> <chr>
# 1 1 January in 0 Some 0
# 2 1 February in 1 Some 0
# 3 1 March in 0 Some 0
# 4 1 January out 0 All 0
# 5 1 February out 0 All 0
# 6 1 March out 0 All 0
# 7 2 January in 1 No 0
# 8 2 February in 2 No 0
# 9 2 March in 3 No 0
# 10 2 January out 1 Some 0
# 11 2 February out 1 Some 0
# 12 2 March out 0 Some 0
答案 2 :(得分:1)
使用data.table
library(data.table)
setDT(df1)[, NewColumn := c("No 0", "Some 0", "All 0")[(all(!Value) +
any(!Value))+1], .(product_id, Revenue)]
# product_id Date Revenue Value NewColumn
# 1: 1 January in 0 Some 0
# 2: 1 February in 1 Some 0
# 3: 1 March in 0 Some 0
# 4: 1 January out 0 All 0
# 5: 1 February out 0 All 0
# 6: 1 March out 0 All 0
# 7: 2 January in 1 No 0
# 8: 2 February in 2 No 0
# 9: 2 March in 3 No 0
#10: 2 January out 1 Some 0
#11: 2 February out 1 Some 0
#12: 2 March out 0 Some 0