我见过类似的问题,但我无法将其应用于我自己的数据。我试图通过产品ID和收入来汇总价值,价值2和价值3,其中价值的总和是总和。然而;我只希望value2和value3显示重复项的一个值
这是我的代码:
aggregate(Value, Value2, Value3 ~product_id + Revenue, dat,sum)
数据:
dat <-structure(list(product_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
Date = c("January", "February", "March", "January", "February", "March", "January", "February", "March", "January", "February", "March"),
Revenue = c("in", "in", "in", "out", "out", "out", "in", "in", "in", "out", "out", "out"),
Value = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 3L, 0L, 0L, 0L),
Value2 = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L),
Value3 = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L)
),
.Names = c("product_id", "Date", "Revenue", "Value", "Value2", "Value3"),
class = "data.frame", row.names = c(NA, -12L))
这样看起来像是:
product i_d Revenue Value Value2 Value 3
1 in 1 1 1
2 in 6 2 2
1 out 0 3 3
2 out 0 4 4
答案 0 :(得分:0)
dat <-structure(list(product_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
Date = c("January", "February", "March", "January", "February", "March", "January", "February", "March", "January", "February", "March"),
Revenue = c("in", "in", "in", "out", "out", "out", "in", "in", "in", "out", "out", "out"),
Value = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 3L, 0L, 0L, 0L),
Value2 = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L),
Value3 = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L)
),
.Names = c("product_id", "Date", "Revenue", "Value", "Value2", "Value3"),
class = "data.frame", row.names = c(NA, -12L))
res <- aggregate(dat[,colnames(dat) %in% c("Value", "Value2", "Value3")],by=list(dat$product_id, dat$Revenue),FUN=sum)
colnames(res) <- c("product_id", "Revenue", "Value", "Value 2", "Value 3")
res
product_id Revenue Value Value 2 Value 3
1 1 in 1 3 3
2 2 in 6 9 9
3 1 out 0 6 6
4 2 out 0 12 12