我希望在计算时间平均值时不包括时间的负值,但是我仍然需要保留相应的成本和数值来计算平均成本和数量成本。我想知道如何修改下面的代码:
df <- structure(list(ID = structure(c(1L, 2L, 2L, 3L, 3L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 5L, 6L), .Label = c("R1", "R2",
"R3", "R4", "R5", "R6"), class = "factor"), cost = c(123L, 12L,
14L, 15L, 16L, 17L, 18L, 9L, 6L, 6L, 7L, 8L, 1L, 111L, 121L,
141L, 181L, 1611L), Time = c(123L, -12L, NA, -15L, NA, 17L, 18L,
-9L, 88L, 666L, 7L, 78L, 188L, 111L, 121L, 141L, 189L, 161L),
number = c(14L, 12L, 15L, 17L, NA, 17L, 22L, 95L, NA, 67L,
7L, 8L, 7L, 118L, NA, 140L, 180L, 1611L)), .Names = c("ID",
"cost", "Time", "number"), row.names = c(NA, -18L), class = "data.frame")
代码:
A<-ddply(df,.(ID),summarise,
N.cost=sum(!is.na(cost)),Cost_Average=round(mean(cost,na.rm=TRUE),2),
N.Time=sum(!is.na(Time)),Time_Average= round(mean(Time,na.rm=TRUE),2),
N.n=sum(!is.na(number)),number_Average= round(mean(number,na.rm=TRUE),2))
答案 0 :(得分:2)
我们可以尝试
library(dplyr)
df %>%
group_by(id) %>%
summarise(N.Time = sum(!is.na(Time)),
Time_Average = round(mean(Time[Time >=0 & !is.na(Time)]), 2))