我正在尝试每小时提取0到40分钟之间的所有变量的平均值。
dput(head(df))
structure(list(DateTime = structure(c(1563467460, 1563468060,
1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), date = structure(c(1563467460, 1563468060,
1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), Date = structure(c(18095, 18095, 18095,
18095, 18095, 18095), class = "Date"), TimeCtr = structure(c(1563467460,
1563468060, 1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), MassConc = c(0.397627, 0.539531, 0.571902,
0.608715, 0.670382, 0.835773), VolConc = c(175.038, 160.534,
174.386, 183.004, 191.074, 174.468), NumbConc = c(234.456, 326.186,
335.653, 348.996, 376.018, 488.279), MassD = c(101.426, 102.462,
101.645, 102.145, 101.255, 101.433)), .Names = c("DateTime",
"date", "Date", "TimeCtr", "MassConc", "VolConc", "NumbConc",
"MassD"), row.names = c(NA, 6L), class = "data.frame")
到目前为止,我已经尝试过了。
hourly_mean<-mydata %>%
filter(between(as.numeric(format(DateTime, "%M")), 0, 40)) %>%
group_by(DateTime=format(DateTime, "%Y-%m-%d %H")) %>%
summarise(variable1_mean=mean(variable1))
但是它给了我整个时期的平均值。任何帮助都非常欢迎。
答案 0 :(得分:1)
我们可以转换DateTime
,以小时单位使用ceiling_date
舍入Datetime
,从DateTime
和filter
中提取分钟数,以保持小于40,group_by
hour
并取mean
个值。
library(lubridate)
library(dplyr)
df %>%
dplyr::mutate(DateTime = ymd_hm(DateTime),
hour = ceiling_date(DateTime, "hour"),
minutes = minute(DateTime)) %>%
filter(minutes <= 40) %>%
group_by(hour) %>%
summarise_at(vars(ends_with("Conc")), mean)
数据
df <- structure(list(DateTime = structure(1:7, .Label = c("2019-08-0810:07",
"2019-08-0810:17", "2019-08-0810:27", "2019-08-0810:37", "2019-08-0810:47",
"2019-08-0810:57", "2019-08-0811:07"), class = "factor"), MassConc = c(0.556398,
1.06868, 0.777654, 0.87289, 0.789704, 0.51948, 0.416676), NumbConc = c(588.069,
984.018, 964.634, 997.678, 1013.52, 924.271, 916.357), VolConc = c(582.887,
979.685, 963.3, 994.178, 1009.52, 922.104, 916.856), Conc = c(281.665,
486.176, 420.058, 422.101, 429.841, 346.539, 330.282)), class =
"data.frame", row.names = c(NA, -7L))