我有一个包含以下数据的数据框(时间戳7.50和7.40的平均值应该是我的时间戳7.45的A值)
Date_Time | A
7/28/2017 8:00| 443.75
7/28/2017 7:50| 440.75
7/28/2017 7:45| NA
7/28/2017 7:40| 447.5
7/28/2017 7:30| 448.75
7/28/2017 7:20| 444.5
7/28/2017 7:15| NA
7/28/2017 7:10| 440.25
7/28/2017 7:00| 447.5
我希望它使用mean:
转换为15分钟的间隔,如下所示Date / Time | Object Value
7/28/2017 8:00| 465
7/28/2017 7:45| 464.875
7/28/2017 7:30| 464.75
7/28/2017 7:15| 464.875
7/28/2017 7:00| 465
答案 0 :(得分:1)
OP改变他或她想要的输出。由于我没有时间更新我的答案,我将保留我的答案。请参阅我在原帖中的评论,了解如何使用log4j
填写缺失值。
此解决方案假设您根据na.interpolation
,8:00
和7:30
中的平均值计算平均值。
7:00
数据强>
library(dplyr)
library(tidyr)
library(lubridate)
library(imputeTS)
dt2 <- dt %>%
mutate(Date.Time = mdy_hm(Date.Time)) %>%
filter(Date.Time %in% seq(min(Date.Time), max(Date.Time), by = "15 min")) %>%
complete(Date.Time = seq(min(Date.Time), max(Date.Time), by = "15 min")) %>%
mutate(Object.Value = na.interpolation(Object.Value)) %>%
fill(Object.Name) %>%
arrange(desc(Date.Time))
dt2
# A tibble: 5 x 3
Date.Time Object.Name Object.Value
<dttm> <chr> <dbl>
1 2017-07-28 08:00:00 a 465.000
2 2017-07-28 07:45:00 a 464.875
3 2017-07-28 07:30:00 a 464.750
4 2017-07-28 07:15:00 a 464.875
5 2017-07-28 07:00:00 a 465.000
答案 1 :(得分:0)
如果以10分钟为间隔测量的值是该时间段内的时间积分平均值,则可以合理地将它们平均到另一个时间段。如果这些是瞬时测量,则按照其他人的建议对它们进行平滑处理是更合理的。
要获取按10分钟时间表测量的时间积分平均值并将其平均值按15分钟时间表测量,可以使用intervalaverage软件包:
library(data.table)
library(intervalaverage)
x <- structure(list(time = c("7/28/2017 8:00", "7/28/2017 7:50", "7/28/2017 7:45",
"7/28/2017 7:40", "7/28/2017 7:30", "7/28/2017 7:20", "7/28/2017 7:15",
"7/28/2017 7:10", "7/28/2017 7:00"), A = c(443.75, 440.75, NA,
447.5, 448.75, 444.5, NA, 440.25, 447.5)), row.names = c(NA,
-9L), class = "data.frame")
y <- structure(list(time = c("7/28/2017 8:00", "7/28/2017 7:45", "7/28/2017 7:30",
"7/28/2017 7:15", "7/28/2017 7:00")), row.names = c(NA, -5L), class = "data.frame")
setDT(x)
setDT(y)
x
#> time A
#> 1: 7/28/2017 8:00 443.75
#> 2: 7/28/2017 7:50 440.75
#> 3: 7/28/2017 7:45 NA
#> 4: 7/28/2017 7:40 447.50
#> 5: 7/28/2017 7:30 448.75
#> 6: 7/28/2017 7:20 444.50
#> 7: 7/28/2017 7:15 NA
#> 8: 7/28/2017 7:10 440.25
#> 9: 7/28/2017 7:00 447.50
y
#> time
#> 1: 7/28/2017 8:00
#> 2: 7/28/2017 7:45
#> 3: 7/28/2017 7:30
#> 4: 7/28/2017 7:15
#> 5: 7/28/2017 7:00
x[, time:=as.POSIXct(time,format='%m/%d/%Y %H:%M',tz = "UTC")]
setnames(x, "time","start_time")
x[, start_time_integer:=as.integer(start_time)]
y[, time:=as.POSIXct(time,format='%m/%d/%Y %H:%M',tz = "UTC")]
setnames(y, "time","start_time")
y[, start_time_integer:=as.integer(start_time)]
setkey(y, start_time)
setkey(x, start_time)
##drop time times at 15 and 45
x <- x[!start_time %in% as.POSIXct(c("2017-07-28 07:45:00","2017-07-28 07:15:00"),tz="UTC")]
x[, end_time_integer:=as.integer(start_time)+60L*10L-1L]
x[, end_time:=as.POSIXct(end_time_integer,origin="1969-12-31 24:00:00",tz = "UTC")]
y[, end_time_integer:=as.integer(start_time)+60L*15L-1L]
y[, end_time:=as.POSIXct(end_time_integer,origin="1969-12-31 24:00:00",tz = "UTC")]
x
#> start_time A start_time_integer end_time_integer
#> 1: 2017-07-28 07:00:00 447.50 1501225200 1501225799
#> 2: 2017-07-28 07:10:00 440.25 1501225800 1501226399
#> 3: 2017-07-28 07:20:00 444.50 1501226400 1501226999
#> 4: 2017-07-28 07:30:00 448.75 1501227000 1501227599
#> 5: 2017-07-28 07:40:00 447.50 1501227600 1501228199
#> 6: 2017-07-28 07:50:00 440.75 1501228200 1501228799
#> 7: 2017-07-28 08:00:00 443.75 1501228800 1501229399
#> end_time
#> 1: 2017-07-28 07:09:59
#> 2: 2017-07-28 07:19:59
#> 3: 2017-07-28 07:29:59
#> 4: 2017-07-28 07:39:59
#> 5: 2017-07-28 07:49:59
#> 6: 2017-07-28 07:59:59
#> 7: 2017-07-28 08:09:59
y
#> start_time start_time_integer end_time_integer end_time
#> 1: 2017-07-28 07:00:00 1501225200 1501226099 2017-07-28 07:14:59
#> 2: 2017-07-28 07:15:00 1501226100 1501226999 2017-07-28 07:29:59
#> 3: 2017-07-28 07:30:00 1501227000 1501227899 2017-07-28 07:44:59
#> 4: 2017-07-28 07:45:00 1501227900 1501228799 2017-07-28 07:59:59
#> 5: 2017-07-28 08:00:00 1501228800 1501229699 2017-07-28 08:14:59
out <- intervalaverage(x,y,interval_vars=c("start_time_integer","end_time_integer"),value_vars="A")
out[, start_time:=as.POSIXct(start_time_integer,origin="1969-12-31 24:00:00",tz="UTC")]
out[, end_time:=as.POSIXct(end_time_integer,origin="1969-12-31 24:00:00",tz="UTC")]
out[, list(start_time,end_time, A)]
#> start_time end_time A
#> 1: 2017-07-28 07:00:00 2017-07-28 07:14:59 445.0833
#> 2: 2017-07-28 07:15:00 2017-07-28 07:29:59 443.0833
#> 3: 2017-07-28 07:30:00 2017-07-28 07:44:59 448.3333
#> 4: 2017-07-28 07:45:00 2017-07-28 07:59:59 443.0000
#> 5: 2017-07-28 08:00:00 2017-07-28 08:14:59 NA
#Note that this just equivalent to taking weighted.mean:
weighted.mean(c(447.5,440.25),w=c(10,5))
#> [1] 445.0833
weighted.mean(c(440.25,444.5),w=c(5,10))
#> [1] 443.0833
#etc
请注意,intervalaverage包需要整数列来定义封闭的间隔,因此转换为整数。出于可读性考虑,将整数转换回日期时间(POSIXct)。