我又在这里
我有3个月的数据集,其中包含每小时的数据。 我每天的每个小时需要每周平均。 因此,不是整个一周的单一平均值,而是一周中一天中每个小时的平均值。
我什么都没尝试,因为我不怎么开始。 我能做的就是与您共享我的数据集的一部分。
structure(list(Hourtime = structure(c(1527804000, 1527807600.73559,
1527811201.47119, 1527814802.20678, 1527818402.94238, 1527822003.67797
), class = c("POSIXct", "POSIXt"), tzone = ""), HOF = c(0, 1,
2, 3, 4, 5), H_flux = c(-7.9856017965, -5.9197070475, -8.3727508595,
-17.317657695, -20.81087357, -7.6067714585), LE_flux = c(-0.0788009009557579,
-1.90920163435432, 0.251986931688322, -1.25918680530234, 0.497851355551565,
10.6053213610874), Turbulence = c(0.1061918215, 0.08405, 0.1211055835,
0.208830021, 0.2305439105, 0.219717154), mz31_flux = c(0.02342,
-0.008085, 0.01424, 0.02375, -0.01505, 0.03235), mz33_flux = c(0.0361,
-0.0239, -0.1048, -0.0205, 0.2685, 0.2255), mz39_flux = c(-0.057,
-0.00199999999999999, 0.2345, 0.3745, 0.029, -0.3645), mz42_flux = c(2e-04,
0.0119, 0.00655, -0.00495, 0.0064, -0.004), mz45_flux = c(0.06575,
0.028, -0.05065, 0.1115, 0.0844, 0.08305), mz47_flux = c(-0.046,
0.00685, 0.02795, 0.06215, -0.01425, -0.0383), mz59_flux = c(0.0474,
0.03845, -0.03475, -0.00784999999999999, 0.07285, -0.10705),
mz61_flux = c(-0.01585, 0.01135, 0.03077, 0.01605, -0.0579,
0.01725), mz69_flux = c(0.02105, 0.001225, -0.01625, 0.0074,
-0.0062, 0.000949999999999998), mz71_flux = c(0.000545, 0.00335,
0.00221, -0.01115, 0.00195, -0.0021), mz75_flux = c(-0.00202500000000001,
0.00011, 0.0051385, 0.000277500000000003, -0.0012705, -0.00884999999999998
), mz79_flux = c(0.010005, 0.00919, -0.0072, -0.02325, -0.0045,
-0.03495), mz85_flux = c(-0.007545, -0.00196, -0.013675,
0.0037, 0.010395, -0.02955), mz87_flux = c(0.01014, 0.00746,
-0.003515, 0.01265, -0.00256, -0.01645), mz93_flux = c(0.01165,
0.031, 0.0224, 0.029325, 0.02195, 0.0736), mz99_flux = c(0.00022,
0.000495, -0.003895, -0.00068, 0.008325, 0.009685), mz101_flux = c(0.008145,
-0.00175, 0.0108, 0.0148, -0.0132, 0.00495), mz107_flux = c(-0.02735,
0.0189, 0.0144, 0.0093, -0.00525, -0.0037), mz111_flux = c(0.002505,
0.00135, 0.004185, -0.00274, 0.00484, -0.005175), mz113_flux = c(0.00215,
0.0012235, 0.00277, 0.002775, -0.00438, -0.00568), mz135_flux = c(-0.00801,
0.004815, 0.014065, -0.002315, 0.00317, -0.0119), mz137_flux = c(0.02895,
0.008273, -0.03515, 0.00471, 0.014485, 3.73594), mz149_flux = c(-0.00256,
0.0001485, 0.004081, -0.00187, -0.00153, 0.002755), mz155_flux = c(-0.000105,
0.0005345, -6.435e-05, 0.000846, 1988.94262555, 0.00012)), row.names = c(NA,
6L), class = "data.frame")
答案 0 :(得分:1)
请考虑计算每个日期与开始日期(即min(df$Hourtime)
)之间的星期差,并使用format()
计算小时。然后aggregate
求平均值:
df <- within(df, {
# CALCULATE NUMBER OF WEEKS SINCE START
week <- as.numeric(floor((Hourtime - min(Hourtime)) / (24*60*60*7)), units="secs")
# EXTRACT HOUR FROM datetime
hour <- as.integer(format(Hourtime, "%H"))
})
# AGGREGATE FOR MEAN BY WEEK AND HOUR
agg <- aggregate(. ~ week + hour, df, mean)
输出
agg
# week hour Hourtime HOF H_flux LE_flux Turbulence mz31_flux mz33_flux mz39_flux mz42_flux mz45_flux mz47_flux mz59_flux mz61_flux mz69_flux mz71_flux
# 1 0 17 1527804000 0 -7.985602 -0.0788009 0.1061918 0.023420 0.0361 -0.0570 0.00020 0.06575 -0.04600 0.04740 -0.01585 0.021050 0.000545
# 2 0 18 1527807601 1 -5.919707 -1.9092016 0.0840500 -0.008085 -0.0239 -0.0020 0.01190 0.02800 0.00685 0.03845 0.01135 0.001225 0.003350
# 3 0 19 1527811201 2 -8.372751 0.2519869 0.1211056 0.014240 -0.1048 0.2345 0.00655 -0.05065 0.02795 -0.03475 0.03077 -0.016250 0.002210
# 4 0 20 1527814802 3 -17.317658 -1.2591868 0.2088300 0.023750 -0.0205 0.3745 -0.00495 0.11150 0.06215 -0.00785 0.01605 0.007400 -0.011150
# 5 0 21 1527818403 4 -20.810874 0.4978514 0.2305439 -0.015050 0.2685 0.0290 0.00640 0.08440 -0.01425 0.07285 -0.05790 -0.006200 0.001950
# 6 0 22 1527822004 5 -7.606771 10.6053214 0.2197172 0.032350 0.2255 -0.3645 -0.00400 0.08305 -0.03830 -0.10705 0.01725 0.000950 -0.002100
# mz75_flux mz79_flux mz85_flux mz87_flux mz93_flux mz99_flux mz101_flux mz107_flux mz111_flux mz113_flux mz135_flux mz137_flux mz149_flux mz155_flux
# 1 -0.0020250 0.010005 -0.007545 0.010140 0.011650 0.000220 0.008145 -0.02735 0.002505 0.0021500 -0.008010 0.028950 -0.0025600 -0.00010500
# 2 0.0001100 0.009190 -0.001960 0.007460 0.031000 0.000495 -0.001750 0.01890 0.001350 0.0012235 0.004815 0.008273 0.0001485 0.00053450
# 3 0.0051385 -0.007200 -0.013675 -0.003515 0.022400 -0.003895 0.010800 0.01440 0.004185 0.0027700 0.014065 -0.035150 0.0040810 -0.00006435
# 4 0.0002775 -0.023250 0.003700 0.012650 0.029325 -0.000680 0.014800 0.00930 -0.002740 0.0027750 -0.002315 0.004710 -0.0018700 0.00084600
# 5 -0.0012705 -0.004500 0.010395 -0.002560 0.021950 0.008325 -0.013200 -0.00525 0.004840 -0.0043800 0.003170 0.014485 -0.0015300 1988.94262555
# 6 -0.0088500 -0.034950 -0.029550 -0.016450 0.073600 0.009685 0.004950 -0.00370 -0.005175 -0.0056800 -0.011900 3.735940 0.0027550 0.00012000
答案 1 :(得分:0)
调用您在df
上方提供的数据,并使用dplyr
和lubridate
包:
library(dplyr)
library(lubridate)
df %>%
mutate(week = lubridate::week(Hourtime),
hour = lubridate::hour(Hourtime)) %>%
group_by(week, hour) %>%
summarize_all(mean)
答案 2 :(得分:0)
这是使用data.table
并调用您在dat
上方提供的输出的解决方案。
dat = structure(...)
library(data.table)
## Make the dataset non-trivial
to_add = seq(dat$Hourtime[5], by = 'hour', length.out = 24*7*3)
for(i in seq_along(to_add)) {
h_stamp = to_add[i]
sub_dat = data.frame(h_stamp, dat[i %% 5 + 1, -1])
names(sub_dat) = names(dat)
dat = rbind(dat, sub_dat)
}
dim(dat)
#> [1] 510 29
## Main answer begins here
dt <- data.table(dat)
dt[,c('Week', 'Hour') := .(week(Hourtime), hour(Hourtime))]
means <- dcast(dt, Week + Hour ~ ., value.var = grep('*_flux', names(dt), value = TRUE), fun.aggregate = mean)
means[,.(Week, Hour, H_flux)]
#> Week Hour H_flux
#> 1: 22 0 -12.845204
#> 2: 22 1 -19.064266
#> 3: 22 2 -14.398238
#> 4: 22 3 -6.952654
#> 5: 22 4 -7.146229
#> 6: 22 5 -12.845204
#> 7: 22 6 -19.064266
#> 8: 22 7 -14.398238
#> 9: 22 8 -6.952654
#> 10: 22 9 -7.146229
#> 11: 22 10 -12.845204
#> 12: 22 11 -19.064266
#> 13: 22 12 -14.398238
#> 14: 22 13 -6.952654
#> 15: 22 14 -7.146229
#> 16: 22 15 -12.845204
#> 17: 22 16 -19.064266
#> 18: 22 17 -14.398238
#> 19: 22 18 -7.296970
#> 20: 22 19 -6.737388
#> 21: 22 20 -11.354386
#> 22: 22 21 -18.482063
#> 23: 22 22 -13.881764
#> 24: 22 23 -7.471208
#> 25: 23 0 -10.615986
#> 26: 23 1 -10.671293
#> 27: 23 2 -12.299571
#> 28: 23 3 -14.076446
#> 29: 23 4 -12.743295
#> 30: 23 5 -10.615986
#> 31: 23 6 -10.671293
#> 32: 23 7 -12.299571
#> 33: 23 8 -14.076446
#> 34: 23 9 -12.743295
#> 35: 23 10 -10.615986
#> 36: 23 11 -10.671293
#> 37: 23 12 -12.299571
#> 38: 23 13 -14.076446
#> 39: 23 14 -12.743295
#> 40: 23 15 -10.615986
#> 41: 23 16 -10.671293
#> 42: 23 17 -12.299571
#> 43: 23 18 -14.076446
#> 44: 23 19 -12.743295
#> 45: 23 20 -10.615986
#> 46: 23 21 -10.671293
#> 47: 23 22 -12.299571
#> 48: 23 23 -14.076446
#> 49: 24 0 -14.076446
#> 50: 24 1 -12.743295
#> 51: 24 2 -10.615986
#> 52: 24 3 -10.671293
#> 53: 24 4 -12.299571
#> 54: 24 5 -14.076446
#> 55: 24 6 -12.743295
#> 56: 24 7 -10.615986
#> 57: 24 8 -10.671293
#> 58: 24 9 -12.299571
#> 59: 24 10 -14.076446
#> 60: 24 11 -12.743295
#> 61: 24 12 -10.615986
#> 62: 24 13 -10.671293
#> 63: 24 14 -12.299571
#> 64: 24 15 -14.076446
#> 65: 24 16 -12.743295
#> 66: 24 17 -10.615986
#> 67: 24 18 -10.671293
#> 68: 24 19 -12.299571
#> 69: 24 20 -14.076446
#> 70: 24 21 -12.743295
#> 71: 24 22 -10.615986
#> 72: 24 23 -10.671293
#> 73: 25 0 -12.081318
#> 74: 25 1 -12.081318
#> 75: 25 2 -12.081318
#> 76: 25 3 -12.081318
#> 77: 25 4 -12.081318
#> 78: 25 5 -12.081318
#> 79: 25 6 -12.081318
#> 80: 25 7 -12.081318
#> 81: 25 8 -12.081318
#> 82: 25 9 -12.081318
#> 83: 25 10 -12.081318
#> 84: 25 11 -12.081318
#> 85: 25 12 -12.081318
#> 86: 25 13 -12.081318
#> 87: 25 14 -12.081318
#> 88: 25 15 -12.081318
#> 89: 25 16 -12.081318
#> 90: 25 17 -12.081318
#> 91: 25 18 -12.081318
#> 92: 25 19 -12.081318
#> 93: 25 20 -12.081318
#> 94: 25 21 -12.081318
#> 95: 25 22 -13.105247
#> 96: 25 23 -13.621721
#> Week Hour H_flux
由reprex package(v0.3.0)于2019-10-03创建