当不应该有

时间:2017-04-12 04:08:33

标签: r dplyr standard-deviation

我试图计算安全S和安全B的每分钟回报与整个日期范围内的返回数据的标准差。

对于安全性S在样本中的所有日期的时间15:41返回,下面的

sample

c(9.78237288670086e-05, -0.000478679433439075, -0.000815849476806222, 
-0.00104531810077364, 0.000991518042062172, -0.000481762633530326, 
0.000103264062935107, 0.000533498558109242, 0.00013655059028412, 
0.000684017572494667, -0.0010666543999283, 0.00111305447657944, 
0.000350943499215542, -0.000728452559245173, -0.000133010630777755, 
0.000273805385288854, -0.000541815253997811)

我应该得到标准偏差:

sd(sample)
[1] 0.0006778196 

同样在12:02,另一个假NA:

c(6.60974283750572e-05, 0.000136481483259815, -6.6884541045211e-05, 
3.45265989371524e-07, 0.000262426448938174, 6.59361301702748e-05, 
0.000129839556949415, 0.000548861044701233, 0.000131773159828252, 
-0.000336677148988292)

我应该

sd(sample)
[1] 0.0002264425

对于12:04,类似的NA现象:

c(-0.000511510506030053, -6.36748185365645e-05, -0.000461914296267199, 
0.000498827890900754, -0.000407637171003328, -0.000344290866374583, 
-0.000170414237452937, 0.00012470163477781, -0.00025976973379323, 
-6.84333430222517e-05, 6.74028653020233e-05, 0.000349203389118181, 
1.73806217228455e-07)

而实际上

sd(sample)
[1] 0.0003077007

dput 12:04:如果为此运行dplyr命令,它应返回完全有效的标准偏差

structure(list(DATETIME = structure(1:13, .Label = c("2007-06-06 12:04:00", 
"2007-06-27 12:04:00", "2007-07-25 12:04:00", "2007-08-03 12:04:00", 
"2007-08-27 12:04:00", "2007-08-29 12:04:00", "2007-09-11 12:04:00", 
"2007-09-26 12:04:00", "2007-10-29 12:04:00", "2007-11-13 12:04:00", 
"2007-11-14 12:04:00", "2007-11-26 12:04:00", "2007-12-13 12:04:00"
), class = "factor"), MINUTE = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L), HOUR = c(12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L), DAY = c(6L, 27L, 25L, 3L, 
27L, 29L, 11L, 26L, 29L, 13L, 14L, 26L, 13L), MONTH = c(6L, 6L, 
7L, 8L, 8L, 8L, 9L, 9L, 10L, 11L, 11L, 11L, 12L), YEAR = c(2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L), RET.B = c(0, -2.17485863410644e-06, 0, 
0, 0, 0.000430714747194296, 0.000652460524786482, -0.000224157179885068, 
0, 0.000431560597189706, 0, -0.000522420548541596, 0), RET.S = c(-0.000511510506030053, 
-6.36748185365645e-05, -0.000461914296267199, 0.000498827890900754, 
-0.000407637171003328, -0.000344290866374583, -0.000170414237452937, 
0.00012470163477781, -0.00025976973379323, -6.84333430222517e-05, 
6.74028653020233e-05, 0.000349203389118181, 1.73806217228455e-07
)), class = "data.frame", row.names = c(NA, -13L), .Names = c("DATETIME", 
"MINUTE", "HOUR", "DAY", "MONTH", "YEAR", "RET.B", "RET.S"))

但是,因为我的大样本包含更多的分钟和天数,所以我通过以下dplyr命令计算了它们的标准偏差

data_original %>%  
    group_by(HOUR, MINUTE) %>% 
    summarise(STD_DEV_S = sd(RET.S), 
              STD_DEV_B = sd(RET.B))

注意:我也尝试过Na.rm = TRUE参数,没有变化。没有NA被删除

data_original <- data_original %>% 
    group_by(HOUR, MINUTE) %>% 
    summarise(STD_DEV_S = sd(RET.S, na.rm = TRUE), 
              STD_DEV_BIL = sd(RET.B, na.rm = TRUE))

以下是标准偏差数据集的dput

structure(list(HOUR = c(12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L), 
    MINUTE = c(2L, 4L, 8L, 14L, 22L, 29L, 44L, 46L, 47L, 48L, 
    49L, 51L, 52L, 54L, 56L, 58L, 0L, 5L, 9L, 16L, 18L, 19L, 
    21L, 25L, 28L, 30L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 41L, 
    42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 57L, 58L, 
    59L, 3L, 4L, 6L, 7L, 8L, 13L, 14L, 15L, 16L, 19L, 24L, 27L, 
    28L, 29L, 30L, 37L, 41L, 43L, 51L, 53L, 54L, 55L, 56L, 57L, 
    59L, 1L, 2L, 3L, 10L, 12L, 14L, 15L, 29L, 33L, 34L, 35L, 
    37L, 39L, 41L, 42L, 44L, 45L, 47L, 48L, 50L, 52L, 53L, 55L, 
    56L, 57L, 59L), STD_DEV_S = c(NA, NA, NA, NA, NA, NA, NA, 
    1.00694568830708e-10, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 0.000193132875381868, NA, NA, NA, NA, 0.000238078900543023, 
    NA, NA, NA, 9.53251626570527e-05, 9.12458748630885e-05, 3.68329210829957e-05, 
    NA, 8.26407656897388e-05, NA, NA, 0.000292533661987067, NA, 
    NA, 0.000302477582215417, NA, 0.00014151757269228, NA, 1.47800176921126e-06, 
    NA, 0.000177633950322518, NA, NA, 0.000246543106829263, NA, 
    NA, NA, 0.000882128914387174, NA, 0.0111616060713996, NA, 
    NA, NA, NA, NA, NA, NA, NA, 0.000333828124024393, NA, 6.17648758558693e-05, 
    NA, 0.000175379264691811, NA, 0.00172685329635406, NA, NA, 
    0.00173851454042975, NA, NA, NA, NA, NA, 0.000713775044911004, 
    0.000608137130111404, 0.000148678119710893, NA, NA, NA, NA, 
    NA, 0.000340832680361768, NA, 7.60599434434376e-06, NA, NA, 
    NA, 0.00015470058433227, 0.000870316976462816, 0.000280759320556483, 
    NA, NA, 0.000303553445174538), STD_DEV_B = c(NA, NA, NA, 
    NA, NA, NA, NA, 0.000152478664167725, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, 1.53786063292825e-06, NA, NA, 
    NA, NA, 0.000310723454012154, NA, NA, NA, 0.000155594306042618, 
    0.000154289064190793, 0.000251349703608842, NA, 0.000925936422330737, 
    NA, NA, 0.000424250083757898, NA, NA, 0.000324016266256633, 
    NA, 0.000924893664437753, NA, 1.54020544841043e-06, NA, 0.000154255406018086, 
    NA, NA, 0.000347279332142245, NA, NA, NA, 7.44041572526506e-05, 
    NA, 0.000685450210200628, NA, NA, NA, NA, NA, NA, NA, NA, 
    0.000617156256763284, NA, 1.10975260021509e-06, NA, 0.000108866030344832, 
    NA, 0.000227892543844934, NA, NA, 0.000616618078209316, NA, 
    NA, NA, NA, NA, 0.000771310776315698, 0.000240271526606721, 
    0.000154120920049348, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, 
    NA, 1.54087335195036e-06, 0.000129386204897517, 0.000227858493926251, 
    NA, NA, 0.0002286421278086), TIME = structure(c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), month = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hour = c(12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 
    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 
    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
    15, 15, 15, 15, 15, 15), minute = c(2, 4, 8, 14, 22, 29, 
    44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 0, 5, 9, 16, 18, 
    19, 21, 25, 28, 30, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 
    44, 45, 46, 47, 48, 49, 50, 51, 57, 58, 59, 3, 4, 6, 7, 8, 
    13, 14, 15, 16, 19, 24, 27, 28, 29, 30, 37, 41, 43, 51, 53, 
    54, 55, 56, 57, 59, 1, 2, 3, 10, 12, 14, 15, 29, 33, 34, 
    35, 37, 39, 41, 42, 44, 45, 47, 48, 50, 52, 53, 55, 56, 57, 
    59), class = structure("Period", package = "lubridate"))), class = "data.frame", row.names = c(NA, 
-98L), .Names = c("HOUR", "MINUTE", "STD_DEV_S", "STD_DEV_B", 
"TIME"))

正如您所看到的,在15:41,S的标准偏差返回为NA,前7个NA值相同:02,12:04,12:08,12 :14,12:22,12:29,12:44

前7个NA值中的dput:如果运行dplyr命令,它应该返回完全有效的标准偏差

structure(list(DATETIME = structure(1:83, .Label = c("2007-06-06 12:04:00", 
"2007-06-12 12:14:00", "2007-06-27 12:04:00", "2007-07-12 12:29:00", 
"2007-07-13 12:29:00", "2007-07-20 12:22:00", "2007-07-20 12:29:00", 
"2007-07-25 12:02:00", "2007-07-25 12:04:00", "2007-07-30 12:08:00", 
"2007-07-31 12:08:00", "2007-08-03 12:02:00", "2007-08-03 12:04:00", 
"2007-08-03 12:08:00", "2007-08-03 12:14:00", "2007-08-06 12:08:00", 
"2007-08-08 12:02:00", "2007-08-09 12:14:00", "2007-08-13 12:08:00", 
"2007-08-13 12:14:00", "2007-08-14 12:29:00", "2007-08-14 12:44:00", 
"2007-08-16 12:08:00", "2007-08-23 12:29:00", "2007-08-27 12:04:00", 
"2007-08-28 12:29:00", "2007-08-29 12:04:00", "2007-08-30 12:22:00", 
"2007-08-30 12:44:00", "2007-08-31 12:08:00", "2007-08-31 12:29:00", 
"2007-09-05 12:08:00", "2007-09-05 12:14:00", "2007-09-05 12:22:00", 
"2007-09-07 12:08:00", "2007-09-11 12:02:00", "2007-09-11 12:04:00", 
"2007-09-13 12:22:00", "2007-09-13 12:29:00", "2007-09-14 12:29:00", 
"2007-09-18 12:08:00", "2007-09-18 12:29:00", "2007-09-24 12:14:00", 
"2007-09-24 12:29:00", "2007-09-25 12:44:00", "2007-09-26 12:04:00", 
"2007-09-28 12:02:00", "2007-09-28 12:08:00", "2007-10-05 12:08:00", 
"2007-10-09 12:22:00", "2007-10-11 12:44:00", "2007-10-12 12:14:00", 
"2007-10-15 12:08:00", "2007-10-17 12:29:00", "2007-10-19 12:02:00", 
"2007-10-29 12:04:00", "2007-10-30 12:14:00", "2007-10-30 12:44:00", 
"2007-10-31 12:02:00", "2007-11-07 12:08:00", "2007-11-07 12:14:00", 
"2007-11-13 12:04:00", "2007-11-14 12:04:00", "2007-11-14 12:22:00", 
"2007-11-19 12:22:00", "2007-11-20 12:08:00", "2007-11-20 12:44:00", 
"2007-11-21 12:14:00", "2007-11-21 12:22:00", "2007-11-26 12:04:00", 
"2007-11-28 12:02:00", "2007-11-28 12:22:00", "2007-11-29 12:22:00", 
"2007-11-30 12:14:00", "2007-12-06 12:08:00", "2007-12-10 12:02:00", 
"2007-12-10 12:08:00", "2007-12-11 12:22:00", "2007-12-13 12:04:00", 
"2007-12-17 12:08:00", "2007-12-18 12:14:00", "2007-12-26 12:22:00", 
"2007-12-27 12:02:00"), class = "factor"), MINUTE = c(4L, 14L, 
4L, 29L, 29L, 22L, 29L, 2L, 4L, 8L, 8L, 2L, 4L, 8L, 14L, 8L, 
2L, 14L, 8L, 14L, 29L, 44L, 8L, 29L, 4L, 29L, 4L, 22L, 44L, 8L, 
29L, 8L, 14L, 22L, 8L, 2L, 4L, 22L, 29L, 29L, 8L, 29L, 14L, 29L, 
44L, 4L, 2L, 8L, 8L, 22L, 44L, 14L, 8L, 29L, 2L, 4L, 14L, 44L, 
2L, 8L, 14L, 4L, 4L, 22L, 22L, 8L, 44L, 14L, 22L, 4L, 2L, 22L, 
22L, 14L, 8L, 2L, 8L, 22L, 4L, 8L, 14L, 22L, 2L), HOUR = c(12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L), DAY = c(6L, 12L, 27L, 12L, 13L, 20L, 20L, 
25L, 25L, 30L, 31L, 3L, 3L, 3L, 3L, 6L, 8L, 9L, 13L, 13L, 14L, 
14L, 16L, 23L, 27L, 28L, 29L, 30L, 30L, 31L, 31L, 5L, 5L, 5L, 
7L, 11L, 11L, 13L, 13L, 14L, 18L, 18L, 24L, 24L, 25L, 26L, 28L, 
28L, 5L, 9L, 11L, 12L, 15L, 17L, 19L, 29L, 30L, 30L, 31L, 7L, 
7L, 13L, 14L, 14L, 19L, 20L, 20L, 21L, 21L, 26L, 28L, 28L, 29L, 
30L, 6L, 10L, 10L, 11L, 13L, 17L, 18L, 26L, 27L), MONTH = c(6L, 
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), YEAR = c(2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L
), RET.B = c(0, 0, -2.17485863410644e-06, -0.000218102508178801, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000436681222707492, 2.18198163199602e-06, 
0, 0.000435872294594679, -0.000302850759441319, 0, 0, 0.000217533173808963, 
0.00043506634761808, 0, 0, 0.000430714747194296, 0.000217344055640189, 
-0.000434593654932706, -0.000165261940175256, -0.00021625478379565, 
0, 0, -0.000436300174520138, 0.00052142420804618, -0.000654164849542109, 
0.000652460524786482, -2.17959895386486e-06, 2.17960370452681e-06, 
0, 0, -0.000749446149323345, 0, 0, 0, -0.000224157179885068, 
-0.00108832889442356, 0, -0.000654450261780129, 0, 2.18055425319755e-06, 
0.000122125684776186, 0, 0, -0.000871032643209069, 0, 0, -0.00012189812799306, 
-0.000653073037201933, -0.000322828325131771, 0, 0.000431560597189706, 
0, -0.000436109899694636, 0, -0.000653452406882902, 0, -0.00043529919659603, 
0, -0.000522420548541596, 0.000871080139372804, 0, 0, -0.00050891692040007, 
0, 5.66975013847502e-05, -6.10553859572739e-05, 0.000881139961039179, 
0, 0.00108984711796917, 0.000436014824504101, -0.000217722621380318, 
-0.000169786678275926), RET.S = c(-0.000511510506030053, 1.50798142768921e-07, 
-6.36748185365645e-05, 0.000325971641531768, 7.85447691849372e-08, 
0.00026122942181018, 0.000262895511244587, 6.60974283750572e-05, 
-0.000461914296267199, -0.000342324311186784, 0.000337606595481509, 
0.000136481483259815, 0.000498827890900754, 0.000989293539454498, 
0.000346599311788321, 0.000972662223917925, -6.6884541045211e-05, 
0.000880016198865045, 0.000137655341835112, -0.000204591530646723, 
0.000624446736636471, 0.000486106996184283, -0.00151417908785445, 
0.000342468047420744, -0.000407637171003328, 0.000137868635859495, 
-0.000344290866374583, 0.00205224248761815, 0.000136403986823039, 
-0.000541716313992435, 0.000135301702489429, 6.79286451856945e-05, 
0.000347546504406297, -0.000203091423563585, 0.00013714072091746, 
3.45265989371524e-07, -0.000170414237452937, 0.0002013455858478, 
-6.70192974376605e-05, 2.71444000775529e-08, 6.70935200322906e-05, 
0.000201204290073773, 0.000131185179643344, 0.000459150003030885, 
0.00019864260854958, 0.00012470163477781, 0.000262426448938174, 
0.000203931745297139, 0.000135250743026116, 0.000128641053195872, 
-0.000189957905754624, 0.000320705988239717, 0.00012917558184182, 
-0.000389339259032602, 6.59361301702748e-05, -0.00025976973379323, 
0.000587406401071317, -5.08900143169655e-05, 0.000129839556949415, 
-0.000199444260467586, -0.000399388070952602, -6.84333430222517e-05, 
6.74028653020233e-05, 0.000591590208533922, 0.000138780968301319, 
0.00020831228937301, -0.00104333906891781, 0.000844275593893371, 
-0.000266904506005042, 0.000349203389118181, 0.000548861044701233, 
-0.000205223882777962, -0.000135251988252783, -0.000269565436487676, 
-0.000400512638518217, 0.000131773159828252, 6.59282282715629e-05, 
0.000131838729908868, 1.73806217228455e-07, 0.000205075655030559, 
-0.00131395883722512, 0.000268767524013111, -0.000336677148988292
)), class = "data.frame", row.names = c(NA, -83L), .Names = c("DATETIME", 
"MINUTE", "HOUR", "DAY", "MONTH", "YEAR", "RET.B", "RET.S"))

当我使用样本中的所有数据时,我不明白为什么会得到NA。 知道为什么会这样吗?手动更新所有标准偏差会很痛苦......

2 个答案:

答案 0 :(得分:0)

尝试在train_rows = round(0.9 * X.shape[0]) X_train = X.loc[:train_rows-1, :] X_test = X.loc[train_rows:, :] assert X_train.shape[0] + X_test.shape[0] == X.shape[0] 来电中添加na.rm=TRUE。 E.g。

sd()

从函数Description

  

此函数计算x中值的标准偏差。 如果na.rm为TRUE,则在计算进行之前删除缺失值。

答案 1 :(得分:0)

将样本数据集用作测试数据集

 test_data <- structure(list(DATETIME = structure(1:83, .Label = c("2007-06-06 12:04:00", 
"2007-06-12 12:14:00", "2007-06-27 12:04:00", "2007-07-12 12:29:00", 
"2007-07-13 12:29:00", "2007-07-20 12:22:00", "2007-07-20 12:29:00", 
"2007-07-25 12:02:00", "2007-07-25 12:04:00", "2007-07-30 12:08:00", 
"2007-07-31 12:08:00", "2007-08-03 12:02:00", "2007-08-03 12:04:00", 
"2007-08-03 12:08:00", "2007-08-03 12:14:00", "2007-08-06 12:08:00", 
"2007-08-08 12:02:00", "2007-08-09 12:14:00", "2007-08-13 12:08:00", 
"2007-08-13 12:14:00", "2007-08-14 12:29:00", "2007-08-14 12:44:00", 
"2007-08-16 12:08:00", "2007-08-23 12:29:00", "2007-08-27 12:04:00", 
"2007-08-28 12:29:00", "2007-08-29 12:04:00", "2007-08-30 12:22:00", 
"2007-08-30 12:44:00", "2007-08-31 12:08:00", "2007-08-31 12:29:00", 
"2007-09-05 12:08:00", "2007-09-05 12:14:00", "2007-09-05 12:22:00", 
"2007-09-07 12:08:00", "2007-09-11 12:02:00", "2007-09-11 12:04:00", 
"2007-09-13 12:22:00", "2007-09-13 12:29:00", "2007-09-14 12:29:00", 
"2007-09-18 12:08:00", "2007-09-18 12:29:00", "2007-09-24 12:14:00", 
"2007-09-24 12:29:00", "2007-09-25 12:44:00", "2007-09-26 12:04:00", 
"2007-09-28 12:02:00", "2007-09-28 12:08:00", "2007-10-05 12:08:00", 
"2007-10-09 12:22:00", "2007-10-11 12:44:00", "2007-10-12 12:14:00", 
"2007-10-15 12:08:00", "2007-10-17 12:29:00", "2007-10-19 12:02:00", 
"2007-10-29 12:04:00", "2007-10-30 12:14:00", "2007-10-30 12:44:00", 
"2007-10-31 12:02:00", "2007-11-07 12:08:00", "2007-11-07 12:14:00", 
"2007-11-13 12:04:00", "2007-11-14 12:04:00", "2007-11-14 12:22:00", 
"2007-11-19 12:22:00", "2007-11-20 12:08:00", "2007-11-20 12:44:00", 
"2007-11-21 12:14:00", "2007-11-21 12:22:00", "2007-11-26 12:04:00", 
"2007-11-28 12:02:00", "2007-11-28 12:22:00", "2007-11-29 12:22:00", 
"2007-11-30 12:14:00", "2007-12-06 12:08:00", "2007-12-10 12:02:00", 
"2007-12-10 12:08:00", "2007-12-11 12:22:00", "2007-12-13 12:04:00", 
"2007-12-17 12:08:00", "2007-12-18 12:14:00", "2007-12-26 12:22:00", 
"2007-12-27 12:02:00"), class = "factor"), MINUTE = c(4L, 14L, 
4L, 29L, 29L, 22L, 29L, 2L, 4L, 8L, 8L, 2L, 4L, 8L, 14L, 8L, 
2L, 14L, 8L, 14L, 29L, 44L, 8L, 29L, 4L, 29L, 4L, 22L, 44L, 8L, 
29L, 8L, 14L, 22L, 8L, 2L, 4L, 22L, 29L, 29L, 8L, 29L, 14L, 29L, 
44L, 4L, 2L, 8L, 8L, 22L, 44L, 14L, 8L, 29L, 2L, 4L, 14L, 44L, 
2L, 8L, 14L, 4L, 4L, 22L, 22L, 8L, 44L, 14L, 22L, 4L, 2L, 22L, 
22L, 14L, 8L, 2L, 8L, 22L, 4L, 8L, 14L, 22L, 2L), HOUR = c(12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L), DAY = c(6L, 12L, 27L, 12L, 13L, 20L, 20L, 
25L, 25L, 30L, 31L, 3L, 3L, 3L, 3L, 6L, 8L, 9L, 13L, 13L, 14L, 
14L, 16L, 23L, 27L, 28L, 29L, 30L, 30L, 31L, 31L, 5L, 5L, 5L, 
7L, 11L, 11L, 13L, 13L, 14L, 18L, 18L, 24L, 24L, 25L, 26L, 28L, 
28L, 5L, 9L, 11L, 12L, 15L, 17L, 19L, 29L, 30L, 30L, 31L, 7L, 
7L, 13L, 14L, 14L, 19L, 20L, 20L, 21L, 21L, 26L, 28L, 28L, 29L, 
30L, 6L, 10L, 10L, 11L, 13L, 17L, 18L, 26L, 27L), MONTH = c(6L, 
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), YEAR = c(2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L
), RET.B = c(0, 0, -2.17485863410644e-06, -0.000218102508178801, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.000436681222707492, 2.18198163199602e-06, 
0, 0.000435872294594679, -0.000302850759441319, 0, 0, 0.000217533173808963, 
0.00043506634761808, 0, 0, 0.000430714747194296, 0.000217344055640189, 
-0.000434593654932706, -0.000165261940175256, -0.00021625478379565, 
0, 0, -0.000436300174520138, 0.00052142420804618, -0.000654164849542109, 
0.000652460524786482, -2.17959895386486e-06, 2.17960370452681e-06, 
0, 0, -0.000749446149323345, 0, 0, 0, -0.000224157179885068, 
-0.00108832889442356, 0, -0.000654450261780129, 0, 2.18055425319755e-06, 
0.000122125684776186, 0, 0, -0.000871032643209069, 0, 0, -0.00012189812799306, 
-0.000653073037201933, -0.000322828325131771, 0, 0.000431560597189706, 
0, -0.000436109899694636, 0, -0.000653452406882902, 0, -0.00043529919659603, 
0, -0.000522420548541596, 0.000871080139372804, 0, 0, -0.00050891692040007, 
0, 5.66975013847502e-05, -6.10553859572739e-05, 0.000881139961039179, 
0, 0.00108984711796917, 0.000436014824504101, -0.000217722621380318, 
-0.000169786678275926), RET.S = c(-0.000511510506030053, 1.50798142768921e-07, 
-6.36748185365645e-05, 0.000325971641531768, 7.85447691849372e-08, 
0.00026122942181018, 0.000262895511244587, 6.60974283750572e-05, 
-0.000461914296267199, -0.000342324311186784, 0.000337606595481509, 
0.000136481483259815, 0.000498827890900754, 0.000989293539454498, 
0.000346599311788321, 0.000972662223917925, -6.6884541045211e-05, 
0.000880016198865045, 0.000137655341835112, -0.000204591530646723, 
0.000624446736636471, 0.000486106996184283, -0.00151417908785445, 
0.000342468047420744, -0.000407637171003328, 0.000137868635859495, 
-0.000344290866374583, 0.00205224248761815, 0.000136403986823039, 
-0.000541716313992435, 0.000135301702489429, 6.79286451856945e-05, 
0.000347546504406297, -0.000203091423563585, 0.00013714072091746, 
3.45265989371524e-07, -0.000170414237452937, 0.0002013455858478, 
-6.70192974376605e-05, 2.71444000775529e-08, 6.70935200322906e-05, 
0.000201204290073773, 0.000131185179643344, 0.000459150003030885, 
0.00019864260854958, 0.00012470163477781, 0.000262426448938174, 
0.000203931745297139, 0.000135250743026116, 0.000128641053195872, 
-0.000189957905754624, 0.000320705988239717, 0.00012917558184182, 
-0.000389339259032602, 6.59361301702748e-05, -0.00025976973379323, 
0.000587406401071317, -5.08900143169655e-05, 0.000129839556949415, 
-0.000199444260467586, -0.000399388070952602, -6.84333430222517e-05, 
6.74028653020233e-05, 0.000591590208533922, 0.000138780968301319, 
0.00020831228937301, -0.00104333906891781, 0.000844275593893371, 
-0.000266904506005042, 0.000349203389118181, 0.000548861044701233, 
-0.000205223882777962, -0.000135251988252783, -0.000269565436487676, 
-0.000400512638518217, 0.000131773159828252, 6.59282282715629e-05, 
0.000131838729908868, 1.73806217228455e-07, 0.000205075655030559, 
-0.00131395883722512, 0.000268767524013111, -0.000336677148988292
)), class = "data.frame", row.names = c(NA, -83L), .Names = c("DATETIME", 
"MINUTE", "HOUR", "DAY", "MONTH", "YEAR", "RET.B", "RET.S"))

当我将dplyr代码设置为

test_data %>%  
    group_by(HOUR, MINUTE) %>% 
    summarise(STD_DEV_S = sd(RET.S), 
              STD_DEV_B = sd(RET.B))

它可以根据需要返回完美的结果,如下所示

  HOUR MINUTE    STD_DEV_S    STD_DEV_B
  <int>  <int>        <dbl>        <dbl>
1    12      2 0.0002264425 0.0005735081
2    12      4 0.0003077007 0.0002993752
3    12      8 0.0005494082 0.0004086279
4    12     14 0.0006074754 0.0002532825
5    12     22 0.0006206074 0.0003373157
6    12     29 0.0002667720 0.0002694854
7    12     44 0.0005264716 0.0001746570

您是这种情况还是仅在将此应用于整个数据集时遇到问题?

我建议你在使用dplyr代码集之前检查数据帧的数据结构。