我想每隔5分钟将数据划分一次,并将其与每月平均值和标准偏差进行比较,以检测任何异常情况。我每隔5分钟按以下方式剪切数据:
cut(month.data, breaks = "5 min")
dput(monthlydata)
structure(list(time = c("00:00:00", "00:05:00", "00:10:00", "00:15:00", "00:20:00", "00:25:00", "00:30:00", "00:35:00", "00:40:00", "00:45:00", "00:50:00", "00:55:00", "01:00:00", "01:05:00", "01:10:00", "01:15:00", "01:25:00", "01:30:00", "01:35:00", "01:40:00", "01:45:00", "01:55:00", "02:00:00", "02:20:00", "02:25:00", "02:40:00", "02:45:00", "02:50:00", "03:05:00", "03:15:00", "03:25:00", "03:30:00", "03:35:00", "03:40:00", "03:45:00", "03:50:00", "03:55:00", "04:05:00", "04:25:00", "04:30:00", "04:50:00", "04:55:00", "05:05:00", "05:10:00", "05:15:00", "05:20:00", "05:30:00", "05:40:00", "05:50:00", "05:55:00", "06:00:00", "06:05:00", "06:10:00", "06:15:00", "06:20:00", "06:30:00", "06:35:00", "06:40:00", "06:45:00", "06:55:00", "07:00:00", "07:05:00", "07:10:00", "07:15:00", "07:20:00", "07:25:00", "07:30:00", "07:35:00", "07:40:00", "07:45:00", "07:50:00", "07:55:00", "08:00:00", "08:05:00", "08:10:00", "08:15:00", "08:20:00", "08:25:00", "08:30:00", "08:35:00", "08:40:00", "08:45:00", "08:50:00", "08:55:00", "09:00:00", "09:05:00", "09:10:00", "09:15:00", "09:20:00", "09:25:00", "09:30:00", "09:35:00", "09:40:00", "09:45:00", "09:50:00", "09:55:00", "10:00:00", "10:05:00", "10:10:00", "10:15:00", "10:20:00", "10:25:00", "10:30:00", "10:35:00", "10:40:00", "10:45:00", "10:50:00"), avg = c(50.5510560619622, 43.0189344993435, 50.3126451810161, 51.7984139398903, 44.1216815428764, 54.6443574865021, 50.065771120662, 50.9085361977819, 59.7024337563552, 44.6504863019322, 51.3800349930107, 47.2281110758541, 39.1562855847908, 47.3556824171027, 54.4776585774653, 64.9968044851706, 47.878888210121, 44.7561800618865, 45.0695456307952, 56.1759044802863, 50.8227417957758, 52.6309915011542, 62.7342370217067, 54.4257866432874, 54.3226457929837, 40.1938479668371, 54.5403748037875, 51.8463279336394, 53.8173320832895, 49.9889812414321, 50.5827916556644, 53.5500871960216, 37.0209877205586, 55.2612198888207, 55.1863860227875, 58.2740366768661, 50.5885573635039, 48.1051996319848, 47.7634397043728, 46.5863241465071, 54.1810631439095, 53.7906152156406, 52.6409719258956, 54.81269123308, 44.0537724370726, 47.1977242746078, 46.5010741707819, 45.8433966693518, 42.3623605036368, 43.0730189148746, 58.4167050044254, 49.7799961792657, 53.0755779045083, 52.0552481180891, 42.0602921415756, 52.5126950828788, 55.0870481980705, 44.4144434705709, 42.392966543036, 47.1807241560313, 44.6884956183158, 47.2896481418499, 57.8319708553495, 46.6397655826931, 52.4873442246903, 51.6324293101077, 55.0908694414676, 43.8596455462562, 48.8941181950083, 48.7514252330684, 49.310070422837,48.5234755805063, 58.5542276809981, 47.7720993402378,49.9405735614802, 52.8780543357139, 58.2557463154677,52.9242756783794, 55.4983029733778, 44.2299478555713,57.3379257421419, 50.1951095071188, 53.0553561602009, 50.3109504601222, 44.9316581536335, 47.7568227989573, 50.3093864093436, 50.0461546149579, 48.2373271954793, 50.2943295283144, 46.8054846556807, 48.7084493434669, 46.5421233124519, 50.1222951953386, 49.4207933535255, 46.1521667031027, 51.8644029994928, 53.4831581582472, 51.4972139096679, 51.5181187952616, 49.1998856564675, 50.3454476017966, 49.7587298896826, 45.6040364435812, 49.7466159629413, 51.0983492421099, 52.2935123336372), sd =c(9.32931925004817, 11.6047417906884, 9.80771691435559, 10.2308327194904, 9.67431773674866, 10.5323558825585, 9.1920111408028, 10.0745961985324, 9.21246056157269, 7.96228334027313, 9.89384474113651, 12.3284041772698, 13.5489518864705, 9.96803285037014, 8.85300006821126, 10.9832318078379, 13.0909163134817, 9.7261635496657, 10.3208509302825, 8.74704188744148, 9.44853223258545, 7.68266078719723, 8.78660429415339, 10.1981152232186,10.0617639380203, 9.70826609005244, 10.4405793131911, 10.0294741532956, 10.5843898470973, 10.0678114702352, 9.72005734098214, 8.71988580635692, 13.0339431549482, 8.85263899155544, 11.1140174478773, 10.6801908843647, 11.0442113270832, 9.6576794753704, 11.193361191756, 9.05129333547447, 11.1414887813967, 10.5985803957382, 10.2422686622522, 11.2629702278102, 10.4774845098793, 9.82520095973172, 10.6775347630735, 8.97029695502126, 8.28221177072086, 11.0689605695813, 10.4298020842373, 9.57012379689429, 10.1587613403527, 11.2343452027682, 9.09417849538438, 10.3529463918792, 8.89434012398308, 7.86166740352018, 8.67472741747663, 10.7631616313607, 11.4634738459674, 10.3773119423003, 10.1261492697498, 9.51153382612954, 8.8445878796955, 9.99640290007654, 9.33226675473664, 10.2637002156788, 9.44339377944955, 10.5843418581127, 9.00425609052502, 10.2822169680166, 10.5184763916409, 10.3495044419935, 9.99843947958033, 8.67736455800308, 10.1508472078283, 9.3450979185795, 9.84066593055499, 9.95417999414617, 9.2622985333717, 11.4445218170255, 9.72729168049685, 9.36415135782777, 10.2272372991057, 10.4800042675175, 10.0067598340318, 10.6127856103593, 8.91026514253112, 10.3451668931764, 9.86818949901026, 10.736479643411, 8.6424538599602, 9.53874170759392, 10.5484021386586, 10.7130372286846, 10.2509775336419, 10.7454025452377, 9.63372860813097, 9.35982465121709, 9.47486350661184, 10.311507962123, 10.0338294237329, 9.50474631714252, 10.1746281369343, 9.75040171260143, 10.8675383447689)), class = c("tbl_df", "tbl", "data.frame"), .Names = c("time", "avg", "sd"), row.names = c(NA, -107L))
但是,如果我的currdata的开始时间晚于00分钟,则会根据数据中最早的条目来计算时间间隔,因此如果第一个条目是01:00分钟,则间隔是01:00至06:00,并且以此类推。但是,我希望时间间隔为00:00-05:00,以此类推,以与每月数据间隔保持一致。有没有办法在cut语句中设置时间间隔的最低限制?还是有其他方法可以确保两个系列使用相同的分钟间隔。
一天的数据
dput(currday)
structure(c(1533769260, 1533769320, 1533769320, 1533769380, 1533769380, 1533769380, 1533769440, 1533769500, 1533769680, 1533769740, 1533769740,1533769740, 1533769800, 1533769920, 1533769920, 1533769920,1533769980,1533770100, 1533770100, 1533770220, 1533770280, 1533770340, 1533770400, 1533770460, 1533770460, 1533770460, 1533770520, 1533770520, 1533770580, 1533770580, 1533770640, 1533770880, 1533771060, 1533771060, 1533771120, 1533771300, 1533771300, 1533771300, 1533771360, 1533771360, 1533771540, 1533771660, 1533771660, 1533771960, 1533772260, 1533772260, 1533772260, 1533772440, 1533772440, 1533772560, 1533772620, 1533772620, 1533772800, 1533772980, 1533773160, 1533773400, 1533773400, 1533773460, 1533773760, 1533773880, 1533774480, 1533774540, 1533774900, 1533774900, 1533774960, 1533775080, 1533775140, 1533775320, 1533775380, 1533775500,1533775560, 1533775680, 1533776340, 1533776520, 1533776520,1533776580, 1533777660, 1533777900, 1533777900, 1533778200, 1533779100, 1533779100, 1533779220, 1533779460, 1533779640, 1533780360, 1533780960, 1533781740, 1533782040, 1533782340, 1533782700, 1533782880, 1533783120, 1533783600, 1533784020, 1533784140, 1533785280, 1533785280, 1533785400, 1533785460, 1533786660), class = c("POSIXct", "POSIXt"), tzone = "Europe/London")
我查看了其他问题,发现在r中创建了一个带有5分钟时间间隔的24小时向量。这不是我所追求的。非常感谢您的帮助。
答案 0 :(得分:0)
按照@AleBdC的建议,我检查了链接:Grouping every n minutes with dplyr,发现@MikeyHarper的建议非常有用。
因此,我首先在星期几中创建五分钟间隔,然后将日期和时间分为两列,然后将时间列转换为因子,然后将其用于分组和汇总。
library(lubridate)
x <- ymd_hms(currday, tz="Europe/London")
x<-data.frame(floor_date(x, unit = "5minutes"))
names(x)<-"by5"
x%>%separate(by5, into = c("date", "time"),sep = "\\s" )
x$time<-as.factor(x$time)
我还使用相同的方法在每月数据中创建了五分钟间隔,并计算了平均值和标准差,并将其与每日数据进行了比较。