我有一个名为df的数据框
dput(df)
structure(list(timestamp = structure(c(1L, 1L, 2L, 2L, 2L, 3L
), .Label = c("6/17/2016 11:58", "6/17/2016 12:00", "6/17/2016 15:30"
), class = "factor"), app = structure(c(2L, 2L, 2L, 2L, 1L, 1L
), .Label = c("db", "web"), class = "factor"), qmanager = structure(c(3L,
3L, 3L, 4L, 1L, 2L), .Label = c("db01", "db02", "web01", "web04"
), class = "factor"), qname = structure(c(3L, 3L, 4L, 2L, 1L,
1L), .Label = c("dbtest101", "test02", "test101", "test102"), class = "factor"),
que = c(500L, 600L, 66L, 12000L, 4000L, 666L), mas = c(15000L,
50000L, 15000L, 175000L, 5000L, 15000L)), .Names = c("timestamp",
"app", "qmanager", "qname", "que", "mas"), class = "data.frame", row.names = c(NA,
-6L))
我需要通过qname,qmanager和app
将这些数据最多聚合到5分钟我正在尝试这个:
df$timestamp <- as.POSIXct(df$timestamp, format="%m/%d/%Y %H:%M")
library(xts)
df$timestamp<-align.time(df$timestamp, n=60*5)
df<-aggregate(que ~ cut(df$timestamp, "5 min"), df$qname,df$qmanager, df$app df[setdiff(names(df), "timestamp")], max)
无法让它发挥作用,任何想法?
答案 0 :(得分:2)
这样的东西?
# install.packages('dplyr')
library(dplyr)
df %>%
mutate(min5_interval = as.POSIXct(timestamp, format = '%m/%d/%Y %H:%M')
%>% cut('5 min')) %>%
group_by(min5_interval, qname, qmanager, app) %>%
dplyr::summarise(max_que = max(que),
max_mas = max(mas))
Source: local data frame [5 x 6]
Groups: min5_interval, qname, qmanager [?]
min5_interval qname qmanager app max_que max_mas
(fctr) (fctr) (fctr) (fctr) (int) (int)
1 2016-06-17 11:58:00 dbtest101 db01 db 4000 5000
2 2016-06-17 11:58:00 test02 web04 web 12000 175000
3 2016-06-17 11:58:00 test101 web01 web 600 50000
4 2016-06-17 11:58:00 test102 web01 web 66 15000
5 2016-06-17 15:28:00 dbtest101 db02 db 666 15000