我有一个名为df的数据框:
structure(list(Date = structure(c(1468555240, 1468555242, 1468555246,
1468569649, 1468555251, 1468555257, 1468641020, 1468641021, 1468641021,
1468641021, 1468641021, 1468641021), tzone = "", class = c("POSIXct",
"POSIXt")), sCpu = c(7.28602, 9.49307, 7.70778, 8.51675, 6.97994,
8.46983, 4.14684, 2.51154, 3.27359, 1.84363, 2.47815, 3.29061
)), .Names = c("Date", "sCpu"), row.names = c(NA, -12L), class = "data.frame")
我喜欢在中午12点到午夜23点59分之间获取数据点。我不想在午夜到中午12点之后有任何数据点。我怎么能在R?中做到这一点?
我尝试创建一个名为T的时间列并执行此操作:
subset(df, T<times(c("23:59:00"))&T>times(c("12:00:00"))
04:00仍然显示我的最终子集数据,任何想法怎么能这样做?
答案 0 :(得分:2)
这应该这样做:
library(lubridate)
dfsub <- df[hour(df$Date)>11,]
但是,您的数据在该时间范围内没有值。这是一个包含新数据的工作示例:
dat <- data.frame(datetime = seq(ymd_hms('2014-07-17 12:00:00'),
ymd_hms('2014-07-18 11:00:00'),
by = 'hours'),
misc = seq(1, 24))
datsub <- dat[hour(dat$datetime)>11,]
答案 1 :(得分:1)
由于小时只能从0到23,我们可以简单地检查小时是否为12或更多。没有包使用。
df[as.POSIXlt(df$Date)$hour >= 12, ]
或
subset(df, as.POSIXlt(Date)$hour >= 12)
或
df[format(df$Date, "%H") >= 12, ]
或
subset(df, format(Date, "%H") >= 12)
答案 2 :(得分:0)
这个怎么样:
ps <- as.POSIXlt(df$Date)
subset(df, ps$hour>11 & ps$hour<=23)
# OR
df[ps$hour>11 & ps$hour<=23,]
# Date val
# 2 2013-08-02 14:01:21 2
# 3 2013-08-02 16:02:43 3
# 4 2013-08-02 18:04:05 4
# 5 2013-08-02 20:05:27 5
# 6 2013-08-02 22:06:49 6
# 14 2013-08-03 14:17:43 14
# 15 2013-08-03 16:19:05 15
# 16 2013-08-03 18:20:27 16
# 17 2013-08-03 20:21:49 17
# 18 2013-08-03 22:23:10 18
# 26 2013-08-04 14:34:05 26
# 27 2013-08-04 16:35:27 27
# 28 2013-08-04 18:36:49 28
# 29 2013-08-04 20:38:10 29
# 30 2013-08-04 22:39:32 30
# 38 2013-08-05 14:50:27 38
# 39 2013-08-05 16:51:49 39
# 40 2013-08-05 18:53:10 40
# 41 2013-08-05 20:54:32 41
# 42 2013-08-05 22:55:54 42
数据强>
structure(list(Date = structure(c(1375437600, 1375444881.81818,
1375452163.63636, 1375459445.45455, 1375466727.27273, 1375474009.09091,
1375481290.90909, 1375488572.72727, 1375495854.54545, 1375503136.36364,
1375510418.18182, 1375517700, 1375524981.81818, 1375532263.63636,
1375539545.45455, 1375546827.27273, 1375554109.09091, 1375561390.90909,
1375568672.72727, 1375575954.54545, 1375583236.36364, 1375590518.18182,
1375597800, 1375605081.81818, 1375612363.63636, 1375619645.45455,
1375626927.27273, 1375634209.09091, 1375641490.90909, 1375648772.72727,
1375656054.54545, 1375663336.36364, 1375670618.18182, 1375677900,
1375685181.81818, 1375692463.63636, 1375699745.45455, 1375707027.27273,
1375714309.09091, 1375721590.90909, 1375728872.72727, 1375736154.54545,
1375743436.36364, 1375750718.18182, 1375758000), class = c("POSIXct",
"POSIXt"), tzone = ""), val = 1:45), .Names = c("Date", "val"
), row.names = c(NA, -45L), class = "data.frame")
答案 3 :(得分:0)
df <- structure(list(Date = structure(c(1468555240, 1468555242, 1468555246,
1468569649, 1468555251, 1468555257, 1468641020, 1468641021, 1468641021,
1468641021, 1468641021, 1468641021), tzone = "", class = c("POSIXct",
"POSIXt")), sCpu = c(7.28602, 9.49307, 7.70778, 8.51675, 6.97994,
8.46983, 4.14684, 2.51154, 3.27359, 1.84363, 2.47815, 3.29061
)), .Names = c("Date", "sCpu"), row.names = c(NA, -12L), class = "data.frame")
df$Date <- lubridate::ymd_hms(df$Date)
df$hour <- lubridate::hour(df$Date)
# Between noon and midnight
df[df$hour > 12 & df$hour <= 23,] # note - if there were more than 1 date you'd use ymd_hms for the filter
Date sCpu hour 7 2016-07-15 23:50:20 4.14684 23 8 2016-07-15 23:50:21 2.51154 23 9 2016-07-15 23:50:21 3.27359 23 10 2016-07-15 23:50:21 1.84363 23 11 2016-07-15 23:50:21 2.47815 23 12 2016-07-15 23:50:21 3.29061 23