你如何在R中对数据帧的子集进行子集化

时间:2016-07-15 21:06:57

标签: r

我有一个名为df的数据框:

structure(list(Date = structure(c(1468555240, 1468555242, 1468555246, 
1468569649, 1468555251, 1468555257, 1468641020, 1468641021, 1468641021, 
1468641021, 1468641021, 1468641021), tzone = "", class = c("POSIXct", 
"POSIXt")), sCpu = c(7.28602, 9.49307, 7.70778, 8.51675, 6.97994, 
8.46983, 4.14684, 2.51154, 3.27359, 1.84363, 2.47815, 3.29061
)), .Names = c("Date", "sCpu"), row.names = c(NA, -12L), class = "data.frame")

我喜欢在中午12点到午夜23点59分之间获取数据点。我不想在午夜到中午12点之后有任何数据点。我怎么能在R?中做到这一点?

我尝试创建一个名为T的时间列并执行此操作:

subset(df, T<times(c("23:59:00"))&T>times(c("12:00:00"))

04:00仍然显示我的最终子集数据,任何想法怎么能这样做?

4 个答案:

答案 0 :(得分:2)

这应该这样做:

library(lubridate)

dfsub <- df[hour(df$Date)>11,]

但是,您的数据在该时间范围内没有值。这是一个包含新数据的工作示例:

dat <- data.frame(datetime = seq(ymd_hms('2014-07-17 12:00:00'),
                                         ymd_hms('2014-07-18 11:00:00'),
                                         by = 'hours'),
                      misc = seq(1, 24))
datsub <- dat[hour(dat$datetime)>11,]

答案 1 :(得分:1)

由于小时只能从0到23,我们可以简单地检查小时是否为12或更多。没有包使用。

df[as.POSIXlt(df$Date)$hour >= 12, ]

subset(df, as.POSIXlt(Date)$hour >= 12)

df[format(df$Date, "%H") >= 12, ]

subset(df, format(Date, "%H") >= 12)

答案 2 :(得分:0)

这个怎么样:

ps <- as.POSIXlt(df$Date)
subset(df, ps$hour>11 & ps$hour<=23)
# OR
df[ps$hour>11 & ps$hour<=23,]

                  # Date val
# 2  2013-08-02 14:01:21   2
# 3  2013-08-02 16:02:43   3
# 4  2013-08-02 18:04:05   4
# 5  2013-08-02 20:05:27   5
# 6  2013-08-02 22:06:49   6
# 14 2013-08-03 14:17:43  14
# 15 2013-08-03 16:19:05  15
# 16 2013-08-03 18:20:27  16
# 17 2013-08-03 20:21:49  17
# 18 2013-08-03 22:23:10  18
# 26 2013-08-04 14:34:05  26
# 27 2013-08-04 16:35:27  27
# 28 2013-08-04 18:36:49  28
# 29 2013-08-04 20:38:10  29
# 30 2013-08-04 22:39:32  30
# 38 2013-08-05 14:50:27  38
# 39 2013-08-05 16:51:49  39
# 40 2013-08-05 18:53:10  40
# 41 2013-08-05 20:54:32  41
# 42 2013-08-05 22:55:54  42

数据

structure(list(Date = structure(c(1375437600, 1375444881.81818, 
1375452163.63636, 1375459445.45455, 1375466727.27273, 1375474009.09091, 
1375481290.90909, 1375488572.72727, 1375495854.54545, 1375503136.36364, 
1375510418.18182, 1375517700, 1375524981.81818, 1375532263.63636, 
1375539545.45455, 1375546827.27273, 1375554109.09091, 1375561390.90909, 
1375568672.72727, 1375575954.54545, 1375583236.36364, 1375590518.18182, 
1375597800, 1375605081.81818, 1375612363.63636, 1375619645.45455, 
1375626927.27273, 1375634209.09091, 1375641490.90909, 1375648772.72727, 
1375656054.54545, 1375663336.36364, 1375670618.18182, 1375677900, 
1375685181.81818, 1375692463.63636, 1375699745.45455, 1375707027.27273, 
1375714309.09091, 1375721590.90909, 1375728872.72727, 1375736154.54545, 
1375743436.36364, 1375750718.18182, 1375758000), class = c("POSIXct", 
"POSIXt"), tzone = ""), val = 1:45), .Names = c("Date", "val"
), row.names = c(NA, -45L), class = "data.frame")

答案 3 :(得分:0)

df <- structure(list(Date = structure(c(1468555240, 1468555242, 1468555246, 
                                  1468569649, 1468555251, 1468555257, 1468641020, 1468641021, 1468641021, 
                                  1468641021, 1468641021, 1468641021), tzone = "", class = c("POSIXct", 
                                 "POSIXt")), sCpu = c(7.28602, 9.49307, 7.70778, 8.51675, 6.97994, 
                                                      8.46983, 4.14684, 2.51154, 3.27359, 1.84363, 2.47815, 3.29061
                                 )), .Names = c("Date", "sCpu"), row.names = c(NA, -12L), class = "data.frame")


df$Date <- lubridate::ymd_hms(df$Date)
df$hour <- lubridate::hour(df$Date)

# Between noon and midnight
df[df$hour > 12 & df$hour <= 23,] # note - if there were more than 1 date you'd use ymd_hms for the filter
                  Date    sCpu hour
7  2016-07-15 23:50:20 4.14684   23
8  2016-07-15 23:50:21 2.51154   23
9  2016-07-15 23:50:21 3.27359   23
10 2016-07-15 23:50:21 1.84363   23
11 2016-07-15 23:50:21 2.47815   23
12 2016-07-15 23:50:21 3.29061   23