使用filter()子集数据帧

时间:2018-08-31 00:27:00

标签: r dplyr

这是我的数据框:

structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 
1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 1976L, 1976L), Month = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L), A = c(0.419727177, 
0.411522634, 0.415627598, 0.425350915, 0.431778929, 0.455892409, 
0.464252553, 0.473933649, 0.48947626, 0.481231954, 0.495049505, 
0.49419323, 0.4927322, 0.493778392), log.S.grb = c(-0.86815035734372, 
-0.887891256732457, -0.877965616764487, -0.854840768392932, -0.839841560146749, 
-0.785498442482632, -0.767326579572197, -0.746687948097975, -0.714419316655485, 
-0.731405892265036, -0.703097511313113, -0.704828684428783, -0.707789457396815, 
-0.705668461635217), D = c(10.2641, 9.7704, 9.3694, 9.2403, 9.4459, 
9.4826, 10.4272, 10.3805, 10.4835, 11.4103, 10.988, 10.7708, 
9.2987, 8.6161), E = c(5.68, 5.4, 5.53, 5.5, 5.2, 5.86, 6.25, 
6.36, 6.58, 5.51, 5.54, 5.2, 4.73, 5), J = c(1.15663289, 1.05923536, 
0.938740721, 0.890710069, 1.012043355, 0.843618397, 0.850583558, 
0.957856493, 0.888553262, 1.391339534, 1.309574432, 1.322714922, 
1.247746749, 0.894350421), st3_st.grb = c(NA, NA, NA, 1.33095889507878, 
4.80496965857083, 9.24671742818556, 8.75141888207357, 9.31536120487736, 
7.10791258271466, 3.59206873071611, 4.35904367848617, 0.959063222670167, 
2.36164348682203, -0.257095032210358), idiff_3m.grb = c(1.08206512614859, 
1.03439401057093, 0.909556730440464, 0.886569121738501, 1.00676358283349, 
0.856864819463321, 0.983497311932147, 0.946390153374566, 0.917819573656709, 
1.38813518035337, 1.28343598984861, 1.31491300418167, 1.08566149047631, 
0.860507535403032), EXCESS = c("NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")), row.names = c(NA, 
14L), class = "data.frame")

我使用此命令过滤了03/1975至01/1976之间的数据,但无法正常工作。

library(dplyr)

datawork %>%  filter(Year>=1975 & Month>=5 & Year <=1976 & Month <=1 ) 

我在做什么错了?

1 个答案:

答案 0 :(得分:3)

给出YearMonth只是整数列,与Month相关的逻辑谓词将返回一个空数据帧。一种解决方案是将其转换为日期格式,并在dplyr::between行中使用filter。 (请参见下面的可复制示例)。

如@TCZhang所述,如果不进行日期转换,则可以将Year = 1976 Month = 3视为Year = 1976 + 3/12 Years。因此,通过将分数相乘可以得到(1976 * 12)+ 3,您可以直接在dplyr::filter

中使用
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

 datawork <- structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 
1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 1976L, 1976L), Month = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L), A = c(0.419727177, 
0.411522634, 0.415627598, 0.425350915, 0.431778929, 0.455892409, 
0.464252553, 0.473933649, 0.48947626, 0.481231954, 0.495049505, 
0.49419323, 0.4927322, 0.493778392), log.S.grb = c(-0.86815035734372, 
-0.887891256732457, -0.877965616764487, -0.854840768392932, -0.839841560146749, 
-0.785498442482632, -0.767326579572197, -0.746687948097975, -0.714419316655485, 
-0.731405892265036, -0.703097511313113, -0.704828684428783, -0.707789457396815, 
-0.705668461635217), D = c(10.2641, 9.7704, 9.3694, 9.2403, 9.4459, 
9.4826, 10.4272, 10.3805, 10.4835, 11.4103, 10.988, 10.7708, 
9.2987, 8.6161), E = c(5.68, 5.4, 5.53, 5.5, 5.2, 5.86, 6.25, 
6.36, 6.58, 5.51, 5.54, 5.2, 4.73, 5), J = c(1.15663289, 1.05923536, 
0.938740721, 0.890710069, 1.012043355, 0.843618397, 0.850583558, 
0.957856493, 0.888553262, 1.391339534, 1.309574432, 1.322714922, 
1.247746749, 0.894350421), st3_st.grb = c(NA, NA, NA, 1.33095889507878, 
4.80496965857083, 9.24671742818556, 8.75141888207357, 9.31536120487736, 
7.10791258271466, 3.59206873071611, 4.35904367848617, 0.959063222670167, 
2.36164348682203, -0.257095032210358), idiff_3m.grb = c(1.08206512614859, 
1.03439401057093, 0.909556730440464, 0.886569121738501, 1.00676358283349, 
0.856864819463321, 0.983497311932147, 0.946390153374566, 0.917819573656709, 
1.38813518035337, 1.28343598984861, 1.31491300418167, 1.08566149047631, 
0.860507535403032), EXCESS = c("NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")), row.names = c(NA, 
14L), class = "data.frame")

datawork %>% 
  mutate(date = make_date(year = Year, month = Month)) %>% 
  filter(between(date, as.Date("1975-03-01"), as.Date("1976-01-01")))
#>    Year Month         A  log.S.grb       D    E         J st3_st.grb
#> 1  1975     3 0.4156276 -0.8779656  9.3694 5.53 0.9387407         NA
#> 2  1975     4 0.4253509 -0.8548408  9.2403 5.50 0.8907101  1.3309589
#> 3  1975     5 0.4317789 -0.8398416  9.4459 5.20 1.0120434  4.8049697
#> 4  1975     6 0.4558924 -0.7854984  9.4826 5.86 0.8436184  9.2467174
#> 5  1975     7 0.4642526 -0.7673266 10.4272 6.25 0.8505836  8.7514189
#> 6  1975     8 0.4739336 -0.7466879 10.3805 6.36 0.9578565  9.3153612
#> 7  1975     9 0.4894763 -0.7144193 10.4835 6.58 0.8885533  7.1079126
#> 8  1975    10 0.4812320 -0.7314059 11.4103 5.51 1.3913395  3.5920687
#> 9  1975    11 0.4950495 -0.7030975 10.9880 5.54 1.3095744  4.3590437
#> 10 1975    12 0.4941932 -0.7048287 10.7708 5.20 1.3227149  0.9590632
#> 11 1976     1 0.4927322 -0.7077895  9.2987 4.73 1.2477467  2.3616435
#>    idiff_3m.grb EXCESS       date
#> 1     0.9095567     NA 1975-03-01
#> 2     0.8865691     NA 1975-04-01
#> 3     1.0067636     NA 1975-05-01
#> 4     0.8568648     NA 1975-06-01
#> 5     0.9834973     NA 1975-07-01
#> 6     0.9463902     NA 1975-08-01
#> 7     0.9178196     NA 1975-09-01
#> 8     1.3881352     NA 1975-10-01
#> 9     1.2834360     NA 1975-11-01
#> 10    1.3149130     NA 1975-12-01
#> 11    1.0856615     NA 1976-01-01

reprex package(v0.2.0)于2018-08-31创建。