使用dplyr

时间:2018-05-22 15:11:09

标签: r date dplyr tidyverse

我正在尝试过滤我的数据集,只有1996 - 2015年的行,只有列orgid,stdate,locid,charnam。整个数据集包含1988年至2015年的年份以及大量不同的专栏。我刚刚了解了dplyr软件包,并认为这将是最佳选择。但是我一直收到这个错误,我不明白为什么我会得到它。我一直得到的错误是Error in is_character(x) : object 'rlang_is_character' not found

到目前为止,这是我的代码:

    ########## download necessary packages to make script run #########################################################################
if (!require(pacman)) {
  install.packages('pacman')

}

pacman::p_load("tidyverse")
#### Read in the necessary data ######
roadsalt_data<-read.table("QADportaldata_1988-2015.tsv",header=T,sep="\t",fill=T,stringsAsFactors = F)
#Convert date column from a character class to a date class so ggplot can  display as a continuous variable ###
roadsalt_data$stdate <- as.Date(roadsalt_data$stdate)
## Filter dataset to only contain years 1996-2015 ########
roadsalt_data_sub<-roadsalt_data %>%
                     select(orgid,stdate,locid,charnam) %>%
                     filter(between(roadsalt_data, as.Date("1996-01-01"), as.Date("2015-07-01")))

下面是数据集的预览:

structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"
), actid = c("nwisnj.01.01300274", "nwisnj.01.01300273", "nwisnj.01.01300247", 
"nwisnj.01.01300242", "nwisnj.01.01300238", "nwisnj.01.01300237", 
"nwisnj.01.01300189", "nwisnj.01.01300189", "nwisnj.01.01300189", 
"nwisnj.01.01300190"), actyp = c("Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine"
), stdate = structure(c(15755, 15755, 15748, 15748, 15748, 15748, 
15740, 15740, 15740, 15740), class = "Date"), sttime = c("09:30:00", 
"11:00:00", "10:30:00", "12:00:00", "11:00:00", "11:30:00", "09:25:00", 
"09:25:00", "09:25:00", "09:30:00"), actdep = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), actdepun = c("", "", "", "", "", "", "", "", "", ""
), locid = c("USGS-01407760", "USGS-01445030", "USGS-01380075", 
"USGS-01368820", "USGS-01409815", "USGS-01411400", "USGS-01458570", 
"USGS-01458570", "USGS-01458570", "USGS-01445160"), actcom = c("A-0520044 TPCN = 64mL filtered", 
"A-0520046 TPCN = 124mL filtered", "A-0460036 TPCN = 124mL filtered L-0460036 Received February 14, 2013", 
"A-0460025 TPCN = 125mL filtered L-0460025 Received February 14, 2013", 
"A-0460027 TPCN = 64mL filtered.  ATTN: H.Ardourel, LL ANC and LL pH L-0460027 Received February 14, 2013", 
"A-0460028 TPCN = 125mL filtered. L-0460028 Received February 14, 2013", 
"A-0370012 TPCN = 125mL filtered", "A-0370012 TPCN = 125mL filtered", 
"A-0370012 TPCN = 125mL filtered", "A-0370011 TPCN = 125mL filtered"
), hydcond = c("Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, normal stage", "Stable, high stage", 
"Falling stage", "Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, high stage"), hydev = c("Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample"), metcont = c("USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398"), metnam = c("Multiple verticals", 
"Multiple verticals", "Multiple verticals", "Multiple verticals", 
"Single vertical", "Multiple verticals", "Grab sample  (dip)", 
"Grab sample  (dip)", "Grab sample  (dip)", "Multiple verticals"
), detcond = c("", "", "", "", "Not Detected", "", "", "", "", 
""), charnam = c("Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Inorganic nitrogen (nitrate and nitrite)", "Phosphorus", "Phosphorus", 
"Kjeldahl nitrogen"), samfrac = c("Dissolved", "Dissolved", "Dissolved", 
"Dissolved", "Dissolved", "Dissolved", "Dissolved", "Total", 
"Dissolved", "Dissolved"), val = c("0.84", "1.2", "0.46", "0.28", 
"", "0.66", "3.10", "0.032", "0.028", "0.21"), valunit = c("mg/l", 
"mg/l", "mg/l", "mg/l", "", "mg/l", "mg/l as N", "mg/l as P", 
"mg/l as P", "mg/l as N"), valqual = c("", "", "", "", "", "", 
"", "", "", ""), valstat = c("Accepted", "Accepted", "Accepted", 
"Accepted", "Accepted", "Accepted", "Accepted", "Accepted", "Accepted", 
"Accepted"), statcode = c("", "", "", "", "", "", "", "", "", 
""), valtype = c("Actual", "Actual", "Actual", "Actual", "Actual", 
"Actual", "Actual", "Actual", "Actual", "Actual"), precval = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), valcom = c("", "", "", "", "", "", "Report level code updated Oct., Nov. 2015. Reference: NWQL Rapi-Note 2011-21 (RLC: IRL => LT-MDL)", 
"", "", ""), valdep = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    valdepun = c("", "", "", "", "", "", "", "", "", ""), valmetnam = c("Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "NO3+NO2, wf, FCC,NaR, DA", 
    "P, wu, WCA, persulfate CF color", "P, wf, FCA, persulfate CF color", 
    "NH3+org-N, wf, FCA, Kjeldahl, CF"), metdesc = c("NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "", "", "", 
    "USGS OF 00-170"), labcom = c("", "", "", "", "", "", "", 
    "", "", ""), detlimnam = c("", "", "", "", "Historical Lower Reporting Limit", 
    "", "Long Term Method Detection Level", "Long Term Method Detection Level", 
    "Long Term Method Detection Level", "Long Term Method Detection Level"
    ), detlimval = c("", "", "", "", "0.23", "", "0.04", "0.004", 
    "0.0040", "0.07"), detlimun = c("", "", "", "", "mg/l", "", 
    "mg/l as N", "mg/l as P", "mg/l as P", "mg/l as N"), V63 = c("NWIS", 
    "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", 
    "NWIS")), .Names = c("orgid", "actid", "actyp", "stdate", 
"sttime", "actdep", "actdepun", "locid", "actcom", "hydcond", 
"hydev", "metcont", "metnam", "detcond", "charnam", "samfrac", 
"val", "valunit", "valqual", "valstat", "statcode", "valtype", 
"precval", "valcom", "valdep", "valdepun", "valmetnam", "metdesc", 
"labcom", "detlimnam", "detlimval", "detlimun", "V63"), row.names = c(NA, 
10L), class = "data.frame")

任何帮助将不胜感激!提前谢谢!

2 个答案:

答案 0 :(得分:1)

如果你厌倦了与tidyverse依赖地狱作战,你可以试试data.table

library(data.table)

## Convert to a data.table by reference
setDT(roadsalt_data) 

## Filter on date and only return a subset of columns
roadsalt_data[between(stdate, as.Date("1996-01-01"), as.Date("2015-07-01")), .(orgid,stdate,locid,charnam)]

#       orgid     stdate         locid                                                      charnam
#  1: USGS-NJ 2013-02-19 USGS-01407760 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  2: USGS-NJ 2013-02-19 USGS-01445030 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  3: USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  4: USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  5: USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  6: USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  7: USGS-NJ 2013-02-04 USGS-01458570                     Inorganic nitrogen (nitrate and nitrite)
#  8: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
#  9: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
# 10: USGS-NJ 2013-02-04 USGS-01445160                                            Kjeldahl nitrogen

答案 1 :(得分:1)

如果我没记错的话,即使between()Date参数转换为{leftright也无法同时使用as.Date() s library(dplyr) roadsalt_data <- as_tibble(roadsalt_data) # not necessary, just convenient console output roadsalt_data %>% select(orgid, stdate, locid, charnam) %>% filter(stdate >= "2013-02-04", stdate <= "2013-02-12") #> # A tibble: 8 x 4 #> orgid stdate locid charnam #> <chr> <date> <chr> <chr> #> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~ #> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~ #> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~ #> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~ #> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~ #> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen roadsalt_data %>% select(orgid, stdate, locid, charnam) %>% filter(between(stdate, as.Date("2013-02-04"), as.Date("2013-02-12"))) #> # A tibble: 8 x 4 #> orgid stdate locid charnam #> <chr> <date> <chr> <chr> #> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~ #> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~ #> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~ #> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~ #> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~ #> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen # How I would've done it library(lubridate) roadsalt_data %>% select(orgid, stdate, locid, charnam) %>% # filter(between(year(stdate), 1996, 2015)) # for years instead of days filter(between(day(stdate), 4, 12)) #> # A tibble: 8 x 4 #> orgid stdate locid charnam #> <chr> <date> <chr> <chr> #> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~ #> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~ #> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~ #> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~ #> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~ #> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen # If {lubridate} isn't installed, this is all year() and day() do... get_day <- function(date) as.POSIXlt(date, tz = tz(date))$mday # get_year <- function(date) as.POSIXlt(date, tz = tz(date))$year + 1900 # for years instead of days roadsalt_data %>% select(orgid, stdate, locid, charnam) %>% # filter(between(get_year(stdate), 1996, 2015)) # for years instead of days filter(between(get_day(stdate), 4, 12)) #> # A tibble: 8 x 4 #> orgid stdate locid charnam #> <chr> <date> <chr> <chr> #> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~ #> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~ #> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~ #> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~ #> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~ #> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen # Base R roadsalt_data <- roadsalt_data[, c("orgid", "stdate", "locid", "charnam")] roadsalt_data[roadsalt_data$stdate >= as.Date("2013-02-04") & roadsalt_data$stdate <= as.Date("2013-02-12") ,] #> # A tibble: 8 x 4 #> orgid stdate locid charnam #> <chr> <date> <chr> <chr> #> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~ #> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~ #> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~ #> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~ #> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~ #> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus #> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen 1}}。

以下是一些替代方案。由于您的所有样本数据都在指定年份之间,因此这些数据都会过滤2013-02-04和2013-02-12之间的日期。相应调整。

if (_currentlyHeldObject != null && !_currentlyHeldObject.GetComponent<Rigidbody>().isKinematic) {
    _currentlyHeldObject.GetComponent<Rigidbody>().isKinematic = true;
}

reprex package(v0.2.0)创建于2018-05-23。

=============================================== ===============

如果这些都不起作用,那么还有其他事情要继续发生。