lab_var_num <- (0:24)
times_var <-c(0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,2000,2100,2200,2300,2400,2500)
all_files_ls <- read_csv("~/Desktop/bioinformatic_work/log_parse_files/sorted_by_habitat/all_trap/all_files_la_selva_log.csv")
#Eliminate bad data and capture in separate dataframe- "bad" data contained within all_files_ls_bad
all_files_ls <-subset(all_files_ls,all_files_ls$temp>10&all_files_ls$temp<50)
# convert our character data to date data- then change to POSIXct data type.
# all_dates <- strptime(all_files_ls$date,format="%m/%d/%Y")
# Data needs to be put into a cosnistant format of %m/%d/%Y before you can coerce it
# into POSIXct, or any other, data otherwise it will spit out errors.
all_files_ls$date <- strptime(all_files_ls$date,format="%m/%d/%Y")
all_files_ls$date <- as.POSIXct(all_files_ls$date)
# Create wet and dry season data sets.
all_files_ls_w <- subset(all_files_ls,date>="2015-05-01"&date<="2015-12-31"|date>="2016-05-01"&date<="2016-12-31")
all_files_ls_s <- subset(all_files_ls,date>="2015-01-01"&date<="2015-4-30"|date>="2016-01-01"&date<="2016-04-30")
# Subset into canopy and understory dataframes.
all_files_ls_s_c <- subset(all_files_ls_s,canopy_understory=="c"|canopy_understory=="C")
all_files_ls_s_u <- subset(all_files_ls_s,canopy_understory=="u"|canopy_understory=="U")
all_files_ls_w_c <- subset(all_files_ls_w,canopy_understory=="c"|canopy_understory=="C")
all_files_ls_w_u <- subset(all_files_ls_w,canopy_understory=="u"|canopy_understory=="U")
all_files_ls_s_c_summ <- all_files_ls_s_c %>% group_by(time)%>% summarise(standard_deviation = sd(temp,na.rm=TRUE),mean = mean(temp,na.rm=TRUE))
all_files_ls_s_u_summ <- all_files_ls_s_u %>% group_by(time)%>% summarise(standard_deviation = sd(temp,na.rm=TRUE),mean = mean(temp,na.rm=TRUE))
all_files_ls_w_c_summ <- all_files_ls_w_c %>% group_by(time)%>% summarise(standard_deviation = sd(temp,na.rm=TRUE),mean = mean(temp,na.rm=TRUE))
all_files_ls_w_u_summ <- all_files_ls_w_u %>% group_by(time)%>% summarise(standard_deviation = sd(temp,na.rm=TRUE),mean = mean(temp,na.rm=TRUE))
all_files_ls_s_c_summ$time <- cut(as.numeric(all_files_ls_s_c_summ$time),breaks=c(times_var),labels = lab_var_num,include.lowest = TRUE)
all_files_ls_s_u_summ$time <- cut(as.numeric(all_files_ls_s_u_summ$time),breaks=c(times_var),labels = lab_var_num,include.lowest = TRUE)
all_files_ls_w_c_summ$time <- cut(as.numeric(all_files_ls_w_c_summ$time),breaks=c(times_var),labels = lab_var_num,include.lowest = TRUE)
all_files_ls_w_u_summ$time <- cut(as.numeric(all_files_ls_w_u_summ$time),breaks=c(times_var),labels = lab_var_num,include.lowest = TRUE)
trap serial_no file_name canopy_understory date time temp humidity
1 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 600 20.1 <NA>
2 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 800 25.5 <NA>
3 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 1000 29.0 <NA>
4 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 1200 28.0 <NA>
5 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 1400 28.5 <NA>
6 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 1601 27.5 <NA>
7 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 1803 25.5 <NA>
8 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 2001 23.5 <NA>
9 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-28 2200 22.5 <NA>
10 LS_trap_10c 7C000000395C1641 trap10c_7C000000395C1641_150809.csv c 2015-05-29 000 21.5 <NA>
11 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 0159 23.6 <NA>
12 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 0359 24.1 <NA>
13 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 0559 24.1 <NA>
14 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 0759 24.6 <NA>
15 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 0959 24.6 <NA>
16 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 1159 26.1 <NA>
17 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 1359 26.6 <NA>
18 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 1559 25.6 <NA>
19 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 1759 24.1 <NA>
20 LS_trap_10u 9F00000039641541 trap10u_9F00000039641541_160110.csv u 2016-01-01 1959 24.1 <NA>
从每个日期提取月份(在基础R中可以使用strptime(df$date, '%Y-%m-%d')$mon + 1
(这将导致R&gt; = 3.4.0中的错误)。设置所有分组变量后,汇总很简单,DRY。
df %>% group_by(canopy_understory, # Group by canopy/understory factor
# Extract numeric month from date. If less than 5, make `season` "s" else "w", and group by it.
season = ifelse(lubridate::month(date) < 5, 's', 'w'),
# Cut time by 0,100,200,...,2400, and group by the factor returned.
hour = cut(time, seq(0, 2400, 100), include.lowest = TRUE)) %>%
summarise(temp_mean = mean(temp), # For each group, calc mean and sd of temp.
temp_sd = sd(temp))
#> # A tibble: 20 x 5
#> # Groups: canopy_understory, season [?]
#> canopy_understory season hour temp_mean temp_sd
#> <fctr> <chr> <fctr> <dbl> <dbl>
#> 1 c w [0,100] 21.5 NA
#> 2 c w (500,600] 20.1 NA
#> 3 c w (700,800] 25.5 NA
#> 4 c w (900,1e+03] 29.0 NA
#> 5 c w (1.1e+03,1.2e+03] 28.0 NA
#> 6 c w (1.3e+03,1.4e+03] 28.5 NA
#> 7 c w (1.6e+03,1.7e+03] 27.5 NA
#> 8 c w (1.8e+03,1.9e+03] 25.5 NA
#> 9 c w (2e+03,2.1e+03] 23.5 NA
#> 10 c w (2.1e+03,2.2e+03] 22.5 NA
#> 11 u s (100,200] 23.6 NA
#> 12 u s (300,400] 24.1 NA
#> 13 u s (500,600] 24.1 NA
#> 14 u s (700,800] 24.6 NA
#> 15 u s (900,1e+03] 24.6 NA
#> 16 u s (1.1e+03,1.2e+03] 26.1 NA
#> 17 u s (1.3e+03,1.4e+03] 26.6 NA
#> 18 u s (1.5e+03,1.6e+03] 25.6 NA
#> 19 u s (1.7e+03,1.8e+03] 24.1 NA
#> 20 u s (1.9e+03,2e+03] 24.1 NA
df <- structure(list(trap = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("LS_trap_10c",
"LS_trap_10u"), class = "factor"), serial_no = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("7C000000395C1641", "9F00000039641541"
), class = "factor"), file_name = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("trap10c_7C000000395C1641_150809.csv", "trap10u_9F00000039641541_160110.csv"
), class = "factor"), canopy_understory = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("c", "u"), class = "factor"), date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("2015-05-28", "2015-05-29", "2016-01-01"
), class = "factor"), time = c(600L, 800L, 1000L, 1200L, 1400L,
1601L, 1803L, 2001L, 2200L, 0L, 159L, 359L, 559L, 759L, 959L,
1159L, 1359L, 1559L, 1759L, 1959L), temp = c(20.1, 25.5, 29,
28, 28.5, 27.5, 25.5, 23.5, 22.5, 21.5, 23.6, 24.1, 24.1, 24.6,
24.6, 26.1, 26.6, 25.6, 24.1, 24.1), humidity = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "<NA>", class = "factor")), .Names = c("trap",
"serial_no", "file_name", "canopy_understory", "date", "time",
"temp", "humidity"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"))