我有这样的数据
Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1
我想计算每种化学物质每年超过限值的次数(请注意,每种限值都不同)。所以我想得到这样的东西
Year A B C
2016 2 1 1
2017 1 2 0
最后是每年的所有超标
Year exceedances
2016 4
2017 3
我不确定如何在R中执行此操作。 希望能对您有所帮助。
答案 0 :(得分:1)
使用tidyverse
和reshape2
,您可以执行以下操作:
df %>%
mutate(date = substr(date, 7, 10)) %>%
group_by(date, Chemical) %>%
summarise(temp = sum(ifelse(concentration > limit, 1, 0))) %>%
dcast(date~Chemical, value.var = "temp")
date A B C
1 2016 2 1 1
2 2017 1 2 0
答案 1 :(得分:1)
另一个tidyverse
选项,
library(tidyverse)
df %>%
filter(concentration > limit) %>%
group_by(Chemical, grp = format(as.POSIXct(date, format = '%m-%d-%Y'), format = '%Y')) %>%
count() %>%
spread(Chemical, n, fill = 0)
给出,
# A tibble: 2 x 4 # Groups: grp [2] grp A B C <chr> <dbl> <dbl> <dbl> 1 2016 2 1 1 2 2017 1 2 0
答案 2 :(得分:1)
还有另一种可能性:
library(dplyr)
library(tidyr)
#library(lubridate) # you can choose to import it or not
dat %>%
mutate(date = lubridate::dmy(format(date, format="%d-%m-%Y"))) %>% # correct date format
mutate(year = lubridate::year(date)) %>% # extract the year
group_by(year, Chemical) %>%
mutate(exceed = concentration > limit) %>% # TRUE/FALSE
summarise(tot_exceed = sum(exceed)) %>% # count each T/F
spread(Chemical, tot_exceed) # Spread the results by Chemical
# # A tibble: 2 x 4
# # Groups: year [2]
# year A B C
# <dbl> <int> <int> <int>
# 1 2016 2 1 1
# 2 2017 1 2 0
数据:
tt <- " Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1"
dat <- read.table(text = tt, header = T)
答案 3 :(得分:0)
以下是使用dplyr软件包的解决方案:
const inputAsPromise = (ibanData) =>
new Promise((resolve, reject) => {
ibanData.oninput = (e)=>ibanData.value.length === 12?resolve():'';
//@todo: onerror should be reject
});
df_2:
library(dplyr)
chemical <- c('A','A','A','A','B','B','B','B','C','C','C','C')
date <- c('01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017', '01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017', '01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017')
year <- format(as.Date(df$date, format="%m-%d-%Y"),"%Y")
concentration <- c(0.2, 0.2, 0.005, 0.2, 0.3, 0.05, 0.2, 0.2, 1.2, 0.8, 0.9, 0.9)
limit <- c(0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 1, 1, 1, 1)
df <- data.frame(chemical, date, year, concentration, limit)
df_1 <- df %>% mutate(exceed = concentration>limit) %>% filter(exceed==T)
df_2 <- df_1 %>% group_by(chemical, year) %>% count(exceed)
df_3:
chemical year exceed n
<fct> <fct> <lgl> <int>
1 A 2016 TRUE 2
2 A 2017 TRUE 1
3 B 2016 TRUE 1
4 B 2017 TRUE 2
5 C 2016 TRUE 1
df_3 <- df_2 %>% group_by(year) %>% count(exceed)
答案 4 :(得分:0)
使用data.table
,我们将'data.frame'转换为'data.table'(setDT(df1)
),并按year
类的Date
类转换为'date '和'化学',获取逻辑向量的sum
并dcast
转换为“宽”格式
library(data.table)
library(lubridate)
dcast(setDT(df1)[, sum(concentration > limit),
.(date = year(dmy(date)), Chemical)], date ~ Chemical)
# date A B C
#1: 2016 2 1 1
#2: 2017 1 2 0
或将base R
与xtabs
一起使用
xtabs(cond ~ date + Chemical, transform(df1, date = substr(date, 7, 10),
cond = concentration > limit))
# Chemical
#date A B C
# 2016 2 1 1
# 2017 1 2 0
df1 <- structure(list(Chemical = c("A", "A", "A", "A", "B", "B", "B",
"B", "C", "C", "C", "C"), date = c("01-01-2016", "01-02-2016",
"01-01-2017", "01-02-2017", "01-01-2016", "01-02-2016", "01-01-2017",
"01-02-2017", "01-01-2016", "01-02-2016", "01-01-2017", "01-02-2017"
), concentration = c(0.2, 0.2, 0.005, 0.2, 0.3, 0.05, 0.2, 0.2,
1.2, 0.8, 0.9, 0.9), limit = c(0.01, 0.01, 0.01, 0.01, 0.1, 0.1,
0.1, 0.1, 1, 1, 1, 1)), class = "data.frame", row.names = c(NA,
-12L))
答案 5 :(得分:0)
将tidyverse
与@akrun的数据一起使用:
library(tidyverse)
df1 %>%
filter(concentration > limit) %>%
count(Chemical, Year=substr(date,7,10)) %>%
spread(Chemical,n,fill = 0)
# # A tibble: 2 x 4
# Year A B C
# * <chr> <dbl> <dbl> <dbl>
# 1 2016 2 1 1
# 2 2017 1 2 0
df1 %>%
filter(concentration > limit) %>%
count(Year=substr(date,7,10))
# A tibble: 2 x 2
# Year n
# <chr> <int>
# 1 2016 4
# 2 2017 3