每年计数值,并基于其他列

时间:2018-12-13 14:21:55

标签: r date count sum

我有这样的数据

  Chemical  date      concentration  limit
   A     01-01-2016     0.2         0.01
   A     01-02-2016     0.2         0.01
   A     01-01-2017     0.005       0.01
   A     01-02-2017     0.2         0.01
   B     01-01-2016     0.3         0.1
   B     01-02-2016     0.05        0.1
   B     01-01-2017     0.2         0.1
   B     01-02-2017     0.2         0.1
   C     01-01-2016     1.2         1
   C     01-02-2016     0.8         1
   C     01-01-2017     0.9         1
   C     01-02-2017     0.9         1

我想计算每种化学物质每年超过限值的次数(请注意,每种限值都不同)。所以我想得到这样的东西

  Year   A     B    C
  2016   2     1    1
  2017   1     2    0

最后是每年的所有超标

 Year exceedances
 2016   4
 2017   3

我不确定如何在R中执行此操作。 希望能对您有所帮助。

6 个答案:

答案 0 :(得分:1)

使用tidyversereshape2,您可以执行以下操作:

df %>%
 mutate(date = substr(date, 7, 10)) %>%
 group_by(date, Chemical) %>%
 summarise(temp = sum(ifelse(concentration > limit, 1, 0))) %>%
 dcast(date~Chemical, value.var = "temp")

  date A B C
1 2016 2 1 1
2 2017 1 2 0

答案 1 :(得分:1)

另一个tidyverse选项,

library(tidyverse)

df %>% 
 filter(concentration > limit) %>% 
 group_by(Chemical, grp = format(as.POSIXct(date, format = '%m-%d-%Y'), format = '%Y')) %>% 
 count() %>% 
 spread(Chemical, n, fill = 0)

给出,

# A tibble: 2 x 4
# Groups:   grp [2]
  grp       A     B     C
  <chr> <dbl> <dbl> <dbl>
1 2016      2     1     1
2 2017      1     2     0

答案 2 :(得分:1)

还有另一种可能性:

library(dplyr)
library(tidyr)
#library(lubridate) # you can choose to import it or not


dat %>% 
  mutate(date = lubridate::dmy(format(date, format="%d-%m-%Y"))) %>% # correct date format
  mutate(year = lubridate::year(date)) %>%  # extract the year
  group_by(year, Chemical) %>% 
  mutate(exceed = concentration > limit) %>% # TRUE/FALSE
  summarise(tot_exceed = sum(exceed)) %>%  # count each T/F
  spread(Chemical, tot_exceed) # Spread the results by Chemical
# # A tibble: 2 x 4
# # Groups:   year [2]
# year     A     B     C
# <dbl> <int> <int> <int>
# 1  2016     2     1     1
# 2  2017     1     2     0

数据:

tt <- "  Chemical  date      concentration  limit
   A     01-01-2016     0.2         0.01
A     01-02-2016     0.2         0.01
A     01-01-2017     0.005       0.01
A     01-02-2017     0.2         0.01
B     01-01-2016     0.3         0.1
B     01-02-2016     0.05        0.1
B     01-01-2017     0.2         0.1
B     01-02-2017     0.2         0.1
C     01-01-2016     1.2         1
C     01-02-2016     0.8         1
C     01-01-2017     0.9         1
C     01-02-2017     0.9         1"

dat <- read.table(text = tt, header = T)

答案 3 :(得分:0)

以下是使用dplyr软件包的解决方案:

const inputAsPromise = (ibanData) =>
  new Promise((resolve, reject) => {
    ibanData.oninput = (e)=>ibanData.value.length === 12?resolve():'';
    //@todo: onerror should be reject
  });

df_2:

library(dplyr)
chemical <- c('A','A','A','A','B','B','B','B','C','C','C','C')
date <- c('01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017', '01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017', '01-01-2016', '01-02-2016', '01-01-2017', '01-02-2017')
year <- format(as.Date(df$date, format="%m-%d-%Y"),"%Y")
concentration <- c(0.2, 0.2, 0.005, 0.2, 0.3, 0.05, 0.2, 0.2, 1.2, 0.8, 0.9, 0.9)
limit <- c(0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 1, 1, 1, 1)

df <- data.frame(chemical, date, year, concentration, limit)

df_1 <- df %>% mutate(exceed = concentration>limit) %>% filter(exceed==T)

df_2 <- df_1 %>% group_by(chemical, year) %>% count(exceed)

df_3:

  chemical year  exceed     n
  <fct>    <fct> <lgl>  <int>
1 A        2016  TRUE       2
2 A        2017  TRUE       1
3 B        2016  TRUE       1
4 B        2017  TRUE       2
5 C        2016  TRUE       1

df_3 <- df_2 %>% group_by(year) %>% count(exceed)

答案 4 :(得分:0)

使用data.table,我们将'data.frame'转换为'data.table'(setDT(df1)),并按year类的Date类转换为'date '和'化学',获取逻辑向量的sumdcast转换为“宽”格式

library(data.table)
library(lubridate)
dcast(setDT(df1)[, sum(concentration > limit),
      .(date = year(dmy(date)), Chemical)], date ~ Chemical)
#   date A B C
#1: 2016 2 1 1
#2: 2017 1 2 0

或将base Rxtabs一起使用

xtabs(cond ~ date + Chemical, transform(df1, date = substr(date, 7, 10), 
                 cond = concentration > limit))
#      Chemical
#date   A B C
#  2016 2 1 1
#  2017 1 2 0

数据

df1 <- structure(list(Chemical = c("A", "A", "A", "A", "B", "B", "B", 
"B", "C", "C", "C", "C"), date = c("01-01-2016", "01-02-2016", 
"01-01-2017", "01-02-2017", "01-01-2016", "01-02-2016", "01-01-2017", 
"01-02-2017", "01-01-2016", "01-02-2016", "01-01-2017", "01-02-2017"
 ), concentration = c(0.2, 0.2, 0.005, 0.2, 0.3, 0.05, 0.2, 0.2, 
 1.2, 0.8, 0.9, 0.9), limit = c(0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 
 0.1, 0.1, 1, 1, 1, 1)), class = "data.frame", row.names = c(NA, 
  -12L))

答案 5 :(得分:0)

tidyverse与@akrun的数据一起使用:

library(tidyverse)
df1 %>% 
  filter(concentration > limit) %>%
  count(Chemical, Year=substr(date,7,10)) %>%
  spread(Chemical,n,fill = 0)
# # A tibble: 2 x 4
#    Year     A     B     C
# * <chr> <dbl> <dbl> <dbl>
# 1  2016     2     1     1
# 2  2017     1     2     0

df1 %>% 
  filter(concentration > limit) %>%
  count(Year=substr(date,7,10))
# A tibble: 2 x 2
#    Year     n
#   <chr> <int>
# 1  2016     4
# 2  2017     3