每日数据的年平均值,最小值,最大值,第90个百分点和第10个百分点

时间:2019-01-31 04:39:04

标签: r

我有每日数据,我想计算年均值,年最小值,年最大值,第90个百分点和第10个百分点。

我的数据如下:

Station Date    Month  Day Year    MaxTemp MinTemp MaxDewPoint MinDewPoint
    ORD 1/1/1948    1   1   1948    35.6    26.6    34.16         -27.4
    ORD 1/2/1948    1   2   1948    -2      -16     -16.96       -27.04
    ORD 1/3/1948    1   3   1948    -4      -26     -12            -26
    ORD 1/4/1948    1   4   1948    -5      -26     -15             -26
    ORD 1/5/1948    1   5   1948    8       -25     3               NaN
    ORD 1/6/1948    1   6   1948    -11     -25     -24            -25
    ORD 1/7/1948    1   7   1948    1       -23     NaN            -23
    ORD 1/8/1948    1   8   1948    1       -22     -9              NaN
    ORD 1/9/1948    1   9   1948    NaN     -22     -5             -22
    ORD 1/10/1948   1   10  1948    10      NaN     -2              -22
    ORD 1/11/1948   1   11  1948    -11     -21    -23              -21
    ORD 1/12/1948   1   12  1948    3       -12     -7.96        -20.92
    ORD 1/13/1948   1   13  1948    6.98    -7.6    -7.6         -20.2
    ORD 1/14/1948   1   14  1948    3.92    -9.4    -11.2        NaN
    ORD 1/15/1948   1   15  1948    6        -7    -5.98         NaN
    ORD 1/16/1948   1   16  1948    3       -11     -7.96       -20.02

到目前为止,我有以下代码:

# Install Necessary packages
install.packages("dplyr")
library(dplyr)
setwd("F:/Climate Data Analysis/Asignment 1")
data<- read.csv("chiacagost.csv", header=TRUE, sep=",")
dframe<- data.frame(data)

我不知道之后如何进行

1 个答案:

答案 0 :(得分:0)

您可以使用mutate_allsummarise_all

library(dplyr)

df <- read.table(text = "Station Date    Month  Day Year    MaxTemp MinTemp MaxDewPoint MinDewPoint
    ORD 1/1/1948    1   1   1948    35.6    26.6    34.16         -27.4
    ORD 1/2/1948    1   2   1948    -2      -16     -16.96       -27.04
    ORD 1/3/1948    1   3   1948    -4      -26     -12            -26
    ORD 1/4/1948    1   4   1948    -5      -26     -15             -26
    ORD 1/5/1948    1   5   1948    8       -25     3               NaN
    ORD 1/6/1948    1   6   1948    -11     -25     -24            -25
    ORD 1/7/1948    1   7   1948    1       -23     NaN            -23
    ORD 1/8/1948    1   8   1948    1       -22     -9              NaN
    ORD 1/9/1948    1   9   1948    NaN     -22     -5             -22
    ORD 1/10/1948   1   10  1948    10      NaN     -2              -22
    ORD 1/11/1948   1   11  1948    -11     -21    -23              -21
    ORD 1/12/1948   1   12  1948    3       -12     -7.96        -20.92
    ORD 1/13/1948   1   13  1948    6.98    -7.6    -7.6         -20.2
    ORD 1/14/1948   1   14  1948    3.92    -9.4    -11.2        NaN
    ORD 1/15/1948   1   15  1948    6        -7    -5.98         NaN
    ORD 1/16/1948   1   16  1948    3       -11     -7.96       -20.02",
                 header = TRUE, stringsAsFactors = FALSE,
                 na.strings = 'NaN')

stat_df <- df %>% 
  select(-Date, -Month, -Day) %>% 
  group_by(Station, Year) %>% 
  summarise_all(funs(yr_max = max(., na.rm = TRUE),
                  yr_min = min(., na.rm = TRUE),
                  yr_avg = mean(., na.rm = TRUE),
                  yr_qt1 = quantile(., probs = c(0.1), na.rm = TRUE),
                  yr_qt9 = quantile(., probs = c(0.9), na.rm = TRUE)
  )
  )
stat_df
#> # A tibble: 1 x 22
#> # Groups:   Station [?]
#>   Station  Year MaxTemp_yr_max MinTemp_yr_max MaxDewPoint_yr_~
#>   <chr>   <int>          <dbl>          <dbl>            <dbl>
#> 1 ORD      1948           35.6           26.6             34.2
#> # ... with 17 more variables: MinDewPoint_yr_max <dbl>,
#> #   MaxTemp_yr_min <dbl>, MinTemp_yr_min <dbl>, MaxDewPoint_yr_min <dbl>,
#> #   MinDewPoint_yr_min <dbl>, MaxTemp_yr_avg <dbl>, MinTemp_yr_avg <dbl>,
#> #   MaxDewPoint_yr_avg <dbl>, MinDewPoint_yr_avg <dbl>,
#> #   MaxTemp_yr_qt1 <dbl>, MinTemp_yr_qt1 <dbl>, MaxDewPoint_yr_qt1 <dbl>,
#> #   MinDewPoint_yr_qt1 <dbl>, MaxTemp_yr_qt9 <dbl>, MinTemp_yr_qt9 <dbl>,
#> #   MaxDewPoint_yr_qt9 <dbl>, MinDewPoint_yr_qt9 <dbl>


stat_df <- df %>% 
  select(-Date, -Month, -Day) %>% 
  group_by(Station, Year) %>% 
  mutate_all(funs(yr_max = max(., na.rm = TRUE),
                  yr_min = min(., na.rm = TRUE),
                  yr_avg = mean(., na.rm = TRUE),
                  yr_qt1 = quantile(., probs = c(0.1), na.rm = TRUE),
                  yr_qt9 = quantile(., probs = c(0.9), na.rm = TRUE)
  )
  )
stat_df
#> # A tibble: 16 x 26
#> # Groups:   Station, Year [1]
#>    Station  Year MaxTemp MinTemp MaxDewPoint MinDewPoint MaxTemp_yr_max
#>    <chr>   <int>   <dbl>   <dbl>       <dbl>       <dbl>          <dbl>
#>  1 ORD      1948   35.6     26.6       34.2        -27.4           35.6
#>  2 ORD      1948   -2      -16        -17.0        -27.0           35.6
#>  3 ORD      1948   -4      -26        -12          -26             35.6
#>  4 ORD      1948   -5      -26        -15          -26             35.6
#>  5 ORD      1948    8      -25          3           NA             35.6
#>  6 ORD      1948  -11      -25        -24          -25             35.6
#>  7 ORD      1948    1      -23         NA          -23             35.6
#>  8 ORD      1948    1      -22         -9           NA             35.6
#>  9 ORD      1948   NA      -22         -5          -22             35.6
#> 10 ORD      1948   10       NA         -2          -22             35.6
#> 11 ORD      1948  -11      -21        -23          -21             35.6
#> 12 ORD      1948    3      -12         -7.96       -20.9           35.6
#> 13 ORD      1948    6.98    -7.6       -7.6        -20.2           35.6
#> 14 ORD      1948    3.92    -9.4      -11.2         NA             35.6
#> 15 ORD      1948    6       -7         -5.98        NA             35.6
#> 16 ORD      1948    3      -11         -7.96       -20.0           35.6
#> # ... with 19 more variables: MinTemp_yr_max <dbl>,
#> #   MaxDewPoint_yr_max <dbl>, MinDewPoint_yr_max <dbl>,
#> #   MaxTemp_yr_min <dbl>, MinTemp_yr_min <dbl>, MaxDewPoint_yr_min <dbl>,
#> #   MinDewPoint_yr_min <dbl>, MaxTemp_yr_avg <dbl>, MinTemp_yr_avg <dbl>,
#> #   MaxDewPoint_yr_avg <dbl>, MinDewPoint_yr_avg <dbl>,
#> #   MaxTemp_yr_qt1 <dbl>, MinTemp_yr_qt1 <dbl>, MaxDewPoint_yr_qt1 <dbl>,
#> #   MinDewPoint_yr_qt1 <dbl>, MaxTemp_yr_qt9 <dbl>, MinTemp_yr_qt9 <dbl>,
#> #   MaxDewPoint_yr_qt9 <dbl>, MinDewPoint_yr_qt9 <dbl>

reprex package(v0.2.1.9000)于2019-01-30创建