我有每日数据,我想计算年均值,年最小值,年最大值,第90个百分点和第10个百分点。
我的数据如下:
Station Date Month Day Year MaxTemp MinTemp MaxDewPoint MinDewPoint
ORD 1/1/1948 1 1 1948 35.6 26.6 34.16 -27.4
ORD 1/2/1948 1 2 1948 -2 -16 -16.96 -27.04
ORD 1/3/1948 1 3 1948 -4 -26 -12 -26
ORD 1/4/1948 1 4 1948 -5 -26 -15 -26
ORD 1/5/1948 1 5 1948 8 -25 3 NaN
ORD 1/6/1948 1 6 1948 -11 -25 -24 -25
ORD 1/7/1948 1 7 1948 1 -23 NaN -23
ORD 1/8/1948 1 8 1948 1 -22 -9 NaN
ORD 1/9/1948 1 9 1948 NaN -22 -5 -22
ORD 1/10/1948 1 10 1948 10 NaN -2 -22
ORD 1/11/1948 1 11 1948 -11 -21 -23 -21
ORD 1/12/1948 1 12 1948 3 -12 -7.96 -20.92
ORD 1/13/1948 1 13 1948 6.98 -7.6 -7.6 -20.2
ORD 1/14/1948 1 14 1948 3.92 -9.4 -11.2 NaN
ORD 1/15/1948 1 15 1948 6 -7 -5.98 NaN
ORD 1/16/1948 1 16 1948 3 -11 -7.96 -20.02
到目前为止,我有以下代码:
# Install Necessary packages
install.packages("dplyr")
library(dplyr)
setwd("F:/Climate Data Analysis/Asignment 1")
data<- read.csv("chiacagost.csv", header=TRUE, sep=",")
dframe<- data.frame(data)
我不知道之后如何进行
答案 0 :(得分:0)
您可以使用mutate_all
或summarise_all
library(dplyr)
df <- read.table(text = "Station Date Month Day Year MaxTemp MinTemp MaxDewPoint MinDewPoint
ORD 1/1/1948 1 1 1948 35.6 26.6 34.16 -27.4
ORD 1/2/1948 1 2 1948 -2 -16 -16.96 -27.04
ORD 1/3/1948 1 3 1948 -4 -26 -12 -26
ORD 1/4/1948 1 4 1948 -5 -26 -15 -26
ORD 1/5/1948 1 5 1948 8 -25 3 NaN
ORD 1/6/1948 1 6 1948 -11 -25 -24 -25
ORD 1/7/1948 1 7 1948 1 -23 NaN -23
ORD 1/8/1948 1 8 1948 1 -22 -9 NaN
ORD 1/9/1948 1 9 1948 NaN -22 -5 -22
ORD 1/10/1948 1 10 1948 10 NaN -2 -22
ORD 1/11/1948 1 11 1948 -11 -21 -23 -21
ORD 1/12/1948 1 12 1948 3 -12 -7.96 -20.92
ORD 1/13/1948 1 13 1948 6.98 -7.6 -7.6 -20.2
ORD 1/14/1948 1 14 1948 3.92 -9.4 -11.2 NaN
ORD 1/15/1948 1 15 1948 6 -7 -5.98 NaN
ORD 1/16/1948 1 16 1948 3 -11 -7.96 -20.02",
header = TRUE, stringsAsFactors = FALSE,
na.strings = 'NaN')
stat_df <- df %>%
select(-Date, -Month, -Day) %>%
group_by(Station, Year) %>%
summarise_all(funs(yr_max = max(., na.rm = TRUE),
yr_min = min(., na.rm = TRUE),
yr_avg = mean(., na.rm = TRUE),
yr_qt1 = quantile(., probs = c(0.1), na.rm = TRUE),
yr_qt9 = quantile(., probs = c(0.9), na.rm = TRUE)
)
)
stat_df
#> # A tibble: 1 x 22
#> # Groups: Station [?]
#> Station Year MaxTemp_yr_max MinTemp_yr_max MaxDewPoint_yr_~
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 ORD 1948 35.6 26.6 34.2
#> # ... with 17 more variables: MinDewPoint_yr_max <dbl>,
#> # MaxTemp_yr_min <dbl>, MinTemp_yr_min <dbl>, MaxDewPoint_yr_min <dbl>,
#> # MinDewPoint_yr_min <dbl>, MaxTemp_yr_avg <dbl>, MinTemp_yr_avg <dbl>,
#> # MaxDewPoint_yr_avg <dbl>, MinDewPoint_yr_avg <dbl>,
#> # MaxTemp_yr_qt1 <dbl>, MinTemp_yr_qt1 <dbl>, MaxDewPoint_yr_qt1 <dbl>,
#> # MinDewPoint_yr_qt1 <dbl>, MaxTemp_yr_qt9 <dbl>, MinTemp_yr_qt9 <dbl>,
#> # MaxDewPoint_yr_qt9 <dbl>, MinDewPoint_yr_qt9 <dbl>
stat_df <- df %>%
select(-Date, -Month, -Day) %>%
group_by(Station, Year) %>%
mutate_all(funs(yr_max = max(., na.rm = TRUE),
yr_min = min(., na.rm = TRUE),
yr_avg = mean(., na.rm = TRUE),
yr_qt1 = quantile(., probs = c(0.1), na.rm = TRUE),
yr_qt9 = quantile(., probs = c(0.9), na.rm = TRUE)
)
)
stat_df
#> # A tibble: 16 x 26
#> # Groups: Station, Year [1]
#> Station Year MaxTemp MinTemp MaxDewPoint MinDewPoint MaxTemp_yr_max
#> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ORD 1948 35.6 26.6 34.2 -27.4 35.6
#> 2 ORD 1948 -2 -16 -17.0 -27.0 35.6
#> 3 ORD 1948 -4 -26 -12 -26 35.6
#> 4 ORD 1948 -5 -26 -15 -26 35.6
#> 5 ORD 1948 8 -25 3 NA 35.6
#> 6 ORD 1948 -11 -25 -24 -25 35.6
#> 7 ORD 1948 1 -23 NA -23 35.6
#> 8 ORD 1948 1 -22 -9 NA 35.6
#> 9 ORD 1948 NA -22 -5 -22 35.6
#> 10 ORD 1948 10 NA -2 -22 35.6
#> 11 ORD 1948 -11 -21 -23 -21 35.6
#> 12 ORD 1948 3 -12 -7.96 -20.9 35.6
#> 13 ORD 1948 6.98 -7.6 -7.6 -20.2 35.6
#> 14 ORD 1948 3.92 -9.4 -11.2 NA 35.6
#> 15 ORD 1948 6 -7 -5.98 NA 35.6
#> 16 ORD 1948 3 -11 -7.96 -20.0 35.6
#> # ... with 19 more variables: MinTemp_yr_max <dbl>,
#> # MaxDewPoint_yr_max <dbl>, MinDewPoint_yr_max <dbl>,
#> # MaxTemp_yr_min <dbl>, MinTemp_yr_min <dbl>, MaxDewPoint_yr_min <dbl>,
#> # MinDewPoint_yr_min <dbl>, MaxTemp_yr_avg <dbl>, MinTemp_yr_avg <dbl>,
#> # MaxDewPoint_yr_avg <dbl>, MinDewPoint_yr_avg <dbl>,
#> # MaxTemp_yr_qt1 <dbl>, MinTemp_yr_qt1 <dbl>, MaxDewPoint_yr_qt1 <dbl>,
#> # MinDewPoint_yr_qt1 <dbl>, MaxTemp_yr_qt9 <dbl>, MinTemp_yr_qt9 <dbl>,
#> # MaxDewPoint_yr_qt9 <dbl>, MinDewPoint_yr_qt9 <dbl>
由reprex package(v0.2.1.9000)于2019-01-30创建