我想将数据分组为多个季节,以便我的季节是冬天:12月-2月;春季:3月-5月;夏季:6月至8月,秋季:9月至11月。然后,我想对冬季和春季的季节性数据进行箱线绘图,将A与B进行比较,然后将A与C进行比较。我希望能有一种有效的数据分组和绘图方法。
library(tidyverse)
library(reshape2)
Dates30s = data.frame(seq(as.Date("2011-01-01"), to= as.Date("2040-12-31"),by="day"))
colnames(Dates30s) = "date"
FakeData = data.frame(A = runif(10958, min = 0.5, max = 1.5), B = runif(10958, min = 1.6, max = 2), C = runif(10958, min = 0.8, max = 1.8))
myData = data.frame(Dates30s, FakeData)
myData = separate(myData, date, sep = "-", into = c("Year", "Month", "Day"))
myData$Year = as.numeric(myData$Year)
myData$Month = as.numeric(myData$Month)
SeasonalData = myData %>% group_by(Year, Month) %>% summarise_all(funs(mean)) %>% select(Year, Month, A, B, C)
Spring = SeasonalData %>% filter(Month == 3 | Month == 4 |Month == 5)
Winter1 = SeasonalData %>% filter(Month == 12)
Winter1$Year = Winter1$Year+1
Winter2 = SeasonalData %>% filter(Month == 1 | Month == 2 )
Winter = rbind(Winter1, Winter2) %>% filter(Year >= 2012 & Year <= 2040) %>% group_by(Year) %>% summarise_all(funs(mean)) %>% select(-"Month")
BoxData = gather(Winter, key = "Variable", value = "value", -Year )
ggplot(BoxData, aes(x=Variable, y=value,fill=factor(Variable)))+
geom_boxplot() + labs(title="Winter") +facet_wrap(~Variable)
我想有两个数字:图1分为两部分;一个用于冬季,一个用于夏季(请参见BoxPlot 1),一个用于月度年度平均值,代表整个时间段(2011 -2040年)的平均月度值,请参见Boxplot 2
答案 0 :(得分:1)
这是我通常要做的。所有计算和绘图均基于water year (WY
) or hydrologic year from October to September。
library(tidyverse)
library(lubridate)
set.seed(123)
Dates30s <- data.frame(seq(as.Date("2011-01-01"), to = as.Date("2040-12-31"), by = "day"))
colnames(Dates30s) <- "date"
FakeData <- data.frame(A = runif(10958, min = 0.3, max = 1.5),
B = runif(10958, min = 1.2, max = 2),
C = runif(10958, min = 0.6, max = 1.8))
### Calculate Year, Month then Water year (WY) and Season
myData <- data.frame(Dates30s, FakeData) %>%
mutate(Year = year(date),
MonthNr = month(date),
Month = month(date, label = TRUE, abbr = TRUE)) %>%
mutate(WY = case_when(MonthNr > 9 ~ Year + 1,
TRUE ~ Year)) %>%
mutate(Season = case_when(MonthNr %in% 9:11 ~ "Fall",
MonthNr %in% c(12, 1, 2) ~ "Winter",
MonthNr %in% 3:5 ~ "Spring",
TRUE ~ "Summer")) %>%
select(-date, -MonthNr, -Year) %>%
as_tibble()
myData
#> # A tibble: 10,958 x 6
#> A B C Month WY Season
#> <dbl> <dbl> <dbl> <ord> <dbl> <chr>
#> 1 0.645 1.37 1.51 Jan 2011 Winter
#> 2 1.25 1.79 1.71 Jan 2011 Winter
#> 3 0.791 1.35 1.68 Jan 2011 Winter
#> 4 1.36 1.97 0.646 Jan 2011 Winter
#> 5 1.43 1.31 1.60 Jan 2011 Winter
#> 6 0.355 1.52 0.708 Jan 2011 Winter
#> 7 0.934 1.94 0.825 Jan 2011 Winter
#> 8 1.37 1.89 1.03 Jan 2011 Winter
#> 9 0.962 1.75 0.632 Jan 2011 Winter
#> 10 0.848 1.94 0.883 Jan 2011 Winter
#> # ... with 10,948 more rows
按WY计算季节和月平均值
### Seasonal Avg by WY
SeasonalAvg <- myData %>%
select(-Month) %>%
group_by(WY, Season) %>%
summarise_all(mean, na.rm = TRUE) %>%
ungroup() %>%
gather(key = "State", value = "MFI", -WY, -Season)
SeasonalAvg
#> # A tibble: 366 x 4
#> WY Season State MFI
#> <dbl> <chr> <chr> <dbl>
#> 1 2011 Fall A 0.939
#> 2 2011 Spring A 0.907
#> 3 2011 Summer A 0.896
#> 4 2011 Winter A 0.909
#> 5 2012 Fall A 0.895
#> 6 2012 Spring A 0.865
#> 7 2012 Summer A 0.933
#> 8 2012 Winter A 0.895
#> 9 2013 Fall A 0.879
#> 10 2013 Spring A 0.872
#> # ... with 356 more rows
### Monthly Avg by WY
MonthlyAvg <- myData %>%
select(-Season) %>%
group_by(WY, Month) %>%
summarise_all(mean, na.rm = TRUE) %>%
ungroup() %>%
gather(key = "State", value = "MFI", -WY, -Month) %>%
mutate(Month = factor(Month))
MonthlyAvg
#> # A tibble: 1,080 x 4
#> WY Month State MFI
#> <dbl> <ord> <chr> <dbl>
#> 1 2011 Jan A 1.00
#> 2 2011 Feb A 0.807
#> 3 2011 Mar A 0.910
#> 4 2011 Apr A 0.923
#> 5 2011 May A 0.888
#> 6 2011 Jun A 0.876
#> 7 2011 Jul A 0.909
#> 8 2011 Aug A 0.903
#> 9 2011 Sep A 0.939
#> 10 2012 Jan A 0.903
#> # ... with 1,070 more rows
绘制季节和月度数据
### Seasonal plot
s1 <- ggplot(SeasonalAvg, aes(x = Season, y = MFI, color = State)) +
geom_boxplot(position = position_dodge(width = 0.7)) +
geom_point(position = position_jitterdodge(seed = 123))
s1
### Monthly plot
m1 <- ggplot(MonthlyAvg, aes(x = Month, y = MFI, color = State)) +
geom_boxplot(position = position_dodge(width = 0.7)) +
geom_point(position = position_jitterdodge(seed = 123))
m1
奖金
### https://stackoverflow.com/a/58369424/786542
# if (!require(devtools)) {
# install.packages('devtools')
# }
# devtools::install_github('erocoar/gghalves')
library(gghalves)
s2 <- ggplot(SeasonalAvg, aes(x = Season, y = MFI, color = State)) +
geom_half_boxplot(nudge = 0.05) +
geom_half_violin(aes(fill = State),
side = "r", nudge = 0.01) +
theme_light() +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 1))
s2
s3 <- ggplot(SeasonalAvg, aes(x = Season, y = MFI, color = State)) +
geom_half_boxplot(nudge = 0.05, outlier.color = NA) +
geom_dotplot(aes(fill = State),
binaxis = "y", method = "histodot",
dotsize = 0.35,
stackdir = "up", position = PositionDodge) +
theme_light() +
theme(legend.position = "bottom") +
guides(color = guide_legend(nrow = 1))
s3
#> `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
由reprex package(v0.3.0)于2019-10-16创建