ggplot2在使用`stat_summary`时添加黄土线

时间:2017-11-12 11:31:28

标签: r ggplot2 statistics

我正在尝试为某些数据创建黄土线

以下是一些代码:

library(ggplot2)

#rm(list=ls())
#gc()
#.rs.restartR()
###############################################################################

## Create some numbers for testing
m = 200
set.seed(123)
Aboard <- sample(1:m,m)
## some years to use
Years <- sort( trunc( runif( m, min=1931, max=1990 ) ) )

df <- data.frame(Aboard, Years)

graph <- ggplot(df, aes(Years, Aboard))

graph <- graph + stat_summary(fun.y=sum, geom="point", aes(size=..y..))
graph <- graph + theme_bw()
graph <- graph + stat_summary(fun.y=sum, geom="smooth", method="loess", alpha=0.01)

graph <- graph + theme(text = element_text(size=16))
graph <- graph + labs(title = "Some information that is here, from a computer,\nJune 2004")
graph <- graph + theme(plot.title = element_text(hjust=0.5))
graph <- graph + theme(plot.title = element_text(size=20))
graph <- graph + labs(x = "Year") + labs(y = "Information")
graph <- graph + stat_summary(fun.y=sum, geom="line")

graph

以上是上述的输出:

enter image description here

我期待这一行

graph <- graph + stat_summary(fun.y=sum, geom="smooth", method="loess", alpha=0.01)

创建黄土线,但它恰好适合数据点。

修改

如果可能,我想要一个没有tidyverse / dplyr的解决方案,因为我没有使用那些

1 个答案:

答案 0 :(得分:3)

library(ggplot2)
m = 200
set.seed(123)
Aboard <- sample(1:m,m)
Years <- sort( trunc( runif( m, min=1931, max=1990 ) ) )
df <- data.frame(Aboard, Years)

# Add a column with sums by years    
library(dplyr)
df <- df %>% group_by(Years) %>% mutate(ysum=sum(Aboard))

graph <- ggplot(df, aes(Years, Aboard))
graph <- graph + stat_summary(fun.y=sum, geom="point", aes(size=..y..))
graph <- graph + theme_bw()

# Use geom_smooth in place of stat_summary
graph <- graph + geom_smooth(aes(y=ysum), alpha=0.5)

graph <- graph + theme(text = element_text(size=16))
graph <- graph + labs(title = "Some information that is here, from a computer,\nJune 2004")
graph <- graph + theme(plot.title = element_text(hjust=0.5))
graph <- graph + theme(plot.title = element_text(size=20))
graph <- graph + labs(x = "Year") + labs(y = "Information")
graph <- graph + stat_summary(fun.y=sum, geom="line")
graph

如果您需要避开dplyr包裹:

m = 200
set.seed(123)
Aboard <- sample(1:m,m)
Years <- sort( trunc( runif( m, min=1931, max=1990 ) ) )
df <- data.frame(Aboard, Years)

# Sums by years    
df2 <- aggregate(x=df$Aboard, list(df$Years), FUN=sum)
names(df2) <- c("Years","ysum")

graph <- ggplot(df, aes(Years, Aboard))
graph <- graph + stat_summary(fun.y=sum, geom="point", aes(size=..y..))
graph <- graph + theme_bw()
# Use geom_smooth in place of stat_summary
graph <- graph + geom_smooth(data=df2, aes(x=Years, y=ysum), alpha=0.5)

graph <- graph + theme(text = element_text(size=16))
graph <- graph + labs(title = "Some information that is here, from a computer,\nJune 2004")
graph <- graph + theme(plot.title = element_text(hjust=0.5))
graph <- graph + theme(plot.title = element_text(size=20))
graph <- graph + labs(x = "Year") + labs(y = "Information")
graph <- graph + stat_summary(fun.y=sum, geom="line")
graph

enter image description here