我很抱歉这个显而易见的基本问题,但我努力尝试,在网上搜索,但仍然坚持。 这些是数据:
NULL
这是在“mydata”上构建3个探索图的循环
temp <- data.frame(mean=seq(1, 200, by=2),
sd=seq(1, 200, by=2))
normv <- function( n , mean , sd ){
out <- rnorm( n*length(mean) , mean = mean , sd = sd )
return( matrix( out , , ncol = n , byrow = FALSE ) )
}
set.seed(1)
normv( 5 , temp$mean , temp$sd ) # 5 variables, from V1 to V5
mydata <- as.data.frame(normv( 5 , temp$mean , temp$sd ))
我想得到的最终影像的模拟如下: 请注意,如果我在循环外运行geom_histogram它工作正常,当我排除GGPLOT并使用基本R进行直方图时,它也可以正常工作。但是,当我运行循环时,我仍然收到错误:
require(ggplot2)
require(car)
pdf(paste("Explore",1,".pdf",sep=""))
layout(matrix(c(1,2,3,3), 2, 2, byrow = FALSE))
lst1<- lapply(names(mydata),function(i)
{
print (
ggplot(mydata, aes(i)) +
geom_histogram(aes(y = ..density..),
fill = 'yellow',
alpha = 0.7,
col = 'black') +
geom_density(colour="blue", lwd = 1, fill="lightyellow", alpha=0.5) +
stat_function(fun = dnorm,
args = list(mean = mean(mydata[,i], na.rm=T), sd = sd(mydata[,i], na.rm=T)),
lwd = 1,
col = 'red') +
geom_vline(xintercept = mean(mydata[,i], na.rm=TRUE),col="lightblue", lty=1, lwd = 1) +
geom_vline(xintercept = median(mydata[,i], na.rm=TRUE),col="purple", lty=2, lwd = 1) +
theme_bw() +
labs(title="Blue Line: Mean, Purple Line: Median") +
theme(axis.text.x=element_text(size=14), axis.title.x=element_text(size=16),
axis.text.y=element_text(size=14), axis.title.y=element_text(size=16))
)
qqnorm(mydata[,i], axes=FALSE)
Boxplot(mydata[,i],
labels=rownames(mydata), id.n=Inf,
col="royalblue",
axes=TRUE,
ylab=i,
horizontal=FALSE)
})
另请注意,直方图和条形图之间的差异对我来说非常清楚,在这种情况下,数据显然是连续的而不是离散的,事实上我需要直方图。出于学术目的,我也试图切换到geom_bar:我摆脱了错误,但结果图(如预期的那样)没有意义。
非常感谢任何帮助
答案 0 :(得分:2)
这里有很多 - 我会根据你的代码假设这是你想要的图形类型。在这里,我收集变量,以便我们可以沿着可怕的“变量”列进行分析。通常,如果某些事情很难,并且您开始使用循环,那么通常有更好的方法。
library(tidyverse)
library(car)
temp <- data.frame(mean=seq(1, 200, by=2),
sd=seq(1, 200, by=2))
normv <- function( n , mean , sd ){
out <- rnorm( n*length(mean) , mean = mean , sd = sd )
return( matrix( out , , ncol = n , byrow = FALSE ) )
}
set.seed(1)
normv( 5 , temp$mean , temp$sd ) # 5 variables, from V1 to V5
mydata <- as.data.frame(normv( 5 , temp$mean , temp$sd )) %>%
gather("variable", "value", V1:V5)
ggplot(mydata, aes(value)) +
geom_histogram(aes(y = ..density..),
fill = 'yellow',
alpha = 0.7,
col = 'black') +
geom_density(colour="blue", lwd = 1, fill="lightyellow", alpha=0.5) +
facet_grid(~variable) +
geom_vline(aes(xintercept = summarised_value, color = stat),
size = 1,
data = mydata %>%
group_by(variable) %>%
summarise(mean = mean(value), median = median(value)) %>%
gather("stat", "summarised_value", mean:median)) +
theme_bw() +
theme(axis.text.x=element_text(size=14), axis.title.x=element_text(size=16),
axis.text.y=element_text(size=14), axis.title.y=element_text(size=16))
对于整个问题,这应该足以让你开始:
我仍然收集变量,因为这样可以更容易地绘制。在绘图之前,我只使用我关心的值来获取数据帧的子集。
library(tidyverse)
library(car)
library(cowplot)
temp <- data.frame(mean=seq(1, 200, by=2),
sd=seq(1, 200, by=2))
normv <- function( n , mean , sd ){
out <- rnorm( n*length(mean) , mean = mean , sd = sd )
return( matrix( out , , ncol = n , byrow = FALSE ) )
}
set.seed(1)
normv( 5 , temp$mean , temp$sd ) # 5 variables, from V1 to V5
mydata <- as.data.frame(normv( 5 , temp$mean , temp$sd )) %>%
gather("variable", "value", V1:V5)
make_plot <- function(variable_name){
data_subset <- mydata %>%
filter(variable == variable_name)
hist_g <- data_subset %>%
ggplot(., aes(value)) +
geom_histogram(aes(y = ..density..),
binwidth = 50,
fill = 'yellow',
alpha = 0.7,
col = 'black') +
geom_density(colour="#00000040", lwd = 1, fill="lightyellow", alpha=0.5) +
geom_vline(aes(xintercept = summarised_value, color = stat),
size = 1,
data = . %>%
summarise(mean = mean(value), median = median(value)) %>%
gather("stat", "summarised_value", mean:median)) +
scale_color_manual(values = c("blue", "red")) +
theme_bw() +
theme(axis.text.x=element_text(size=14), axis.title.x=element_text(size=16),
axis.text.y=element_text(size=14), axis.title.y=element_text(size=16),
legend.position = c(.95, .95),
legend.justification = c(1,1),
legend.background = element_rect(color = "black"))
qq <- ggplot(data_subset, aes(sample = value)) +
stat_qq()
bp <- ggplot(data_subset, aes(x = variable_name, y = value)) +
geom_boxplot()
# arrange three in a grid
plot_grid(
plot_grid(hist_g, qq, nrow = 2),
bp,
ncol = 2
)
}
figures_list <- map(unique(mydata$variable), make_plot)
all_figures <- plot_grid(plotlist = figures_list, nrow = 1, ncol = 5)
save_plot("out.png", all_figures, ncol = 5, base_aspect_ratio = 0.9, base_height = 7)
我将all_figures <- plot_grid(plotlist = figures_list, ...
排成一行,但您可以单独保存,将列表映射到ggsave
等。