循环遍历数据子集以在R中生成其他图

时间:2017-04-30 16:52:38

标签: r loops ggplot2 subset

(为非标准R道歉,我是新手)

我有一系列颜色,我的数据格式化为:

colors <- c("red", "blue", "orange", "turquoise4", "green3")

我正在生成一个按颜色/派对组织的情节:

main_aes = aes(x = Date, y = Popular_Support, colour=Party, size=1/Error, weight=1/Error)
plot <- ggplot(polls) 
plot2 <- plot + geom_point(main_aes)
plot2 <- plot2 + scale_colour_manual(values = colors)

我添加了一条趋势线:

plot_smooth <- plot2 + stat_smooth(data=polls, span = .35) 

想要为每种颜色设置不同的趋势和色带,需要在绘制之前操纵平滑的数据,因此我提取平滑的数据:

smooth_data <- ggplot_build(plot_smooth)$data[[2]]
# do some custom manipulations of smooth_data here

然后我手动创建个人趋势和信心丝带。 正是这部分我正在寻找使用循环而不是重复代码的帮助:

party_trend.1 <- subset(smooth_data, colour == colors[1])
party_trend.2 <- subset(smooth_data, colour == colors[2])
party_trend.3 <- subset(smooth_data, colour == colors[3])
party_trend.4 <- subset(smooth_data, colour == colors[4])
party_trend.5 <- subset(smooth_data, colour == colors[5])

plot <- plot + geom_ribbon(data = party_trend.1, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.2, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.3, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.4, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.5, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)

plot <- plot + geom_line(data = party_trend.1, colour=colors[1], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.2, colour=colors[2], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.3, colour=colors[3], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.4, colour=colors[4], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.5, colour=colors[5], aes(x = x, y = y))

我假设如果我可以为party_trend创建一个数组,其他2个循环将很容易,我尝试过这样的事情:

party_trend <- 0
for(i in colors) {
    party_trend[i] <- subset(smooth_data, colour == colors[i])
}

但我在这里使用它之前无法弄清楚如何创建/初始化party_trend数组。这给了我错误:

警告讯息:

1: In party_trend[i] <- subset(smooth_data, colour == colors[i]) :   number of items to replace is not a multiple of replacement length 
2: In party_trend[i] <- subset(smooth_data, colour == colors[i]) :   number of items to replace is not a multiple of replacement length
3...5

这是working fiddle

编辑上下文 这与循环问题无关,但可能有助于解释为什么我不只是使用默认情节。我提取数据并手动重绘的原因是因为有时我希望趋势和功能区使用不会绘制的数据。所以我使用了整个数据集中的默认计算,提取它,修剪它,并且只绘制我想要的部分。这不是在这个特定的小提琴中发生的(尽管你可以在第107-8行看到遗留物),但你可以看到结果here。请注意,绘图的开头是如何具有特征“小号”形状的,因为趋势使用的数据多于实际显示的数据,继续向左。

1 个答案:

答案 0 :(得分:0)

如果您需要为Party的每个级别创建一行和一个功能区,则无需单独创建它们。您可以将因子放在ggplot(data=polls, aes(x=Date, y=Popular_Support, color=Party))中,然后调用geom_smooth()

plot <- ggplot(data=polls,
               aes(x=Date, y=Popular_Support, color=Party)) + 
    geom_point() +
    geom_smooth()

所以你可以删除它:

#Do this in a loop!!!!
party_trend <- 0
for(i in colors) {
  party_trend[i] <- subset(smooth_data, colour == colors[i])
}

party_trend.1 <- subset(smooth_data, colour == colors[1])
party_trend.2 <- subset(smooth_data, colour == colors[2])
party_trend.3 <- subset(smooth_data, colour == colors[3])
party_trend.4 <- subset(smooth_data, colour == colors[4])
party_trend.5 <- subset(smooth_data, colour == colors[5])

plot <- plot + geom_ribbon(data = party_trend.1, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.2, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.3, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.4, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_ribbon(data = party_trend.5, aes(x=x, ymin=ymin, ymax = ymax), alpha = .25)
plot <- plot + geom_line(data = party_trend.1, colour=colors[1], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.2, colour=colors[2], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.3, colour=colors[3], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.4, colour=colors[4], aes(x = x, y = y))
plot <- plot + geom_line(data = party_trend.5, colour=colors[5], aes(x = x, y = y))

#print(plot)
plot <- plot + scale_colour_manual(values = colors)
plot <- plot +  geom_point(main_aes, alpha=0.8)

最后,您可以添加其他绘图功能:

plot <- plot + scale_colour_manual(values = colors)
plot <- plot +  geom_point(main_aes, alpha=0.8)

plot <- plot +   scale_size_area(max_size=3, breaks=seq(20,60,10), labels=seq(20,60,10)^2, name="Sample Size") 
plot <- plot +   guides(color = guide_legend(order=-1) )
  #last election                                 
plot <- plot + geom_point(data=LastElection, size=3, shape=5, show_guide= F, main_aes) 
plot <- plot + geom_point(data=LastElection, size=2, show_guide=F, main_aes) 
plot <- plot + geom_text(data=LastElection, show_guide=F, 
            aes(x = Date, y = Popular_Support, label = Popular_Support), size=3, hjust=-.2, vjust=-0.4)
# this election
#plot <- plot + geom_point(data=ThisElection, size=3, shape=5, show_guide=F, main_aes) +
#  geom_point(data=ThisElection, size=2, show_guide=F, main_aes) +
#  geom_text(data=ThisElection, show_guide=F, 
#            aes(x = Date, y = Popular_Support, label = Popular_Support), size=3, hjust=-.2, vjust=-0.4)

plot <- plot + scale_x_continuous(name = "Date", limits=c(42291,43759), minor_breaks = seq(42291, 43759, by=30),breaks = seq(42291, 43759, by=90))
                                  #minor_breaks = seq(42216, 42296, by=1), breaks = seq(42291, 43759, by=90))
plot <- plot + scale_y_continuous(name = "% Popular Support", lim=c(0,56), expand=c(0,0)) 
plot <- plot + theme(axis.text.x = element_text(size = 11, vjust=.5, angle = 90, colour="#333333"))
plot <- plot + theme(axis.title.x = element_blank())
plot <- plot + theme(axis.text.y = element_text(size = 11))
plot <- plot + theme(axis.title.y = element_text(size = 11, angle = 90, colour="#333333"))
#theme(legend.justification=c(1,1), legend.position=c(1,1))

<强>更新

您可以在geom_smooth()中使用其他数据框。这是一个最小的例子,显示了从数据点以外的另一个数据帧计算和绘制线和色带的情况:

n <- rnorm(100, mean=0, sd=1)
dat <- data.frame(x=1:100, y=n, lab=c(rep('a', 50), rep('b', 50)))

n1 <- rnorm(100, mean=2, sd=1)
dat1 <- data.frame(x=1:100, y=n1, lab=c(rep('a', 50), rep('b', 50)))

ggplot(data=dat, aes(x=x, y=y)) +
  geom_point() +
  geom_smooth(data=dat1)

,并提供:

enter image description here