ggplot绘制聚合总和除以列

时间:2018-04-27 10:33:15

标签: r ggplot2 group-by sum

我试图通过ggplot绘制一个组合点和线图。

我想将x轴设置为月度值,并绘制聚合值除以y轴上的数字。

测试数据:

minuten <- runif(10)
anzahl <- c("", "", "", "23", "", "3", "", "", "5", "56")
time <- factor(c("Jan", "Jan", "Jan", "Jan", "Feb", "Feb", "Mar", "Mar", "Mar", "Apr"))


df2 <- data.frame(name, anzahl, minuten)

我的尝试看起来像这样:

g = ggplot(data = df2, aes(x=time, y=sum(minuten , na.rm = TRUE)/sum(anzahl))))+
  geom_point(aes(group=time))

g

我想展示每个月的minn / anzahl。有没有人想要解决它?

祝你好运

3 个答案:

答案 0 :(得分:0)

在基地R中,您可以使用例如by

minuten <- runif(10)
anzahl <- c("", "", "", "23", "", "3", "", "", "5", "56")
time <- factor(c("Jan", "Jan", "Jan", "Jan", "Feb", "Feb", "Mar", "Mar", "Mar", "Apr"))

anzahl <- as.numeric(anzahl)
anzahl[is.na(anzahl)] <- 0

df2 <- data.frame(time, anzahl, minuten)

df3 <- data.frame(value = cbind(unname(by(df2, df2$time, function(x) sum(x$minuten) / sum(x$anzahl)))),
                  time = unique(df2$time))


df3$time <- factor(df3$time, levels = df3$time)

g = ggplot(data = df3, 
           aes(x = time, 
               y = value))+
  geom_point(aes(group=time))

enter image description here

答案 1 :(得分:0)

基础R解决方案

library(ggplot2)
# Sum of minuten and anzahl per month
my_df_agg <- aggregate(minuten + anzahl ~time, my_df, sum) 
# Calculate average
my_df_agg$average <- my_df_agg$minuten/my_df_agg$anzahl
# Plot
ggplot(data = my_df_agg, aes(x=time, y=average))+
  geom_point(aes(group=time))

使用dplyr解决方案

library(dplyr)
library(ggplot2)
my_df %>% 
  group_by(time) %>% 
  mutate(average = sum(minuten, na.rm = T)/sum(anzahl, na.rm = T)) %>% 
  ggplot(aes(x=time, y=average))+
  geom_point(aes(group=time))

<强>输出

enter image description here

数据

set.seed(1) # for reproducibility
my_df <- data.frame(time = factor(c("Jan", "Jan", "Jan", "Jan", "Feb", "Feb", 
                                    "Mar", "Mar", "Mar", "Apr")), 
                    anzahl = c("", "", "", "23", "", "3", "", "", "5", "56"), 
                    minuten = runif(10))
my_df$anzahl <- as.numeric(my_df$anzahl) # numeric, so we can calculate with it

答案 2 :(得分:0)

以下是library(dplyr) library(ggplot2) df2 %>% group_by(time) %>% mutate(anzahl = sum(as.numeric(anzahl), na.rm = TRUE)) %>% ungroup() %>% group_by(time, anzahl) %>% summarise(minuten = mean(minuten)) %>% mutate(ratio = minuten / anzahl) %>% ungroup() %>% mutate(time = factor(time, levels = month.abb[1:4], labels = month.abb[1:4])) %>% ggplot(aes(time, ratio)) + geom_point() + ylab("minuten / anzahl") 的解决方案。

df2 <- structure(list(time = structure(c(3L, 3L, 3L, 3L, 2L, 2L, 4L, 
4L, 4L, 1L), .Label = c("Apr", "Feb", "Jan", "Mar"), class = "factor"), 
    anzahl = c("", "", "", "23", "", "3", "", "", "5", "56"), 
    minuten = c(0.051252949051559, 0.749002492986619, 0.0514915327075869, 
    0.20246379589662, 0.16418539150618, 0.785793941700831, 0.841768049867824, 
    0.255166659131646, 0.0798644754104316, 0.00516700255684555
    )), .Names = c("time", "anzahl", "minuten"), row.names = c(NA, 
-10L), class = "data.frame")

enter image description here

数据

{{1}}