假设以下数据框
head(df, 9)
Day variable value
1 2015-10-18 Number_Flows.minimum 401.0000
2 2015-10-18 Number_Flows.maximum 2068.0000
3 2015-10-18 Number_Flows.average 1578.9474
4 2015-10-18 Number_srcaddr.minimum 95.0000
5 2015-10-18 Number_srcaddr.maximum 292.0000
6 2015-10-18 Number_srcaddr.average 222.6316
7 2015-10-18 Number_dstaddr.minimum 65.0000
8 2015-10-18 Number_dstaddr.maximum 411.0000
9 2015-10-18 Number_dstaddr.average 202.5789
我想要做的是为每个minimum
,maximum
等制作average
,Number_Flows
,Number_srcaddr
,我宁可这样做。有条形显示值,但我也可以使用其他方法,只要我得到(例如下面公布的可重复示例)总共22个图表(每天11个)。
我尝试了各种各样的东西,但没有运气。
library(dplyr)
library(ggplot2)
ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_bar(stat = 'identity')+facet_wrap(~group)
ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_bar(stat = 'identity')+facet_wrap(~group)
ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_line()+facet_wrap(~group)
数据
dput(df)
structure(list(Day = structure(c(1445115600, 1445115600, 1445115600,
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600,
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600,
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600,
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600,
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600,
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000,
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000,
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000,
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000,
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000,
1445202000, 1445202000, 1445202000), class = c("POSIXct", "POSIXt"
), tzone = ""), variable = c("Number_Flows.minimum", "Number_Flows.maximum",
"Number_Flows.average", "Number_srcaddr.minimum", "Number_srcaddr.maximum",
"Number_srcaddr.average", "Number_dstaddr.minimum", "Number_dstaddr.maximum",
"Number_dstaddr.average", "Sum_packets.minimum", "Sum_packets.maximum",
"Sum_packets.average", "Sum_duration_nannosecs.minimum", "Sum_duration_nannosecs.maximum",
"Sum_duration_nannosecs.average", "Average_Duration.minimum",
"Average_Duration.maximum", "Average_Duration.average", "Average_Bytes.minimum",
"Average_Bytes.maximum", "Average_Bytes.average", "Bytes_per_packet.minimum",
"Bytes_per_packet.maximum", "Bytes_per_packet.average", "Sum_of_Bytes.minimum",
"Sum_of_Bytes.maximum", "Sum_of_Bytes.average", "Actual_Batch_Duration_secs.minimum",
"Actual_Batch_Duration_secs.maximum", "Actual_Batch_Duration_secs.average",
"packets_per_second.minimum", "packets_per_second.maximum", "packets_per_second.average",
"Number_Flows.minimum", "Number_Flows.maximum", "Number_Flows.average",
"Number_srcaddr.minimum", "Number_srcaddr.maximum", "Number_srcaddr.average",
"Number_dstaddr.minimum", "Number_dstaddr.maximum", "Number_dstaddr.average",
"Sum_packets.minimum", "Sum_packets.maximum", "Sum_packets.average",
"Sum_duration_nannosecs.minimum", "Sum_duration_nannosecs.maximum",
"Sum_duration_nannosecs.average", "Average_Duration.minimum",
"Average_Duration.maximum", "Average_Duration.average", "Average_Bytes.minimum",
"Average_Bytes.maximum", "Average_Bytes.average", "Bytes_per_packet.minimum",
"Bytes_per_packet.maximum", "Bytes_per_packet.average", "Sum_of_Bytes.minimum",
"Sum_of_Bytes.maximum", "Sum_of_Bytes.average", "Actual_Batch_Duration_secs.minimum",
"Actual_Batch_Duration_secs.maximum", "Actual_Batch_Duration_secs.average",
"packets_per_second.minimum", "packets_per_second.maximum", "packets_per_second.average"
), value = c(401, 2068, 1578.94736842105, 95, 292, 222.631578947368,
65, 411, 202.578947368421, 4181, 130567, 33860.2631578947, 2647278,
10876533, 5438303.63157895, 1543.937984, 20335.58603, 4202.062837,
692.4193548, 77207.90476, 14689.4305788105, 231.6654261, 943.7592654,
465.315475931579, 1244970, 123223816, 19865244, 9, 30, 27.1578947368421,
179, 4352, 1265.94736842105, 609, 2352, 1578.94736842105, 89,
299, 219.105263157895, 92, 402, 193.578947368421, 1124, 60473,
19022.6842105263, 944317, 20088618, 5254959.84210526, 1550.602627,
9749.356239, 3236.99523905263, 258.9441708, 17451.96293, 5789.86937011053,
140.2998221, 717.4807734, 424.926870810526, 157697, 33505216,
9510806.21052632, 5, 30, 24.9473684210526, 114, 2179, 772.947368421053
)), .Names = c("Day", "variable", "value"), row.names = c(NA,
66L), class = "data.frame")
答案 0 :(得分:7)
我喜欢使用线条来表示趋势,而使用彩带来显示数值范围。
与@docendo相似我先separate
,但我会spread
之后:
library(tidyverse)
df %>%
separate(variable, c("type", "var"), sep = "\\.") %>%
spread(var, value) %>%
ggplot(aes(Day)) +
geom_line(aes(y = average), size = 1) +
geom_ribbon(aes(ymin = minimum, ymax = maximum), alpha = 0.2) +
facet_wrap(~type, scales = 'free_y') +
theme(axis.text.x=element_text(angle = 90, vjust = 0.5))
如果你有更多的日子,这会更好看。
答案 1 :(得分:5)
我首先要分离"变量"绘图前的专栏:
library(dplyr)
library(ggplot2)
library(tidyr)
df %>%
separate(variable, c("type", "var"), sep = "\\.") %>%
ggplot(aes(x = Day, y = value, color = var)) +
geom_point() +
facet_wrap(~type) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
通过使用免费的y刻度,条形代替点等,您可以轻松地提供更多信息。
答案 2 :(得分:4)
base
R解决方案(或几乎:它使用reshape2
):
首先创建变量"type"
和"stat"
,然后按日split
data.frame
创建变量,然后reshape
创建数据框。形状,最后plot
与barplot
(我让你自定义barplot
)。
您可以使用lapply
的名称修改list
次呼叫来保留日期(稍后将其用作主标题)。
df$type <- sub("([^.]+)\\..+", "\\1", df$variable)
df$stat <- sub("[^.]+\\.(.+)", "\\1", df$variable)
l_df <- split(df, df$Day)
library(reshape2)
par(mfrow=c(2, 1))
lapply(l_df, function(df_day){
df_resh <- dcast(type~stat, value.var="value", data=df_day)
row.names(df_resh) <- df_resh$type
barplot(t(df_resh[, -1]), beside=TRUE, legend=TRUE, col=c("green", "blue", "red"))})
答案 3 :(得分:3)