在我的数据集中,我执行了除以分组(存储)的预测。 在这里
df=structure(list(Store = c(173L, 173L, 173L, 173L, 173L, 173L,
173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 174L, 174L, 174L,
174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L,
173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L,
173L, 173L, 173L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L,
174L, 174L, 174L, 174L, 174L, 174L, 173L, 173L, 173L, 173L, 173L,
173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 173L, 174L, 174L,
174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L, 174L,
174L), DayOfWeek = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("15.07.2015", "16.07.2015", "17.07.2015"
), class = "factor"), Sales = structure(c(27L, 12L, 16L, 18L,
9L, 4L, 26L, 23L, 10L, 19L, 7L, 20L, 25L, 5L, 17L, 2L, 11L, 8L,
3L, 22L, 15L, 14L, 28L, 6L, 1L, 24L, 13L, 21L, 27L, 12L, 16L,
18L, 9L, 4L, 26L, 23L, 10L, 19L, 7L, 20L, 25L, 5L, 17L, 2L, 11L,
8L, 3L, 22L, 15L, 14L, 28L, 6L, 1L, 24L, 13L, 21L, 27L, 12L,
16L, 18L, 9L, 4L, 26L, 23L, 10L, 19L, 7L, 20L, 25L, 5L, 17L,
2L, 11L, 8L, 3L, 22L, 15L, 14L, 28L, 6L, 1L, 24L, 13L, 21L), .Label = c("10318.344",
"10725.268", "10765.647", "13546.236", "3418.328", "3939.406",
"4089.442", "4377.643", "5196.012", "5487.437", "5778.296", "6200.403",
"6216.929", "6331.589", "6404.693", "6472.833", "6693.678", "6751.922",
"6770.161", "7510.433", "7736.447", "7743.879", "8107.569", "8119.046",
"9087.104", "9326.839", "9718.452", "9855.327"), class = "factor"),
Promo = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), LinearRegressionForecast = structure(c(22L,
9L, 15L, 14L, 8L, 1L, 26L, 20L, 6L, 17L, 3L, 18L, 25L, 2L,
11L, 27L, 7L, 5L, 24L, 13L, 10L, 12L, 23L, 4L, 28L, 21L,
16L, 19L, 22L, 9L, 15L, 14L, 8L, 1L, 26L, 20L, 6L, 17L, 3L,
18L, 25L, 2L, 11L, 27L, 7L, 5L, 24L, 13L, 10L, 12L, 23L,
4L, 28L, 21L, 16L, 19L, 22L, 9L, 15L, 14L, 8L, 1L, 26L, 20L,
6L, 17L, 3L, 18L, 25L, 2L, 11L, 27L, 7L, 5L, 24L, 13L, 10L,
12L, 23L, 4L, 28L, 21L, 16L, 19L), .Label = c("10672.724",
"2286.724", "2940.339", "3038.273", "3265.624", "3387.729",
"3475.001", "3568.385", "4527.949", "5042.683", "5131.816",
"5196.835", "5204.855", "5239.113", "5572.545", "5605.564",
"5656.971", "6216.276", "6510.814", "6749.251", "6901.256",
"7248.194", "7310.538", "7549.539", "7585.489", "7842.506",
"8371.118", "8487.823"), class = "factor"), Type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Forecast", "obser"
), class = "factor")), .Names = c("Store", "DayOfWeek", "Date",
"Sales", "Promo", "LinearRegressionForecast", "Type"), class = "data.frame", row.names = c(NA,
-84L))
有列类型。它指示什么类型(预测或强制)。 分别对每个商店进行了预测。 我需要为每个商店(分别为173和174)创建一个预测图,在该图上将预测线和观测线相交(针对日期和销售变量) 怎么做?
对不起,我的油漆
答案 0 :(得分:1)
数据集中有很多事情需要完成:
首先,因子必须是数字,日期必须是日期对象。我已经使用mutate
然后我们可以按Store
和Date
进行分组,并使用summarise
来查找每日总销售额和每日总预测值
然后使用gather
使桌子整齐,然后我们得到clean_df
clean_df <- df %>%
mutate(Date = as.Date(Date, format = "%d.%m.%Y"),
Sales = as.numeric(Sales),
LinearRegressionForecast = as.numeric(LinearRegressionForecast)) %>%
select(Date,Sales, LinearRegressionForecast,Store) %>%
group_by(Store, Date) %>%
summarise(total_daily_sales = sum(Sales, na.rm = T),
total_daily_forecast = sum(LinearRegressionForecast, na.rm = T)) %>%
gather(type, value, -Date, -Store)
clean_df %>%
ggplot(aes(x = Date, y = value, col = type)) +
geom_line() +
facet_wrap(~Store)
代码在这里