时间序列及其可视化

时间:2017-10-05 21:39:00

标签: r ggplot2 time-series

我有25万个事件可以说谎:

                 Slot Anzahl Nutzung TimeSlotNr WochenSlots Tag
1 2011-01-01 00:00:00      2   Firma          1         242   1
2 2011-01-01 00:00:00     50  Privat          1         242   1
3 2011-01-01 00:30:00      1   Firma          2         243   1
4 2011-01-01 00:30:00     49  Privat          2         243   1
5 2011-01-01 01:00:00      1   Firma          3         244   1
6 2011-01-01 01:00:00     48  Privat          3         244   1

一个插槽代表半个半小时," Anzahl"是一个插槽中的事件数,第一个插槽开始于2011-01-01 00:00:00" WochenSlots"是TimeSlotNr %% 336,并在星期六00:00:00开始。所以我希望在一周内看到发行版。

You see the development of bookings since 2011 by customertype. The peeks and holes are errors.

And the same data agregated to weeks (monday 00:00 - sunday 23:00)

我现在想做的是:

  • 以x比例显示日期(星期一00:00 - 星期日24:00)
  • 显示行(信封),显示x%事件的分布。

我不知道怎么做。

  ggplot(data=PB2) + 
    geom_point(mapping = aes(x = WochenSlots, y = Anzahl, colour = Nutzung), alpha=0.6) +
    scale_y_continuous(labels = scales::percent) +
    facet_wrap(~Nutzung,
               shrink = TRUE,
               nrow = 2,
               scales = "free_y")
  

dput(PB2 [1:100,])   结构(列表(Slot = structure(c)(1293840000,1293840000,1293841800,   1293841800,1293843600,1293843600,1293845400,1293845400,1293847200,   1293847200,1293849000,1293849000,1293850800,1293850800,1293852600,   1293852600,1293854400,1293854400,1293856200,1293856200,1293858000,   1293858000,1293859800,1293859800,1293861600,1293861600,1293863400,   1293863400,12293865200,12293865200,1293867000,1293867000,1293868800,   1293868800,12293870600,1293870600,1293872400,1293872400,1293874200,   1293874200,1293876000,1293876000,1293877800,1293877800,1293879600,   1293879600,1293881400,1293881400,1293883200,1293883200,1293885000,   1293885000,1293886800,1293886800,1293888600,1293888600,1293890400,   1293890400,1293892200,1293892200,1293894000,1293894000,1293895800,   1293895800,1293897600,1293897600,1293899400,1293899400,1293901200,   1293901200,1293903000,1293903000,1293904800,1293904800,1293906600,   1293906600,1293908400,1293908400,12293910200,12293910200,1293912000,   1293912000,12293913800,1229913800,1229915600,1293915600,1293917400,   1293917400,12293919200,12293919200,12293921000,1293921000,1293922800,   1293922800,12293924600,1293924600,12293926400,12293926400,1293928200,   1293928200),class = c(" POSIXct"," POSIXt"),tzone =" UTC"),       Anzahl = c(2L,50L,1L,49L,1L,48L,1L,43L,1L,43L,1L,       30L,1L,27L,0L,22L,0L,19L,0L,20L,0L,18L,0L,17L,       0L,17L,0L,17L,0L,17L,0L,18L,0L,19L,2L,19L,2L,       19L,2L,20L,2L,21L,2L,21L,2L,20L,2L,18L,2L,22L,       2L,24L,3L,25L,1L,28L,1L,30L,1L,33L,1L,32L,1L,       28L,2L,24L,2L,25L,2L,25L,2L,22L,2L,20L,1L,15L,       2L,14L,1L,13L,1L,11L,1L,12L,1L,11L,1L,9L,1L,       8L,1L,7L,1L,5L,1L,4L,1L,3L,0L,3L),Nutzung = c(" Firma",       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;,       " Privat"," Firma"," Privat"),TimeSlotNr = c(1L,1L,2L,       2L,3L,3L,4L,4L,5L,5L,6L,6L,7L,7L,8L,8L,9L,9L,       10L,10L,11L,11L,12L,12L,13L,13L,14L,14L,15L,15L,       16L,16L,17L,17L,18L,18L,19L,19L,20L,20L,21L,21L,       22L,22L,23L,23L,24L,24L,25L,25L,26L,26L,27L,27L,       28L,28L,29L,29L,30L,30L,31L,31L,32L,32L,33L,33L,       34L,34L,35L,35L,36L,36L,37L,37L,38L,38L,39L,39L,       40L,40L,41L,41L,42L,42L,43L,43L,44L,44L,45L,45L,       46L,46L,47L,47L,48L,48L,49L,49L,50L,50L),WochenSlots = c(242,       242,243,243,244,244,245,245,246,246,247,247,248,       248,249,249,250,250,251,251,252,252,253,253,254,       254,255,255,256,256,257,257,258,258,259,259,260,       260,261,261,262,262,263,263,264,264,265,265,266,       266,267,267,268,268,269,269,270,270,271,271,272,       272,273,273,274,274,275,275,276,276,277,277,278,       278,279,279,280,280,281,281,282,282,283,283,284,       284,285,285,286,286,287,287,288,288,289,289,290,       290,291,291),Tag = c(1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,       1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,2L,2L,2L,       2L)),. Name = c(" Slot"," Anzahl"," Nutzung"," TimeSlotNr",   " WochenSlots"," Tag"),row.names = c(NA,100L),class =" data.frame")

1 个答案:

答案 0 :(得分:1)

看起来分位数回归可能就是你所需要的。您发布的数据样本在每个时间点只有一个观察点,所以我创建了一些虚假数据用于说明。在下图中,我们使用灵活的样条函数进行回归函数,并在数据的第25和第75百分位处绘制回归线。如果这是您的想法,请告诉我。

library(ggplot2)
library(quantreg)
library(splines)

# Fake data
set.seed(2)
dat = data.frame(x=runif(1e4,0,20))
dat$y = cos(dat$x) + 10 + rnorm(1e4, 2)

ggplot(dat, aes(x,y)) +
  geom_point(alpha=0.1, colour="blue", size=0.5) +
  geom_quantile(formula=y ~ ns(x, 10), quantiles=c(0.25, 0.75),
                colour="red", size=1) +
  theme_classic()

enter image description here