我有25万个事件可以说谎:
Slot Anzahl Nutzung TimeSlotNr WochenSlots Tag
1 2011-01-01 00:00:00 2 Firma 1 242 1
2 2011-01-01 00:00:00 50 Privat 1 242 1
3 2011-01-01 00:30:00 1 Firma 2 243 1
4 2011-01-01 00:30:00 49 Privat 2 243 1
5 2011-01-01 01:00:00 1 Firma 3 244 1
6 2011-01-01 01:00:00 48 Privat 3 244 1
一个插槽代表半个半小时," Anzahl"是一个插槽中的事件数,第一个插槽开始于2011-01-01 00:00:00" WochenSlots"是TimeSlotNr %% 336,并在星期六00:00:00开始。所以我希望在一周内看到发行版。
我现在想做的是:
我不知道怎么做。
ggplot(data=PB2) +
geom_point(mapping = aes(x = WochenSlots, y = Anzahl, colour = Nutzung), alpha=0.6) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~Nutzung,
shrink = TRUE,
nrow = 2,
scales = "free_y")
dput(PB2 [1:100,]) 结构(列表(Slot = structure(c)(1293840000,1293840000,1293841800, 1293841800,1293843600,1293843600,1293845400,1293845400,1293847200, 1293847200,1293849000,1293849000,1293850800,1293850800,1293852600, 1293852600,1293854400,1293854400,1293856200,1293856200,1293858000, 1293858000,1293859800,1293859800,1293861600,1293861600,1293863400, 1293863400,12293865200,12293865200,1293867000,1293867000,1293868800, 1293868800,12293870600,1293870600,1293872400,1293872400,1293874200, 1293874200,1293876000,1293876000,1293877800,1293877800,1293879600, 1293879600,1293881400,1293881400,1293883200,1293883200,1293885000, 1293885000,1293886800,1293886800,1293888600,1293888600,1293890400, 1293890400,1293892200,1293892200,1293894000,1293894000,1293895800, 1293895800,1293897600,1293897600,1293899400,1293899400,1293901200, 1293901200,1293903000,1293903000,1293904800,1293904800,1293906600, 1293906600,1293908400,1293908400,12293910200,12293910200,1293912000, 1293912000,12293913800,1229913800,1229915600,1293915600,1293917400, 1293917400,12293919200,12293919200,12293921000,1293921000,1293922800, 1293922800,12293924600,1293924600,12293926400,12293926400,1293928200, 1293928200),class = c(" POSIXct"," POSIXt"),tzone =" UTC"), Anzahl = c(2L,50L,1L,49L,1L,48L,1L,43L,1L,43L,1L, 30L,1L,27L,0L,22L,0L,19L,0L,20L,0L,18L,0L,17L, 0L,17L,0L,17L,0L,17L,0L,18L,0L,19L,2L,19L,2L, 19L,2L,20L,2L,21L,2L,21L,2L,20L,2L,18L,2L,22L, 2L,24L,3L,25L,1L,28L,1L,30L,1L,33L,1L,32L,1L, 28L,2L,24L,2L,25L,2L,25L,2L,22L,2L,20L,1L,15L, 2L,14L,1L,13L,1L,11L,1L,12L,1L,11L,1L,9L,1L, 8L,1L,7L,1L,5L,1L,4L,1L,3L,0L,3L),Nutzung = c(" Firma", " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"," Firma"," Privat"," Firma& #34 ;, " Privat"," Firma"," Privat"),TimeSlotNr = c(1L,1L,2L, 2L,3L,3L,4L,4L,5L,5L,6L,6L,7L,7L,8L,8L,9L,9L, 10L,10L,11L,11L,12L,12L,13L,13L,14L,14L,15L,15L, 16L,16L,17L,17L,18L,18L,19L,19L,20L,20L,21L,21L, 22L,22L,23L,23L,24L,24L,25L,25L,26L,26L,27L,27L, 28L,28L,29L,29L,30L,30L,31L,31L,32L,32L,33L,33L, 34L,34L,35L,35L,36L,36L,37L,37L,38L,38L,39L,39L, 40L,40L,41L,41L,42L,42L,43L,43L,44L,44L,45L,45L, 46L,46L,47L,47L,48L,48L,49L,49L,50L,50L),WochenSlots = c(242, 242,243,243,244,244,245,245,246,246,247,247,248, 248,249,249,250,250,251,251,252,252,253,253,254, 254,255,255,256,256,257,257,258,258,259,259,260, 260,261,261,262,262,263,263,264,264,265,265,266, 266,267,267,268,268,269,269,270,270,271,271,272, 272,273,273,274,274,275,275,276,276,277,277,278, 278,279,279,280,280,281,281,282,282,283,283,284, 284,285,285,286,286,287,287,288,288,289,289,290, 290,291,291),Tag = c(1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,2L,2L,2L, 2L)),. Name = c(" Slot"," Anzahl"," Nutzung"," TimeSlotNr", " WochenSlots"," Tag"),row.names = c(NA,100L),class =" data.frame")
答案 0 :(得分:1)
看起来分位数回归可能就是你所需要的。您发布的数据样本在每个时间点只有一个观察点,所以我创建了一些虚假数据用于说明。在下图中,我们使用灵活的样条函数进行回归函数,并在数据的第25和第75百分位处绘制回归线。如果这是您的想法,请告诉我。
library(ggplot2)
library(quantreg)
library(splines)
# Fake data
set.seed(2)
dat = data.frame(x=runif(1e4,0,20))
dat$y = cos(dat$x) + 10 + rnorm(1e4, 2)
ggplot(dat, aes(x,y)) +
geom_point(alpha=0.1, colour="blue", size=0.5) +
geom_quantile(formula=y ~ ns(x, 10), quantiles=c(0.25, 0.75),
colour="red", size=1) +
theme_classic()