R中用户登录时间的图表

时间:2018-09-21 10:32:49

标签: r ggplot2 ggplotly

所以我有三列,ID,用户登录时间以及登录时间(以秒为单位)。这是其中的一个示例(用户总数超过4000):

structure(
  list(
    id = c(
      "id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
      "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
    ),
    time_start = structure(
      c(
        37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
        38466, 30683, 38412, 30643, 29865, 30056, 31727
      ),
      class = c("hms", "difftime"), units = "secs"
    ),
    time_sec = c(
      3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
      3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
    )
  ),
  .Names = c("id", "time_start", "time_sec"),
  row.names = c(NA, -17L),
  class = c("tbl_df", "tbl", "data.frame")
)

我正在尝试创建一个图表,以显示每个给定时间有多少用户,间隔为X(例如5分钟)。
就像在9:00钟一样,有X个用户
在9:05时有x位使用者。
在任何给定时间连接多少个直方图或类似内容。
我的主要问题是注销用户后将其删除。

我感到答案不知所措,但找不到它,也不知道要精确搜索什么。

2 个答案:

答案 0 :(得分:3)

这是一个简单的命题

library(ggplot2)
library(RColorBrewer)

myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))

data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", 
                              "id_8", "id_9", "id_10", "id_11", "id_12", "id_13", 
                              "id_14", "id_15", "id_16", "id_17"), 
                       time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588, 
                                                31303, 37312, 30849, 38466, 30683, 38412, 30643, 
                                                29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"), 
                       time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 
                                    3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)), 
                  .Names = c("id", "time_start", "time_sec"), 
                  row.names = c(NA, -17L), 
                  class = c("tbl_df", "tbl", "data.frame"))

data$id <- factor(data$id, levels = data$id) # ordonate ID factor

ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
  theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
  scale_y_discrete("Users") + scale_x_time("Connexion time")

编辑:理解了您的问题之后,这是一种做我想寻找的简单方法。

time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))

data2 <- data.frame(time=time, sum=sum)

ggplot(data2) + geom_line(aes(x=time, y=sum)) + 
  scale_x_time() + theme_bw()

答案 1 :(得分:3)

foverlaps()中的

data.table是另一种选择。

structure(
  list(
    id = c(
      "id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
      "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
    ),
    time_start = structure(
      c(
        37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
        38466, 30683, 38412, 30643, 29865, 30056, 31727
      ),
      class = c("hms", "difftime"), units = "secs"
    ),
    time_sec = c(
      3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
      3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
    )
  ),
  .Names = c("id", "time_start", "time_sec"),
  row.names = c(NA, -17L),
  class = c("tbl_df", "tbl", "data.frame")
) -> xdf

library(hrbrthemes) # devtools::install_git("https://gitlab.com/hrbrmstr/hrbrthemes")
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)

arrange(xdf, time_start) %>%
  mutate(id = factor(id, levels=rev(id))) %>%
  ggplot(aes(time_start, id)) +
  geom_segment(
    aes(xend=time_start+time_sec, yend=id),
    size=1.5, color=ft_cols$slate
  ) +
  labs(
    x = NULL, y = NULL, title = "Login/Usage Overlap Overview"
  ) +
  theme_ipsum_rc(grid="X")

enter image description here

# Get the range
rng <- as.numeric(round(range(xdf$time_start), -1))

# 5-minute intervals
data_frame(
  start = seq(rng[1], rng[2], 300),
  end = start+299
) -> ranges

# the function we need requires a temporary conversion to a data.table
ranges <- data.table(ranges)
setkey(ranges, start, end)

# create start/end for oritinal data
xdf$start <- as.numeric(xdf$time_start)
xdf$end <- xdf$time_start + as.numeric(xdf$time_sec)
xdf <- data.table(xdf)

foverlaps(xdf, ranges) %>% # the magic is in this function
  as_data_frame() %>% # back to normal
  count(start) %>%
  complete(start = ranges$start, fill=list(n=0)) %>%
  ggplot(aes(start, n)) +
  geom_line() + # there are other ways to show this data, too
  geom_label(aes(label=n)) +
  labs(
    x = NULL, y = "# Users concurrently logged-in",
    title = "Active Users per-5-minute Interval"
  ) +
  theme_ipsum_rc()

enter image description here