Question

我不熟悉R和我有点丢失了组织我的数据。我的数据集如下所示（typeof（mData）返回列表）：

       key_as_string                key     doc_count  colA   colB          types.buckets
2017-01-30T13:33:30.000+01:00 1.485780e+12     28       0      0        type1, type2, type3, 18, 5, 5
2017-01-30T13:34:00.000+01:00 1.485780e+12    175       0      0        type2, type1, type3, type4, 138, 19, 17, 1
...

我希望随时间绘制计数，即我想要一个图表，其中x轴包含时间戳（如POSIX）和 y轴计数每种类型一种颜色（doc_count列为+1）。我发现很多答案说我必须融化数据，但所有的例子都使用了这样的简单格式（最好是数据帧）：

timestamp(as POSIX)             doc_count   type1 type2 ... type n
2017-01-30T13:33:30.000+01:00       28      18      5       ...
2017-01-30T13:34:00.000+01:00       175     19      138     ...
...

问题在于mData[["types"]][["buckets"]][[x]][[1]]包含asysmmetric数据，即我不知道每行的顺序和类型数量（顺序对于绘图来说并不重要）。如果是这种情况，我想将0添加到相应的列。

如何将mData转换为与上面相似的形式并绘制它？那就是我失去的地方。

这是result[["types"]][["buckets"]]的示例输出：

[[1]]
          key doc_count
1      type1        18
2      type2         5
3      type3         5

[[2]]
          key doc_count
1       type2       138
2       type1        19
3       type3        17
4       type4         1

随着时间的推移绘制总计 doc_count 可以正常工作：

dates <- as.POSIXct(mData[["key"]]/1000,origin="1970-01-01")

# returns NA. Problably because it doesn't match the seconds. How do I parse it correctly?
#dates <- as.POSIXct(mData[["key_as_string"]],format="%Y-%m-%dT%H:%M:%S+01:00 CET",tz="Europe/Paris")

mDf <- data.frame(date=dates,doc_count=mData[[3]])

ggplot(mDf,aes(date,doc_count))+geom_line(colour=sample(1:255255255, 1))+xlab("")+ylab("Events")+
scale_x_datetime(date_breaks="24 hours",date_labels = "%d.%m.%Y %H:%M:%S")+
theme(axis.title=element_text(size=24,face="bold"), axis.text.y = element_text(angle=90, hjust=0.5))+geom_area(fill=sample(1:255255255, 1))

修改

这应该重新制作我的数据样本：

structure(list(key_as_string = c("2017-01-30T13:33:30.000+01:00", 
"2017-01-30T13:34:00.000+01:00", "2017-01-30T13:34:30.000+01:00", 
"2017-01-30T13:35:00.000+01:00", "2017-01-30T13:35:30.000+01:00", 
"2017-01-30T13:36:00.000+01:00"), key = c(1485779610000, 1485779640000, 
1485779670000, 1485779700000, 1485779730000, 1485779760000), 
doc_count = c(28L, 175L, 122L, 526L, 160L, 1306L), types = structure(list(
    colA = c(0L, 0L, 0L, 0L, 0L, 0L
    ), colB = c(0L, 0L, 0L, 0L, 0L, 0L), buckets = list(
        structure(list(key = c("type1", "type2", "type3"
        ), doc_count = c(18L, 5L, 5L)), .Names = c("key", 
        "doc_count"), class = "data.frame", row.names = c(NA, 
        3L)), structure(list(key = c("type2", "type1", 
        "type3", "type4"), doc_count = c(138L, 19L, 17L, 
        1L)), .Names = c("key", "doc_count"), class = "data.frame", row.names = c(NA, 
        4L)), structure(list(key = c("type2", "type1", 
        "type3"), doc_count = c(60L, 42L, 20L)), .Names = c("key", 
        "doc_count"), class = "data.frame", row.names = c(NA, 
        3L)), structure(list(key = c("type1", "type2", 
        "type3", "type4"), doc_count = c(379L, 128L, 
        18L, 1L)), .Names = c("key", "doc_count"), class = "data.frame", row.names = c(NA, 
        4L)), structure(list(key = c("type2", "type3", 
        "type1"), doc_count = c(87L, 61L, 12L)), .Names = c("key", 
        "doc_count"), class = "data.frame", row.names = c(NA, 
        3L)), structure(list(key = c("type1", "type2", 
        "type3", "type4"), doc_count = c(1139L, 146L, 
        20L, 1L)), .Names = c("key", "doc_count"), class = "data.frame", row.names = c(NA, 
        4L)))), .Names = c("colA", 
"colB", "buckets"), row.names = c(NA, 6L), class = "data.frame")), .Names = c("key_as_string", 
"key", "doc_count", "types"), row.names = c(NA, 6L), class = "data.frame")

Answer 1

在这里，我提取粒度数据，并将其与测量日期一起放在数据框中。您将获得一个易于使用ggplot绘制的长数据格式：

library(dplyr)
library(ggplot2)

tb <- mData[['types']][['buckets']]  
dt <- mData[['key']]

pdf <- do.call(rbind,lapply(seq_len(length(tb)), 
                            function(x) tb[[x]] %>% mutate(date = dt[x])))

ggplot(pdf, aes(x = date, y=doc_count, col=key)) + geom_line()

使用R在同一图表中绘制多个列

1 个答案: