重塑/重建数据框以创建标准化条形图和饼图

时间:2013-05-17 13:22:34

标签: r ggplot2

我有以下data_frame结构,它已从csv文件中读取(附加)。基本上,这总结了每个操作员(A M D L J)他们的分数是优秀,好,好,差或可怕。其他领域的日期和记分员(我打算稍后使用,但目前不需要)。

我正在努力的是如何将这些数据减少到允许我绘制条形图的格式(通过除以每个运算符的总计数进行归一化)和条形图。如何将此数据框减少到类似下面的内容,这样我就可以更好地使用geom_bar。

Operator Score Count
A        Good  11
A        Poor  5
A        Ok    3
A        Terrible 0
A        Excellent 0
D        Good  36
D        Poor  50
D        Ok    10
D        Terrible 1
D        Excellent 0

我知道我可以根据运营商对初始数据框进行子集化,然后从摘要中获取数字

dfA = subset(df, Operator=='A')
summary(dfA)

但我想自动化这个过程(即自动将数据框重新构建到上面的结构中,我可以使用ggplot2来显示结果)。但是,我不知道从哪里开始这个问题

   structure(list(Operator = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
3L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 5L, 2L, 2L, 2L, 
2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 1L, 1L, 1L, 5L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 2L, 
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 
3L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 4L, 4L, 4L, 4L, 
3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 
2L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 
3L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "D", "J", "L", "M"), class = "factor"), 
    ROI_Score = structure(c(3L, 1L, 1L, 2L, 1L, 3L, 1L, 3L, 3L, 
    2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 
    3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 
    1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 
    3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 
    3L, 1L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 
    1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 
    1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
    3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 
    1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 
    1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 2L, 
    1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 
    3L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
    3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 
    1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 
    1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 
    1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 3L, 1L, 
    3L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 
    3L, 2L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 
    4L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 2L, 
    3L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
    1L, 1L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L, 
    2L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 
    2L, 3L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("Good", 
    "OK", "Poor", "Terrible"), class = "factor"), Date = structure(c(3L, 
    3L, 5L, 5L, 5L, 7L, 3L, 3L, 9L, 9L, 9L, 11L, 11L, 3L, 3L, 
    5L, 5L, 5L, 7L, 7L, 7L, 11L, 11L, 11L, 3L, 15L, 15L, 21L, 
    13L, 17L, 17L, 19L, 21L, 13L, 13L, 13L, 15L, 15L, 17L, 17L, 
    17L, 19L, 19L, 19L, 21L, 21L, 30L, 30L, 23L, 25L, 25L, 25L, 
    27L, 27L, 27L, 29L, 29L, 29L, 23L, 23L, 25L, 25L, 25L, 27L, 
    27L, 27L, 30L, 30L, 30L, 30L, 30L, 32L, 32L, 36L, 2L, 36L, 
    36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L, 36L, 36L, 39L, 39L, 
    2L, 2L, 32L, 34L, 34L, 36L, 41L, 41L, 41L, 43L, 1L, 38L, 
    38L, 41L, 42L, 43L, 38L, 38L, 41L, 41L, 41L, 42L, 42L, 42L, 
    43L, 43L, 1L, 1L, 1L, 38L, 42L, 42L, 42L, 42L, 1L, 1L, 1L, 
    3L, 3L, 7L, 3L, 3L, 3L, 5L, 7L, 11L, 3L, 3L, 3L, 3L, 5L, 
    5L, 5L, 7L, 7L, 7L, 9L, 9L, 11L, 11L, 11L, 13L, 15L, 17L, 
    19L, 19L, 21L, 21L, 13L, 21L, 13L, 13L, 13L, 15L, 17L, 17L, 
    17L, 19L, 19L, 21L, 21L, 21L, 29L, 29L, 29L, 30L, 23L, 25L, 
    29L, 29L, 23L, 23L, 23L, 25L, 25L, 25L, 27L, 27L, 30L, 30L, 
    30L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L, 34L, 
    34L, 34L, 36L, 36L, 2L, 2L, 2L, 43L, 1L, 38L, 41L, 41L, 42L, 
    42L, 42L, 43L, 43L, 1L, 1L, 43L, 1L, 42L, 1L, 1L, 1L, 32L, 
    32L, 36L, 2L, 36L, 36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L, 
    36L, 36L, 39L, 39L, 2L, 2L, 32L, 34L, 34L, 36L, 10L, 4L, 
    6L, 6L, 10L, 10L, 10L, 12L, 4L, 4L, 12L, 12L, 6L, 6L, 6L, 
    8L, 8L, 8L, 12L, 12L, 14L, 16L, 14L, 14L, 18L, 20L, 14L, 
    18L, 18L, 18L, 14L, 14L, 14L, 16L, 16L, 16L, 22L, 22L, 22L, 
    28L, 28L, 31L, 28L, 28L, 28L, 31L, 31L, 31L, 33L, 33L, 33L, 
    35L, 35L, 35L, 37L, 37L, 37L, 33L, 33L, 33L, 35L, 37L, 37L, 
    40L, 40L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L, 
    34L, 34L, 34L, 36L, 36L, 2L, 2L, 2L, 6L, 6L, 10L, 10L, 10L, 
    10L, 4L, 4L, 6L, 6L, 8L, 8L, 8L, 10L, 10L, 12L, 4L, 8L, 8L, 
    8L, 8L, 12L, 4L, 4L, 4L, 4L, 8L, 12L, 16L, 16L, 14L, 16L, 
    18L, 18L, 20L, 20L, 20L, 14L, 14L, 20L, 20L, 22L, 22L, 14L, 
    16L, 18L, 18L, 18L, 18L, 24L, 24L, 24L, 26L, 26L, 31L, 31L, 
    24L, 26L, 26L, 26L, 26L, 24L, 24L, 24L, 24L, 31L, 31L, 40L, 
    37L, 33L, 33L, 33L, 33L, 35L, 35L, 35L, 37L, 37L, 37L, 37L, 
    40L), .Label = c("01/02/2013", "01/03/2013", "04/02/2013", 
    "04/03/2013", "05/02/2013", "05/03/2013", "06/02/2013", "06/03/2013", 
    "07/02/2013", "07/03/2013", "08/02/2013", "08/03/2013", "11/02/2013", 
    "11/03/2013", "12/02/2013", "12/03/2013", "13/02/2013", "13/03/2013", 
    "14/02/2013", "14/03/2013", "15/02/2013", "15/03/2013", "18/02/2013", 
    "18/03/2013", "19/02/2013", "19/03/2013", "20/02/2013", "20/03/2013", 
    "21/02/2013", "22/02/2013", "22/03/2013", "25/02/2013", "25/03/2013", 
    "26/02/2013", "26/03/2013", "27/02/2013", "27/03/2013", "28/01/2013", 
    "28/02/2013", "28/03/2013", "29/01/2013", "30/01/2013", "31/01/2013"
    ), class = "factor"), Scorer = structure(c(2L, 2L, 3L, 3L, 
    2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 
    3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 
    3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 
    2L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
    3L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 3L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 
    1L, 1L, 1L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 
    2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    1L, 3L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 
    1L, 3L, 3L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 1L, 2L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 
    2L, 3L, 3L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 
    3L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 1L, 
    3L, 2L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 3L, 3L, 
    1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 1L, 
    2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L), .Label = c("", "B", "G"), class = "factor")), .Names = c("Operator", 
"ROI_Score", "Date", "Scorer"), row.names = c(NA, -412L), class = "data.frame")

2 个答案:

答案 0 :(得分:1)

以下是使用data.table准备您的数据:

require(data.table)
dt <- data.table(df)
ops <- as.character(unique(dt$Operator))
scr <- as.character(unique(dt$ROI_Score))
oo <- setkey(dt[, .N, by="Operator,ROI_Score"], Operator, 
                 ROI_Score)[CJ(ops, scr)][is.na(N), N:= 0L]

以下是使用此数据获取标准化条形图的方法:

oo[, N.norm := N/sum(N), by=Operator]

绘制此图的一种方法是使用x =运算符:

require(ggplot2)
ggplot(data = oo, aes(x = Operator, y = N.norm)) + 
       geom_bar(positon="stack", stat="identity", aes(fill = ROI_Score))

enter image description here

答案 1 :(得分:1)

您可以简单地执行以下操作来准备数据:

data.frame(table(Operator=df$Operator, Score=df$ROI_Score))

给出了:

   Operator    Score Freq
1         A     Good   11
2         D     Good   36
3         J     Good   54
4         L     Good   44
5         M     Good   28
6         A       OK    3
7         D       OK   10
8         J       OK    9
9         L       OK    4
10        M       OK    7
11        A     Poor    5
12        D     Poor   50
13        J     Poor   56
14        L     Poor   67
15        M     Poor   27
16        A Terrible    0
17        D Terrible    1
18        J Terrible    0
19        L Terrible    0
20        M Terrible    0