无法在R中合并两个数据帧

时间:2018-09-17 01:36:04

标签: r shiny dplyr rbind

我正在尝试使用rbind合并两个数据帧。以下是两个dfs

ab1

structure(list(Product = c("Black Menthol", "Gold ", "Green ", 
"Red "), `Apr 2017` = structure(c(`Black Menthol` = 2L, `Gold ` = 3L, 
`Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", "0.07", 
"0.09", "Apr 2017"), class = "factor"), `May 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.08", "May 2017"), class = "factor"), `Jun 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 4L, `Green ` = 1L, `Red ` = 3L), .Label = c("0", "0.02", 
"0.07", "0.08", "Jun 2017"), class = "factor"), `Jul 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.09", "Jul 2017"), class = "factor"), `Aug 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.09", "Aug 2017"), class = "factor"), `Sep 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.09", "Sep 2017"), class = "factor"), `Oct 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.08", "Oct 2017"), class = "factor"), `Nov 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.02", 
"0.07", "0.09", "Nov 2017"), class = "factor"), `Dec 2017` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 4L), .Label = c("0", "0.03", 
"0.07", "0.08", "Dec 2017"), class = "factor"), `Jan 2018` = structure(c(`Black Menthol` = 2L, 
`Gold ` = 3L, `Green ` = 1L, `Red ` = 3L), .Label = c("0", "0.03", 
"0.06", "Jan 2018"), class = "factor")), row.names = c(NA, -4L
), class = "data.frame")

ab2

structure(list(Product = c("Black Menthol", "Black Non-Menthol", 
"Green ", "Red "), `Apr 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.07", "0.17", "0.23", "Apr 2017"), class = "factor"), `May 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.06", "0.17", "0.24", "May 2017"), class = "factor"), `Jun 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.07", "0.18", "0.25", "Jun 2017"), class = "factor"), `Jul 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.06", "0.17", "0.24", "Jul 2017"), class = "factor"), `Aug 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.06", "0.16", "0.23", "Aug 2017"), class = "factor"), `Sep 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.06", "0.16", "0.23", "Sep 2017"), class = "factor"), `Oct 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.07", "0.14", "0.22", "Oct 2017"), class = "factor"), `Nov 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.02", 
"0.08", "0.15", "0.22", "Nov 2017"), class = "factor"), `Dec 2017` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.03", 
"0.09", "0.16", "0.20", "Dec 2017"), class = "factor"), `Jan 2018` = structure(c(`Black Menthol` = 1L, 
`Black Non-Menthol` = 3L, `Green ` = 2L, `Red ` = 4L), .Label = c("0.03", 
"0.08", "0.16", "0.22", "Jan 2018"), class = "factor")), row.names = c(NA, 
-4L), class = "data.frame")

下面是我用来组合它们的代码:

library(shiny)
library(dplyr)

ab3 <- reactive({rbind.data.frame(ab1,ab2)})

ab4<- reactive({ ab3 %>% group_by(`Product`) %>%summarize_all(funs(sum))})

执行此操作时,出现以下错误:Evaluation error: ‘sum’ not meaningful for factors

我想得到的输出是:

  Product Apr.2017 May.2017 Jun.2017 Jul.2017 Aug.2017 Sep.2017 Oct.2017
#  <chr>      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#1 Black …     0.04     0.04     0.04    0.04      0.04     0.04     0.04
#2 Black …     0.17     0.17     0.18    0.17      0.16     0.16     0.14
#3 Gold        0.07     0.07     0.08    0.07      0.07     0.07     0.07
#4 Green       0.07     0.06     0.07    0.06      0.06     0.06     0.07
#5 Red         0.32     0.32     0.32    0.330     0.32     0.32     0.3

1 个答案:

答案 0 :(得分:2)

这是你的追求吗?

library(tidyverse)
bind_rows(ab1, ab2) %>%
    group_by(Product) %>%
    summarise_all(sum)
## A tibble: 5 x 9
#  Product Apr.2017 May.2017 Jun.2017 Jul.2017 Aug.2017 Sep.2017 Oct.2017
#  <chr>      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#1 Black …     0.04     0.04     0.04    0.04      0.04     0.04     0.04
#2 Black …     0.17     0.17     0.18    0.17      0.16     0.16     0.14
#3 Gold        0.07     0.07     0.08    0.07      0.07     0.07     0.07
#4 Green       0.07     0.06     0.07    0.06      0.06     0.06     0.07
#5 Red         0.32     0.32     0.32    0.330     0.32     0.32     0.3
## ... with 1 more variable: Nov.2017 <dbl>

这假设ab1ab2具有相同的列结构。


样本数据

ab1 <- read.table(text =
    "        Product 'Apr 2017' 'May 2017' 'Jun 2017' 'Jul 2017' 'Aug 2017' 'Sep 2017' 'Oct 2017' 'Nov 2017'
1 'Black Menthol'     0.02     0.02     0.02     0.02     0.02     0.02     0.02     0.02
2         Gold      0.07     0.07     0.08     0.07     0.07     0.07     0.07     0.07
3        Green         0        0        0        0        0        0        0        0
4          Red      0.09     0.08     0.07     0.09     0.09     0.09     0.08     0.09
", header = T)
ab2 <- read.table(text =
    "            Product 'Apr 2017' 'May 2017' 'Jun 2017' 'Jul 2017' 'Aug 2017' 'Sep 2017' 'Oct 2017' 'Nov 2017'
1     'Black Menthol'     0.02     0.02     0.02     0.02     0.02     0.02     0.02     0.02
2 'Black Non-Menthol'     0.17     0.17     0.18     0.17     0.16     0.16     0.14     0.15
3            Green      0.07     0.06     0.07     0.06     0.06     0.06     0.07     0.08
4              Red      0.23     0.24     0.25     0.24     0.23     0.23     0.22     0.22
", header = T)

更新

要解决factor问题:在绑定行之前,将Product以外的所有列转换为numeric

bind_rows(
    ab1 %>% mutate_at(vars(-Product), function(x) as.numeric(as.character(x))),
    ab2 %>% mutate_at(vars(-Product), function(x) as.numeric(as.character(x)))) %>%
    group_by(Product) %>%
    summarise_all(sum)
## A tibble: 5 x 11
#  Product `Apr 2017` `May 2017` `Jun 2017` `Jul 2017` `Aug 2017` `Sep 2017`
#  <chr>        <dbl>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
#1 Black …       0.04       0.04       0.04      0.04        0.04       0.04
#2 Black …       0.17       0.17       0.18      0.17        0.16       0.16
#3 "Gold "       0.07       0.07       0.08      0.07        0.07       0.07
#4 "Green…       0.07       0.06       0.07      0.06        0.06       0.06
#5 "Red "        0.32       0.32       0.32      0.330       0.32       0.32
## ... with 4 more variables: `Oct 2017` <dbl>, `Nov 2017` <dbl>, `Dec
##   2017` <dbl>, `Jan 2018` <dbl>