我有以下数据表:
structure(list(Date = c("2015-04-01", "2015-04-01", "2015-04-01",
"2015-04-01", "2015-04-01", "2015-04-01"), Category = structure(c(4L,
4L, 4L, 5L, 5L, 6L), .Label = c("Bakery ", "Branded goods", "Breakfast ",
"Canned/Packaged ", "Cooking essentials ", "Household ", "NO CATEGORY",
"Personal care", "Stationary ", "Vehicle accessories"), class = "factor"),
Sub_Category = c("carbonated drink ", "carbonated drink ",
"carbonated drink ", "Dairy ", "Dairy ", "Stationary "),
Product = c("soft drink", "soft drink", "soft drink", "Butter ",
"Butter ", "A4 paper"), Brand = c("7 up ", "7 up ", "7 up ",
"Amul", "Amul", "NO BRAND"), Day = c(1L, 1L, 1L, 1L, 1L,
1L), Month = c(4L, 4L, 4L, 4L, 4L, 4L), Year = c(2015L, 2015L,
2015L, 2015L, 2015L, 2015L), MRP = c("55", "25", "70", "37",
"37", "0.5"), Quantity = c(1, 1, 1, 1, 1, 20), Sales = c(55,
25, 70, 37, 37, 10), Wday = c("Wednesday", "Wednesday", "Wednesday",
"Wednesday", "Wednesday", "Wednesday"), Week = c(13L, 13L,
13L, 13L, 13L, 13L), X = c(NA, NA, NA, NA, NA, NA), X. = c(NA,
NA, NA, NA, NA, NA)), .Names = c("Date", "Category", "Sub_Category",
"Product", "Brand", "Day", "Month", "Year", "MRP", "Quantity",
"Sales", "Wday", "Week", "X", "X."), sorted = "Date", class = c("data.table",
"data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x00000000001b0788>)
我希望显示跨越整个日期列的每一类别的(数量)或总和(销售额)。
我试过了:
data2 <- data %>% group_by(data$Date) %>% summarise_each(funs(sum))
但我明白了:
is_list(x)出错:找不到对象'rlang_is_list'
也尝试过:
aggregate(cbind(data$Category,data$Sales,data$Quantity)~data$Date,
data=data,FUN=sum)
这会产生完全不同的输出。 ; /
有没有办法完成这项工作?
即使在卸载rlang,dplyr和ggplot2包之后,错误仍然存在。有办法解决这个问题吗?
提前致谢
答案 0 :(得分:3)
使用data.table
和reshape2
对您提供的数据采用更简单的方法
library(reshape2)
library(data.table)
setDT(data)
data2<-dcast(data[,.(Quantity=sum(Quantity),Sales=sum(Sales)),by=.(Category,Date)],Date~Category,value.var = c("Quantity","Sales"),fun.aggregate = sum)
您同时拥有销售额和数量
答案 1 :(得分:1)
您可以将summarise_at
用于Sales
&amp;仅Quantity
# calculate total sales
library(dplyr)
data2 <- data %>%
group_by(Date, Category) %>%
summarise_at(vars(Sales, Quantity), sum, na.rm = TRUE) %>%
mutate(Total = Sales * Quantity)
data2
#> # A tibble: 10 x 5
#> # Groups: Date [1]
#> Date Category Sales Quantity Total
#> <chr> <fct> <dbl> <dbl> <dbl>
#> 1 2015-04-01 "Bakery " 0 0 0
#> 2 2015-04-01 Branded goods 0 0 0
#> 3 2015-04-01 "Breakfast " 0 0 0
#> 4 2015-04-01 "Canned/Packaged " 150 3 450
#> 5 2015-04-01 "Cooking essentials " 74 2 148
#> 6 2015-04-01 "Household " 10 20 200
#> 7 2015-04-01 NO CATEGORY 0 0 0
#> 8 2015-04-01 Personal care 0 0 0
#> 9 2015-04-01 "Stationary " 0 0 0
#> 10 2015-04-01 Vehicle accessories 0 0 NA
然后使用tidyr::spread
library(tidyr)
data2_long <- data2 %>%
select(-Quantity, -Sales) %>%
spread(Category, Total)
data2_long
# A tibble: 1 x 11
# Groups: Date [1]
Date `Bakery ` `Branded goods` `Breakfast ` `Canned/Packaged ` `Cooking essentials ` `Household ` `NO CATEGORY`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2015-04-01 0 0 0 450 148 200 0
# ... with 3 more variables: `Personal care` <dbl>, `Stationary ` <dbl>, `Vehicle accessories` <dbl>
使用的数据:
data <- structure(list(Date = c("2015-04-01", "2015-04-01", "2015-04-01",
"2015-04-01", "2015-04-01", "2015-04-01"), Category = structure(c(4L,
4L, 4L, 5L, 5L, 6L), .Label = c("Bakery ", "Branded goods", "Breakfast ",
"Canned/Packaged ", "Cooking essentials ", "Household ", "NO CATEGORY",
"Personal care", "Stationary ", "Vehicle accessories"), class = "factor"),
Sub_Category = c("carbonated drink ", "carbonated drink ",
"carbonated drink ", "Dairy ", "Dairy ", "Stationary "),
Product = c("soft drink", "soft drink", "soft drink", "Butter ",
"Butter ", "A4 paper"), Brand = c("7 up ", "7 up ", "7 up ",
"Amul", "Amul", "NO BRAND"), Day = c(1L, 1L, 1L, 1L, 1L,
1L), Month = c(4L, 4L, 4L, 4L, 4L, 4L), Year = c(2015L, 2015L,
2015L, 2015L, 2015L, 2015L), MRP = c("55", "25", "70", "37",
"37", "0.5"), Quantity = c(1, 1, 1, 1, 1, 20), Sales = c(55,
25, 70, 37, 37, 10), Wday = c("Wednesday", "Wednesday", "Wednesday",
"Wednesday", "Wednesday", "Wednesday"), Week = c(13L, 13L,
13L, 13L, 13L, 13L), X = c(NA, NA, NA, NA, NA, NA), X. = c(NA,
NA, NA, NA, NA, NA)), .Names = c("Date", "Category", "Sub_Category",
"Product", "Brand", "Day", "Month", "Year", "MRP", "Quantity",
"Sales", "Wday", "Week", "X", "X."), sorted = "Date",
class = c("data.frame"),
row.names = c(NA, -6L))
P.S:解决object 'rlang_is_list' not found
问题。按照此answer
尝试从终端/控制台会话启动R
R --vanilla
然后,卸载并重新安装
rlang
,例如:
remove.packages("rlang")
install.packages("rlang")