我正在使用钻石数据集。
> dput(diamonds_2[1:100,])
structure(list(carat = structure(c(4L, 2L, 4L, 10L, 12L, 5L,
5L, 7L, 3L, 4L, 11L, 4L, 3L, 12L, 1L, 13L, 11L, 11L, 11L, 11L,
11L, 4L, 4L, 12L, 12L, 4L, 5L, 11L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 12L, 7L, 14L, 14L, 14L, 7L, 7L, 13L, 10L, 13L, 13L, 6L,
10L, 5L, 4L, 13L, 3L, 3L, 11L, 11L, 11L, 11L, 11L, 16L, 11L,
11L, 11L, 23L, 9L, 13L, 12L, 12L, 5L, 5L, 11L, 11L, 11L, 11L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 19L, 7L, 5L, 5L, 5L, 5L, 13L,
45L, 61L, 45L, 46L, 53L, 45L, 45L, 71L, 48L, 55L), .Label = c("0.2",
"0.21", "0.22", "0.23", "0.24", "0.25", "0.26", "0.27", "0.28",
"0.29", "0.3", "0.31", "0.32", "0.33", "0.34", "0.35", "0.36",
"0.37", "0.38", "0.39", "0.4", "0.41", "0.42", "0.43", "0.5",
"0.51", "0.52", "0.53", "0.54", "0.55", "0.56", "0.57", "0.58",
"0.59", "0.6", "0.61", "0.62", "0.63", "0.64", "0.65", "0.66",
"0.67", "0.68", "0.69", "0.7", "0.71", "0.72", "0.73", "0.74",
"0.75", "0.76", "0.77", "0.78", "0.79", "0.8", "0.81", "0.82",
"0.83", "0.84", "0.85", "0.86", "0.87", "0.88", "0.89", "0.9",
"0.91", "0.92", "0.93", "0.94", "0.95", "0.96", "0.97", "0.98",
"0.99", "1", "1.01", "1.02", "1.03", "1.04", "1.05", "1.06",
"1.07", "1.08", "1.09", "1.1", "1.11", "1.12", "1.13", "1.14",
"1.15", "1.16", "1.17", "1.18", "1.19", "1.2", "1.21", "1.22",
"1.23", "1.24", "1.25", "1.27", "1.28", "1.29", "1.31", "1.5",
"1.51", "1.52"), class = "factor"), color = structure(c(2L, 2L,
2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L, 7L,
7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L, 2L,
1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L, 5L,
3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 4L,
6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L, 3L,
2L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"),
clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 3L, 4L,
5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 3L,
3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L,
2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L,
7L, 7L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L,
7L, 7L, 7L, 7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L,
3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
price = c(481, 481, 492, 558, 568, 579, 579, 590, 590, 601,
610, 621, 642, 660, 671, 671, 700, 729, 729, 729, 729, 740,
750, 750, 750, 761, 772, 793, 793, 793, 951, 951, 951, 951,
951, 951, 951, 951, 952, 952, 952, 952, 952, 952, 952, 952,
952, 952, 953, 953, 953, 953, 953, 953, 953, 954, 954, 954,
954, 954, 958, 958, 958, 958, 958, 959, 959, 959, 959, 959,
959, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960,
960, 960, 960, 960, 960, 960, 960, 960, 1, 1, 1, 2, 2, 2,
2, 2, 3, 3), cut_new = structure(c(1L, 1L, 2L, 1L, 2L, 3L,
3L, 3L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 3L, 2L,
2L, 2L, 3L, 1L), .Label = c("Above average", "Below average",
"Very Good"), class = "factor")), .Names = c("carat", "color",
"clarity", "price", "cut_new"), row.names = c(NA, 100L), class = "data.frame")
现在我想获得平均价格,以便每个钻石,我看到价格,旁边也是平均价格(基于组合 color and cut_new)。
我已尝试过以下代码,但无法正确使用:
尝试1:
head(diamonds_2)
diamonds_2 <- x <- as.data.frame(diamonds_2)
diamonds_2$price <- as.numeric(diamonds_2$price)
mean <- tapply(diamonds_2$price, list(diamonds_2$color, diamonds_2$cut_new), mean, na.rm = T)
combine <- merge (diamonds_2, mean, by.x = "cut_new", by.y= "color")
ATTEMPT2:
results <- summaryBy(price~color, data= diamonds_2, FUN = mean)
任何想法如何使其中一个工作?
谢谢
答案 0 :(得分:-1)
假设您的输入数据集为df
,您可以使用以下代码段,根据color
和cut_new
变量获取每个钻石的平均价格:
library(dplyr)
df %>% group_by(color, cut_new) %>%
summarise(AvgPrice= mean(price))
# # A tibble: 20 x 3
# color cut_new AvgPrice
# <fctr> <fctr> <dbl>
# 1 1 Above average 956.7500
# 2 1 Below average 952.0000
# 3 1 Very Good 933.5714
# 4 2 Above average 647.1250
# 5 2 Below average 499.3333
# 6 2 Very Good 720.0000
# 7 3 Above average 797.0000
# 8 3 Below average 318.3333
# 9 3 Very Good 921.6000
# 10 4 Above average 766.4000
# 11 4 Very Good 574.0000
# 12 5 Above average 800.5000
# 13 5 Below average 953.7500
# 14 5 Very Good 801.0000
# 15 6 Above average 886.3333
# 16 6 Below average 841.5000
# 17 6 Very Good 829.0000
# 18 7 Above average 796.7500
# 19 7 Below average 659.0000
# 20 7 Very Good 720.2000