使用合并函数基于条件组合数据帧 - R

时间:2017-07-12 09:18:22

标签: r dataframe merge

我有一个具有钻石特征的数据框:

structure(list(carat = c("Above average", "Above average", "Below average", 
"Above average", "Below average", "Very Good", "Very Good", "Very Good", 
"Below average", "Very Good", "Below average", "Above average", 
"Above average", "Above average", "Above average", "Above average", 
"Above average", "Below average", "Below average", "Very Good", 
"Below average", "Very Good", "Very Good", "Very Good", "Very Good", 
"Very Good", "Above average", "Very Good", "Very Good", "Very Good", 
"Very Good", "Very Good", "Very Good", "Very Good", "Very Good", 
"Below average", "Below average", "Below average", "Very Good", 
"Above average", "Above average", "Above average", "Below average", 
"Below average", "Below average", "Above average", "Very Good", 
"Below average", "Very Good", "Very Good", "Very Good", "Above average", 
"Above average", "Above average", "Above average", "Above average", 
"Above average", "Very Good", "Very Good", "Below average", "Above average", 
"Above average", "Above average", "Above average", "Above average", 
"Above average", "Above average", "Very Good", "Above average", 
"Above average", "Very Good", "Very Good", "Above average", "Above average", 
"Below average", "Very Good", "Very Good", "Very Good", "Very Good", 
"Very Good", "Very Good", "Very Good", "Above average", "Above average", 
"Below average", "Above average", "Above average", "Above average", 
"Above average", "Above average", "Above average", "Below average", 
"Above average", "Very Good", "Very Good", "Below average", "Below average", 
"Below average", "Very Good", "Above average"), color = structure(c(2L, 
2L, 2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L, 
7L, 7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L, 
2L, 1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L, 
5L, 3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 
4L, 6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L, 
1L, 2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L, 
3L, 2L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"), 
    clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 3L, 4L, 
    5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 3L, 
    3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 
    2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 
    4L, 2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L, 
    7L, 7L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L, 
    7L, 7L, 7L, 7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L, 
    3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), 
    price = c(481, 481, 492, 558, 568, 579, 579, 590, 590, 601, 
    610, 621, 642, 660, 671, 671, 700, 729, 729, 729, 729, 740, 
    750, 750, 750, 761, 772, 793, 793, 793, 951, 951, 951, 951, 
    951, 951, 951, 951, 952, 952, 952, 952, 952, 952, 952, 952, 
    952, 952, 953, 953, 953, 953, 953, 953, 953, 954, 954, 954, 
    954, 954, 958, 958, 958, 958, 958, 959, 959, 959, 959, 959, 
    959, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 
    960, 960, 960, 960, 960, 960, 960, 960, 1, 1, 1, 2, 2, 2, 
    2, 2, 3, 3), cut_new = structure(c(1L, 1L, 2L, 1L, 2L, 3L, 
    3L, 3L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
    2L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L, 
    1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 3L, 2L, 
    2L, 2L, 3L, 1L), .Label = c("Above average", "Below average", 
    "Very Good"), class = "factor")), .Names = c("carat", "color", 
"clarity", "price", "cut_new"), row.names = c(NA, 100L), class = "data.frame")

对于colorcut_new的每个组合,我计算了平均值:

structure(c(484.29290617849, 417.560131795717, 463.579787234043, 
514.823308270677, 534.805128205128, 574.193661971831, 605.398230088496, 
470.268456375839, 482.971830985916, 541.042253521127, 542.464788732394, 
504.777777777778, 461.622857142857, 469.469465648855, 485.88986784141, 
540.725490196078, 494.36, 640.603448275862), .Dim = c(6L, 3L), .Dimnames = list(
    c("1", "2", "3", "4", "5", "6"), c("Above average", "Below average", 
    "Very Good")))

现在我想创建一个数据框,其中包含有关钻石+列的信息,以便每个钻石都能看到价格,旁边还有平均价格(基于组合 color and cut_new)。

我尝试使用函数merge,但收到错误:

results <- merge (diamonds_2, mean, by.x = "cut_new", by.y= "color") 

任何提示?

修改 我希望结果看起来像: enter image description here

感谢

1 个答案:

答案 0 :(得分:1)

这是一个tidyversebase示例,用于计算颜色和cut_new的平均值。我相信这就是你想要的,但没有合并。假设您将数据帧调用为“df”。

# tidyverse
library(dplyr)
df2 <- df %>% 
  group_by(color, cut_new) %>% 
  mutate(price.m = mean(price))

# Base
df2 <- transform(df, price.m = ave(price, color, cut_new))