在行之间应用函数,按变量分组,计算其他列

时间:2017-02-24 17:17:38

标签: r dataframe

我在R中有一个很大的data.frame,它的过度简化版本看起来像这样(真正的data.frame在“Color”列中有20种颜色,在“Number”列中有10种不同的数字:

Color   Number  Y
blue    1       5
blue    2       3
blue    3       2
red     1       5
red     2       8
red     3       2
green   1       2
green   2       9
green   3       3

对于“颜色”中的每种颜色,我想通过比较“Y”列的相应值,在“数字”列中的所有数字组合之间应用函数。让我们以一个简单的函数为例:

if x >= y, print 1, else print 0 # where x and y represent the first and second values to be compared, respectively 

我会将其作为输出data.frame:

获取
Color   Comparison  Y
blue    1_vs_2      1
blue    1_vs_3      1
blue    2_vs_1      0
blue    2_vs_3      1
blue    3_vs_1      0
blue    3_vs_2      0
red     1_vs_2      0
red     1_vs_3      1
red     2_vs_1      1
red     2_vs_3      1
red     3_vs_1      0
red     3_vs_2      0
green   1_vs_2      0
green   1_vs_3      0
green   2_vs_1      1
green   2_vs_3      1
green   3_vs_1      1
green   3_vs_2      0

4 个答案:

答案 0 :(得分:5)

您考虑过SQL吗?您可以将数据合并回自身。如果您将Color限制为相同且Number不同,则应获得所需的每次成对比较。这与@ Psidom的回答是一样的 - 他只是在data.table加入中做到了。

library(sqldf)
res <- sqldf("SELECT     l.Color, l.Number as l_number, r.Number as r_number,
                          case when l.Y >= r.Y then 1 else 0 end as Y
              FROM       df as l
              INNER JOIN df as r
              ON         l.Color = r.Color AND
                         l.Number != r.Number
             ")

res$comparison <- paste0(res$l_number,"_vs_",res$r_number)

res

   Color l_number r_number Y comparison
1   blue        1        2 1     1_vs_2
2   blue        1        3 1     1_vs_3
3   blue        2        1 0     2_vs_1
4   blue        2        3 1     2_vs_3
5   blue        3        1 0     3_vs_1
6   blue        3        2 0     3_vs_2
7    red        1        2 0     1_vs_2
8    red        1        3 1     1_vs_3
9    red        2        1 1     2_vs_1
10   red        2        3 1     2_vs_3
11   red        3        1 0     3_vs_1
12   red        3        2 0     3_vs_2
13 green        1        2 0     1_vs_2
14 green        1        3 0     1_vs_3
15 green        2        1 1     2_vs_1
16 green        2        3 1     2_vs_3
17 green        3        1 1     3_vs_1
18 green        3        2 0     3_vs_2

答案 1 :(得分:3)

您可以尝试使用此data.table方法:

library(data.table)
setDT(dt)
(dt[, .(Comparison = do.call(paste, c(sep = "_vs_", CJ(Number, Number, sorted = FALSE))), 
        Y = as.numeric(do.call(`>=`, CJ(Y, Y, sorted = FALSE)))), 
    by = .(Color)]        
   [!grepl("(\\d+)_vs_\\1", Comparison)])   # filter rows where the numbers are the same

#    Color Comparison Y
# 1:  blue     1_vs_2 1
# 2:  blue     1_vs_3 1
# 3:  blue     2_vs_1 0
# 4:  blue     2_vs_3 1
# 5:  blue     3_vs_1 0
# 6:  blue     3_vs_2 0
# 7:   red     1_vs_2 0
# 8:   red     1_vs_3 1
# 9:   red     2_vs_1 1
#10:   red     2_vs_3 1
#11:   red     3_vs_1 0
#12:   red     3_vs_2 0
#13: green     1_vs_2 0
#14: green     1_vs_3 0
#15: green     2_vs_1 1
#16: green     2_vs_3 1
#17: green     3_vs_1 1
#18: green     3_vs_2 0

答案 2 :(得分:2)

使用dplyr

df <- data.frame(Color = c(rep("blue",3), rep("red", 3), rep("green", 3)),
                     Number = rep(1:3, 3),
                     Y = c(5,3,2,5,8,2,2,9,3))

df %>% 
  left_join(df, by = c("Color")) %>% 
  filter(Number.x != Number.y) %>% 
  mutate(Comparison = sprintf("%s_vs_%s", Number.x, Number.y))  %>% 
  mutate(Y = ifelse(Y.x - Y.y >= 0, 1, 0)) %>% 
  select(Color, Comparison, Y)

   Color Comparison Y
1   blue     1_vs_2 1
2   blue     1_vs_3 1
3   blue     2_vs_1 0
4   blue     2_vs_3 1
5   blue     3_vs_1 0
6   blue     3_vs_2 0
7    red     1_vs_2 0
8    red     1_vs_3 1
9    red     2_vs_1 1
10   red     2_vs_3 1
11   red     3_vs_1 0
12   red     3_vs_2 0
13 green     1_vs_2 0
14 green     1_vs_3 0
15 green     2_vs_1 1
16 green     2_vs_3 1
17 green     3_vs_1 1
18 green     3_vs_2 0

答案 3 :(得分:1)

#Obtain all combinations for each color
df2 = data.frame(do.call(rbind, lapply( split(df, df$Color), function(x)
                                          cbind(x[1,1], t(combn(x[,2], 2))) ) ))    
#Repeat combinations in reverse order
df2 = rbind(df2, setNames(df2[,c(1,3:2)], colnames(df2)))    
#Do a comparison of two comparators
df2$Y = as.numeric(as.numeric(as.character(df2$X3)) > as.numeric(as.character(df2$X2)))    
#Sort if you want
df2 = df2[order(df2$X1,df2$Y),]    
#Create comparison column if that is necessary
df2$comparison = paste(df2$X2,df2$X3,sep = "_vs_")

df2
#      X1 X2 X3 Y comparison
#10  blue  2  1 0     2_vs_1
#11  blue  3  1 0     3_vs_1
#12  blue  3  2 0     3_vs_2
#1   blue  1  2 1     1_vs_2
#2   blue  1  3 1     1_vs_3
#3   blue  2  3 1     2_vs_3
#13 green  2  1 0     2_vs_1
#14 green  3  1 0     3_vs_1
#15 green  3  2 0     3_vs_2
#4  green  1  2 1     1_vs_2
#5  green  1  3 1     1_vs_3
#6  green  2  3 1     2_vs_3
#16   red  2  1 0     2_vs_1
#17   red  3  1 0     3_vs_1
#18   red  3  2 0     3_vs_2
#7    red  1  2 1     1_vs_2
#8    red  1  3 1     1_vs_3
#9    red  2  3 1     2_vs_3