我在R中有一个很大的data.frame,它的过度简化版本看起来像这样(真正的data.frame在“Color”列中有20种颜色,在“Number”列中有10种不同的数字:
Color Number Y
blue 1 5
blue 2 3
blue 3 2
red 1 5
red 2 8
red 3 2
green 1 2
green 2 9
green 3 3
对于“颜色”中的每种颜色,我想通过比较“Y”列的相应值,在“数字”列中的所有数字组合之间应用函数。让我们以一个简单的函数为例:
if x >= y, print 1, else print 0 # where x and y represent the first and second values to be compared, respectively
我会将其作为输出data.frame:
获取Color Comparison Y
blue 1_vs_2 1
blue 1_vs_3 1
blue 2_vs_1 0
blue 2_vs_3 1
blue 3_vs_1 0
blue 3_vs_2 0
red 1_vs_2 0
red 1_vs_3 1
red 2_vs_1 1
red 2_vs_3 1
red 3_vs_1 0
red 3_vs_2 0
green 1_vs_2 0
green 1_vs_3 0
green 2_vs_1 1
green 2_vs_3 1
green 3_vs_1 1
green 3_vs_2 0
答案 0 :(得分:5)
您考虑过SQL吗?您可以将数据合并回自身。如果您将Color
限制为相同且Number
不同,则应获得所需的每次成对比较。这与@ Psidom的回答是一样的 - 他只是在data.table
加入中做到了。
library(sqldf)
res <- sqldf("SELECT l.Color, l.Number as l_number, r.Number as r_number,
case when l.Y >= r.Y then 1 else 0 end as Y
FROM df as l
INNER JOIN df as r
ON l.Color = r.Color AND
l.Number != r.Number
")
res$comparison <- paste0(res$l_number,"_vs_",res$r_number)
res
Color l_number r_number Y comparison
1 blue 1 2 1 1_vs_2
2 blue 1 3 1 1_vs_3
3 blue 2 1 0 2_vs_1
4 blue 2 3 1 2_vs_3
5 blue 3 1 0 3_vs_1
6 blue 3 2 0 3_vs_2
7 red 1 2 0 1_vs_2
8 red 1 3 1 1_vs_3
9 red 2 1 1 2_vs_1
10 red 2 3 1 2_vs_3
11 red 3 1 0 3_vs_1
12 red 3 2 0 3_vs_2
13 green 1 2 0 1_vs_2
14 green 1 3 0 1_vs_3
15 green 2 1 1 2_vs_1
16 green 2 3 1 2_vs_3
17 green 3 1 1 3_vs_1
18 green 3 2 0 3_vs_2
答案 1 :(得分:3)
您可以尝试使用此data.table
方法:
library(data.table)
setDT(dt)
(dt[, .(Comparison = do.call(paste, c(sep = "_vs_", CJ(Number, Number, sorted = FALSE))),
Y = as.numeric(do.call(`>=`, CJ(Y, Y, sorted = FALSE)))),
by = .(Color)]
[!grepl("(\\d+)_vs_\\1", Comparison)]) # filter rows where the numbers are the same
# Color Comparison Y
# 1: blue 1_vs_2 1
# 2: blue 1_vs_3 1
# 3: blue 2_vs_1 0
# 4: blue 2_vs_3 1
# 5: blue 3_vs_1 0
# 6: blue 3_vs_2 0
# 7: red 1_vs_2 0
# 8: red 1_vs_3 1
# 9: red 2_vs_1 1
#10: red 2_vs_3 1
#11: red 3_vs_1 0
#12: red 3_vs_2 0
#13: green 1_vs_2 0
#14: green 1_vs_3 0
#15: green 2_vs_1 1
#16: green 2_vs_3 1
#17: green 3_vs_1 1
#18: green 3_vs_2 0
答案 2 :(得分:2)
使用dplyr
:
df <- data.frame(Color = c(rep("blue",3), rep("red", 3), rep("green", 3)),
Number = rep(1:3, 3),
Y = c(5,3,2,5,8,2,2,9,3))
df %>%
left_join(df, by = c("Color")) %>%
filter(Number.x != Number.y) %>%
mutate(Comparison = sprintf("%s_vs_%s", Number.x, Number.y)) %>%
mutate(Y = ifelse(Y.x - Y.y >= 0, 1, 0)) %>%
select(Color, Comparison, Y)
Color Comparison Y
1 blue 1_vs_2 1
2 blue 1_vs_3 1
3 blue 2_vs_1 0
4 blue 2_vs_3 1
5 blue 3_vs_1 0
6 blue 3_vs_2 0
7 red 1_vs_2 0
8 red 1_vs_3 1
9 red 2_vs_1 1
10 red 2_vs_3 1
11 red 3_vs_1 0
12 red 3_vs_2 0
13 green 1_vs_2 0
14 green 1_vs_3 0
15 green 2_vs_1 1
16 green 2_vs_3 1
17 green 3_vs_1 1
18 green 3_vs_2 0
答案 3 :(得分:1)
#Obtain all combinations for each color
df2 = data.frame(do.call(rbind, lapply( split(df, df$Color), function(x)
cbind(x[1,1], t(combn(x[,2], 2))) ) ))
#Repeat combinations in reverse order
df2 = rbind(df2, setNames(df2[,c(1,3:2)], colnames(df2)))
#Do a comparison of two comparators
df2$Y = as.numeric(as.numeric(as.character(df2$X3)) > as.numeric(as.character(df2$X2)))
#Sort if you want
df2 = df2[order(df2$X1,df2$Y),]
#Create comparison column if that is necessary
df2$comparison = paste(df2$X2,df2$X3,sep = "_vs_")
df2
# X1 X2 X3 Y comparison
#10 blue 2 1 0 2_vs_1
#11 blue 3 1 0 3_vs_1
#12 blue 3 2 0 3_vs_2
#1 blue 1 2 1 1_vs_2
#2 blue 1 3 1 1_vs_3
#3 blue 2 3 1 2_vs_3
#13 green 2 1 0 2_vs_1
#14 green 3 1 0 3_vs_1
#15 green 3 2 0 3_vs_2
#4 green 1 2 1 1_vs_2
#5 green 1 3 1 1_vs_3
#6 green 2 3 1 2_vs_3
#16 red 2 1 0 2_vs_1
#17 red 3 1 0 3_vs_1
#18 red 3 2 0 3_vs_2
#7 red 1 2 1 1_vs_2
#8 red 1 3 1 1_vs_3
#9 red 2 3 1 2_vs_3