跨数据帧按组重新缩放

时间:2014-06-04 07:39:30

标签: r dplyr

我有两个数据框

df1 <- structure(list(g1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), g2 = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("a", "b", "c"), class = "factor"), val1 = 1:20, val2 = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L)), .Names = c("g1", "g2", "val1", "val2"), row.names = c(NA,  -20L), class = "data.frame")


df2 <- structure(list(g1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,  2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor"),  g2 = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L,  1L, 1L, 2L, 2L, 2L, 3L, 3L), .Label = c("a", "b", "c"), class = "factor"), val3 = c(5L, 6L, 7L, 3L, 4L, 5L, 2L, 3L, 4L, 8L, 9L, 10L, 4L, 5L, 6L, 5L, 6L)), .Names = c("g1", "g2", "val3"), row.names = c(NA, -17L), class = "data.frame")

> df1
   g1 g2 val1 val2
1   A  a    1    1
2   A  a    2    2
3   A  a    3    3
4   A  a    4    4
5   A  b    5    1
6   A  b    6    2
7   A  b    7    3
8   A  c    8    1
9   A  c    9    2
10  A  c   10    3
11  B  a   11    1
12  B  a   12    2
13  B  a   13    3
14  B  b   14    1
15  B  b   15    2
16  B  b   16    3
17  B  b   17    4
18  B  c   18    1
19  B  c   19    2
20  B  c   20    3

> df2
   g1 g2 val3
1   A  a    5
2   A  a    6
3   A  a    7
4   A  b    3
5   A  b    4
6   A  b    5
7   A  c    2
8   A  c    3
9   B  c    4
10  B  a    8
11  B  a    9
12  B  a   10
13  B  b    4
14  B  b    5
15  B  b    6
16  B  c    5
17  B  c    6

我的目标是重新定标df1$val2,以便在相应群组中的df2$val3的最小值和最大值之间取值。

我试过了:

library(dplyr)
df1 <- df1 %.% group_by(g1, g2) %.% mutate(rescaled=(max(df2$val3)-min(df2$val3))*(val2-min(val2))/(max(val2)-min(val2))+min(df2$val3))

但输出与我的预期不同。问题是由于它们的长度不同,我既不能合并也不能合并两个数据帧。任何提示?

1 个答案:

答案 0 :(得分:1)

这有用吗?

library(plyr)
df3 <- ddply(df2, .(g1, g2), summarize, max.val=max(val3), min.val=min(val3))
merged.df <- merge(df1, df3, by=c("g1", "g2"), all.x=TRUE)
## Now rescale merged.df$val2 as desired