按因子索引/检查矩阵的值

时间:2017-06-01 19:06:51

标签: r indexing boolean

给出一个df:

DF1:

connectionTimeout

我想通过每组的另一个df要求来考虑这个因素:

DF2:

        x1  x2  x3  x4  x5  x6  x7  x8  x9  x10 x11 x12
Group 1 0   0   0   0   0   0   0   0   0   0   0   0
Group 1 0   0.7 5   15  16  1   0   0   0   0   0   0
Group 1 0   1   5   5   61  1   0   0   0   0   0   0
Group 1 0   1.2 28  6   100 0   0   0   0   0   0   0
Group 1 0   1.2 44  10  66  0   0   0   0   0   0   0
Group 1 0   1.2 6   8   59  0   0   0   0   0   0   0
Group 1 0   1.4 23  18  38  5   0   0   0   0   0   0
Group 1 0   2   16  16  107 1   0   0   0   0   0   0
Group 2 0   0   0   0   0   0   0   0   0   0   0   0
Group 2 0   2.9 0   3   63  4   2   1   0   2   0   0
Group 2 0   3.2 1   5   35  5   1   1   2   0   0   0
Group 2 0   4.3 1   9   68  7   4   1   0   0   0   0
Group 2 0   4.6 0   7   51  5   4   3   1   1   1   1
Group 2 0   4.6 1   6   22  4   2   4   1   1   1   1
Group 3 0   0   0   0   0   0   0   0   0   0   0   0
Group 3 0   1.5 0   1   25  2   2   1   0   0   0   0
Group 3 0   2   0   2   99  3   0   0   0   1   0   0
Group 3 0   2.2 0   2   44  2   1   2   1   2   2   2
Group 3 0   2.6 0   1   58  2   1   1   0   0   0   0
Group 3 0   2.9 5   5   83  0   1   2   0   0   0   0
Group 3 0   3.3 1   2   36  3   1   2   0   0   0   0
Group 3 0   3.4 3   4   78  7   3   1   0   1   0   0
Group 3 0   3.5 0   5   87  4   4   2   0   0   0   0
Group 3 0   4.6 0   4   52  3   1   2   2   1   0.5 0.5
Group 4 0   2.9 1   5   104 5   4   1   0   0   0   0
Group 4 0   3.1 1   1   60  0   2   1   2   0   0   0
Group 4 0   3.6 0   5   55  4   4   2   1   0   0   0
Group 4 0   5   1   8   36  6   3   2   1   0   0   0

例如,df< df2会为df1的每个值创建一个布尔矩阵,由df2中其组的值来计算或不计算 - 例如df1:

        x1  x2  x3  x4  x5  x6  x7  x8  x9  x10 x11 x12
Group 1 0   3.5 100 2   1   1   0.5 0.5 0   0   0   0
Group 2 0   4   90  2.5 1.5 1   0.6 0.6 0   0   0   0
Group 3 0   4   60  3   2   1   0.7 0.7 0   0   0   0
Group 4 0   5   50  4   3   2   1   1   0   0   0   0

问题是每个组中的成员数发生了变化,因此需要能够处理每个组的动态大小。

第一列也可以是行名或独立列。

1 个答案:

答案 0 :(得分:1)

这是一种可能的方法:

# prepend DF2 column names with "R."
names(DF2)[-1] <- paste('R',names(DF2)[-1],sep='.')
# merge DF1 and DF2
DF3 <- merge(DF1,DF2,by='Group',all.x = TRUE)
# compute the matrix of requirements satisfaction
bools <- DF3[,names(DF1)[-1]] < DF3[,paste('R',names(DF1)[-1],sep='.')]
# construct the final DF
DFResult <- data.frame(Group=DF3$Group,bools)

DFResult:

> DFResult
     Group    x1    x2   x3    x4    x5    x6    x7    x8    x9   x10   x11   x12
1  Group 1 FALSE  TRUE TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
2  Group 1 FALSE  TRUE TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
3  Group 1 FALSE  TRUE TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
4  Group 1 FALSE  TRUE TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
5  Group 1 FALSE  TRUE TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
6  Group 1 FALSE  TRUE TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
7  Group 1 FALSE  TRUE TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
8  Group 1 FALSE  TRUE TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
9  Group 2 FALSE  TRUE TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
10 Group 2 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
11 Group 2 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
12 Group 2 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
13 Group 2 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
14 Group 2 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
15 Group 3 FALSE  TRUE TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
16 Group 3 FALSE  TRUE TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
17 Group 3 FALSE  TRUE TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
18 Group 3 FALSE  TRUE TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
19 Group 3 FALSE  TRUE TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
20 Group 3 FALSE  TRUE TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
21 Group 3 FALSE  TRUE TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
22 Group 3 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
23 Group 3 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
24 Group 3 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
25 Group 4 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
26 Group 4 FALSE  TRUE TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
27 Group 4 FALSE  TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
28 Group 4 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

重新创建输入数据的代码:

DF1 <- read.csv(text=
"Group,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12
Group 1,0,0,0,0,0,0,0,0,0,0,0,0
Group 1,0,0.7,5,15,16,1,0,0,0,0,0,0
Group 1,0,1,5,5,61,1,0,0,0,0,0,0
Group 1,0,1.2,28,6,100,0,0,0,0,0,0,0
Group 1,0,1.2,44,10,66,0,0,0,0,0,0,0
Group 1,0,1.2,6,8,59,0,0,0,0,0,0,0
Group 1,0,1.4,23,18,38,5,0,0,0,0,0,0
Group 1,0,2,16,16,107,1,0,0,0,0,0,0
Group 2,0,0,0,0,0,0,0,0,0,0,0,0
Group 2,0,2.9,0,3,63,4,2,1,0,2,0,0
Group 2,0,3.2,1,5,35,5,1,1,2,0,0,0
Group 2,0,4.3,1,9,68,7,4,1,0,0,0,0
Group 2,0,4.6,0,7,51,5,4,3,1,1,1,1
Group 2,0,4.6,1,6,22,4,2,4,1,1,1,1
Group 3,0,0,0,0,0,0,0,0,0,0,0,0
Group 3,0,1.5,0,1,25,2,2,1,0,0,0,0
Group 3,0,2,0,2,99,3,0,0,0,1,0,0
Group 3,0,2.2,0,2,44,2,1,2,1,2,2,2
Group 3,0,2.6,0,1,58,2,1,1,0,0,0,0
Group 3,0,2.9,5,5,83,0,1,2,0,0,0,0
Group 3,0,3.3,1,2,36,3,1,2,0,0,0,0
Group 3,0,3.4,3,4,78,7,3,1,0,1,0,0
Group 3,0,3.5,0,5,87,4,4,2,0,0,0,0
Group 3,0,4.6,0,4,52,3,1,2,2,1,0.5,0.5
Group 4,0,2.9,1,5,104,5,4,1,0,0,0,0
Group 4,0,3.1,1,1,60,0,2,1,2,0,0,0
Group 4,0,3.6,0,5,55,4,4,2,1,0,0,0
Group 4,0,5,1,8,36,6,3,2,1,0,0,0")

DF2 <- read.csv(text=
"Group,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12
Group 1,0,3.5,100,2,1,1,0.5,0.5,0,0,0,0
Group 2,0,4,90,2.5,1.5,1,0.6,0.6,0,0,0,0
Group 3,0,4,60,3,2,1,0.7,0.7,0,0,0,0
Group 4,0,5,50,4,3,2,1,1,0,0,0,0")