Question

以下是该场景：我有一个样本，其中主题被放入三个组中的任何一个。接下来，将来自每组的受试者分组在一起，产生由来自每组的受试者组成的几个“三联体”。我想计算来自给定组（1,2或3）的主题与不同原始组的主题i分组的次数。

这是一个简单的代码示例：

data <- cbind(c(1:9), c(rep("Group 1", 3), rep("Group 2", 3), rep("Group 3", 3)))
data <- data.frame(data)
names(data) <- c("ID", "Group")

groups.of.3 <- data.frame(rbind(c(1,4,7),c(2,4,7),c(2,5,7),c(3,6,8),c(3,6,9)))

N <- nrow(data)
n1 <- nrow(data[data$Group == "Group 1", ])
n2 <- nrow(data[data$Group == "Group 2", ])
n3 <- nrow(data[data$Group == "Group 3", ])

# Check the number of times a subject from a group is grouped with a subject i 
# from another group

M1 <- matrix(0, nrow = N, ncol = n1) 
M2 <- matrix(0, nrow = N, ncol = n2)
M3 <- matrix(0, nrow = N, ncol = n3)
for (i in 1:N){
  if (data$Group[i] != "Group 1"){
    for (j in 1:n1){
      M1[i,j] <- nrow(groups.of.3[groups.of.3[,1] == j &
                                  (groups.of.3[,2] == i |
                                  groups.of.3[,3] == i), ])
    }
  }
  if (data$Group[i] != "Group 2"){
    for (j in 1:n2){
      M2[i,j] <- nrow(groups.of.3[groups.of.3[,2] == (n1 + j) &
                                    (groups.of.3[,1] == i | 
                                       groups.of.3[,3] == i), ])
    }
  }
  if (data$Group[i] != "Group 3"){
    for (j in 1:n3){
      M3[i,j] <- nrow(groups.of.3[groups.of.3[,3] == (n1 + n2 + j) & 
                                    (groups.of.3[,1] == i |
                                    groups.of.3[,2] == i), ])
    }
  }
}

所以我有9个科目，每组3个。然后将来自每组的受试者组合在一起（允许重复放置）。对于更多的主题，这需要更长的时间，我想知道是否有更快的替代方案，避免使用for循环。

例如，矩阵M1包括第1组中的受试者随后与来自任何其他组的其他受试者分组的次数：

M1
      [,1] [,2] [,3]
 [1,]    0    0    0
 [2,]    0    0    0
 [3,]    0    0    0
 [4,]    1    1    0
 [5,]    0    1    0
 [6,]    0    0    2
 [7,]    1    2    0
 [8,]    0    0    1
 [9,]    0    0    1

因此，3列代表来自第1组的3个主题，并且行代表所有主题 - 条目是来自第1组的每个主题与任何其他主题分组的次数（例如，根据组。如图3所示，受试者3出现在受试者6两次的组中，受试者1出现受试者7一次）。

感谢您的帮助！

Answer 1

这样的东西？

library(tidyr)
library(dplyr)
data <- data %>% 
  mutate(ID = as.numeric(levels(ID))[ID])
tmp <- groups.of.3 %>% 
  add_rownames() %>% 
  gather("X", "Person", -rowname) %>% 
  inner_join(data, by = c("Person" = "ID"))
tmp %>% 
  inner_join(tmp, by = c("rowname")) %>% 
  filter(Group.x != Group.y) %>% 
  group_by(Person.x, Group.x, Group.y) %>% 
  summarise(N = n()) %>% 
  spread(key = Group.y, value = N, fill = 0)

  Person.x Group.x Group 1 Group 2 Group 3
     (dbl)  (fctr)   (dbl)   (dbl)   (dbl)
1        1 Group 1       0       1       1
2        2 Group 1       0       2       2
3        3 Group 1       0       2       2
4        4 Group 2       2       0       2
5        5 Group 2       1       0       1
6        6 Group 2       2       0       2
7        7 Group 3       3       3       0
8        8 Group 3       1       1       0
9        9 Group 3       1       1       0

Answer 2

For循环本身并不慢：

# coerce the fields in groups.of.3 to factor
for(i in 1:3)
    groups.of.3[,i]  <-  as.factor(groups.of.3[,i],levels =data$ID)


M <- matrix(0, N, N) 
out  <-  NULL
for(i in 1:(3-1))
    for(j in (i+1):3)
        M  <-  M + table(groups.of.3[,i],groups.of.3[,j])
M1  <-  M[,as.integer(data$Group)==1]
M2  <-  M[,as.integer(data$Group)==2]
M3  <-  M[,as.integer(data$Group)==3]

Answer 3

我会回答我自己的问题，对Thierry的答案稍作修改：

库（tidyr）库（dplyr）

data <- data %>%
  mutate(ID = as.numeric(levels(ID))[ID])
tmp <- groups.of.3 %>%
  add_rownames() %>%
  gather("X", "Person", -rowname) %>%
  inner_join(data, by = c("Person" = "ID"))
tmp %>% 
  inner_join(tmp, by = c("rowname")) %>%
  filter(Group.x != Group.y) %>%
  group_by(Person.x, Group.x, Person.y) %>%
  summarise(N = n()) %>%
  spread(key = Person.y, value = N, fill = 0)

这给出了以下输出，其中包括前一个for循环中的M1，M2和M3，它们相邻在一起。

Source: local data frame [9 x 11]

  Person.x Group.x     1     2     3     4     5     6     7     8     9
     (dbl)  (fctr) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl)
1        1 Group 1     0     0     0     1     0     0     1     0     0
2        2 Group 1     0     0     0     1     1     0     2     0     0
3        3 Group 1     0     0     0     0     0     2     0     1     1
4        4 Group 2     1     1     0     0     0     0     2     0     0
5        5 Group 2     0     1     0     0     0     0     1     0     0
6        6 Group 2     0     0     2     0     0     0     0     1     1
7        7 Group 3     1     2     0     2     1     0     0     0     0
8        8 Group 3     0     0     1     0     0     1     0     0     0
9        9 Group 3     0     0     1     0     0     1     0     0     0

R

3 个答案: