将虚拟编码矩阵转换为邻接矩阵

时间:2021-03-07 09:45:24

标签: r tidyverse igraph adjacency-matrix

大家,我有这个二元矩阵

rownames <- c("gene1", "gene2", "gene3", "gene4")
colnames <- c("A", "B", "C", "D")

data <- matrix(c(1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1),nrow = 4, ncol = 4, byrow = TRUE, dimnames = list(rownames, colnames))

data
#      A B C D
#gene1 1 0 1 0
#gene2 1 1 0 0
#gene3 1 0 1 0
#gene4 0 1 0 1

我想将数据转化为这个邻接矩阵,用于网络 igraph 可视化。

   A B C D
A  0 1 2 0
B  1 0 0 1
C  2 0 0 0
D  0 1 0 0

说明:从 A 中,gene2 与 B 共享分数“1”,这就是为什么我得到 1 分。 从A,gene1和gene3与C共享分数“total 2”,这就是为什么它得到2分

可以把矩阵改成这个吗?我应该使用迭代吗?但我不知道。 Tidyverse 方法将非常有帮助。

谢谢

3 个答案:

答案 0 :(得分:3)

您可以使用 outer 函数。

count1s <- function(x, y) colSums(x == 1 & y == 1)
n <- 1:ncol(data)
mat <- outer(n, n, function(x, y) count1s(data[, x], data[, y]))
diag(mat) <- 0
dimnames(mat) <- list(colnames(data), colnames(data))
mat

#  A B C D
#A 0 1 2 0
#B 1 0 0 1
#C 2 0 0 0
#D 0 1 0 0

答案 1 :(得分:2)

一个 tidyverse 方法 - 欢迎任何关于使其更简洁的反馈

library(tidyverse)

data_tibble <- as_tibble(data)
calculate_adjacency <- function(data, col_name) {
  column_names <- names(data)
  data %>%
    filter(!!sym(col_name) ==1) %>%
    select(!matches(col_name)) %>%
    pivot_longer(cols = everything(), names_to = "name", values_to = "value") %>%
    group_by(name) %>%
    summarize(value = sum(value)) %>%
    pivot_wider(names_from = name, values_from = value) %>%
    mutate("{col_name}" := 0) %>%
    select(all_of(column_names))
}

map_df(colnames(data_tibble), calculate_adjacency, data = data_tibble) %>%
  mutate(key = colnames(.)) %>%
  relocate(key, .before = 1)

# A tibble: 4 x 5
  key       A     B     C     D
  <chr> <dbl> <dbl> <dbl> <dbl>
1 A         0     1     2     0
2 B         1     0     0     1
3 C         2     0     0     0
4 D         0     1     0     0

答案 2 :(得分:2)

更简单的 tidyverse 方法

data2 <- as.data.frame(data) %>% rownames_to_column("id") %>%
  pivot_longer(cols = -id) %>% filter(value != 0)

merge(data2, data2, by = "id", all = T) %>%
  filter(name.x != name.y) %>%
  group_by(name.x, name.y) %>%
  summarise(val = n()) %>%
  pivot_wider(names_from = name.y, values_from = val, values_fill = 0, names_sort = T) %>%
  column_to_rownames("name.x")

  A B C D
A 0 1 2 0
B 1 0 0 1
C 2 0 0 0
D 0 1 0 0

更简单的方法是

data2 <- as.data.frame(data) %>% rownames_to_column("id") %>%
  pivot_longer(cols = -id) %>% filter(value != 0)

inter <- crossprod(xtabs(~id+name, data2), xtabs(~id+name, data2))
diag(inter) <- 0
inter

    name
name A B C D
   A 0 1 2 0
   B 1 0 0 1
   C 2 0 0 0
   D 0 1 0 0

在链接最简单的问题后我意识到

inter <- crossprod(data)
diag(inter) <- 0
inter
  A B C D
A 0 1 2 0
B 1 0 0 1
C 2 0 0 0
D 0 1 0 0