Question

我有一个矩阵或数据框，并希望计算值之间的总转换（忽略转换顺序），行方式和列方式。理想情况下包括实际不会发生的可能转换。小规模的例子：

mat <- matrix(c(2, 1, 2, 1, 3, 1, 2, 1, 2), nrow = 3)

     [,1] [,2] [,3]
[1,]    2    1    2
[2,]    1    3    1
[3,]    2    1    2

期望的结果如下：

cat1 cat2 n
 1    1   0
 1    2   8
 1    3   4   
 2    2   0
 2    3   0
 3    3   0

e.g。由于第二列中的1-3-1加上第二列中的1-3-1，导致总共“1 - 3”转换。

非常感谢！

Answer 1

这是一种方法：

library(dplyr)


left_to_right_transitions <- function(m)
{
    # Assemble a two column matrix that contains every left-to-right transition.
    nc <- ncol(m)
    matrix(
        c(m[, 1:(nc -1)], m[, 2:nc]),
        ncol = 2,
        dimnames = list(NULL, c('cat1', 'cat2'))
    )
}


count_transitions <- function(m)
{
    nr <- nrow(m)
    nc <- ncol(m)
    num.categories <- length(unique(as.vector(m)))

    # Create three mirror reflections of the original matrix.
    mt <- t(m)
    m.right.to.left <- m[, nc:1]
    mt.right.to.left <- mt[, nr:1]

    # Assemble a two column matrix that contains every transition that occurs.
    transitions <- rbind(
        left_to_right_transitions(m),
        left_to_right_transitions(m.right.to.left),
        left_to_right_transitions(mt),
        left_to_right_transitions(mt.right.to.left)
    )

    # Count the total number of transitions for each kind that occurs.
    count <-
        transitions %>%
        as.data.frame %>%
        filter(cat1 <= cat2) %>%
        group_by(cat1, cat2) %>%
        count

    # Join `count` to a table of all possible transitions to get the full count table.
    # Note that this assumes the categories are labeled 1:num.categories.
    combn(num.categories + 1, 2) %>%
        t %>%
        as.data.frame %>%
        rename(cat1 = V1, cat2 = V2) %>%
        mutate(cat2 = cat2 - 1) %>%
        left_join(count, by = c('cat1', 'cat2')) %>%
        mutate(
            n = ifelse(is.na(n), 0, n),
            # Remove double counting of transitions with no-state change:
            n = ifelse(cat1 == cat2, n/2, n)
        )
}

上面的想法是创建一个函数，创建一个双列矩阵，在输入矩阵m中包含所有从左到右的过渡。然后，此函数可应用于m的镜像反射，以实现从右到左，从上到下和从下到上的过渡。然后我们对四个转换矩阵进行行绑定并应用一些 dplyr 函数来删除转换的重复计数并计算每种类型的转换数。然后在最后，我们将转换计数表连接到所有可能转换的完整表格。

现在让我们将count_transitions应用于几个例子：

set.seed(1)
m1 <- matrix(c(2, 1, 2, 1, 3, 1, 2, 1, 2), nrow = 3)
m2 <- matrix(sample(1:4, size = 16, replace = TRUE), nrow = 4)
m3 <- matrix(sample(1:9, size = 1e6, replace = TRUE), nrow = 1e3)

m1
#      [,1] [,2] [,3]
# [1,]    2    1    2
# [2,]    1    3    1
# [3,]    2    1    2
count_transitions(m1)
#   cat1 cat2 n
# 1    1    1 0
# 2    1    2 8
# 3    1    3 4
# 4    2    2 0
# 5    2    3 0
# 6    3    3 0

m2
#      [,1] [,2] [,3] [,4]
# [1,]    2    1    3    3
# [2,]    2    4    1    2
# [3,]    3    4    1    4
# [4,]    4    3    1    2
count_transitions(m2)
#    cat1 cat2 n
# 1     1    1 2
# 2     1    2 3
# 3     1    3 3
# 4     1    4 4
# 5     2    2 1
# 6     2    3 2
# 7     2    4 3
# 8     3    3 1
# 9     3    4 4
# 10    4    4 1

count_transitions功能似乎也相当快：

library(microbenchmark)
microbenchmark(count_transitions(m3), times = 10)
# Unit: milliseconds
#                   expr      min       lq     mean  median       uq      max neval
#  count_transitions(m3) 333.3395 334.3705 338.0282 335.945 337.0059 359.5586    10

计算矩阵中类别/值之间的所有行和列的转换，忽略顺序

1 个答案: