分析R中的排列

时间:2014-10-22 15:12:08

标签: r permutation

我可以通过以下方式获得字母a-e的所有排列:

> library(gtools)
> permutations(5,5, letters[1:5])
       [,1] [,2] [,3] [,4] [,5]
  [1,] "a"  "b"  "c"  "d"  "e" 
  [2,] "a"  "b"  "c"  "e"  "d" 
  [3,] "a"  "b"  "d"  "c"  "e" 
  [4,] "a"  "b"  "d"  "e"  "c" 
  [5,] "a"  "b"  "e"  "c"  "d" 
  [6,] "a"  "b"  "e"  "d"  "c" 
  [7,] "a"  "c"  "b"  "d"  "e" 
  [8,] "a"  "c"  "b"  "e"  "d" 
.....

但我怎么知道,对于每一行,该行中有多少个字母在适当的位置?

编辑: 谢谢你的回复。我用microbenchmark看速度:

> n=7
> f1 <- function() {A= permutations(n,n, letters[1:n]); table(apply(A, 1, function(u) sum( u == letters[1:n] )))}
> f2 <- function() {A= permutations(n,n, letters[1:n]); rowSums(A==letters[1:n][col(A)])}
> f3 <- function() {A <-permutations(n,n); rowSums(A==col(A))}
> library(microbenchmark)
> microbenchmark(f1(), f2(), f3(), unit="relative")
Unit: relative
 expr      min       lq     mean   median       uq      max neval
 f1() 1.426850 1.426050 1.424601 1.428461 1.410686 1.727781   100
 f2() 1.076791 1.074002 1.071137 1.065674 1.069168 1.071190   100
 f3() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000   100

我接受了应用答案,因为它显然是可以理解的。

保持排列功能:

> n=8
> library(gtools)
> A= permutations(n,n, letters[1:n]);
> B <-permutations(n,n); 
> f1 <- function() {table(apply(A, 1, function(u) sum( u == letters[1:n] )))}
> f2 <- function() {rowSums(A==letters[1:n][col(A)])}
> f3 <- function() {rowSums(B==col(B))}
> library(microbenchmark)
> microbenchmark(f1(), f2(), f3(), unit="relative")
Unit: relative
 expr       min        lq      mean    median        uq      max neval
 f1() 79.426189 72.674500 40.296357 68.896710 43.559159 6.269005   100
 f2()  3.440729  3.614968  2.807806  3.589499  2.740272 1.349151   100
 f3()  1.000000  1.000000  1.000000  1.000000  1.000000 1.000000   100
> 

2 个答案:

答案 0 :(得分:3)

这样的事可能有用:

library(gtools)
A <- permutations(5,5, letters[1:5])
apply(A, 1, function(u) sum( u == letters[1:5] ))

我们获得

head(data.frame(A,n=apply(A, 1, function(u) sum( u == letters[1:5] ))))
#   X1 X2 X3 X4 X5 n
# 1  a  b  c  d  e 5
# 2  a  b  c  e  d 3
# 3  a  b  d  c  e 3
# 4  a  b  d  e  c 2
# 5  a  b  e  c  d 2
# 6  a  b  e  d  c 3

答案 1 :(得分:2)

你也可以这样做:(灵感来自@ J.R。的评论)

  A <- permutations(5,5, letters[1:5])
  rowSums(A==letters[1:5][col(A)])
  #[1] 5 3 3 2 2 3 3 1 2 1 1 2 2 1 3 2 1 1 1 2 2 3 1 1 3 1 1 0 0 1 2 0 1 0 0 1 1
  #[38] 0 2 1 0 0 0 1 1 2 0 0 2 0 1 0 0 1 3 1 2 1 1 2 1 0 1 0 0 0 0 1 0 1 0 0 1 0
  #[75] 2 1 0 0 2 1 3 2 1 1 1 0 1 0 0 0 0 0 0 0 1 1 0 1 1 2 0 0 1 2 2 3 1 1 0 1 0
  #[112] 1 0 0 0 0 0 0 1 1

解释

为方便起见,我正在使用A

的子集
  A1 <- head(A)
  col(A1)
  #     [,1] [,2] [,3] [,4] [,5]
  #[1,]    1    2    3    4    5
  #[2,]    1    2    3    4    5
  #[3,]    1    2    3    4    5
  #[4,]    1    2    3    4    5
  #[5,]    1    2    3    4    5
  #[6,]    1    2    3    4    5

letters[1:5][col(A1)]rep(letters[1:5], each=nrow(A1))类似,因为col(A1)创建的数字索引和列式操作确保复制letters[1:5]letters[1:5]的第一个元素,即a填充1的所有col(A1)。同样,b

2等中的col(A1)
  letters[1:5][col(A1)]
  #[1] "a" "a" "a" "a" "a" "a" "b" "b" "b" "b" "b" "b" "c" "c" "c" "c" "c" "c" "d"
  #[20] "d" "d" "d" "d" "d" "e" "e" "e" "e" "e" "e"

  rep(letters[1:5], each=nrow(A1))
 # [1] "a" "a" "a" "a" "a" "a" "b" "b" "b" "b" "b" "b" "c" "c" "c" "c" "c" "c" "d"
 #[20] "d" "d" "d" "d" "d" "e" "e" "e" "e" "e" "e"

在比较A1==letters[1:5][col(A1)]中,rhs的{​​{1}}和lhs具有相同的==(即length,{{1 }}),所以比较将是元素,你得到结果

length(A1)#[1] 30

现在,为什么length(letters[1:5][col(A1)])#[1] 30给出不同的结果是因为元素的回收如何发生。

  A1==letters[1:5][col(A1)]
  #    [,1] [,2]  [,3]  [,4]  [,5]
  #[1,] TRUE TRUE  TRUE  TRUE  TRUE
  #[2,] TRUE TRUE  TRUE FALSE FALSE
  #[3,] TRUE TRUE FALSE FALSE  TRUE
  #[4,] TRUE TRUE FALSE FALSE FALSE
  #[5,] TRUE TRUE FALSE FALSE FALSE
  #[6,] TRUE TRUE FALSE  TRUE FALSE

上述比较类似于比较A1==letters[1:5] A1==letters[1:5] # [,1] [,2] [,3] [,4] [,5] #[1,] TRUE TRUE TRUE TRUE TRUE #[2,] FALSE FALSE FALSE TRUE FALSE #[3,] FALSE FALSE FALSE FALSE FALSE #[4,] FALSE FALSE FALSE FALSE TRUE #[5,] FALSE FALSE FALSE TRUE TRUE #[6,] TRUE TRUE FALSE TRUE FALSE

c(A1)

如果您想使用回收,您可以这样做:

rep(letters[1:5], nrow(A1))