Question

我正在研究一种算法，它需要连续强制执行N次测试。测试的排列对结果很重要。

问题：当一些规则适用时，我需要能够限制组合搜索空间。例如：

排列“1,2,3”使得以下测试无效。所以我不再需要像“1,2,3,4”或“1,2,3,5”等那样的排列。所以我写了一些代码，自己做排列，但我很慢。

我可以做些什么来加快这段代码的速度？或者我错过了那里的包裹吗？我应该自己在C中实现这个吗？有多种简单方法可以多线程吗？有没有一种简单的方法来预测第N个排列？（这将是简洁的，以简单的方式实现并行计算;）

非常感谢！马克

# Example of permu.with.check.
# 02.05.2014; Marc Giesmann

# Set if needed Recursion limit
# options(expressions=1e5)

permu.with.check <- function(perm = c(1,2,3), current = NULL, fun){

  #Optional: Calculate all variants
  #if(is.null(current)){
  #  all.permutations <- 2* (sum(gamma(perm + 1)) - 1)
  #}

  for(i in 1: length(perm)){

    fix  <- perm[i]   # calculated elements; fix at this point
    rest <- perm[-i]  # elements yet to permutate

    #If this is a recursive call, use
    #"current" to complement current fix value
    if(!is.null(current)){
      fix <- c(current,fix)
    }

    #Call callback.
    #If callback returns "FALSE" don't calculate 
    #further permutations with this "fix". Skip i.
    if(fun(x=fix)){

      #if this is the call with the last
      #value (the deepest,recursive call), stop recursion
      if(length(rest) > 0){
        permu.with.check( rest, fix,fun ) #recursive. 
      }
    }
  }

}

# Callback for permu.with.check
# Ignores 3
perm.callback <- function(x){

  #CALCULATE STUFF HERE
  #cat(counter, ". permutation: ",x, "\n")
  counter <<- counter + 1

  #TEST - EXAMPLE:
  # if new number equals 3, we don't need further testing
  if(x[length(x)] == 3){
    return(FALSE)
  }else{
    return(TRUE)
  }

} 

########## MAIN ################

counter <- 0
permu.with.check(perm=1:8, fun=perm.callback)

#Compare with permutations from package Combinations
# counter (from permu.with.check) == 27399
# nrow(permutations(8))           == 40320

#OPTIONAL: Try out Combinations package
#if(!require(Combinations)){
#  install.packages("Combinations", repos = "http://www.omegahat.org/R")
#  require(Combinations)
#}

#nrow(permutations(8))

Answer 1

Marc，根据您最近的评论，这是一个建议的实施。

这是一个非常迭代的解决方案，并且效率不高产生排列。它假设计算在 testfunc比排列生成贵得多。

基本设置：

set.seed(123)
opts <- 1:5
library(combinat)
## a little inefficient but functional
permn.lim <- function(x, m=length(x)) {
    tmp <- permn(x)
    if (m >= length(x)) tmp
    else unique(lapply(tmp, `[`, 1:m))
}
testfunc <- function(...) list(results=list(), continue=(runif(1) < 0.3))

运行3元组的第一次迭代。

doe3 <- permn.lim(opts, 3)
length(doe3)
## [1] 60
str(head(doe3, n=2))
## List of 2
##  $ : int [1:3] 1 2 3
##  $ : int [1:3] 1 2 5
tmp3 <- lapply(doe3, testfunc)
str(head(tmp3, n=2))
## List of 2
##  $ :List of 2
##   ..$ results : list()
##   ..$ continue: logi TRUE
##  $ :List of 2
##   ..$ results : list()
##   ..$ continue: logi FALSE
results3 <- sapply(tmp3, function(zz) zz$results)
continue3 <- sapply(tmp3, function(zz) zz$continue)
head(continue3, n=2)
## [1]  TRUE FALSE
length(doe3.continue <- doe3[continue3])
## [1] 19

results3是每个实际测试结果的列表（据称是在中捕获的） testfunc），而continue3是bool的向量，表明是否继续使用相应的3元组是合理的。用于查找然后，我们会将doe3过滤为doe3.continue。

然后我们生成下一系列实验（在这种情况下为4），和过滤基于前一个成功测试的存储过程在doe3.continue。

doe4.all <- permn.lim(opts, 4)
length(doe4.all)
## [1] 120
doe4.filtered <- Filter(function(zz) list(zz[1:3]) %in% doe3.continue, doe4.all)
length(doe4.filtered)
## [1] 38
tmp4 <- lapply(doe4.filtered, testfunc)
results4 <- sapply(tmp4, function(zz) zz$results)
continue4 <- sapply(tmp4, function(zz) zz$continue)
doe4.continue <- doe4[continue4]
length(doe4.continue)
## [1] 35

对于opts中的多个元素，可以重复此过程。如果这是固定数量的水平，然后它不难维护目前的形式。如果你将以不同的方式重复这一点数量级别，那么要做到这一点并不难尾递归函数，也许更精致一点。

Answer 2

我需要的是：具有回调的置换算法，可以决定

如果此时可以修剪排列
如果应该保存排列

和

能够进行多线程处理。

到目前为止，我所拥有的是一个带有冗余的复杂代码，但到目前为止它还能正常工作。我仍然不满意，因为在多线程模式中无法向用户提供反馈。这是我的代码，希望有人可以重用它。

如果有人知道如何优化它，请继续。如果我对全局/部分全局变量的想法正常工作，我仍然不确定。

附加代码是一个工作示例，如果“3”是当前排列中的最后一个数字，则修剪，如果当前排列的数字之和在此时最高，则仅保存。多线程的缺点：它保存了许多冗余值，因为“最高的数字总和”不能沿线程共享，这在这一点上非常不幸。

此致马克

# Example of permu.new # 05.05.2014; Marc Giesmann # Set if needed Recursion limit # options(expressions=1e5) require(compiler) compilePKGS(enable=TRUE) enableJIT(3) require(doMC) CONST_SKIP <- 1 CONST_SAVE <- 2 CONST_VAL <- 3 #--------------------- permu.new <- function(perm,fun, values = 0, savemax = 1000){ #DEFINE INTERNAL FUNCTIONS permu.worker.save.max <- savemax permu.worker.save.count <- 1 permu.worker.global.savelist <- vector(mode="list",length = permu.worker.save.max) #Saves permutation. If there are more to save than in savemax defined, #it primitlively appends a entry to the list permu.worker.save <- function(permutation, values){ if(permu.worker.save.count > permu.worker.save.max){ permu.worker.global.savelist[[length(permu.worker.global.savelist)+1]] <<- list(perm=permutation,values=values) }else{ permu.worker.global.savelist[[permu.worker.save.count]] <<- list(perm=permutation,values=values) } permu.worker.save.count <<- permu.worker.save.count + 1 } #CREATES RESULTOBJECT robj <- function(vals){ return(vector(mode="numeric",length=2+vals)) } #WORKERBEE. Does the funpart of recursion and calling the callbacks permu.worker <- function(perm, current, resultobject, fun){ #resultobject<- robj.reset(resultobject) #reset internal values. resultobject[1:2] <- 0 #reset internal values. for(i in 1: length(perm)){ fix <- c(current,perm[i]) # calculated elements; fix at this point rest <- perm[-i] # elements yet to permutate #Call callback. resultobject <- fun(x=fix, resultobject = resultobject) #Save permutation? if(resultobject[CONST_SAVE]){ permu.worker.save(fix, resultobject[CONST_VAL]) } #if this is the call with the last #value (the deepest,recursive call) or object wanted #to skip next iterations stop recursion if(length(rest) && !resultobject[CONST_SKIP]){ resultobject <- permu.worker(rest, fix, resultobject, fun) } }#end for return(resultobject) } #DEFINE INTERNAL END #BEGIN FUNCTION resultobject <- robj(values) #vector(mode="numeric", length=2+values) #for(i in 1: length(perm)){ i<-0 res<-foreach(i=1: length(perm), .combine=c) %dopar% { #calculate the first permutation manually resultobject <- permu.worker(perm[i], NULL, resultobject, fun) #now do the funny, recursive stuff resultobject <- permu.worker(perm[-i], perm[i], resultobject, fun) # Now we're ready for the next permutation. # Save all the things we need return(permu.worker.global.savelist[1:permu.worker.save.count-1]) }#end foreach return(res) } #---------------------------------------------------------------- #EXAMPLE CALLBACK # Prunes, if 3 is last number in permutation # Saves only, if sum() of permutation is the highes found yet. # IMPORTANT: return has to be a "resultobject", which is provided # through the parameters. # Use # resultobject[CONST_SKIP] <- TRUE/FALSE (prune after this permutation T/F) # resultobject[CONST_SAVE] <- TRUE/FALSE (return this permutation, save it T/F) # resultobject[CONST_VAL] <- NUMERIC (use this to save something for the process) #----------------------------------------------------------------- perm.callback <- function(x,resultobject){ #CALCULATE STUFF HERE; #Example a global counter;(works only singlethreaded) counter <<- counter + 1 #SKIP EXAMPLE #Skip this one? skip next permutations if the last number is 3 resultobject[CONST_SKIP] <- (x[length(x)] == 3) if(resultobject[CONST_SKIP]){ #another global counter (works only singlethreaded) skipped <<- skipped + 1 } #SAVE EXAMPLE #Should we save this permutation? #Save only, if sum of permutation is bigger than own value s <- sum(x) if(s > resultobject[CONST_VAL]){ resultobject[CONST_VAL] <- s resultobject[CONST_SAVE] <-TRUE #yet another example-counter. (works only singlethreaded) saved <<- saved + 1 }else{ resultobject[CONST_SAVE] <-FALSE } return(resultobject) } #---------- MAIN #counter/skipped/saved are working in singlethreading mode, #See usage in perm.callback(). # #Variables show, how many... counter <- 0 # ...permutations have been calculated skipped <- 0 # ... have been skipped (last digit was 3) saved <- 0 # ... were saved and returned #registerDoMC(4) #uncomment for multithreading stime <- system.time(gcFirst = TRUE, expr ={ result <- permu.new(perm=1:10, fun=perm.callback,values=1) }) cat(as.double(stime[3]), "seconds; ~", (counter / as.double(stime[3])), " calculations/second")

排列：加速，预测和/或多线程

2 个答案: