Question

您好，我的模拟速度非常快。我遇到的问题是加快模拟生成的数据的报告速度。

#Load in relevant libraries
library(splitstackshape)
library(foreach)
library(doParallel)

#sample data for simulation

set.seed(100)
input <- data.frame(JobNum = seq(1:200)
    ,HangsPerWeek = sample(1:50, 200,replace=T)
    ,DS.CT = sample(c(38,41,43),200,replace=T)
    ,C1.CT = sample(c(40,41,42),200,replace=T)
    ,C2.CT = sample(c(36,41),200,replace=T)
    ,C3.CT = sample(c(38,39,40),200,replace=T)
    ,C4.CT = sample(c(40,27),200,replace=T)
    ,C5D5.CT = sample(c(20,21,22),200,replace=T)
    ,C6D6.CT = sample(c(20,21,22),200,replace=T)
    ,C5D7.CT = sample(c(20,21,22),200,replace=T)
    ,C6D8.CT = sample(c(9,22,23),200,replace=T)
    ,C7CD.CT = sample(c(40,41),200,replace=T))

input$JobNum<-as.character(input$JobNum)

#expand input file to have a single row per part
partsList<-expandRows(input, "HangsPerWeek")

#Set up cluster using all but one core on machine 
#this runs the simulation in parallel
cores=detectCores()
cl <- makeCluster(cores[1]-1)
registerDoParallel(cl)

#Initialize variables
partsOrder <- list()
numSim <- 10

#start simulation
SimResults <- foreach(j=1:numSim) %dopar%{

RobotSimulation <- function(){

#randomize the dataset of parts and record the order
set.seed(100)
parts <- partsList[sample(nrow(partsList)),]
partsOrder <- list(parts$JobNum)

#choose a random sample of parts to populate the conveyor belts
#this random assignment will be constant across all iterations of the simulation
set.seed(101)
LineParts <- partsList[sample(nrow(partsList),234,replace=FALSE),]

#pass parts through system one at a time and record cycle times at each dip
LineParts_dfList <- lapply(seq(nrow(parts)), function(i){      
    #Index line
    LinePartsTemp <- parts[1,]
    LinePartsTemp[2:nrow(LineParts),] <- LineParts[1:nrow(LineParts)-1,]

    #put new part into system
    LinePartsTemp[1,] <- parts[i,]

    #update the list of parts on the line
    LineParts <<- LinePartsTemp      
})

otherstations_veclist <- 
  list(
    DS = vapply(LineParts_dfList, function(df) df[1,'DS.CT'], numeric(1)),
    D1 = vapply(LineParts_dfList, function(df) df[10,'C1.CT'], numeric(1)),
    D2 = vapply(LineParts_dfList, function(df) df[26,'C2.CT'], numeric(1)),
    D3 = vapply(LineParts_dfList, function(df) df[42,'C3.CT'], numeric(1)),
    D4 = vapply(LineParts_dfList, function(df) df[57,'C4.CT'], numeric(1)),
    D5 = vapply(LineParts_dfList, function(df) df[85,'C5D5.CT'], numeric(1)),
    D6 = vapply(LineParts_dfList, function(df) df[120,'C6D6.CT'], numeric(1)),
    D7 = vapply(LineParts_dfList, function(df) df[167,'C5D7.CT'], numeric(1)),
    D8 = vapply(LineParts_dfList, function(df) df[210,'C6D8.CT'], numeric(1)),
    D9 = vapply(LineParts_dfList, function(df) df[216,'C7CD.CT'], numeric(1))
  )

jobstations_veclist <- 
  list(
    DS = vapply(LineParts_dfList, function(df) df[1,'JobNum'], character(1)),
    D1 = vapply(LineParts_dfList, function(df) df[10,'JobNum'], character(1)),
    D2 = vapply(LineParts_dfList, function(df) df[26,'JobNum'], character(1)),
    D3 = vapply(LineParts_dfList, function(df) df[42,'JobNum'], character(1)),
    D4 = vapply(LineParts_dfList, function(df) df[57,'JobNum'], character(1)),
    D5 = vapply(LineParts_dfList, function(df) df[85,'JobNum'], character(1)),
    D6 = vapply(LineParts_dfList, function(df) df[120,'JobNum'], character(1)),
    D7 = vapply(LineParts_dfList, function(df) df[167,'JobNum'], character(1)),
    D8 = vapply(LineParts_dfList, function(df) df[210,'JobNum'], character(1)),
    D9 = vapply(LineParts_dfList, function(df) df[216,'JobNum'], character(1))
  )

#record results
result <- list(partsOrder = partsOrder, CT = otherstations_veclist, JobNum = jobstations_veclist)
return(result)
}

RobotSimulation()

}

#stop using all cores
stopCluster(cl)

在此运行之后，我创建了4个不同的图形，其中包含双循环以完成模拟的每次迭代。有没有办法继续使用R中的列表来加速计算？或者for循环是唯一的选择吗？

#For every time we add a new part to the line for every simulation 
#find the felt cycle time and the bottlenecks

ProblemJob <- c()
FeltCT <- c()
BottleNeck <- c()
CTs <- c()

for(s in 1:numSim){
  for(p in 1:dim(partsList)[1]){
    CT <- c(SimResults[[s]][[2]][[1]][[p]],
      SimResults[[s]][[2]][[2]][[p]],
      SimResults[[s]][[2]][[3]][[p]],
      SimResults[[s]][[2]][[4]][[p]],
      SimResults[[s]][[2]][[5]][[p]],
      max(SimResults[[s]][[2]][[6]][[p]],SimResults[[s]][[2]][[7]][[p]])+max(SimResults[[s]][[2]][[8]][[p]], SimResults[[s]][[2]][[9]][[p]]),
      SimResults[[s]][[2]][[10]][[p]]
    )
    FeltCT <- append(FeltCT,max(CT))
    BottleNeck <- append(BottleNeck,which(CT==max(CT)))
    CTs <- append(CTs,CT[which(CT==max(CT))])
    ProblemJob <- append(ProblemJob,if(which(CT==max(CT))==1){paste('DS',SimResults[[s]][[3]][[1]][[p]],sep=' ')}
    else if(which(CT==max(CT))==2){paste('R1',SimResults[[s]][[3]][[2]][[p]],sep=' ')}
    else if(which(CT==max(CT))==3){paste('R2',SimResults[[s]][[3]][[3]][[p]],sep=' ')}
    else if(which(CT==max(CT))==4){paste('R3',SimResults[[s]][[3]][[4]][[p]],sep=' ')}
    else if(which(CT==max(CT))==5){paste('R4',SimResults[[s]][[3]][[5]][[p]],sep=' ')}
    else if(which(CT==max(CT))==6){c(
                    if(SimResults[[s]][[2]][[6]][[p]] >= SimResults[[s]][[2]][[7]][[p]]){paste('R5D5',SimResults[[s]][[3]][[6]][[p]],sep=' ')}else{paste('R6D6',SimResults[[s]][[3]][[7]][[p]],sep=' ')}
                    ,if(SimResults[[s]][[2]][[8]][[p]] >= SimResults[[s]][[2]][[9]][[p]]){paste('R5D7',SimResults[[s]][[3]][[8]][[p]],sep=' ')}else{paste('R6D8',SimResults[[s]][[3]][[9]][[p]],sep=' ')}
                    ,paste(if(SimResults[[s]][[2]][[6]][[p]] >= SimResults[[s]][[2]][[7]][[p]]){SimResults[[s]][[3]][[6]][[p]]}else{SimResults[[s]][[3]][[7]][[p]]},
                        if(SimResults[[s]][[2]][[8]][[p]] >= SimResults[[s]][[2]][[9]][[p]]){SimResults[[s]][[3]][[8]][[p]]}else{SimResults[[s]][[3]][[9]][[p]]},sep='/')
                    )}
    else if(which(CT==max(CT))==7){paste('R7',SimResults[[s]][[3]][[10]][[p]],sep=' ')}
      )
  }
}

BottleNeckPercent <- 100*tabulate(BottleNeck)/length(BottleNeck)
RobotAvg<-aggregate(CTs~BottleNeck,FUN=mean)
base <- data.frame(BottleNeck=seq(1:7),CTs=rep(0,7))
RobotAvg <- merge(base,RobotAvg,by='BottleNeck',all=TRUE)

par(mfrow=c(2,2))
bp <- barplot(RobotAvg$CTs.y,
    names.arg=c('DS','R1','R2','R3','R4','R5/R6','R7'),
    col="lightblue",
    main="Average Cycle Time per Robot",
    xlab="Robot",ylab="Seconds")
text(bp,RobotAvg$CTs.y,round(RobotAvg$CTs.y),pos=1)

hist(FeltCT,col='yellow',main=paste('Avg Cycle Time:',round(mean(FeltCT),1),sep=' '))

barplot(head(table(ProblemJob)[order(-table(ProblemJob))],20),las=2,col='red',main='Top 20 Problem Jobs and Location')
bp2<-barplot(BottleNeckPercent
    ,col='green'
    ,names=c('DS','R1','R2','R3','R4','R5/R6','R7')
    ,main='% Cause of Bottleneck'
    ,xlab='Robot'
    ,ylab='%')

text(bp2,BottleNeckPercent,paste(round(BottleNeckPercent,2),'%',sep=''),pos=1)

结果如下：

Answer 1

好的，所以我认为我最初的想法是，要想达到你想要的东西并不太难，在我去吃晚餐之前这将是一个快速的想法。与此不同的是，我对解决方案仍然不满意，但它主要是ProblemJob变量让它变得痛苦。我的笔记本电脑上的计时解决方案为我的解决方案提供了大约1.5 - 1.7秒的时间，而当前的解决方案为36-39秒。我相信它可以提高效率，但我现在需要吃。我的答案与你的答案完全相同，但{OPO评论中讨论的ProblemJob除外。任何方式没有进一步的ado：

让你慢下来的一件事就是在双循环中随处附加向量。删除它会产生巨大的差异。问题在于您提前知道的一些变量，预先分配了多少结果。但是，您在列表和lapply函数上使用purrr::pmap进行了大量计算。还有一些地方你多次计算同样的事情。

我们可以一次性预先计算所有CT次迭代，这样可以在嵌套迭代结构中减少一些，并创建可能用于ProblemJob的所有标签：

library(purrr)
simlist = transpose(SimResults)[[2]] %>% lapply(.,function(x) do.call(cbind,x))
labels = transpose(SimResults)[[3]] %>% lapply(.,function(x) do.call(cbind,x))
CT_list = lapply(simlist, function(x) cbind(x[,1:5], pmax(x[,6],x[,7]) + pmax(x[,8],x[,9]),x[,10,drop = FALSE]))
bool1 = lapply(simlist, function(x) x[,6] > x[,7])
bool2 = lapply(simlist, function(x) x[,8] > x[,9])
special_labels = pmap(list(labels,bool1,bool2), function(x,y,z){
  paste(ifelse(y,x[,6],x[,7]), ifelse(z, x[,8],x[,9]),sep = "/")
})  

labels = lapply(labels, function(x) {
  x = t(x)
  x[] = paste(c("DS","R1","R2","R3","R4","R5D5", "R6D6","R5D7","R6D8","R7"), x)
  t(x)
  }
)

由于您反复从SimResults列表中提取第二个和第三个组件，因此使用purrr:transpose一次性执行此操作是合理的，然后lapply在结果列表中执行此操作再次给出一个更好的形状。 pmax这里是Base R的一部分，并且是并行最大值，本质上是一个向量化的max函数。

由于比较运算符是矢量化的，我们可以沿着列表执行此操作以创建正在检查ProblemJob的布尔条件。就此处的原始代码而言，simlist的组件是循环中s的10个值，每个组件的行是循环中p的5000+值， 10列是您在每次迭代时计算的CT的值。 ifelse是if(...){} else {}

的矢量化版本

预先计算了所有这些，我们现在需要在列表上进行映射，以创建所有输出，这是ProblemJob变量导致问题的位置，因为我无法想到一个简洁的方法空腹去除嵌套的lapply

out_list = pmap(list(CT_list, special_labels, labels, bool1, bool2), function(x,sl,z,b1,b2){
  lapply(1:nrow(x), function(i){
    y = x[i,]
    m = max(y)
    ix = which(y == m)
    # only need to do something special when ix contains a 6
    if(6 %in% ix){
     temp1 = ifelse(b1[i],z[i,6], z[i,7])  
     temp2 = ifelse(b2[i], z[i,8],z[i,9])
     lab = c(temp1,temp2,sl[i], z[i,ix[ix != 6]])
    }else{
      lab = z[i,ix]
    }
    list(FeltCT = m, BottleNeck = ix, CTs = y[ix], ProblemJob = lab)
  }) %>% transpose %>% simplify_all()
}) %>% transpose %>% simplify_all()

我们在transpose %>% simplify_all()这里使用purrr来基本上将相同的命名组件连接到结果列表列表中。您可以通过从out_list

中提取结果来检查3个变量是否得到相同的解决方案

all(out_list$FeltCT == FeltCT)
all(out_list$BottleNeck == BottleNeck)
all(out_list$CTs == CTs)

在这里使用lapply pmap并不完全满意，但希望它有所帮助，它肯定比原来的更快。我现在需要吃饭。

在报告生成中加快循环加倍。也许有列表？

1 个答案: