您好,我的模拟速度非常快。我遇到的问题是加快模拟生成的数据的报告速度。
#Load in relevant libraries
library(splitstackshape)
library(foreach)
library(doParallel)
#sample data for simulation
set.seed(100)
input <- data.frame(JobNum = seq(1:200)
,HangsPerWeek = sample(1:50, 200,replace=T)
,DS.CT = sample(c(38,41,43),200,replace=T)
,C1.CT = sample(c(40,41,42),200,replace=T)
,C2.CT = sample(c(36,41),200,replace=T)
,C3.CT = sample(c(38,39,40),200,replace=T)
,C4.CT = sample(c(40,27),200,replace=T)
,C5D5.CT = sample(c(20,21,22),200,replace=T)
,C6D6.CT = sample(c(20,21,22),200,replace=T)
,C5D7.CT = sample(c(20,21,22),200,replace=T)
,C6D8.CT = sample(c(9,22,23),200,replace=T)
,C7CD.CT = sample(c(40,41),200,replace=T))
input$JobNum<-as.character(input$JobNum)
#expand input file to have a single row per part
partsList<-expandRows(input, "HangsPerWeek")
#Set up cluster using all but one core on machine
#this runs the simulation in parallel
cores=detectCores()
cl <- makeCluster(cores[1]-1)
registerDoParallel(cl)
#Initialize variables
partsOrder <- list()
numSim <- 10
#start simulation
SimResults <- foreach(j=1:numSim) %dopar%{
RobotSimulation <- function(){
#randomize the dataset of parts and record the order
set.seed(100)
parts <- partsList[sample(nrow(partsList)),]
partsOrder <- list(parts$JobNum)
#choose a random sample of parts to populate the conveyor belts
#this random assignment will be constant across all iterations of the simulation
set.seed(101)
LineParts <- partsList[sample(nrow(partsList),234,replace=FALSE),]
#pass parts through system one at a time and record cycle times at each dip
LineParts_dfList <- lapply(seq(nrow(parts)), function(i){
#Index line
LinePartsTemp <- parts[1,]
LinePartsTemp[2:nrow(LineParts),] <- LineParts[1:nrow(LineParts)-1,]
#put new part into system
LinePartsTemp[1,] <- parts[i,]
#update the list of parts on the line
LineParts <<- LinePartsTemp
})
otherstations_veclist <-
list(
DS = vapply(LineParts_dfList, function(df) df[1,'DS.CT'], numeric(1)),
D1 = vapply(LineParts_dfList, function(df) df[10,'C1.CT'], numeric(1)),
D2 = vapply(LineParts_dfList, function(df) df[26,'C2.CT'], numeric(1)),
D3 = vapply(LineParts_dfList, function(df) df[42,'C3.CT'], numeric(1)),
D4 = vapply(LineParts_dfList, function(df) df[57,'C4.CT'], numeric(1)),
D5 = vapply(LineParts_dfList, function(df) df[85,'C5D5.CT'], numeric(1)),
D6 = vapply(LineParts_dfList, function(df) df[120,'C6D6.CT'], numeric(1)),
D7 = vapply(LineParts_dfList, function(df) df[167,'C5D7.CT'], numeric(1)),
D8 = vapply(LineParts_dfList, function(df) df[210,'C6D8.CT'], numeric(1)),
D9 = vapply(LineParts_dfList, function(df) df[216,'C7CD.CT'], numeric(1))
)
jobstations_veclist <-
list(
DS = vapply(LineParts_dfList, function(df) df[1,'JobNum'], character(1)),
D1 = vapply(LineParts_dfList, function(df) df[10,'JobNum'], character(1)),
D2 = vapply(LineParts_dfList, function(df) df[26,'JobNum'], character(1)),
D3 = vapply(LineParts_dfList, function(df) df[42,'JobNum'], character(1)),
D4 = vapply(LineParts_dfList, function(df) df[57,'JobNum'], character(1)),
D5 = vapply(LineParts_dfList, function(df) df[85,'JobNum'], character(1)),
D6 = vapply(LineParts_dfList, function(df) df[120,'JobNum'], character(1)),
D7 = vapply(LineParts_dfList, function(df) df[167,'JobNum'], character(1)),
D8 = vapply(LineParts_dfList, function(df) df[210,'JobNum'], character(1)),
D9 = vapply(LineParts_dfList, function(df) df[216,'JobNum'], character(1))
)
#record results
result <- list(partsOrder = partsOrder, CT = otherstations_veclist, JobNum = jobstations_veclist)
return(result)
}
RobotSimulation()
}
#stop using all cores
stopCluster(cl)
在此运行之后,我创建了4个不同的图形,其中包含双循环以完成模拟的每次迭代。有没有办法继续使用R中的列表来加速计算?或者for循环是唯一的选择吗?
#For every time we add a new part to the line for every simulation
#find the felt cycle time and the bottlenecks
ProblemJob <- c()
FeltCT <- c()
BottleNeck <- c()
CTs <- c()
for(s in 1:numSim){
for(p in 1:dim(partsList)[1]){
CT <- c(SimResults[[s]][[2]][[1]][[p]],
SimResults[[s]][[2]][[2]][[p]],
SimResults[[s]][[2]][[3]][[p]],
SimResults[[s]][[2]][[4]][[p]],
SimResults[[s]][[2]][[5]][[p]],
max(SimResults[[s]][[2]][[6]][[p]],SimResults[[s]][[2]][[7]][[p]])+max(SimResults[[s]][[2]][[8]][[p]], SimResults[[s]][[2]][[9]][[p]]),
SimResults[[s]][[2]][[10]][[p]]
)
FeltCT <- append(FeltCT,max(CT))
BottleNeck <- append(BottleNeck,which(CT==max(CT)))
CTs <- append(CTs,CT[which(CT==max(CT))])
ProblemJob <- append(ProblemJob,if(which(CT==max(CT))==1){paste('DS',SimResults[[s]][[3]][[1]][[p]],sep=' ')}
else if(which(CT==max(CT))==2){paste('R1',SimResults[[s]][[3]][[2]][[p]],sep=' ')}
else if(which(CT==max(CT))==3){paste('R2',SimResults[[s]][[3]][[3]][[p]],sep=' ')}
else if(which(CT==max(CT))==4){paste('R3',SimResults[[s]][[3]][[4]][[p]],sep=' ')}
else if(which(CT==max(CT))==5){paste('R4',SimResults[[s]][[3]][[5]][[p]],sep=' ')}
else if(which(CT==max(CT))==6){c(
if(SimResults[[s]][[2]][[6]][[p]] >= SimResults[[s]][[2]][[7]][[p]]){paste('R5D5',SimResults[[s]][[3]][[6]][[p]],sep=' ')}else{paste('R6D6',SimResults[[s]][[3]][[7]][[p]],sep=' ')}
,if(SimResults[[s]][[2]][[8]][[p]] >= SimResults[[s]][[2]][[9]][[p]]){paste('R5D7',SimResults[[s]][[3]][[8]][[p]],sep=' ')}else{paste('R6D8',SimResults[[s]][[3]][[9]][[p]],sep=' ')}
,paste(if(SimResults[[s]][[2]][[6]][[p]] >= SimResults[[s]][[2]][[7]][[p]]){SimResults[[s]][[3]][[6]][[p]]}else{SimResults[[s]][[3]][[7]][[p]]},
if(SimResults[[s]][[2]][[8]][[p]] >= SimResults[[s]][[2]][[9]][[p]]){SimResults[[s]][[3]][[8]][[p]]}else{SimResults[[s]][[3]][[9]][[p]]},sep='/')
)}
else if(which(CT==max(CT))==7){paste('R7',SimResults[[s]][[3]][[10]][[p]],sep=' ')}
)
}
}
BottleNeckPercent <- 100*tabulate(BottleNeck)/length(BottleNeck)
RobotAvg<-aggregate(CTs~BottleNeck,FUN=mean)
base <- data.frame(BottleNeck=seq(1:7),CTs=rep(0,7))
RobotAvg <- merge(base,RobotAvg,by='BottleNeck',all=TRUE)
par(mfrow=c(2,2))
bp <- barplot(RobotAvg$CTs.y,
names.arg=c('DS','R1','R2','R3','R4','R5/R6','R7'),
col="lightblue",
main="Average Cycle Time per Robot",
xlab="Robot",ylab="Seconds")
text(bp,RobotAvg$CTs.y,round(RobotAvg$CTs.y),pos=1)
hist(FeltCT,col='yellow',main=paste('Avg Cycle Time:',round(mean(FeltCT),1),sep=' '))
barplot(head(table(ProblemJob)[order(-table(ProblemJob))],20),las=2,col='red',main='Top 20 Problem Jobs and Location')
bp2<-barplot(BottleNeckPercent
,col='green'
,names=c('DS','R1','R2','R3','R4','R5/R6','R7')
,main='% Cause of Bottleneck'
,xlab='Robot'
,ylab='%')
text(bp2,BottleNeckPercent,paste(round(BottleNeckPercent,2),'%',sep=''),pos=1)
答案 0 :(得分:1)
好的,所以我认为我最初的想法是,要想达到你想要的东西并不太难,在我去吃晚餐之前这将是一个快速的想法。与此不同的是,我对解决方案仍然不满意,但它主要是ProblemJob
变量让它变得痛苦。我的笔记本电脑上的计时解决方案为我的解决方案提供了大约1.5 - 1.7秒的时间,而当前的解决方案为36-39秒。我相信它可以提高效率,但我现在需要吃。我的答案与你的答案完全相同,但{OPO评论中讨论的ProblemJob
除外。任何方式没有进一步的ado:
让你慢下来的一件事就是在双循环中随处附加向量。删除它会产生巨大的差异。问题在于您提前知道的一些变量,预先分配了多少结果。但是,您在列表和lapply
函数上使用purrr::pmap
进行了大量计算。还有一些地方你多次计算同样的事情。
我们可以一次性预先计算所有CT
次迭代,这样可以在嵌套迭代结构中减少一些,并创建可能用于ProblemJob
的所有标签:
library(purrr)
simlist = transpose(SimResults)[[2]] %>% lapply(.,function(x) do.call(cbind,x))
labels = transpose(SimResults)[[3]] %>% lapply(.,function(x) do.call(cbind,x))
CT_list = lapply(simlist, function(x) cbind(x[,1:5], pmax(x[,6],x[,7]) + pmax(x[,8],x[,9]),x[,10,drop = FALSE]))
bool1 = lapply(simlist, function(x) x[,6] > x[,7])
bool2 = lapply(simlist, function(x) x[,8] > x[,9])
special_labels = pmap(list(labels,bool1,bool2), function(x,y,z){
paste(ifelse(y,x[,6],x[,7]), ifelse(z, x[,8],x[,9]),sep = "/")
})
labels = lapply(labels, function(x) {
x = t(x)
x[] = paste(c("DS","R1","R2","R3","R4","R5D5", "R6D6","R5D7","R6D8","R7"), x)
t(x)
}
)
由于您反复从SimResults
列表中提取第二个和第三个组件,因此使用purrr:transpose
一次性执行此操作是合理的,然后lapply
在结果列表中执行此操作再次给出一个更好的形状。 pmax
这里是Base R的一部分,并且是并行最大值,本质上是一个向量化的max
函数。
由于比较运算符是矢量化的,我们可以沿着列表执行此操作以创建正在检查ProblemJob
的布尔条件。就此处的原始代码而言,simlist
的组件是循环中s
的10个值,每个组件的行是循环中p
的5000+值, 10列是您在每次迭代时计算的CT
的值。 ifelse
是if(...){} else {}
预先计算了所有这些,我们现在需要在列表上进行映射,以创建所有输出,这是ProblemJob
变量导致问题的位置,因为我无法想到一个简洁的方法空腹去除嵌套的lapply
out_list = pmap(list(CT_list, special_labels, labels, bool1, bool2), function(x,sl,z,b1,b2){
lapply(1:nrow(x), function(i){
y = x[i,]
m = max(y)
ix = which(y == m)
# only need to do something special when ix contains a 6
if(6 %in% ix){
temp1 = ifelse(b1[i],z[i,6], z[i,7])
temp2 = ifelse(b2[i], z[i,8],z[i,9])
lab = c(temp1,temp2,sl[i], z[i,ix[ix != 6]])
}else{
lab = z[i,ix]
}
list(FeltCT = m, BottleNeck = ix, CTs = y[ix], ProblemJob = lab)
}) %>% transpose %>% simplify_all()
}) %>% transpose %>% simplify_all()
我们在transpose %>% simplify_all()
这里使用purrr
来基本上将相同的命名组件连接到结果列表列表中。您可以通过从out_list
all(out_list$FeltCT == FeltCT)
all(out_list$BottleNeck == BottleNeck)
all(out_list$CTs == CTs)
在这里使用lapply
pmap
并不完全满意,但希望它有所帮助,它肯定比原来的更快。我现在需要吃饭。