使用for循环矢量化索引

时间:2017-03-02 21:40:33

标签: r for-loop vector vectorization

我正在尝试模拟装配线。我有一份零件清单以及他们在每个车站花了多少时间。我试图一次一个地通过装配线发送零件,并记录每个工位的时间。但是,我的工作方式是将for循环嵌套在for循环中。必须有更好的方法来做到这一点。

parts <- data.frame(JobNum = sample(c('a','b','c','d'),400,replace=TRUE)
,DS.CT = sample.int(10,400,replace=TRUE)
,C1.CT = sample.int(10,400,replace=TRUE)
,C2.CT = sample.int(10,400,replace=TRUE)
,C3.CT = sample.int(10,400,replace=TRUE)
,C4.CT = sample.int(10,400,replace=TRUE)
,C5D5.CT = sample.int(10,400,replace=TRUE)
,C6D6.CT = sample.int(10,400,replace=TRUE)
,C5D7.CT = sample.int(10,400,replace=TRUE)
,C6D8.CT = sample.int(10,400,replace=TRUE)
,C7CD.CT = sample.int(10,400,replace=TRUE)
)

LineParts <- parts[sample(nrow(parts),234,replace=FALSE),]

#Initialize Dip collecting variables
DS <- c()
D1 <- c()
D2 <- c()
D3 <- c()
D4 <- c()
D5 <- c()
D6 <- c()
D7 <- c()
D8 <- c()
D9 <- c()

for(i in 1:dim(parts)[1]){

#Create temporary dataframe for use in indexing line
LinePartsTemp <- data.frame(matrix("",nrow=234,ncol=11))
colnames(LinePartsTemp)=names(LineParts)
LinePartsTemp$JobNum <- as.character(LinePartsTemp$JobNum)
LinePartsTemp$DS.CT <- as.integer(LinePartsTemp$DS.CT)
LinePartsTemp$C1.CT <- as.integer(LinePartsTemp$C1.CT)
LinePartsTemp$C2.CT <- as.integer(LinePartsTemp$C2.CT)
LinePartsTemp$C3.CT <- as.integer(LinePartsTemp$C3.CT)
LinePartsTemp$C4.CT <- as.integer(LinePartsTemp$C4.CT)
LinePartsTemp$C5D5.CT <- as.integer(LinePartsTemp$C5D5.CT)
LinePartsTemp$C6D6.CT <- as.integer(LinePartsTemp$C6D6.CT)
LinePartsTemp$C5D7.CT <- as.integer(LinePartsTemp$C5D7.CT)
LinePartsTemp$C6D8.CT <- as.integer(LinePartsTemp$C6D8.CT)
LinePartsTemp$C7CD.CT <- as.integer(LinePartsTemp$C7CD.CT)

#Index line
for(j in 1:dim(LineParts)[1]){
    LinePartsTemp[j+1,] <- LineParts[j,]
}

#put new part into system
LinePartsTemp[1,] <- parts[i,]

#update the list of parts on the line
LineParts <- LinePartsTemp

#Append CT values at stations
DS <- append(DS,LineParts[1,'DS.CT'])
D1 <- append(D1,LineParts[10,'C1.CT'])
D2 <- append(D2,LineParts[26,'C2.CT'])
D3 <- append(D3,LineParts[42,'C3.CT'])
D4 <- append(D4,LineParts[57,'C4.CT'])
D5 <- append(D5,LineParts[85,'C5D5.CT'])
D6 <- append(D6,LineParts[120,'C6D6.CT'])
D7 <- append(D7,LineParts[167,'C5D7.CT'])
D8 <- append(D8,LineParts[210,'C6D8.CT'])
D9 <- append(D9,LineParts[234,'C7CD.CT'])

}

编辑:添加样本数据

1 个答案:

答案 0 :(得分:1)

考虑与列表交互,避免初始化空容器,然后使用大量独立的环境对象附加到它们。除了输入之外,下面只使用了两个对象。

  1. 首先,构建一个 LineParts 数据框列表, LineParts_dfList
  2. 然后,将所需的数据点提取到矢量列表 stations_veclist 中。
  3. 您会注意到数据框列表的lapply使用<<-运算符来更新全局对象(本地函数的范围之外),因为 LineParts 需要重新使用更新的值:

    LineParts_dfList <- lapply(seq(nrow(parts)), function(i){      
      #Index line
      LinePartsTemp <- parts[1,]
      LinePartsTemp[2:nrow(LineParts),] <- LineParts[1:nrow(LineParts)-1,]
    
      #put new part into system
      LinePartsTemp[1,] <- parts[i,]
    
      #update the list of parts on the line
      LineParts <<- LinePartsTemp      
    })
    
    # Extract CT values at stations
    stations_veclist <- 
      list(
        DS = vapply(LineParts_dfList, function(df) df[1,'DS.CT'], numeric(1)),
        D1 = vapply(LineParts_dfList, function(df) df[10,'C1.CT'], numeric(1)),
        D2 = vapply(LineParts_dfList, function(df) df[26,'C2.CT'], numeric(1)),
        D3 = vapply(LineParts_dfList, function(df) df[42,'C3.CT'], numeric(1)),
        D4 = vapply(LineParts_dfList, function(df) df[57,'C4.CT'], numeric(1)),
        D5 = vapply(LineParts_dfList, function(df) df[85,'C5D5.CT'], numeric(1)),
        D6 = vapply(LineParts_dfList, function(df) df[120,'C6D6.CT'], numeric(1)),
        D7 = vapply(LineParts_dfList, function(df) df[167,'C5D7.CT'], numeric(1)),
        D8 = vapply(LineParts_dfList, function(df) df[210,'C6D8.CT'], numeric(1)),
        D9 = vapply(LineParts_dfList, function(df) df[234,'C7CD.CT'], numeric(1))
      )
    

    为了避免许多vapply次调用,请考虑将所有 LineParts 数据框项目绑定到一个大型数据框 LinePartsAll (N = 93,600 obs for 234 X 400 ),然后按行顺序提取值:

    LinePartsAll <- do.call(rbind, LineParts_dfList)
    
    otherstations_veclist <- 
      list(
        DS = LinePartsAll[seq(1,93600, by=234),'DS.CT'],
        D1 = LinePartsAll[seq(10,93600, by=234),'C1.CT'],
        D2 = LinePartsAll[seq(26,93600, by=234),'C2.CT'],
        D3 = LinePartsAll[seq(42,93600, by=234),'C3.CT'],
        D4 = LinePartsAll[seq(57,93600, by=234),'C4.CT'],
        D5 = LinePartsAll[seq(85,93600, by=234), 'C5D5.CT'],
        D6 = LinePartsAll[seq(120,93600, by=234), 'C6D6.CT'],
        D7 = LinePartsAll[seq(167,93600, by=234),'C5D7.CT'],
        D8 = LinePartsAll[seq(210,93600, by=234), 'C6D8.CT'],
        D9 = LinePartsAll[seq(234,93600, by=234), 'C7CD.CT']
      )
    

    要检查,这个更新的,更快的方法会重新生成与原始的双for循环过程相同的结束值。要使用发布的示例数据进行测试,您必须在部分 LineParts 分配之前设置示例种子set.seed(###),以重新运行相同的随机数:

    all.equal(DS, stationsList$DS)
    # [1] TRUE
    all.equal(D1, stationsList$D1)
    # [1] TRUE
    all.equal(D9, stationsList$D9)
    # [1] TRUE
    
    all.equal(stations_veclist, otherstations_veclist)
    # [1] TRUE