PL中的PLS-DA引导速度更快

时间:2014-07-30 12:49:32

标签: r cross-validation statistics-bootstrap

请我尝试为PLS-DA分类进行自举交叉验证。我必须为不同的数据集重复这个程序六(6)种不同的缩放方法。问题是每个问题都需要2个多小时才能完成。如果有办法提高速度,请帮助。以下是书面代码。谢谢。

注意:X是一个33 x 160的数据矩阵,包含健康样本以及患病样本,分类为" 1"和" 2"分别

    CLASS <-c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)

cat("\n select the scaling....")
cat("0 = Raw; 1 = Mean-centre; 2 = Auto-scale; 3 = Range-scale (-1 to 1); 4 = Range-       scale; 5 = Normalise; 6 = pareto .\n\n")
S <-as.numeric(readline("Enter the scaling method, e.g.,0 :"))  

ACC <- numeric(150)
SPEC <- numeric(150)
SENS <- numeric(150)
NPV <- numeric(150)
PPV <- numeric(150)
FDR <- numeric(150)
LVn <- numeric(150)
for (i in 1:150) {

  # Split data
  ## 70% of the sample size
  smp_size <- floor(0.70 * nrow(X))

  train_ind <- sample(seq_len(nrow(X)), size = smp_size)
  print(train_ind)


  TR <- X[train_ind, ] # Training Dataset
  TST <- X[-train_ind, ] # Testing Dataset
  CTR <- CLASS[train_ind] # Training classes


  # Do Leave One Out Cross-Validation to determine best LV for PLSDA Classification

  OptLV <- DoLOOCVa2(TR,CTR,S,1,seq(1,20,1))


  CTST <- CLASS[-train_ind] # Testing classes
  C1 <- pretreat(TR,TST,S) 
  C2 <- C1$trDATAscaled
  C3 <- C1$tstDATAscaled  

  # Determine the predicted classes with the optimal LV

  C4 <- pls.lda(C2,CTR,C3,OptLV$OptLVs) # Perform the classification

  C5 <- as.numeric(C4$predclass) # Extract the predicted class
  C6 <- C5 - CTST

  TN <- 0
  TP <- 0
  FN <- 0
  FP <- 0
  for (j in 1:nrow(C3)) {
    enableJIT(3)
  if (C6[j]==0 & CTST[j]==1){
    TN=TN+1
  }
  if (C6[j]==0 & CTST[j]==2){
    TP=TP+1
  }
  if(C6[j]!=0 & CTST[j]==1){
    FP=FP+1
  } 
  if(C6[j]!=0 & CTST[j]==2){
    FN=FN+1
  }

} 
ACC[i] <- 100 * (TN + TP)/(TN+TP+FP+FN)
SPEC[i] <- 100 * TN/(TN + FP) # Percentge Speificity
SENS[i] <- 100 * TP/(TP + FN) # PErcenatage Sensitivity
NPV[i] <- 100 * TN/(TN + FN) # percentage Negative Predictive Value
PPV[i] <- 100 * TP/(TP + FP) # Percentage Positive Predictive Value
FDR[i] <- 100 * FP/(TP + FP) # Percentage False Discovery Rate
LVn[i] <- OptLV$OptLVs

}

0 个答案:

没有答案