使ROC中的LOOCV功能更快

时间:2014-07-31 12:32:39

标签: r matlab

我编写了这个函数来执行LOOCV以对数据集中的样本进行分类。问题是需要太长时间。我需要用它来进行150次迭代的自举。我怎样才能让它更快地运作?

DoLOOCVa2 <- function(X,Class,S,MTHD,LV) {
  #
  # Leave-one-out cross validation (LOO-CV) with either the pls.lda function (MTHD = 1)
  # or SVMs (MTHD = 2)
  # X is the input matrix
  # Class is the class vector
  # S is the scaling to be performed:
  # 0 = None; 1 = MC; 2 = AS; 3 = RS1; 4 = RS2; 5 = Norm
  # MTHD is the classification function
  # LV is the maximum number of latent variables (= 0 if SVM is used)
  # Differs from DoLOOCV2 in that only the minimum LV is outputted corresponding to the maximum %Overall
  #
  #
  source("pretreat.r") # Source function in case not sourced from calling script


  C5 <- 0
  C6 <- 0
  cat(paste("\nSetting number of LVs to ", LV, "...\n"))
  # Set number of iterations to LV
  for (i in 1:length(LV)) {
    cat(paste("\nLOO-CV for LV",i,"\n"))
    # Set number of iterations to the number of rows of X
    for (j in 1:dim(X)[1]) {
      X1 <- X[-j,] # Remove the row j from X so that X1 is a "training set"
      C1 <- Class[-j] # Remove element j from Class
      X2 <- t(as.matrix(X[j,])) # Make X2 = to row j of X so that X2 is a "test set"
      C2 <- Class[j]  # Make C2 = to element j of Class
      Xn <- pretreat(X1,X2,S)  # Apply data pre-treatment
      X1a <- Xn$trDATAscaled    # Extract the scaled "training set"
      X2a <- Xn$tstDATAscaled   # Extract the scaled "test set"
      if (MTHD == 1) {
        # PLS-LDA
        C3 <- pls.lda(X1a,C1,X2a,LV[i]) # Perform the classification
        C4 <- as.numeric(C3$predclass) # Extract the predicted class
      } else if (MTHD == 2) {
        # SVM  (LV = 1)
        MODEL <- svm(X1a,as.factor(C1),scale=FALSE,type="C-classification",kernel="radial")
        C4 <- as.numeric(predict(MODEL,X2a))
      } else if (MTHD == 3) {
        # Random Forests
        C3 <- randomForest(x=X1a,y=as.factor(C1),xtest=X2a,ntree=LV[i]) # Perform the classification (LV value indicates here the number of trees
        C4 <- as.numeric(C3$test$predicted) # Extract the predicted class
      }
      C5[j] <- C4 - C2  # Calculate the difference between the predicted class (C4) and the actual class (C2)
      }
    C6[i] <- 100*(length(which(C5==0)))/length(C5) # Work out the overall success rate of classiciation for LV i
    }
  cat("\nThe %success classified was:\n")

  show(C6)
  if (MTHD == 1 | MTHD == 3) {
     if (MTHD == 1) {
        cat("\nThe suggested optimum LV(s) are: \n")
        OptLVs <- which(C6==max(C6))    # Attain the maximum success rate
        show(OptLVs)
        return(list(OptLVs=OptLVs[1],Overall=C6))
      } else {
        cat("\nThe suggested optimum TREE(s) are: \n")
        OLV <- which(C6==max(C6))    # Attain the maximum success rate
        OptLVs <- LV[OLV]
        show(OptLVs)
        return(list(OptLVs=OptLVs[1],Overall=C6))
      } 
    } else {
      OptLVs <- 0
      return(C6)
    }



}   # End function

我设法在下面进行分析是结果:

total.time total.pct self.time self.pct
"DoLOOCVa2"        17.90    100.00      0.04     0.22
"pls.lda"           9.62     53.74      0.02     0.11
"pretreat"          8.20     45.81      0.02     0.11
"source"            6.02     33.63      1.24     6.93
"lda"               3.98     22.23      0.02     0.11
"pretreat1"         3.96     22.12      0.04     0.22
"lda.formula"       3.96     22.12      0.00     0.00
"FUN"               3.48     19.44      1.62     9.05

0 个答案:

没有答案