我编写了这个函数来执行LOOCV以对数据集中的样本进行分类。问题是需要太长时间。我需要用它来进行150次迭代的自举。我怎样才能让它更快地运作?
DoLOOCVa2 <- function(X,Class,S,MTHD,LV) {
#
# Leave-one-out cross validation (LOO-CV) with either the pls.lda function (MTHD = 1)
# or SVMs (MTHD = 2)
# X is the input matrix
# Class is the class vector
# S is the scaling to be performed:
# 0 = None; 1 = MC; 2 = AS; 3 = RS1; 4 = RS2; 5 = Norm
# MTHD is the classification function
# LV is the maximum number of latent variables (= 0 if SVM is used)
# Differs from DoLOOCV2 in that only the minimum LV is outputted corresponding to the maximum %Overall
#
#
source("pretreat.r") # Source function in case not sourced from calling script
C5 <- 0
C6 <- 0
cat(paste("\nSetting number of LVs to ", LV, "...\n"))
# Set number of iterations to LV
for (i in 1:length(LV)) {
cat(paste("\nLOO-CV for LV",i,"\n"))
# Set number of iterations to the number of rows of X
for (j in 1:dim(X)[1]) {
X1 <- X[-j,] # Remove the row j from X so that X1 is a "training set"
C1 <- Class[-j] # Remove element j from Class
X2 <- t(as.matrix(X[j,])) # Make X2 = to row j of X so that X2 is a "test set"
C2 <- Class[j] # Make C2 = to element j of Class
Xn <- pretreat(X1,X2,S) # Apply data pre-treatment
X1a <- Xn$trDATAscaled # Extract the scaled "training set"
X2a <- Xn$tstDATAscaled # Extract the scaled "test set"
if (MTHD == 1) {
# PLS-LDA
C3 <- pls.lda(X1a,C1,X2a,LV[i]) # Perform the classification
C4 <- as.numeric(C3$predclass) # Extract the predicted class
} else if (MTHD == 2) {
# SVM (LV = 1)
MODEL <- svm(X1a,as.factor(C1),scale=FALSE,type="C-classification",kernel="radial")
C4 <- as.numeric(predict(MODEL,X2a))
} else if (MTHD == 3) {
# Random Forests
C3 <- randomForest(x=X1a,y=as.factor(C1),xtest=X2a,ntree=LV[i]) # Perform the classification (LV value indicates here the number of trees
C4 <- as.numeric(C3$test$predicted) # Extract the predicted class
}
C5[j] <- C4 - C2 # Calculate the difference between the predicted class (C4) and the actual class (C2)
}
C6[i] <- 100*(length(which(C5==0)))/length(C5) # Work out the overall success rate of classiciation for LV i
}
cat("\nThe %success classified was:\n")
show(C6)
if (MTHD == 1 | MTHD == 3) {
if (MTHD == 1) {
cat("\nThe suggested optimum LV(s) are: \n")
OptLVs <- which(C6==max(C6)) # Attain the maximum success rate
show(OptLVs)
return(list(OptLVs=OptLVs[1],Overall=C6))
} else {
cat("\nThe suggested optimum TREE(s) are: \n")
OLV <- which(C6==max(C6)) # Attain the maximum success rate
OptLVs <- LV[OLV]
show(OptLVs)
return(list(OptLVs=OptLVs[1],Overall=C6))
}
} else {
OptLVs <- 0
return(C6)
}
} # End function
我设法在下面进行分析是结果:
total.time total.pct self.time self.pct
"DoLOOCVa2" 17.90 100.00 0.04 0.22
"pls.lda" 9.62 53.74 0.02 0.11
"pretreat" 8.20 45.81 0.02 0.11
"source" 6.02 33.63 1.24 6.93
"lda" 3.98 22.23 0.02 0.11
"pretreat1" 3.96 22.12 0.04 0.22
"lda.formula" 3.96 22.12 0.00 0.00
"FUN" 3.48 19.44 1.62 9.05