请我尝试为PLS-DA分类进行自举交叉验证。我必须为不同的数据集重复这个程序六(6)种不同的缩放方法。问题是每个问题都需要2个多小时才能完成。如果有办法提高速度,请帮助。以下是书面代码。谢谢。
注意:X是一个33 x 160的数据矩阵,包含健康样本以及患病样本,分类为" 1"和" 2"分别
CLASS <-c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
cat("\n select the scaling....")
cat("0 = Raw; 1 = Mean-centre; 2 = Auto-scale; 3 = Range-scale (-1 to 1); 4 = Range- scale; 5 = Normalise; 6 = pareto .\n\n")
S <-as.numeric(readline("Enter the scaling method, e.g.,0 :"))
ACC <- numeric(150)
SPEC <- numeric(150)
SENS <- numeric(150)
NPV <- numeric(150)
PPV <- numeric(150)
FDR <- numeric(150)
LVn <- numeric(150)
for (i in 1:150) {
# Split data
## 70% of the sample size
smp_size <- floor(0.70 * nrow(X))
train_ind <- sample(seq_len(nrow(X)), size = smp_size)
print(train_ind)
TR <- X[train_ind, ] # Training Dataset
TST <- X[-train_ind, ] # Testing Dataset
CTR <- CLASS[train_ind] # Training classes
# Do Leave One Out Cross-Validation to determine best LV for PLSDA Classification
OptLV <- DoLOOCVa2(TR,CTR,S,1,seq(1,20,1))
CTST <- CLASS[-train_ind] # Testing classes
C1 <- pretreat(TR,TST,S)
C2 <- C1$trDATAscaled
C3 <- C1$tstDATAscaled
# Determine the predicted classes with the optimal LV
C4 <- pls.lda(C2,CTR,C3,OptLV$OptLVs) # Perform the classification
C5 <- as.numeric(C4$predclass) # Extract the predicted class
C6 <- C5 - CTST
TN <- 0
TP <- 0
FN <- 0
FP <- 0
for (j in 1:nrow(C3)) {
enableJIT(3)
if (C6[j]==0 & CTST[j]==1){
TN=TN+1
}
if (C6[j]==0 & CTST[j]==2){
TP=TP+1
}
if(C6[j]!=0 & CTST[j]==1){
FP=FP+1
}
if(C6[j]!=0 & CTST[j]==2){
FN=FN+1
}
}
ACC[i] <- 100 * (TN + TP)/(TN+TP+FP+FN)
SPEC[i] <- 100 * TN/(TN + FP) # Percentge Speificity
SENS[i] <- 100 * TP/(TP + FN) # PErcenatage Sensitivity
NPV[i] <- 100 * TN/(TN + FN) # percentage Negative Predictive Value
PPV[i] <- 100 * TP/(TP + FP) # Percentage Positive Predictive Value
FDR[i] <- 100 * FP/(TP + FP) # Percentage False Discovery Rate
LVn[i] <- OptLV$OptLVs
}