Question

我正在使用R中的插入符号包进行PLSDA。我的目标是使用牛奶中红外光谱预测奶牛的状况（0比1）。我想比较系数以了解哪些光谱点对预测的贡献最大。如果使用插入符号中的“ peProc”选项对数据进行居中和缩放（标准化），系数是标准化的还是非标准化的？我应该使用标准化或非标准化系数来识别重要变量吗？

这是我在R中的代码：

# sample of data: 10 rows and 11 columns (cow status and 10 spectral points)

data<-structure(list(status = c(1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L), X67 = c(0.0621632561087608, 0.0607260726392266, 0.0625290125608445, 
0.0630319677293305, 0.0647925734519963, 0.0652132406830785, 0.0642152130603786, 
0.0632935389876363, 0.0630211532115932, 0.0653552338480952), 
    X69 = c(0.0412880629301071, 0.0411925278604031, 0.0422371216118336, 
    0.0428165234625339, 0.0449542962014675, 0.0450734049081802, 
    0.0436325967311859, 0.0429527163505555, 0.0416706018149853, 
    0.0443116500973701), X80 = c(-0.0138179995119572, -0.0144830904901028, 
    -0.0144161432981491, -0.013074841350317, -0.0167389884591103, 
    -0.0159232392907143, -0.0143161900341511, -0.0138954184949399, 
    -0.0147733353078366, -0.0134175941348076), X81 = c(-0.0134893320500851, 
    -0.0142031051218509, -0.0142825171351433, -0.0127705596387386, 
    -0.0168376825749874, -0.0157066956162453, -0.0140237100422382, 
    -0.0135413259267807, -0.0144432000815868, -0.0128327533602714
    ), X82 = c(-0.0121541880071164, -0.0126882530748845, -0.0128774531185627, 
    -0.0113642327487468, -0.0153397060930729, -0.0141194649040699, 
    -0.012583240866661, -0.0119865834712983, -0.0129532031714916, 
    -0.0113681443035603), X83 = c(-0.0100522302091122, -0.0100194588303566, 
    -0.0103428065776825, -0.0090412348508835, -0.012253813445568, 
    -0.0112871341407299, -0.01005644723773, -0.00953329727053641, 
    -0.0103608369827271, -0.0091012343764305), X84 = c(-0.0068778395652771, 
    -0.006202656775713, -0.0066996179521084, -0.0058559291064739, 
    -0.0077349841594696, -0.0070663541555404, -0.00633592158555989, 
    -0.0060042813420295, -0.0066741779446602, -0.0058899037539959
    ), X85 = c(-0.0028718337416649, -0.0016290470957756, -0.0022227615118026, 
    -0.0018559470772744, -0.00229159742593769, -0.00200331956148141, 
    -0.0016555078327656, -0.0015552900731563, -0.0021986812353134, 
    -0.00184135138988499), X86 = c(0.001129399985075, 0.0027880594134331, 
    0.002091933041811, 0.0021506287157535, 0.0029507651925087, 
    0.00285872071981429, 0.00294967368245119, 0.0027530193328858, 
    0.0021884441375732, 0.0023270919919014), X87 = c(0.0043732412159443, 
    0.0061031468212605, 0.0052727945148945, 0.0052438415586949, 
    0.00681021064519879, 0.0065649412572384, 0.0064241252839565, 
    0.00608809292316441, 0.0055649057030678, 0.0056490488350391
    )), row.names = c(NA, 10L), class = "data.frame")



Ycalib<-factor(data[,1],levels=c("1","0"),labels=c("status_1","status_2"))
names(Ycalib)<-c("y")
Xcalib<-data.frame(data[,2:11])


set.seed(1001)
folds<-createFolds(Ycalib,k=10,list = TRUE, returnTrain = TRUE)

set.seed(1001) 
ctrl<-trainControl(method="repeatedcv",index=folds,classProbs = TRUE,summaryFunction = twoClassSummary,savePredictions = TRUE)

set.seed(1001)
plsda<-train(x=Xcalib, # spectral data
             y=Ycalib, # factor vector
             method="pls", # pls-da algorithm
             tuneLength=20, # number of components
             trControl=ctrl, # ctrl contained cross-validation option
             preProc=c("center","scale"), # the data are centered and scaled
             metric="ROC") # metric is ROC for 2 classes

coefficients<-coef(plsda$finalModel)

使用R中的插入符号识别PLSDA模型中的重要变量：系数是否标准化？

0 个答案: