如何在R中修复“ x [which(data $ Status == 1),]中的错误:不正确的维数”?

时间:2019-05-21 15:13:09

标签: r logistic-regression genetic-algorithm cross-validation

我想用遗传算法进行逻辑回归,以进行5层交叉验证分层选择变量。我运行我的r脚本,但是出现以下错误: x [which(data $ Status == 1),]中的错误:错误的维数。如何解决此错误?

我的数据由2类和6个自变量的因变量组成。

#DATA
Status = sample(c(1,0), 50, replace = TRUE)
col1 = sample(c(0,1), 50, replace = TRUE)
col2 = sample(c(0,1), 50, replace = TRUE)
col3 = sample(c(0,1), 50, replace = TRUE)
col4 = sample(c(0,1), 50, replace = TRUE)
col5 = sample(c(0,1), 50, replace = TRUE)
col6 = sample(31:80)
data <- data.frame(Status, col1, col2, col3, col4, col5, col6)

#CONVERT TO FACTOR
for(k in 1:6) {
  data[, k] <- as.factor(data[, k])
}

我使用as.factor更改数据类型

#Randomly shuffle the data
data<-data[sample(nrow(data)),]
datax=data[,2:7]
datay=data[,1]
datay0=data[which(data$Status==1),]
datay0=datay0[,1]
datay1=data[which(data$Status==0),]
datay1=datay1[,1]
n0=length(datay0)
n1=length(datay1)
#Create 5 equally size folds
folds0 <- cut(seq(1,length(datay0)),breaks=5,labels=FALSE)
folds1 <- cut(seq(1,length(datay1)),breaks=5,labels=FALSE)

library(caret)
#Perform 5 fold cross validation
fitness <- function(string)
  {
  inc <- which(string==1)
  x <- datax[,inc]
  datax0=x[which(data$Status==1),]
  datax1=x[which(data$Status==0),]
  akurasi<- rep(0,5)
  sensi<- rep(0,5)
  speci<-rep(0,5)
  auc<- rep(0,5)
  for(i in 1:5){
    #Segement your data by fold using the which() function 
    testIndexes0 <- which(folds0==i,arr.ind=TRUE)
    testIndexes1 <- which(folds1==i,arr.ind=TRUE)
    trainx0 <- datax0[-testIndexes0, ]
    trainx1 <- datax1[-testIndexes1, ]
    trainy0 <- datay0[-testIndexes0]
    trainy1 <- datay1[-testIndexes1]
    testx0 <- datax0[testIndexes0, ]
    testx1 <- datax1[testIndexes1, ]
    testy0 <- datay0[testIndexes0]
    testy1 <- datay1[testIndexes1]
    if(ncol(x)==1){
      xtrain <- data.frame(c(trainx0,trainx1)-1)
      colnames(xtrain) = "x"
      xtest <- data.frame(c(testx0,testx1)-1)
      colnames(xtest) = "x"
    } else {
      xtrain <- rbind(trainx0,trainx1)
      xtest <- rbind(testx0,testx1)
    }
    ytrain <- c(trainy0, trainy1)-1
    ytest <- c(testy0,testy1)-1
    #Use the test and train data partitions however you desire...
    b <- data.frame(cbind(ytrain,xtrain))
    model <- glm(as.factor(ytrain)~.,data=b, family=binomial(link='logit'))
    predtest=as.factor(ifelse(predict(model,xtest,type='response')>0.5,1,0))
    cm <- confusionMatrix(predtest, as.factor(ytest))
    akurasi[i] <- cm$overall['Accuracy']
    sensi[i] <- cm$byClass['Sensitivity']
    speci[i] <- cm$byClass['Specificity']
    auc[i] <- 0.5*(sensi[i]+speci[i])
  }
  mean.auc=mean(auc)
  return(mean.auc)
  }
library(GA)
gaControl("binary" = list(selection = "ga_rwSelection"))
GA_REGLOG <- ga("binary", fitness = fitness, nBits = ncol(datax),
                  names = colnames(datax), 
                  selection = gaControl("binary")$selection,
                  popSize = 100, pcrossover = 0.8, pmutation = 0.1, maxFitness = 1)

我认为,这里有问题

datax0=x[which(data$Status==1),]
datax1=x[which(data$Status==0),]

0 个答案:

没有答案