我想用遗传算法进行逻辑回归,以进行5层交叉验证分层选择变量。我运行我的r脚本,但是出现以下错误: x [which(data $ Status == 1),]中的错误:错误的维数。如何解决此错误?
我的数据由2类和6个自变量的因变量组成。
#DATA
Status = sample(c(1,0), 50, replace = TRUE)
col1 = sample(c(0,1), 50, replace = TRUE)
col2 = sample(c(0,1), 50, replace = TRUE)
col3 = sample(c(0,1), 50, replace = TRUE)
col4 = sample(c(0,1), 50, replace = TRUE)
col5 = sample(c(0,1), 50, replace = TRUE)
col6 = sample(31:80)
data <- data.frame(Status, col1, col2, col3, col4, col5, col6)
#CONVERT TO FACTOR
for(k in 1:6) {
data[, k] <- as.factor(data[, k])
}
我使用as.factor更改数据类型
#Randomly shuffle the data
data<-data[sample(nrow(data)),]
datax=data[,2:7]
datay=data[,1]
datay0=data[which(data$Status==1),]
datay0=datay0[,1]
datay1=data[which(data$Status==0),]
datay1=datay1[,1]
n0=length(datay0)
n1=length(datay1)
#Create 5 equally size folds
folds0 <- cut(seq(1,length(datay0)),breaks=5,labels=FALSE)
folds1 <- cut(seq(1,length(datay1)),breaks=5,labels=FALSE)
library(caret)
#Perform 5 fold cross validation
fitness <- function(string)
{
inc <- which(string==1)
x <- datax[,inc]
datax0=x[which(data$Status==1),]
datax1=x[which(data$Status==0),]
akurasi<- rep(0,5)
sensi<- rep(0,5)
speci<-rep(0,5)
auc<- rep(0,5)
for(i in 1:5){
#Segement your data by fold using the which() function
testIndexes0 <- which(folds0==i,arr.ind=TRUE)
testIndexes1 <- which(folds1==i,arr.ind=TRUE)
trainx0 <- datax0[-testIndexes0, ]
trainx1 <- datax1[-testIndexes1, ]
trainy0 <- datay0[-testIndexes0]
trainy1 <- datay1[-testIndexes1]
testx0 <- datax0[testIndexes0, ]
testx1 <- datax1[testIndexes1, ]
testy0 <- datay0[testIndexes0]
testy1 <- datay1[testIndexes1]
if(ncol(x)==1){
xtrain <- data.frame(c(trainx0,trainx1)-1)
colnames(xtrain) = "x"
xtest <- data.frame(c(testx0,testx1)-1)
colnames(xtest) = "x"
} else {
xtrain <- rbind(trainx0,trainx1)
xtest <- rbind(testx0,testx1)
}
ytrain <- c(trainy0, trainy1)-1
ytest <- c(testy0,testy1)-1
#Use the test and train data partitions however you desire...
b <- data.frame(cbind(ytrain,xtrain))
model <- glm(as.factor(ytrain)~.,data=b, family=binomial(link='logit'))
predtest=as.factor(ifelse(predict(model,xtest,type='response')>0.5,1,0))
cm <- confusionMatrix(predtest, as.factor(ytest))
akurasi[i] <- cm$overall['Accuracy']
sensi[i] <- cm$byClass['Sensitivity']
speci[i] <- cm$byClass['Specificity']
auc[i] <- 0.5*(sensi[i]+speci[i])
}
mean.auc=mean(auc)
return(mean.auc)
}
library(GA)
gaControl("binary" = list(selection = "ga_rwSelection"))
GA_REGLOG <- ga("binary", fitness = fitness, nBits = ncol(datax),
names = colnames(datax),
selection = gaControl("binary")$selection,
popSize = 100, pcrossover = 0.8, pmutation = 0.1, maxFitness = 1)
我认为,这里有问题
datax0=x[which(data$Status==1),]
datax1=x[which(data$Status==0),]