我正在使用列车和验证变量之间的GLM进行回归建模。所有变量都是数值。但是当我尝试与错误运行相关时出现错误:'y'必须是数字..代码如下:
data <- read.csv(file="fielddatabase.csv", sep=",", header=T)
attach(data)
summary(data)
###############################
## Prepare Bootstrap samples
###############################
set.seed(550)
# create empty lists in which subsets can be stored
train <- list()
validation <- list()
# set the bootstrap parameters
N = length(data[,9]) # N° of observations
B = 500 # N° of bootstrap iterations
# start loop
for(i in 1:B){
# create random numbers with replacement to select samples from each group
idx = sample(1:N, N, replace=TRUE)
# select subsets of the five groups based on the random numbers
train[[i]] <- data[idx,]
validation[[i]] <- data[-idx,]
}
######################################
## start regression modelling with GLM
######################################
# create empty lists in which the models accuracies can be stored
# Total
ID.nb<-list()
Obs.nb<-list()
Pred.nb<-list()
r2.nb<-list()
rmse.nb<-list()
Nrmse.nb<-list()
imp.nb<-list()
bias.nb <- list()
# Run GLM
for(i in 1:B)
TRAIN <- train[[i]]
VALIDATION <- validation[[i]]
len<-length( VALIDATION[,1])
# store and select the observations
ID<-VALIDATION$ID
ID.nb [[i]]<-ID
obs <- VALIDATION[,2]
Obs.nb[[i]]<-obs
# run the GLM using Negative Binomial family.
GLM_total <- glm(Biomass ~ hmax + Nmax + hsd + hskewness + hkurtosis, data=TRAIN, family=negative.binomial(theta=1 , link="log"))
# predict Biomass values
Pred<-stats:::predict(GLM_total, newdata=VALIDATION, type="response")
# store the model accuracies
Pred.nb[[i]]<-Pred
r2.nb[[i]]<-(cor(Pred, obs, method="pearson"))^2
这里的错误显示:
Error in cor(Pred, obs, method = "pearson") : 'y' must be numeric