当我尝试运行R代码时:
fit <- train(V16~., data=credit,
method="glm",
family="binomial",
trControl=ctrl,
preProcess = c("center", "scale"))
...但是我遇到了以下错误:
1)
Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut = 10, :
These variables have zero variances: V7.o
There were 50 or more warnings (use warnings() to see the first 50)
2)
> warnings()
Mensagens de aviso:
1: In train.default(x, y, weights = w, ...) :
You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.
2: glm.fit: fitted probabilities numerically 0 or 1 occurred
3: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
4: glm.fit: fitted probabilities numerically 0 or 1 occurred
5: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
6: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
7: glm.fit: fitted probabilities numerically 0 or 1 occurred
8: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
9: glm.fit: fitted probabilities numerically 0 or 1 occurred
10: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
11: glm.fit: fitted probabilities numerically 0 or 1 occurred
12: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
13: glm.fit: fitted probabilities numerically 0 or 1 occurred
14: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
15: glm.fit: fitted probabilities numerically 0 or 1 occurred
16: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
17: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
18: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
19: glm.fit: fitted probabilities numerically 0 or 1 occurred
20: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
21: glm.fit: fitted probabilities numerically 0 or 1 occurred
22: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
23: glm.fit: fitted probabilities numerically 0 or 1 occurred
24: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
25: glm.fit: fitted probabilities numerically 0 or 1 occurred
26: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
27: glm.fit: fitted probabilities numerically 0 or 1 occurred
28: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
29: glm.fit: fitted probabilities numerically 0 or 1 occurred
30: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
31: glm.fit: fitted probabilities numerically 0 or 1 occurred
32: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
33: glm.fit: fitted probabilities numerically 0 or 1 occurred
34: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
35: glm.fit: fitted probabilities numerically 0 or 1 occurred
36: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
37: glm.fit: fitted probabilities numerically 0 or 1 occurred
38: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
39: glm.fit: fitted probabilities numerically 0 or 1 occurred
40: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
41: glm.fit: fitted probabilities numerically 0 or 1 occurred
42: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
43: glm.fit: fitted probabilities numerically 0 or 1 occurred
44: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
45: glm.fit: fitted probabilities numerically 0 or 1 occurred
46: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
47: glm.fit: fitted probabilities numerically 0 or 1 occurred
48: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
49: glm.fit: fitted probabilities numerically 0 or 1 occurred
50: In predict.lm(object, newdata, se.fit, scale = 1, type = if (type == ... :
prediction from a rank-deficient fit may be misleading
我对代码进行了很多修改,但是我没有找到错误所在。
我已经尝试过了,但是没有成功:
install.packages("ROCR")
install.packages("dummies")
install.packages("caret")
install.packages("e1071")
library(ROCR)
library(dummies)
library(caret) # for Cross Validation functions
library(e1071)
# Leia os dados de
# https://archive.ics.uci.edu/ml/machine-learning-databases/credit-
screening/crx.data
#
# dica: os dados nao possuem cabecalho
credit = read.csv("https://archive.ics.uci.edu/ml/machine-learning-
databases/credit-screening/crx.data", header=FALSE)
cat("credit - rows and columns dataset:", nrow(credit) ," rows ",
ncol(credit), "columns ", "\n")
# Elimine as linha com valores ausentes "?"
#
# dica: primeiro troque os valores "?" por NA e em seguida use na.omit()
credit[credit[]=="?"] <- NA
credit = na.omit(credit)
# Valores numericos com NA aparecem como caracteres. Converta esses
valores
# para numerico com as.numeric (V2, V14)
#
credit$V2 = as.numeric(credit$V2)
credit$V14 = as.numeric(credit$V14)
# Converta o atributo de classe para valores 0 e 1
credit$V16 = as.numeric(credit$V16)
credit$V16 = credit$V16=replace(credit$V16,credit$V16==2,0)
cat("credit - rows and columns after NA omit:", nrow(credit) ," rows ",
ncol(credit), "columns ", "\n")
# Aplique o dummy encode para todos os atributos categoricos
# dica: empregue o comando dummy da library(dummies)
credit = dummy.data.frame(credit, names =
c("V1","V4","V5","V6","V7","V9","V10","V12","V13"), sep = ".")
cat("credit - rows and columns after dummy encode:", nrow(credit) ," rows
", ncol(credit), "columns ", "\n")
head(credit)
####inserido aqui após pesquisa no stackoverflow####
#converte o atributo de classe para factor
credit$V16 = as.factor(credit$V16)
credit$V16 = ifelse(credit$V16==1, "No", "Yes")
# Logit com Cross Validation
# dica: empregue o codigo da tarefa 2 como exemplo
# Crie o arquivo de controle para 20 particoes dos dados e 5 repeticoes
ctrl <- trainControl(method="repeatedcv", number= 20, repeats=5)
# Faca o treinamento logistico, nao esquecer de empregar preProcess =
c("center", "scale")
fit <- train(V16~., data=credit,
method="glm",
family="binomial",
trControl=ctrl,
preProcess = c("center", "scale"))
fit
# conjunto de treinamento e teste
T = sample(1:nrow(credit), round(0.3*nrow(credit)))
credit_test=credit[T,]
credit_train=credit[-T,]
# Faca a predicao para todos os valores de credit
predict_test = predict(fit, newdata=credit_test, type="raw")
predict_test
# Converte predict_test para valores 0 e 1
predict_test = as.numeric(predict_test)
predict_test = predict_test=replace(predict_test,predict_test==2,0)
# Construa a matriz de confusao
c_matrix=table(credit_test$V16, predict_test)
print(c_matrix)
# Calcula a acuracidade
cat('Accuracy: ', sum(diag(c_matrix))/sum(c_matrix)*100, ' %', "\n")
# Plot da curva ROC
pr=prediction(as.numeric(predict_test),credit_test$V16)
prf=performance(pr, measure="tpr", x.measure="fpr")
plot(prf,colorize=TRUE)
# Calcule a area sob a curva ROC
auc=performance(pr, measure="auc")
auc=auc@y.values[[1]]
auc
第1项中描述的两个错误篮子。