我感兴趣的是找到MNIST数据集的随机森林分类器在R中错误预测的数字。尤其是每个数字(0-9)的示例。以下是我尝试过的。但是,我不断收到错误Error in y + 1 : non-numeric argument to binary operator
,但不确定如何解决该错误。我认为以下是通常只获取最差数字的方法,因此,是否有可能通过更改此方法来获取每个数字的示例?
# The below code is copied directly from https://gist.github.com/daviddalpiaz/ae62ae5ccd0bada4b9acd6dbc9008706
# and https://gist.github.com/brendano/39760
# download data from http://yann.lecun.com/exdb/mnist/
download.file("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
"train-images-idx3-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
"train-labels-idx1-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
"t10k-images-idx3-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
"t10k-labels-idx1-ubyte.gz")
# gunzip the files
R.utils::gunzip("train-images-idx3-ubyte.gz")
R.utils::gunzip("train-labels-idx1-ubyte.gz")
R.utils::gunzip("t10k-images-idx3-ubyte.gz")
R.utils::gunzip("t10k-labels-idx1-ubyte.gz")
# load image files
load_image_file = function(filename) {
ret = list()
f = file(filename, 'rb')
readBin(f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
nrow = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
ncol = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
x = readBin(f, 'integer', n = n * nrow * ncol, size = 1, signed = FALSE)
close(f)
data.frame(matrix(x, ncol = nrow * ncol, byrow = TRUE))
}
# load label files
load_label_file = function(filename) {
f = file(filename, 'rb')
readBin(f, 'integer', n = 1, size = 4, endian = 'big')
n = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
y = readBin(f, 'integer', n = n, size = 1, signed = FALSE)
close(f)
y
}
# load images
train = load_image_file("train-images-idx3-ubyte")
test = load_image_file("t10k-images-idx3-ubyte")
# load labels
train$y = as.factor(load_label_file("train-labels-idx1-ubyte"))
test$y = as.factor(load_label_file("t10k-labels-idx1-ubyte"))
library(randomForest)
# testing classification on subset of training data
model.rf<-randomForest(y ~ ., data = train[1:1000, ])
model.rf$confusion
#Using model on test set
predict.rf = predict(model.rf, test)
mean(predict.rf == test$y)
table(predicted = predict.rf, actual = test$y)
iset <- sample(which(predict.rf != ytest),7*7)
par(mar=c(0,0,0,0))
par(mfrow=c(7,7))
for (j in iset) {
y <- matrix(as.matrix(test[j,-1]),16,16,byrow=TRUE)
y <- 1 - (y + 1)*0.5
plot(0,0,xlab="",ylab="",axes=FALSE)
rasterImage(y,-1,-1,1,1)
box()
text(-0.8,-0.7, test[j,1], cex=3, col="red")
text(0.8,-0.7, predict.rf[j], cex=3, col="blue")
}