Question

我正在尝试通过计算图像的HuMoment来构建SVM系统以识别手写数字（使用MNIST数据集-http://yann.lecun.com/exdb/mnist/）。由于某些原因，当我对数据集进行计算时，对于相同编号的图像，在计算中会得到不同的值。我使用scikit-learn库的现成SVM算法。最初，我试图通过插入训练数据来训练算法（在该训练数据上我执行了矩计算）（即，每个图像变成7个HuMoment值的向量）。此外，我尝试使用在10底的log的计算下通过的向量来训练系统。

由于某种原因，该算法的准确性很低（大约30％）。火车数据的大小为60k图像。我尝试了多种伽玛和C值。

你们中的任何一个有任何想法吗？这么低的精度可能是什么原因？

我自己附加了代码-

import importData
import numpy as np

def main():
    train_images, train_labels, test_images, test_labels = importData.load_data()
    classifier1 = importData.train_model(num_train=60000, gamma_arg=2 ,Ci= 20,images=train_images,tag= train_labels,iter=50000)
    prediction1 , test_label = importData.predict(clf=classifier1, num_test=100, images=test_images, tag=test_labels)
    pre1 = []
    pre1.append(prediction1)
    pre1.append(test_label)
    np.savetxt("predict.txt",prediction1 )
    np.savetxt("tag.txt", test_label)

if __name__ == "__main__":
    main()

。

from mlxtend.data import loadlocal_mnist
import cv2
from sklearn import svm
from numpy import *


def load_data():
     train_images, train_labels = loadlocal_mnist(
               images_path='D:/Python Projects/MNIST_With_Moments/mnist_data/train-images.idx3-ubyte',
               labels_path='D:/Python Projects/MNIST_With_Moments/mnist_data/train-labels.idx1-ubyte')

     test_images, test_labels = loadlocal_mnist(
               images_path='D:/Python Projects/MNIST_With_Moments/mnist_data/t10k-images.idx3-ubyte',
               labels_path='D:/Python Projects/MNIST_With_Moments/mnist_data/t10k-labels.idx1-ubyte')

     return  train_images, train_labels ,  test_images, test_labels

def train_model(num_train ,gamma_arg, iter,Ci,images, tag):

     train_label_list = []
     train_images_list = []

     for k in range(0, num_train):
          temp = cv2.HuMoments(cv2.moments(images[k])).flatten()
          temp1 = -np.sign(temp)*np.log10(np.abs(temp))
          index = isnan(temp1)
          temp1[index] = 0
          train_images_list.append(temp1)
          train_label_list.append(tag[k])

     classifier = svm.SVC(C = Ci, kernel='rbf', gamma = gamma_arg, cache_size=8000, probability=False, max_iter=iter)
     classifier.fit(train_images_list, train_label_list)
     return classifier


def predict(clf, num_test ,images, tag):
     test_images_list = []
     test_label_list = []

     for i in range(0, num_test):
          temp2 = cv2.HuMoments(cv2.moments(images[i])).flatten()
          temp22 = -np.sign(temp2) * np.log10(np.abs(temp2))
          index = isnan(temp22)
          temp22[index] = 0
          test_images_list.append(temp22)
          test_label_list.append(tag[i])


     return clf.predict(test_images_list) , test_label_list

使用SVM和HuMoments进行的MNIST错误预测

0 个答案: