在本周我正在尝试做一些分类练习,我从MNIST手写探测器开始。我使用来自互联网的一些文件来识别纸上的数字,但我正在尝试使用神经网络训练 自己的 模型。
我必须腌制数据以格式化.pkl文件,识别器可以读取数据。我用keras训练我的模型,然后我接受了训练有素的模型。首先我的问题是我不能腌制数据,但我不得不添加“make keras picklable”功能,之后我设法腌制。
现在的问题是,当我试图加载腌制模型时,我得到错误EOFError。我认为我正在使用泡菜,但问题是可能在keras。我正在使用Ubuntu16.04和python 2.7。代码文件包含在下面:
OwnClassificationtrain文件:
import types
import tempfile
import keras
import keras.models
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# define the larger model
def larger_model():
# create model
model = Sequential()
model.add(Conv2D(30, (5, 5), input_shape=(1, 28, 28), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(15, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def make_keras_picklable():
def __getstate__(self):
model_str = ""
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
keras.models.save_model(self, fd.name, overwrite=True)
model_str = fd.read()
d = { 'model_str': model_str }
return d
def __setstate__(self, state):
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
fd.write(state['model_str'])
fd.flush()
model = keras.models.load_model(fd.name)
self.__dict__ = model.__dict__
cls = keras.models.Model
cls.__getstate__ = __getstate__
cls.__setstate__ = __setstate__
model = larger_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=1, batch_size=200)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100-scores[1]*100))
make_keras_picklable()
with open('filename.pkl', 'wb') as file:
pickle.dump(model, file, protocol=pickle.HIGHEST_PROTOCOL)
识别文件:
# Import the modules
import cv2
import cPickle
from skimage.feature import hog
import numpy as np
# Load the classifier
with open('traineddata.pkl','rb') as f: #load working with originalfile with command clf=joblib.load('digits_cls.pkl') (from sklearn.externals import joblib)
clf = cPickle.load(f)
print(clf)
# Read the input image
im = cv2.imread("photo10.jpg")
# Convert to grayscale and apply Gaussian filtering
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
# Threshold the image
ret, im_th = cv2.threshold(im_gray, 100, 255, cv2.THRESH_BINARY_INV)
# Find contours in the image
image, ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get rectangles contains each contour
rects = [cv2.boundingRect(ctr) for ctr in ctrs]
# For each rectangular region, calculate HOG features and predict
# the digit using Linear SVM.
for rect in rects:
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
nbr = clf.predict(np.array([roi_hog_fd], 'float64'))
cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (128, 255, 0), 3)
imS = cv2.resize(im, (960, 540))
cv2.imshow("Resulting Image with Rectangular ROIs", imS)
cv2.waitKey()
Originaltrain文件:
# Import the modules
from sklearn.externals import joblib
from sklearn import datasets
from skimage.feature import hog
from sklearn.svm import LinearSVC
import numpy as np
from collections import Counter
# Load the dataset
dataset = datasets.fetch_mldata("MNIST Original")
# Extract the features and labels
features = np.array(dataset.data, 'int16')
labels = np.array(dataset.target, 'int')
# Extract the hog features
list_hog_fd = []
for feature in features:
fd = hog(feature.reshape((28, 28)), orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
print "Count of digits in dataset", Counter(labels)
# Create an linear SVM object
clf = LinearSVC()
# Perform the training
clf.fit(hog_features, labels)
# Save the classifier
joblib.dump(clf, "digits_cls.pkl", compress=3)
photo10.jpg链接:http://imgur.com/a/iXt5z 我认为我不应该混合使用keras和sklearn,但我不知道如何在识别文件中替换sklearn的函数。