Question

我正在尝试自学不同的机器学习技术，所以我想我将开始研究光学字符识别。我让这个工作在八度音，但我想利用python因为我认为它会更快。

当我运行我的代码＆＃34; cost_fcn（）＆＃34;和＆＃34; gradient_descent（）＆＃34;自己正常工作，但当我尝试使用优化器来训练它时，它会出错。我在下面附上了追溯，并在我的代码下面。

我正在使用pjreddie重新编写的mnist训练集：https://pjreddie.com/projects/mnist-in-csv/

Traceback (most recent call last):
  File "./test_code.py", line 115, in <module>
    optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
  File "/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py", line 441, in minimize
    return _minimize_bfgs(fun, x0, args, jac, callback, **options)
  File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 847, in _minimize_bfgs
    gfk = myfprime(x0)
  File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 289, in function_wrapper
    return function(*(wrapper_args + args))
  File "./test_code.py", line 87, in gradient_descent
    grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
ValueError: could not broadcast input array from shape (5) into shape (1)

现在我的代码：

import csv
from PIL import Image
import sys
import numpy as np
import math
import cmath
import scipy.optimize as opt

size = 28           # images are 28x28 pixels

test_images = []            # 10000 images
test_images_labels = []     # 10000 labels

train_images = []           # 60000 images
train_images_labels = []    # 60000 labels

# open and read train images file
with open('mnist_train.csv','r') as f:
    mnist_train = csv.reader(f,delimiter=',')
    for image in mnist_train:
        train_images.append(list(map(int,image[1:])))
        train_images_labels.append(image[0])

print("Image training set loaded... ")

# open and read test images file
with open('mnist_test.csv','r') as f:
    mnist_test = csv.reader(f,delimiter=',')
    for image in mnist_test:
        test_images.append(image[1:])
        test_images_labels.append(image[0])

print("Image test set loaded... ")

##########################################
# Multi-Class Logistic Classifier
#
#


# Define the activation function
def activation_fcn(z):
    g = np.divide(1.0, (1.0 + np.exp(-z)))
    return g

# Define the cost function
def cost_fcn(theta, X, y, lambda_term): 
    m = len(y)      # number of examples
    J = 0           # initialize cost term

    h = activation_fcn(np.matmul(X,theta))
    theta_sq = np.power(theta[1:],2)
    abs_log = np.log(np.abs(np.subtract(1,h)))

    J_unreg_nonsum = np.multiply(y, np.log(h)) + np.multiply((np.subtract(1,y)), abs_log)
    J_unreg = (-1/m)*J_unreg_nonsum.sum(axis=0)

    sum_theta_sq = theta_sq.sum(axis=0)
    reg_term = (lambda_term/(2*m))*sum_theta_sq
    J = J_unreg + reg_term

    return J

def gradient_descent(theta, X, y, lambda_term):
    m = len(y)
    grad_size = X.shape
    grad = np.zeros((grad_size[1],1), float)

    h = activation_fcn(np.matmul(X,theta))  
    grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
    grad[1:] = (1/m)*np.matmul(X[:,1:].conj().transpose(),(np.subtract(h,y))) + (lambda_term/m)*theta[1:,:]

    return grad.flatten()


###############################################################################################################
### test using the numpy matrix object 

## initializations
theta_t = np.matrix('-2; -1; 1; 2')
X_t = np.matrix('1.0 0.1 0.6 1.1; 1.0 0.2 0.7 1.2; 1.0 0.3 0.8 1.3; 1.0 0.4 0.9 1.4; 1.0 0.5 1.0 1.5')
y_t = np.matrix('1; 0; 1; 0; 1')
lambda_t = 3

## test regularized cost func (IT WORKS!)
test_cost = cost_fcn(theta_t, X_t, y_t, lambda_t)
print("The cost is: %d" % test_cost)

## test gradient descent (IT WORKS!)
test_grad = gradient_descent(theta_t, X_t, y_t, lambda_t)
print("The gradient is: ")
print(test_grad)

print("made it to optimizations")
## optimization of theta
m,n = X_t.shape
initial_theta = np.zeros(n, float)
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)

print(optimization)
###############################################################################################################

我真的很感激这方面的一些帮助。我相信这个错误与我如何为＆＃34; opt.minimize（）＆＃34;提供参数有关。但我没有成功解决这个问题。

使用scipy.optimize为OCR

0 个答案: