当我运行我的代码" cost_fcn()"和" gradient_descent()"自己正常工作,但当我尝试使用优化器来训练它时,它会出错。我在下面附上了追溯,并在我的代码下面。
Traceback (most recent call last):
File "./test_code.py", line 115, in <module>
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
File "/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py", line 441, in minimize
return _minimize_bfgs(fun, x0, args, jac, callback, **options)
File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 847, in _minimize_bfgs
gfk = myfprime(x0)
File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 289, in function_wrapper
return function(*(wrapper_args + args))
File "./test_code.py", line 87, in gradient_descent
grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
ValueError: could not broadcast input array from shape (5) into shape (1)
import csv
from PIL import Image
import sys
import numpy as np
import math
import cmath
import scipy.optimize as opt
size = 28 # images are 28x28 pixels
test_images = [] # 10000 images
test_images_labels = [] # 10000 labels
train_images = [] # 60000 images
train_images_labels = [] # 60000 labels
# open and read train images file
with open('mnist_train.csv','r') as f:
mnist_train = csv.reader(f,delimiter=',')
for image in mnist_train:
print("Image training set loaded... ")
# open and read test images file
with open('mnist_test.csv','r') as f:
mnist_test = csv.reader(f,delimiter=',')
for image in mnist_test:
print("Image test set loaded... ")
# Multi-Class Logistic Classifier
# Define the activation function
def activation_fcn(z):
g = np.divide(1.0, (1.0 + np.exp(-z)))
return g
# Define the cost function
def cost_fcn(theta, X, y, lambda_term):
m = len(y) # number of examples
J = 0 # initialize cost term
h = activation_fcn(np.matmul(X,theta))
theta_sq = np.power(theta[1:],2)
abs_log = np.log(np.abs(np.subtract(1,h)))
J_unreg_nonsum = np.multiply(y, np.log(h)) + np.multiply((np.subtract(1,y)), abs_log)
J_unreg = (-1/m)*J_unreg_nonsum.sum(axis=0)
sum_theta_sq = theta_sq.sum(axis=0)
reg_term = (lambda_term/(2*m))*sum_theta_sq
J = J_unreg + reg_term
return J
def gradient_descent(theta, X, y, lambda_term):
m = len(y)
grad_size = X.shape
grad = np.zeros((grad_size[1],1), float)
h = activation_fcn(np.matmul(X,theta))
grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
grad[1:] = (1/m)*np.matmul(X[:,1:].conj().transpose(),(np.subtract(h,y))) + (lambda_term/m)*theta[1:,:]
return grad.flatten()
### test using the numpy matrix object
## initializations
theta_t = np.matrix('-2; -1; 1; 2')
X_t = np.matrix('1.0 0.1 0.6 1.1; 1.0 0.2 0.7 1.2; 1.0 0.3 0.8 1.3; 1.0 0.4 0.9 1.4; 1.0 0.5 1.0 1.5')
y_t = np.matrix('1; 0; 1; 0; 1')
lambda_t = 3
## test regularized cost func (IT WORKS!)
test_cost = cost_fcn(theta_t, X_t, y_t, lambda_t)
print("The cost is: %d" % test_cost)
## test gradient descent (IT WORKS!)
test_grad = gradient_descent(theta_t, X_t, y_t, lambda_t)
print("The gradient is: ")
print("made it to optimizations")
## optimization of theta
m,n = X_t.shape
initial_theta = np.zeros(n, float)
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
我真的很感激这方面的一些帮助。我相信这个错误与我如何为&#34; opt.minimize()&#34;提供参数有关。但我没有成功解决这个问题。