从Coursera的深度学习课程中,我实现了逻辑回归:
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
return s
def initialize_with_zeros(dim):
w = np.zeros(shape=(dim, 1))
b = 0
return w, b
def propagate(w, b, X, Y):
m = X.shape[1]
A = sigmoid(np.dot(w.T, X) + b) # compute activation
cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # compute cost
dw = (1 / m) * np.dot(X, (A - Y).T)
db = (1 / m) * np.sum(A - Y)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
grads, cost = propagate(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate * dw # need to broadcast
b = b - learning_rate * db
if i % 100 == 0:
costs.append(cost)
# Print the cost every 100 training examples
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" % (i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}
return params, grads, costs
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1, m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T, X) + b)
for i in range(A.shape[1]):
# Convert probabilities a[0,i] to actual predictions p[0,i]
### START CODE HERE ### (≈ 4 lines of code)
print(A)
Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0
### END CODE HERE ###
assert(Y_prediction.shape == (1, m))
return Y_prediction
print ("sigmoid(0) = " + str(sigmoid(0)))
print ("sigmoid(9.2) = " + str(sigmoid(9.2)))
dim = 2
w, b = initialize_with_zeros(dim)
print ("w = " + str(w))
print ("b = " + str(b))
w, b, X, Y = np.array([[1], [2]]), 2, np.array([[-1,-2], [3,4]]), np.array([[1, 0]])
grads, cost = propagate(w, b, X, Y)
print ("dw = " + str(grads["dw"]))
print ("db = " + str(grads["db"]))
print ("cost = " + str(cost))
params, grads, costs = optimize(w, b, X, Y, num_iterations= 10000, learning_rate = 0.01, print_cost = False)
print ("w = " + str(params["w"]))
print ("b = " + str(params["b"]))
print ("dw = " + str(grads["dw"]))
print ("db = " + str(grads["db"]))
print("predictions = " + str(predict(w, b, X)))
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
w, b = initialize_with_zeros(X_train.shape[0])
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
w = parameters["w"]
b = parameters["b"]
Y_prediction_test = predict(w, b, X_test)
Y_prediction_train = predict(w, b, X_train)
print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
我正在尝试使用包含5个样本的通用数据集,其中每个样本均包含4个元素:
train_set_x = np.array([[1,2,3,4],[4,3,2,1],[1,2,3,4],[4,3,2,1],[1,2,3,4]])
train_set_y = np.array([1,0,1,0,1])
test_set_x = np.array([[1,2,3,4],[4,3,2,1],[1,2,3,4],[4,3,2,1],[1,2,3,4]])
test_set_y = np.array([1,0,1,0,1])
train_set_x , train_set_y , test_set_x , test_set_y
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
但是会引发以下错误:
<ipython-input-409-bd4e233a8f4e> in propagate(w, b, X, Y)
18
19 A = sigmoid(np.dot(w.T, X) + b) # compute activation
---> 20 cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # compute cost
21
22 dw = (1 / m) * np.dot(X, (A - Y).T)
ValueError: operands could not be broadcast together with shapes (5,) (1,4)
我需要更改重量尺寸以计算成本值吗?
更新:
使用修改:
A = sigmoid(np.dot(X , w) + b) # compute activation
导致错误:
<ipython-input-546-7a7980550834> in propagate(w, b, X, Y)
20 m = X.shape[1]
21
---> 22 A = sigmoid(np.dot(X , w) + b) # compute activation
23 print('w.T' , w.T , 'w' , w, 'X' , X , 'Y' , Y , 'A' , A)
24 cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # compute cost
ValueError: shapes (5,4) and (5,1) not aligned: 4 (dim 1) != 5 (dim 0)