我是深度神经网络的新手,正在尝试从头开始在python上实现它。我做了很多尝试,但是在实现中找不到错误。每当我使用“预测”功能时,它始终输出0。 我还使用与x和y形状相同的随机数组(您将在下面看到)测试了您在下面提供的代码中看到的每个函数,它们似乎都可以正常工作。我以前也曾清理过数据。
import os
os.chdir(r'path where my data is store')#This block of code changes directory to where my data set
创建数据框并为输入和目标矢量分配值
import pandas as pd
import numpy as np
df = pd.read_csv('clean_data.csv')
X = df[['radius_mean', 'texture_mean', 'perimeter_mean',
'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
'fractal_dimension_se', 'radius_worst', 'texture_worst',
'perimeter_worst', 'area_worst', 'smoothness_worst',
'compactness_worst', 'concavity_worst', 'concave points_worst',
'symmetry_worst', 'fractal_dimension_worst']].values
Y = df['diagnosis'].values
Y = Y.reshape(569,1)
将数据拆分为训练和测试数据(x和y为训练集,xt和yt为测试集)
from sklearn.model_selection import train_test_split
x, xt, y, yt = train_test_split(X, Y, test_size = 0.2, random_state = 40)
x, xt, y, yt = x.T, xt.T, y.T, yt.T
初始化参数
def iniparams(layer_dims):
params = {}
for l in range(1,len(layer_dims)):
params['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l - 1])*0.01
params['b' + str(l)] = np.zeros((layer_dims[l],1))
return params
编写辅助功能#1
def sigmoid(Z):
return 1/(1 + np.exp(-Z)), Z
#2
def relu(Z):
return np.maximum(0, Z), Z
线性向前
def linearfwd(W, A, b):
Z = np.dot(W, A) + b
linear_cache = (W, A, b)
return Z, linear_cache
前向激活
def fwdactivation(W, A_prev, b, activation):
if activation == 'sigmoid':
Z, linear_cache = linearfwd(W, A_prev, b)
A, activation_cache = sigmoid(Z)
elif activation == 'relu':
Z, linear_cache = linearfwd(W, A_prev, b)
A, activation_cache = relu(Z)
cache = (linear_cache, activation_cache)
return A, cache
转发模型
def fwdmodel(x, params):
caches = []
L = len(params)//2
A = x
for l in range(1, L):
A_prev = A
A, cache = fwdactivation(params['W' + str(l)], A_prev, params['b' + str(l)], 'relu')
caches.append(cache)
AL, cache = fwdactivation(params['W' + str(L)], A, params['b' + str(L)], 'sigmoid')
caches.append(cache)
return AL, caches
计算成本
def J(AL, y):
return -np.sum(np.multiply(np.log(AL), y) + np.multiply(np.log(1 - AL), (1 - y)))/y.shape[1]
向后乙状结肠
def sigmoidbkwd(dA, cache):
Z = cache
s = 1/(1 + np.exp(-Z))
dZ = dA*s*(1 - s)
return dZ
后退路演
def sigmoidbkwd(dA, cache):
Z = cache
s = 1/(1 + np.exp(-Z))
dZ = dA*s*(1 - s)
return dZ
线性bkwd
def linearbkwd(dZ, cache):
W, A_prev, b = cache
m = A_prev.shape[1]
dW = np.dot(dZ, A_prev.T)/m
db = np.sum(dZ, axis = 1, keepdims = True)/m
dA_prev = np.dot(W.T, dZ)
return dW, dA_prev, db
向后激活
def bkwdactivation(dA, cache, activation):
linear_cache, activation_cache = cache
if activation == 'sigmoid':
dZ = sigmoidbkwd(dA, activation_cache)
dW, dA_prev, db = linearbkwd(dZ, linear_cache)
if activation == 'relu':
dZ = relubkwd(dA, activation_cache)
dW, dA_prev, db = linearbkwd(dZ, linear_cache)
return dW, dA_prev, db
向后模型
def bkwdmodel(AL, y, cache):
grads = {}
L = len(cache)
dAL = -(np.divide(y, AL) - np.divide(1 - y,1 - AL))
current_cache = cache[L - 1]
grads['dW' + str(L)], grads['dA' + str(L - 1)], grads['db' + str(L)] = bkwdactivation(dAL, current_cache, 'sigmoid')
for l in reversed(range(L - 1)):
current_cache = cache[l]
dW_temp, dA_prev_temp, db_temp = bkwdactivation(grads['dA' + str(l + 1)], current_cache, 'relu')
grads['dW' + str(l + 1)] = dW_temp
grads['dA' + str(l)] = dA_prev_temp
grads['db' + str(l + 1)] = db_temp
return grads
使用梯度下降优化参数
def optimize(grads, params, alpha):
L = len(params)//2
for l in range(1, L + 1):
params['W' + str(l)] = params['W' + str(l)] - alpha*grads['dW' + str(l)]
params['b' + str(l)] = params['b' + str(l)] - alpha*grads['db' + str(l)]
return params
神经网络模型
def model(x, y, layer_dims, iters):
costs = []
params = iniparams(layer_dims)
for i in range(1, iters):
AL, caches = fwdmodel(x, params)
cost = J(AL, y)
costs.append(cost)
grads = bkwdmodel(AL, y, caches)
params = optimize(grads, params, 1.2)
if i%100 == 0:
print('Cost after', i,'iterations is:', cost)
costs.append(cost)
return costs, params
计算(费用确实减少了Cost Vs Iterations(Y,X) curve)
costs, params = model(x, y, [30,8,5,4,4,3,1], 3000)
预测功能
def predict(x,params):
AL, cache = fwdmodel(x,params)
predictions = AL >= 0.5
return predictions
最后,当我这样做时
predictions = predict(xt,params)
predictions
我明白了:
array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 , 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0]])
请告诉我我哪里错了
here's the link to the dataset
请帮帮我:D
答案 0 :(得分:0)
我不明白为什么您要转换火车测试拆分输出。为什么仍要使用xt.T,x.T? 您应该尝试打印params(array)输出和xt(array)输出,并查看它们的状态。 它们相似吗?您的params输出是否给出正确的结果?检查所有这些。
答案 1 :(得分:0)
我的问题是我的神经网络太深了。像我这样的新手往往会犯一个错误。我发现了这个很棒的资源,帮助我意识到了这个错误: http://theorangeduck.com/page/neural-network-not-working