Question

我正在尝试利用deeplearning.ai的课程中的代码来分析我自己的一个数据集。

当我尝试实现代码时，我认为适当的改动，我看到每个时代后我的成本不会改变（成本= 1.459）。

我尝试通过删除小批量来改变代码以使其更简单，但成本仍然相同。顺便说一句，当我在课程中这样做时，它按预期运行，但又在不同的数据集上运行。

以下代码：

import tensorflow as tf
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
from math import floor, ceil

from sklearn.model_selection import train_test_split
# import data

X_main = pd.read_csv(r"C:\Users\Mike\Desktop\Radiation Oncology\Glioma\CSV data\glioma DB X.csv")

Y_main = pd.read_csv(r"C:\Users\Mike\Desktop\Radiation Oncology\Glioma\CSV data\glioma DB Y.csv")
# Need to split data into train, validate, test

# partition with sklearn (still as dataframe)

x_train, x_test, y_train, y_test = train_test_split(X_main, Y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# Normalize training data; will want to have the same mu and sigma for test

def normalize_features(dataset):
    mu = np.mean(dataset, axis = 0) # columns
    sigma = np.std(dataset, axis = 0)
    norm_parameters = {'mu': mu,
                'sigma': sigma}
    return (dataset-mu)/(sigma+1e-10), norm_parameters

# Normal X data; using same mu and sigma from test set; then transposed

x_train, norm_parameters = normalize_features(x_train)

x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)

x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_train = np.transpose(x_train)
x_val = np.transpose(x_val)
x_test = np.transpose(x_test)

y_train = np.transpose(y_train)
y_val = np.transpose(y_val)
y_test = np.transpose(y_test)
# converting values from database to matrix
x_train = x_train.as_matrix()
x_val = x_val.as_matrix()
x_test = x_test.as_matrix()

y_train = y_train.as_matrix()
y_val = y_val.as_matrix()
y_test = y_test.as_matrix()
# testing shape

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

# convert y to array per value so 3 = [0 0 1]

def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

y_train = convert_to_one_hot(y_train, 4)
y_val = convert_to_one_hot(y_val, 4)
y_test = convert_to_one_hot(y_test, 4)
print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))

# minibatches for later

def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)

    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.

    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """

    m = X.shape[1]                  # number of training examples
    mini_batches = []

    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    return mini_batches


# starting TF graph
# Create X and Y placeholders
def create_xy_placeholder(n_x, n_y):
    X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
    Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')

    return X, Y
X, Y = create_xy_placeholder(x_train.shape[0],y_train.shape[0])
print('X = ' + str(X))
print('Y = ' + str(Y))

# initialize parameters for 3 hidden layer network
def initialize_parameters():
    W1 = tf.get_variable('W1', [50, 67], initializer = tf.contrib.layers.xavier_initializer())
    b1 = tf.get_variable("b1", [50,1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable('W2', [40, 50], initializer = tf.contrib.layers.xavier_initializer())
    b2 = tf.get_variable("b2", [40,1], initializer = tf.zeros_initializer())
    W3 = tf.get_variable('W3', [4, 40], initializer = tf.contrib.layers.xavier_initializer())
    b3 = tf.get_variable("b3", [1,1], initializer = tf.zeros_initializer())

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}

    return parameters
tf.reset_default_graph()
with tf.Session() as sess:
    parameters = initialize_parameters()
    print("W1 = " + str(parameters["W1"]))
    print("b1 = " + str(parameters["b1"]))
    print("W2 = " + str(parameters["W2"]))
    print("b2 = " + str(parameters["b2"]))

# forward propagation
def forward_propagation(X, parameters):   
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']


    Z1 = tf.matmul(W1, X) + b1  
    A1 = tf.nn.relu(Z1)                                            
    Z2 = tf.matmul(W2, A1) + b2                           
    A2 = tf.nn.relu(Z2)                                              
    Z3 = tf.matmul(W3, A2) + b3                                      

    return Z3
tf.reset_default_graph()

with tf.Session() as sess:
    X, Y = create_xy_placeholder(x_train.shape[0], y_train.shape[0])
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    print("Z3 = " + str(Z3))

# compute cost
def compute_cost(Z3, Y):

    logits = tf.transpose(Z3)
    labels = tf.transpose(Y)

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, 
                                                                  labels = labels))
    return cost
tf.reset_default_graph()

with tf.Session() as sess:
    X, Y = create_xy_placeholder(x_train.shape[0], y_train.shape[0])
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    cost = compute_cost(Z3, Y)
    print("cost = " + str(cost))

def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
          num_epochs = 1500, minibatch_size = 32, print_cost = True):

    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    (n_x, m) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                        # To keep track of the cost

    # Create Placeholders of shape (n_x, n_y)
    X, Y = create_xy_placeholder(n_x, n_y)

    # Initialize parameters
    parameters = initialize_parameters()

    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters)

    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z3, Y)

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

    # Initialize all the variables
    init = tf.global_variables_initializer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # Start the session to compute the tensorflow graph
    with tf.Session(config=config) as sess:
        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).

                _ , minibatch_cost = sess.run([optimizer, cost], feed_dict = {X: minibatch_X, Y: minibatch_Y})

                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))


        return parameters

parameters = model(x_train, y_train, x_test, y_test)

编辑：在遵循评论中提到的跟踪之后，我能够获得改进的学习

在Tensorflow中评估不变成本函数的任何提示

0 个答案: