Question

我正在尝试构建一个多任务深度神经网络，其中包含用于复杂性调整的配置文件功能。我的目标是使用此功能训练网络，并根据三个不同范围（0％ - 20％），（20％ - 40％）和（40）的通道配置文件百分比计算三种不同的损失及其相应的训练精度％ - 100％）如下面的代码所示。请问我正在做的是正确的，还是有关于我能做到最好的建议？

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import math
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Network Parameters
n_1 = 100               # 1st layer number of neurons
n_2 = 100               # 2nd layer number of neurons
n_input = 784           #MNIST data input (img shape: 28*28)
n_classes = 10          # MNIST total classes (0-9 digits)
learning_rate = 0.0008
training_epochs = 20
batch_size = 30
display_step = 1


np.random.seed(1)
# tf Graph input
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])

# Store layers weight & bias
def initialize_param(n_input, n_1, n_2, n_class):
    tf.set_random_seed(1)
    W1 = tf.get_variable("W1", shape = [n_input, n_1], 
        initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", shape = [n_1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", shape = [n_1, n_2], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", shape = [n_2], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", shape = [n_2, n_class], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", shape = [n_class], initializer = tf.zeros_initializer())

    parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}
    return parameters
parameters = initialize_param(784, 100, 100, 10)

def linear_func(n):
    return[np.float32(1.0 - 1.0 * i/n) for i in range(1, n + 1)]
L = linear_func(100)

def linear_profile(lp, n_1):
    p_L = tf.constant(L, shape = [1, 100])
    L_11 = tf.constant(1.0, shape = [1, int(np.round((lp) * n_1))])
    L_12 = tf.zeros(shape = [1, int(np.round((1 - lp) * n_1))])
    L1 = tf.concat((L_11, L_12), axis = 1)
    p_L1 = tf.multiply(L1, p_L)
    return p_L1

# Creating Multiple Profile
pc1 = np.linspace(0, 0.2, 100)
pc2 = np.linspace(0.2, 0.4, 100)
pc3 = np.linspace(0.4, 1.0, 100)
prof = {"p1": pc1, "p2": pc2, "p3":pc3}

profile_1 = []
profile_2 = []
profile_3 = []

for i, v in enumerate(prof):   
    if i == 0:
        pc1 = prof[v]
        for j in pc1:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_1.append(profile)
        profile1 = tf.convert_to_tensor(profile_1, dtype=tf.float32) 

    elif i == 1:
        pc2 = prof[v]
        for j in pc2:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_2.append(profile)
        profile2 = tf.convert_to_tensor(profile_2, dtype=tf.float32)

    elif i == 2:
        pc3 = prof[v]
        for j in pc3:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_3.append(profile)
        profile3 = tf.convert_to_tensor(profile_3, dtype=tf.float32)

def mlp_1(x, profile_type):
    for j in range(len(pc1)):
        Z_ML11 = tf.add(tf.matmul(x, parameters['W1']), parameters['b1'])  
        A_ML11 = tf.nn.relu(Z_ML11)
        P_ML11 = tf.multiply(profile_type[j], A_ML11)
        Z_ML12 = tf.add(tf.matmul(P_ML11, parameters['W2']), parameters['b2'])  
        A_ML12 = tf.nn.relu(Z_ML12)
        P_ML12 = tf.multiply(profile_type[j], A_ML12)
        out_layer = tf.add(tf.matmul(P_ML12, parameters['W3']), parameters['b3'])
        return out_layer

logits_1 = mlp_1(X, profile1)
logits_2 = mlp_1(X, profile2)
logits_3 = mlp_1(X, profile3)

# Define loss and optimizer
loss_op_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_1, labels = Y))
loss_op_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_2, labels = Y))
loss_op_3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_3, labels = Y))
optimizer_1 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_1)
optimizer_2 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_2)
optimizer_3 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_3)

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    # Training Loop
    cost_1 = []
    cost_2 = []
    cost_3 = []
    for epoch in range(training_epochs):
        avg_cost1 = 0.
        avg_cost2 = 0.
        avg_cost3 = 0.
        total_batch = int(mnist.train.num_examples/batch_size)

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c_1 = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})
            _, c_2 = sess.run([loss_op_2, optimizer_2], feed_dict = {X: batch_x, Y: batch_y})
            _, c_3 = sess.run([loss_op_3, optimizer_3], feed_dict = {X: batch_x, Y: batch_y})

            # Compute average losses
            avg_cost1 += c_1 / total_batch
            avg_cost2 += c_2 / total_batch
            avg_cost3 += c_3 / total_batch
            cost_1.append(avg_cost1)
            cost_2.append(avg_cost2)
            cost_3.append(avg_cost3)

            logits_list = [logits_1, logits_2, logits_3]
            train_accuracy = []
            for r in logits_list:
                if i % 5000 == 0:
                    pred = tf.nn.softmax(logits_list[r])  # Apply softmax to logits
                    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                    trian_accuracy_1 = accuracy.eval({X: mnist.train.images, Y: mnist.train.labels})
                    train.append(trian_accuracy_1)

        # Display logs per epoch step
        avg_cost = [avg_cost1, avg_cost1, avg_cost1]
        for l in avg_cost:
            if epoch % display_step == 0:
                print("Epoch:", '%03d' % (epoch + 1), "cost = {:.9f}".format(avg_cost[l]))
    sess.close()

运行代码时，出现以下错误：

ValueError: TypeError                                 Traceback (most recent call last)
<ipython-input-19-411b2efd4af7> in <module>()
    134 
    135             # Compute average losses
--> 136             avg_cost1 += c_1 / total_batch
    137             avg_cost2 += c_2 / total_batch
    138             avg_cost3 += c_3 / total_batch

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

Answer 1

c_1是NoneType，因此无法进行计算。问题在于这一行（看不到行号）：

_, c_1 = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})

因为为了将损失值恢复到c_1，您不应该要求优化器返回值，而应该从损失本身loss_op_1请求。因此该行应该是：

c_1, _ = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})

对于c_2和c_3，这将是相同的。

这只是为了修复列出的TypeError，我还没有审核您的代码以解决任何其他问题。

Answer 2

除了修复实际错误之外，您可能还需要添加一些错误处理来捕获这些错误，并使用event.request.userAttributes context.authorizer块处理代码中出现None值的可能性。

try/except

如何执行多任务深度神经网络训练

2 个答案: