Question

我对tensorflow非常陌生。目前，我已经建立了神经网络来解决ODE（尽管应用程序并不重要）。代码看起来像这样

# Routine to train the neural network for solving -u'(x) = f(x)
def train_neural_network_batch(x_ph, predict=False):
    prediction = neural_network_model(x_ph)
    dpred = tf.gradients(prediction, x_ph)

    cost = tf.reduce_mean(tf.square( tf.add(dpred,f_ph) ))

    optimizer = tf.train.AdamOptimizer(learn_rate).minimize(cost)

我使用批量随机梯度下降来训练网络，如下所示：

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Train in each epoch with mini batch
    for epoch in range(hm_epochs):

        epoch_loss = 0
        for step in range(N//batch_size):
            for inputX, inputY in get_batch(x, y, batch_size):
                _, l = sess.run([optimizer,cost], feed_dict={x_ph:inputX, y_ph:inputY})
                epoch_loss += l
            if epoch %10 == 0:
                print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)

get_batch函数的定义如下：

# Feed batch data
def get_batch(inputX, inputY, batch_size):
    duration = len(inputX)
    for i in range(0,duration//batch_size):
        returnX = np.random.uniform(0,1,batch_size)

        # some function of x
        returnY = ...

        yield returnX, returnY

但是，我现在正在尝试使用成本函数做一些相当复杂的事情。在get_batch返回的每个批次中，我需要从任意数量的子域中采样，因此，假设returnX被划分为N个部分，这些部分将对应于我的成本函数的不同部分。特别是我想做这样的事情

for i in range(0,N):
    # compute the ith contribution to the cost function using [start:end] indices of the batch data
    cost += tf.reduce_mean( <some function of dpred[start:end]> )

我知道以上述方式访问占位符没有意义，因为占位符就是占位符。但我希望我要做的事情的想法很明确。批处理数据在分区中返回一个批处理，并且每个分区都需要以不同的方式使用以计算成本函数。因此，在形成成本函数时，鉴于这些不同的分区只是占位符，我该如何访问它们？

编辑：我在下面附上了完整的代码。

# Load modules
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
from scipy import special

######################################################################
# Routine to solve -u''(x) = f(x), u(0)=A, u'(0)=B 
######################################################################

# Create the arrays x and y, where x is a discretization of the domain (a,b) and y is the source term f(x)
N = 100
a = 0.0
b = 1.0
x = np.arange(a, b, (b-a)/N).reshape((N,1))
y = x

# Boundary conditions
A = 0.0
B = 0.0

# Define the number of neurons in each layer
n_nodes_hl1 = 40
n_nodes_hl2 = 40
n_nodes_hl3 = 40
n_nodes_hl4 = 40
n_nodes_hl5 = 40
n_nodes_hl6 = 40
n_nodes_hl7 = 40
n_nodes_hl8 = 40
n_nodes_hl9 = 40
n_nodes_hl10 = 40
n_nodes_hl11 = 40
n_nodes_hl12 = 40
n_nodes_hl13 = 40

# Define the number of outputs and the learning rate
n_classes = 1
learn_rate = 0.00004

# Define input / output placeholders
x_ph = tf.placeholder('float', [None, 1],name='input')
y_ph = tf.placeholder('float')
w_ph = tf.placeholder('float')
phi_ph = tf.placeholder('float')

# Routine to compute the neural network (5 hidden layers)
def neural_network_model(data):
    hidden_1_layer = {'weights': tf.Variable(name='w_h1',initial_value=tf.glorot_uniform_initializer()((1,n_nodes_hl1))),
                      'biases': tf.Variable(name='b_h1',initial_value=0.0)}

    hidden_2_layer = {'weights': tf.Variable(name='w_h2',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl1,n_nodes_hl2))),
                      'biases': tf.Variable(name='b_h2',initial_value=0.0)}

    hidden_3_layer = {'weights': tf.Variable(name='w_h3',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl2,n_nodes_hl3))),
                      'biases': tf.Variable(name='b_h3',initial_value=0.0)}

    hidden_4_layer = {'weights': tf.Variable(name='w_h4',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl3,n_nodes_hl4))),
                      'biases': tf.Variable(name='b_h4',initial_value=0.0)}

    hidden_5_layer = {'weights': tf.Variable(name='w_h5',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl4,n_nodes_hl5))),
                      'biases': tf.Variable(name='b_h5',initial_value=0.0)}

    hidden_6_layer = {'weights': tf.Variable(name='w_h6',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl5,n_nodes_hl6))),
                      'biases': tf.Variable(name='b_h6',initial_value=0.0)}

    hidden_7_layer = {'weights': tf.Variable(name='w_h7',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl6,n_nodes_hl7))),
                      'biases': tf.Variable(name='b_h7',initial_value=0.0)}

    hidden_8_layer = {'weights': tf.Variable(name='w_h8',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl7,n_nodes_hl8))),
                      'biases': tf.Variable(name='b_h8',initial_value=0.0)}

    hidden_9_layer = {'weights': tf.Variable(name='w_h9',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl8,n_nodes_hl9))),
                      'biases': tf.Variable(name='b_h9',initial_value=0.0)}

    hidden_10_layer = {'weights': tf.Variable(name='w_h10',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl9,n_nodes_hl10))),
                      'biases': tf.Variable(name='b_h10',initial_value=0.0)}

    hidden_11_layer = {'weights': tf.Variable(name='w_h11',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl10,n_nodes_hl11))),
                      'biases': tf.Variable(name='b_h11',initial_value=0.0)}

    hidden_12_layer = {'weights': tf.Variable(name='w_h12',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl11,n_nodes_hl12))),
                      'biases': tf.Variable(name='b_h12',initial_value=0.0)}

    hidden_13_layer = {'weights': tf.Variable(name='w_h13',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl12,n_nodes_hl13))),
                      'biases': tf.Variable(name='b_h13',initial_value=0.0)}

    output_layer = {'weights': tf.Variable(name='w_o',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl13,n_classes))),
                      'biases': tf.Variable(name='b_o',initial_value=0.0)}


    # (input_data * weights) + biases
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.tanh(l1)   

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.tanh(l2)

    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.tanh(l3)

    l4 = tf.add(tf.matmul(l3, hidden_4_layer['weights']), hidden_4_layer['biases'])
    l4 = tf.nn.tanh(l4)

    l5 = tf.add(tf.matmul(l4, hidden_5_layer['weights']), hidden_5_layer['biases'])
    l5 = tf.nn.tanh(l5)

    l6 = tf.add(tf.matmul(l5, hidden_6_layer['weights']), hidden_6_layer['biases'])
    l6 = tf.nn.tanh(l6)   

    l7 = tf.add(tf.matmul(l6, hidden_7_layer['weights']), hidden_7_layer['biases'])
    l7 = tf.nn.tanh(l7)

    l8 = tf.add(tf.matmul(l7, hidden_8_layer['weights']), hidden_8_layer['biases'])
    l8 = tf.nn.tanh(l8)

    l9 = tf.add(tf.matmul(l8, hidden_9_layer['weights']), hidden_9_layer['biases'])
    l9 = tf.nn.tanh(l9)

    l10 = tf.add(tf.matmul(l9, hidden_10_layer['weights']), hidden_10_layer['biases'])
    l10 = tf.nn.tanh(l10)

    l11 = tf.add(tf.matmul(l10, hidden_11_layer['weights']), hidden_11_layer['biases'])
    l11 = tf.nn.tanh(l11)

    l12 = tf.add(tf.matmul(l11, hidden_12_layer['weights']), hidden_12_layer['biases'])
    l12 = tf.nn.tanh(l12)   

    l13 = tf.add(tf.matmul(l12, hidden_13_layer['weights']), hidden_13_layer['biases'])
    l13 = tf.nn.relu(l13)

    output = tf.add(tf.matmul(l13, output_layer['weights']), output_layer['biases'], name='output')

    return output



batch_size = 16
nints = 2
h = 1.0/nints

# Feed batch data
def get_batch(inputX, inputY, batch_size):
    duration = len(inputX)
    for i in range(0,duration//batch_size):
        idx = i*batch_size + np.random.randint(0,10,(1))[0]

        # test points
        xTest = np.zeros([nints*batch_size+2,1])
        for j in range(0,nints):
            xTest[1+j*batch_size:j*batch_size+batch_size+1] = np.random.uniform(a+j*h,a+(j+1)*h,[batch_size,1])

        # append boundary points
        xTest[0] = a
        xTest[-1] = b

        # source term
        returnY = xTest

        # weights
        cost_weights = np.ones((nints*batch_size+2,1))
        cost_weights[0] = 0.0
        cost_weights[-1] = 0.0

        returnPhi = np.zeros([nints*batch_size+2,1])
        for j in range(0,nints*batch_size+2):
            if (xTest[j] < 0.5):
                returnPhi[j] = (xTest[j]-a)/h
            else:
                returnPhi[j] = (b-xTest[j])/h

        yield xTest, returnY, returnPhi, cost_weights


# Routine to train the neural network
def train_neural_network_batch(x_ph, predict=False):
    prediction = neural_network_model(x_ph)
    pred_dx = tf.gradients(prediction, x_ph)
    pred_dx2 = tf.gradients(tf.gradients(prediction, x_ph),x_ph)

    # initial residuals
    r = np.zeros([nints-1,1])

    # try computing with indexed placeholder as a test
    for i in range(0,nints-1):
        r[i] = tf.reduce_mean(y_ph[:, 1:2, :])

    # boundary terms
    cost += 20.0*((A-u[0])**2 + (B-u[-1])**2)/2.0

    optimizer = tf.train.AdamOptimizer(learn_rate).minimize(cost)


    # cycles feed forward + backprop
    hm_epochs = 500

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Train in each epoch with the whole data
        for epoch in range(hm_epochs):

            epoch_loss = 0
            for step in range(N//batch_size):
                for inputX, inputY, inputPhi, weights in get_batch(x, y, batch_size):
                    _, l = sess.run([optimizer,cost], feed_dict={x_ph:inputX, y_ph:inputY, w_ph:weights, phi_ph:inputPhi})
                    epoch_loss += l
            if epoch %10 == 0:
                print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)


        # Predict a new input by adding a random number, to check whether the network has actually learned
        x_valid = x + 0.0*np.random.normal(scale=0.1,size=(1))
        return sess.run(tf.squeeze(prediction),{x_ph:x_valid}), x_valid


# Train network
tf.set_random_seed(42)
pred, time = train_neural_network_batch(x_ph)
mypred = pred.reshape(N,1)

u = mypred

# exact solution
ue = (x-x**3)/6.0


# Numerical solution vs. exact solution
fig = plt.figure()
plt.plot(time, u, label='NN solution')
plt.plot(time, ue, label='Exact solution')
plt.show()


fig = plt.figure()
plt.plot(time, abs(u - ue))
plt.xlabel('$x$')
plt.ylabel('$|u_{N}(x) - u_{exact}(x)|$')
plt.title('Pointwise Error of NN Approx')
plt.show()

Answer 1

我不确定我是否完全理解您的问题，但可以尝试一下。首先，我假设通过分区来表示，例如，如果x的形状为[_batch_, a, b]，并且分区沿'a'轴，则元素a_0至a_c是第一个分区，a_{c+1}至a_d是第二个分区，依此类推。我说得对吗？

在这种情况下，您可以索引占位符。与我上面的示例和您的代码片段相对应，这看起来像

tf.reduce_mean(some_function(x[:, i:i+c, :]))

另一个问题是首先需要分区。您能否仅使模型将分区输出为不同的变量？您的模型是什么样的？每个模型的“子域的任意数量”是否保持不变，还是可以针对不同的迭代进行更改？

最后一件事，如果您是Tensorflow的新手，那么从Tensorflow 2.0开始可能值得。它们在那里发生了很大的变化，并且从此开始可以防止您必须在短时间内同时学习tf.v1和tf.v2。

在Tensorflow中访问并使用占位符

1 个答案: