我对tensorflow非常陌生。目前,我已经建立了神经网络来解决ODE(尽管应用程序并不重要)。代码看起来像这样
# Routine to train the neural network for solving -u'(x) = f(x)
def train_neural_network_batch(x_ph, predict=False):
prediction = neural_network_model(x_ph)
dpred = tf.gradients(prediction, x_ph)
cost = tf.reduce_mean(tf.square( tf.add(dpred,f_ph) ))
optimizer = tf.train.AdamOptimizer(learn_rate).minimize(cost)
我使用批量随机梯度下降来训练网络,如下所示:
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train in each epoch with mini batch
for epoch in range(hm_epochs):
epoch_loss = 0
for step in range(N//batch_size):
for inputX, inputY in get_batch(x, y, batch_size):
_, l = sess.run([optimizer,cost], feed_dict={x_ph:inputX, y_ph:inputY})
epoch_loss += l
if epoch %10 == 0:
print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)
get_batch
函数的定义如下:
# Feed batch data
def get_batch(inputX, inputY, batch_size):
duration = len(inputX)
for i in range(0,duration//batch_size):
returnX = np.random.uniform(0,1,batch_size)
# some function of x
returnY = ...
yield returnX, returnY
但是,我现在正在尝试使用成本函数做一些相当复杂的事情。在get_batch
返回的每个批次中,我需要从任意数量的子域中采样,因此,假设returnX
被划分为N个部分,这些部分将对应于我的成本函数的不同部分。特别是我想做这样的事情
for i in range(0,N):
# compute the ith contribution to the cost function using [start:end] indices of the batch data
cost += tf.reduce_mean( <some function of dpred[start:end]> )
我知道以上述方式访问占位符没有意义,因为占位符就是占位符。但我希望我要做的事情的想法很明确。批处理数据在分区中返回一个批处理,并且每个分区都需要以不同的方式使用以计算成本函数。因此,在形成成本函数时,鉴于这些不同的分区只是占位符,我该如何访问它们?
编辑:我在下面附上了完整的代码。
# Load modules
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
from scipy import special
######################################################################
# Routine to solve -u''(x) = f(x), u(0)=A, u'(0)=B
######################################################################
# Create the arrays x and y, where x is a discretization of the domain (a,b) and y is the source term f(x)
N = 100
a = 0.0
b = 1.0
x = np.arange(a, b, (b-a)/N).reshape((N,1))
y = x
# Boundary conditions
A = 0.0
B = 0.0
# Define the number of neurons in each layer
n_nodes_hl1 = 40
n_nodes_hl2 = 40
n_nodes_hl3 = 40
n_nodes_hl4 = 40
n_nodes_hl5 = 40
n_nodes_hl6 = 40
n_nodes_hl7 = 40
n_nodes_hl8 = 40
n_nodes_hl9 = 40
n_nodes_hl10 = 40
n_nodes_hl11 = 40
n_nodes_hl12 = 40
n_nodes_hl13 = 40
# Define the number of outputs and the learning rate
n_classes = 1
learn_rate = 0.00004
# Define input / output placeholders
x_ph = tf.placeholder('float', [None, 1],name='input')
y_ph = tf.placeholder('float')
w_ph = tf.placeholder('float')
phi_ph = tf.placeholder('float')
# Routine to compute the neural network (5 hidden layers)
def neural_network_model(data):
hidden_1_layer = {'weights': tf.Variable(name='w_h1',initial_value=tf.glorot_uniform_initializer()((1,n_nodes_hl1))),
'biases': tf.Variable(name='b_h1',initial_value=0.0)}
hidden_2_layer = {'weights': tf.Variable(name='w_h2',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl1,n_nodes_hl2))),
'biases': tf.Variable(name='b_h2',initial_value=0.0)}
hidden_3_layer = {'weights': tf.Variable(name='w_h3',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl2,n_nodes_hl3))),
'biases': tf.Variable(name='b_h3',initial_value=0.0)}
hidden_4_layer = {'weights': tf.Variable(name='w_h4',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl3,n_nodes_hl4))),
'biases': tf.Variable(name='b_h4',initial_value=0.0)}
hidden_5_layer = {'weights': tf.Variable(name='w_h5',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl4,n_nodes_hl5))),
'biases': tf.Variable(name='b_h5',initial_value=0.0)}
hidden_6_layer = {'weights': tf.Variable(name='w_h6',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl5,n_nodes_hl6))),
'biases': tf.Variable(name='b_h6',initial_value=0.0)}
hidden_7_layer = {'weights': tf.Variable(name='w_h7',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl6,n_nodes_hl7))),
'biases': tf.Variable(name='b_h7',initial_value=0.0)}
hidden_8_layer = {'weights': tf.Variable(name='w_h8',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl7,n_nodes_hl8))),
'biases': tf.Variable(name='b_h8',initial_value=0.0)}
hidden_9_layer = {'weights': tf.Variable(name='w_h9',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl8,n_nodes_hl9))),
'biases': tf.Variable(name='b_h9',initial_value=0.0)}
hidden_10_layer = {'weights': tf.Variable(name='w_h10',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl9,n_nodes_hl10))),
'biases': tf.Variable(name='b_h10',initial_value=0.0)}
hidden_11_layer = {'weights': tf.Variable(name='w_h11',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl10,n_nodes_hl11))),
'biases': tf.Variable(name='b_h11',initial_value=0.0)}
hidden_12_layer = {'weights': tf.Variable(name='w_h12',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl11,n_nodes_hl12))),
'biases': tf.Variable(name='b_h12',initial_value=0.0)}
hidden_13_layer = {'weights': tf.Variable(name='w_h13',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl12,n_nodes_hl13))),
'biases': tf.Variable(name='b_h13',initial_value=0.0)}
output_layer = {'weights': tf.Variable(name='w_o',initial_value=tf.glorot_uniform_initializer()((n_nodes_hl13,n_classes))),
'biases': tf.Variable(name='b_o',initial_value=0.0)}
# (input_data * weights) + biases
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.tanh(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.tanh(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.tanh(l3)
l4 = tf.add(tf.matmul(l3, hidden_4_layer['weights']), hidden_4_layer['biases'])
l4 = tf.nn.tanh(l4)
l5 = tf.add(tf.matmul(l4, hidden_5_layer['weights']), hidden_5_layer['biases'])
l5 = tf.nn.tanh(l5)
l6 = tf.add(tf.matmul(l5, hidden_6_layer['weights']), hidden_6_layer['biases'])
l6 = tf.nn.tanh(l6)
l7 = tf.add(tf.matmul(l6, hidden_7_layer['weights']), hidden_7_layer['biases'])
l7 = tf.nn.tanh(l7)
l8 = tf.add(tf.matmul(l7, hidden_8_layer['weights']), hidden_8_layer['biases'])
l8 = tf.nn.tanh(l8)
l9 = tf.add(tf.matmul(l8, hidden_9_layer['weights']), hidden_9_layer['biases'])
l9 = tf.nn.tanh(l9)
l10 = tf.add(tf.matmul(l9, hidden_10_layer['weights']), hidden_10_layer['biases'])
l10 = tf.nn.tanh(l10)
l11 = tf.add(tf.matmul(l10, hidden_11_layer['weights']), hidden_11_layer['biases'])
l11 = tf.nn.tanh(l11)
l12 = tf.add(tf.matmul(l11, hidden_12_layer['weights']), hidden_12_layer['biases'])
l12 = tf.nn.tanh(l12)
l13 = tf.add(tf.matmul(l12, hidden_13_layer['weights']), hidden_13_layer['biases'])
l13 = tf.nn.relu(l13)
output = tf.add(tf.matmul(l13, output_layer['weights']), output_layer['biases'], name='output')
return output
batch_size = 16
nints = 2
h = 1.0/nints
# Feed batch data
def get_batch(inputX, inputY, batch_size):
duration = len(inputX)
for i in range(0,duration//batch_size):
idx = i*batch_size + np.random.randint(0,10,(1))[0]
# test points
xTest = np.zeros([nints*batch_size+2,1])
for j in range(0,nints):
xTest[1+j*batch_size:j*batch_size+batch_size+1] = np.random.uniform(a+j*h,a+(j+1)*h,[batch_size,1])
# append boundary points
xTest[0] = a
xTest[-1] = b
# source term
returnY = xTest
# weights
cost_weights = np.ones((nints*batch_size+2,1))
cost_weights[0] = 0.0
cost_weights[-1] = 0.0
returnPhi = np.zeros([nints*batch_size+2,1])
for j in range(0,nints*batch_size+2):
if (xTest[j] < 0.5):
returnPhi[j] = (xTest[j]-a)/h
else:
returnPhi[j] = (b-xTest[j])/h
yield xTest, returnY, returnPhi, cost_weights
# Routine to train the neural network
def train_neural_network_batch(x_ph, predict=False):
prediction = neural_network_model(x_ph)
pred_dx = tf.gradients(prediction, x_ph)
pred_dx2 = tf.gradients(tf.gradients(prediction, x_ph),x_ph)
# initial residuals
r = np.zeros([nints-1,1])
# try computing with indexed placeholder as a test
for i in range(0,nints-1):
r[i] = tf.reduce_mean(y_ph[:, 1:2, :])
# boundary terms
cost += 20.0*((A-u[0])**2 + (B-u[-1])**2)/2.0
optimizer = tf.train.AdamOptimizer(learn_rate).minimize(cost)
# cycles feed forward + backprop
hm_epochs = 500
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train in each epoch with the whole data
for epoch in range(hm_epochs):
epoch_loss = 0
for step in range(N//batch_size):
for inputX, inputY, inputPhi, weights in get_batch(x, y, batch_size):
_, l = sess.run([optimizer,cost], feed_dict={x_ph:inputX, y_ph:inputY, w_ph:weights, phi_ph:inputPhi})
epoch_loss += l
if epoch %10 == 0:
print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)
# Predict a new input by adding a random number, to check whether the network has actually learned
x_valid = x + 0.0*np.random.normal(scale=0.1,size=(1))
return sess.run(tf.squeeze(prediction),{x_ph:x_valid}), x_valid
# Train network
tf.set_random_seed(42)
pred, time = train_neural_network_batch(x_ph)
mypred = pred.reshape(N,1)
u = mypred
# exact solution
ue = (x-x**3)/6.0
# Numerical solution vs. exact solution
fig = plt.figure()
plt.plot(time, u, label='NN solution')
plt.plot(time, ue, label='Exact solution')
plt.show()
fig = plt.figure()
plt.plot(time, abs(u - ue))
plt.xlabel('$x$')
plt.ylabel('$|u_{N}(x) - u_{exact}(x)|$')
plt.title('Pointwise Error of NN Approx')
plt.show()
答案 0 :(得分:1)
我不确定我是否完全理解您的问题,但可以尝试一下。首先,我假设通过分区来表示,例如,如果x的形状为[_batch_, a, b]
,并且分区沿'a'轴,则元素a_0
至a_c
是第一个分区,a_{c+1}
至a_d
是第二个分区,依此类推。我说得对吗?
在这种情况下,您可以索引占位符。与我上面的示例和您的代码片段相对应,这看起来像
tf.reduce_mean(some_function(x[:, i:i+c, :]))
另一个问题是首先需要分区。您能否仅使模型将分区输出为不同的变量?您的模型是什么样的?每个模型的“子域的任意数量”是否保持不变,还是可以针对不同的迭代进行更改?
最后一件事,如果您是Tensorflow的新手,那么从Tensorflow 2.0开始可能值得。它们在那里发生了很大的变化,并且从此开始可以防止您必须在短时间内同时学习tf.v1和tf.v2。