X和Y占位符的输入都是维度[7,36,3] - 7号的numpy数组。数据集,36个数据点,3个是输入要素的数量。我的问题是这个,随着时期的数量增加,预测值都趋向于0. Z占位符保持要拟合的y数据是[7,1] numpy数组。
随着时期数量的增加,达到了极限 - 时期> 1000。这里输出预测是一个[7,1] numpy零点数组,如果增加了一个时期数,它将保持不变。即通过更多培训,预测会变得更糟
我不知道是什么造成了这种缺乏收敛。
我的代码如下:
#!/usr/bin/python
import tensorflow as tf
import numpy as np
from tensorflow.contrib import learn
from sklearn import cross_validation
from matplotlib import pyplot as plt
from testdfreader import get_dataset
import pickle
#Loading data
with open('ideal_Tensor_data','rb') as handle:
ideal_Tensor = pickle.load(handle)[:]
with open('displaced_Tensor_data','rb') as handle:
displaced_Tensor = pickle.load(handle)[:]
with open('Vcoup_Tensor_data','rb') as handle:
Vcoup_Tensor = pickle.load(handle)[:]
#hyperparameters
n_configs = displaced_Tensor.shape[0]
n_atoms = displaced_Tensor.shape[1]
n_input = displaced_Tensor.shape[2] # should scale to a [energy,force] list
n_hidden1 = 21
n_hidden2 = 21
n_output = 1 # Vnn
#n_output=50
keep_prob = 0.8
learning_rate=0.05
initializer = tf.contrib.layers.xavier_initializer()
#weights
with tf.name_scope("Weights_and_Biases"):
# weights = {
# 'W1': tf.Variable(tf.random_normal([n_input, n_hidden1], 0, 0.1), name = "W1"), #when using tanh activation - use Xavier_Initializer
# 'W2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2], 0, 0.1), name = "W2"),
# 'W3': tf.Variable(tf.random_normal([n_hidden2, n_output], 0, 0.1), name = "W3")
# }
weights = {
'W1': tf.Variable(initializer([n_input,n_hidden1])),
'W2': tf.Variable(initializer([n_hidden1,n_hidden2])),
'W3': tf.Variable(initializer([n_hidden2,n_output]))
}
#bias
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden1], 0, 0.1), name = "b1"),
'b2': tf.Variable(tf.random_normal([n_hidden2], 0, 0.1), name = "b2"),
'b3': tf.Variable(tf.random_normal([n_output], 0, 0.1), name = "b3")
}
with tf.name_scope("Training_Neural_Network"):
#Training Computation
def training_multilayer_perceptron(X, weights, biases): #dropout should only be used during training, not during evaluation
with tf.name_scope("Layer1"):
layer_1=tf.einsum('ijk,kl->ijl)', X,weights['W1'])
layer_1=tf.add(layer_1,biases['b1'])
layer_1 = tf.nn.tanh(layer_1)
#layer_1 = tf.nn.dropout(layer_1,keep_prob)
with tf.name_scope("Layer2"):
layer_2=tf.einsum('ijk,kl->ijl)', layer_1,weights['W2'])
layer_2=tf.add(layer_2,biases['b2'])
layer_2 = tf.nn.tanh(layer_2)
#layer_2 = tf.nn.dropout(layer_2,keep_prob)
with tf.name_scope("Layer3"):
out_layer=tf.einsum('ijk,kl->ijl)', layer_2,weights['W3'])
out_layer=tf.add(out_layer,biases['b3'])
return out_layer, layer_1, layer_2
X = tf.placeholder("float",[None,None,n_input], name = "ideal_data")
Y = tf.placeholder("float",[None,None,n_input], name = "displaced_data")
Z = tf.placeholder("float",[None,1],name = "Vcoup")
predicted_ideal,ideal_l1,ideal_l2 = training_multilayer_perceptron(X, weights, biases)# ideal_l1 and l2 for debugging, as below for displaced
predicted_disp,disp_l1,disp_l2 = training_multilayer_perceptron(Y, weights, biases)
predicted = tf.reduce_sum(predicted_disp - predicted_ideal, axis=1) # sum over Natom axis
print predicted.shape
print Z.shape
cost = tf.reduce_sum(tf.square(Z - predicted))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
for epoch in range(1):
p,c,o = session.run([predicted,cost,optimizer], feed_dict = {X: ideal_Tensor, Y: displaced_Tensor, Z: Vcoup_Tensor})