TensorFlow回归NN:随着历元数量的增加,所有预测 - > 0

时间:2018-02-08 13:49:18

标签: python tensorflow

X和Y占位符的输入都是维度[7,36,3] - 7号的numpy数组。数据集,36个数据点,3个是输入要素的数量。我的问题是这个,随着时期的数量增加,预测值都趋向于0. Z占位符保持要拟合的y数据是[7,1] numpy数组。

随着时期数量的增加,达到了极限 - 时期> 1000。这里输出预测是一个[7,1] numpy零点数组,如果增加了一个时期数,它将保持不变。即通过更多培训,预测会变得更糟

我不知道是什么造成了这种缺乏收敛。

我的代码如下:

#!/usr/bin/python

import tensorflow as tf
import numpy as np
from tensorflow.contrib import learn
from sklearn import cross_validation
from matplotlib import pyplot as plt
from testdfreader import get_dataset
import pickle

#Loading data
with open('ideal_Tensor_data','rb') as handle:
  ideal_Tensor = pickle.load(handle)[:]

with open('displaced_Tensor_data','rb') as handle:
  displaced_Tensor = pickle.load(handle)[:]

with open('Vcoup_Tensor_data','rb') as handle:
  Vcoup_Tensor = pickle.load(handle)[:]



#hyperparameters
n_configs = displaced_Tensor.shape[0]
n_atoms = displaced_Tensor.shape[1]
n_input = displaced_Tensor.shape[2] # should scale to a [energy,force] list
n_hidden1 = 21
n_hidden2 = 21
n_output = 1 # Vnn
#n_output=50
keep_prob = 0.8
learning_rate=0.05
initializer = tf.contrib.layers.xavier_initializer()

#weights
with tf.name_scope("Weights_and_Biases"):
#  weights = {
#    'W1': tf.Variable(tf.random_normal([n_input, n_hidden1], 0, 0.1), name = "W1"), #when using tanh activation - use Xavier_Initializer
#    'W2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2], 0, 0.1), name = "W2"),
#    'W3': tf.Variable(tf.random_normal([n_hidden2, n_output], 0, 0.1), name = "W3")
#  }

  weights = {
    'W1': tf.Variable(initializer([n_input,n_hidden1])),
    'W2': tf.Variable(initializer([n_hidden1,n_hidden2])),
    'W3': tf.Variable(initializer([n_hidden2,n_output]))
  }

#bias
  biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden1], 0, 0.1), name = "b1"),
    'b2': tf.Variable(tf.random_normal([n_hidden2], 0, 0.1), name = "b2"),
    'b3': tf.Variable(tf.random_normal([n_output], 0, 0.1), name = "b3")
  }


with tf.name_scope("Training_Neural_Network"):
#Training Computation
  def training_multilayer_perceptron(X, weights, biases): #dropout should only be used during training, not during evaluation
    with tf.name_scope("Layer1"):
      layer_1=tf.einsum('ijk,kl->ijl)', X,weights['W1'])
      layer_1=tf.add(layer_1,biases['b1'])
      layer_1 = tf.nn.tanh(layer_1)
     #layer_1 = tf.nn.dropout(layer_1,keep_prob)
    with tf.name_scope("Layer2"):
      layer_2=tf.einsum('ijk,kl->ijl)', layer_1,weights['W2'])
      layer_2=tf.add(layer_2,biases['b2'])
      layer_2 = tf.nn.tanh(layer_2)
      #layer_2 = tf.nn.dropout(layer_2,keep_prob)
    with tf.name_scope("Layer3"): 
      out_layer=tf.einsum('ijk,kl->ijl)', layer_2,weights['W3'])
      out_layer=tf.add(out_layer,biases['b3'])
      return out_layer, layer_1, layer_2

X = tf.placeholder("float",[None,None,n_input], name = "ideal_data")
Y = tf.placeholder("float",[None,None,n_input], name = "displaced_data")
Z = tf.placeholder("float",[None,1],name = "Vcoup")




predicted_ideal,ideal_l1,ideal_l2 = training_multilayer_perceptron(X, weights, biases)# ideal_l1 and l2 for debugging, as below for displaced
predicted_disp,disp_l1,disp_l2 = training_multilayer_perceptron(Y, weights, biases)
predicted = tf.reduce_sum(predicted_disp - predicted_ideal, axis=1) # sum over Natom axis
print predicted.shape
print Z.shape

cost = tf.reduce_sum(tf.square(Z - predicted))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()



with tf.Session() as session:

  session.run(init)

  for epoch in range(1):
    p,c,o = session.run([predicted,cost,optimizer], feed_dict = {X: ideal_Tensor, Y: displaced_Tensor, Z: Vcoup_Tensor})

0 个答案:

没有答案