我是Tensorflow的新手,我想知道为什么在每个时期都得到成本,W和b的nan值?我正在设置一个交通游戏,我想训练一个模型,以便能够根据以前的奖励和以前的绿灯持续时间来预测最佳的绿灯持续时间。我尝试按照this guide进行设置,但似乎没有用。有任何想法吗?这应该可以复制我遇到的问题,并且我添加了许多印刷品可以帮助比我更有经验的人。
import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings
warnings.simplefilter(action='once', category=FutureWarning) # future warnings annoy me
# add in a couple of rewards and light durations
current_reward = [-1000,-900,-950]
current_green = [10,12,12]
current_reward = np.array(current_reward)
current_green = np.array(current_green)
# Pass in reward and green_light
def green_light_duration_new(current_reward, current_green):
# Predicting the best light duration based on previous rewards.
# predict the best duration based on previous step's reward value, using simple linear regression model
x = current_reward
y = current_green
n = len(x)
# Plot of Training Data
plt.scatter(x, y)
plt.ylabel('Green Light Duration')
plt.title("Training Data")
X = tf.placeholder("float")
Y = tf.placeholder("float")
W = tf.Variable(np.random.randn(), name = "W")
b = tf.Variable(np.random.randn(), name = "b")
learning_rate = 0.01
training_epochs = 500
# Hypothesis
y_pred = tf.add(tf.multiply(X, W), b)
print('y_pred : ', y_pred)
print('y_pred dtype : ', y_pred.dtype)
# Mean Squared Error Cost Function
cost = tf.reduce_sum(tf.pow(y_pred-Y, 2)) / (2 * n)
print('cost : ', cost)
print('cost dtype: ', cost.dtype)
# Gradient Descent Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Global Variables Initializer
init = tf.global_variables_initializer()
# Starting the Tensorflow Session
with tf.Session() as sess:
# Initializing the Variables
# Iterating through all the epochs
for epoch in range(training_epochs):
# Feeding each data point into the optimizer using Feed Dictionary
for (_x, _y) in zip(x, y):
print('_x : ',_x)
print('_y : ',_y)
sess.run(optimizer, feed_dict = {X : _x, Y : _y})
# Displaying the result after every 50 epochs
if (epoch + 1) % 50 == 0:
# Calculating the cost a every epoch
c = sess.run(cost, feed_dict = {X : x, Y : y})
print('c : ', c)
print('c dtype : ', c.dtype)
print("Epoch", (epoch + 1), ": cost =", c, "W =", sess.run(W), "b =", sess.run(b))
# Storing necessary values to be used outside the Session
training_cost = sess.run(cost, feed_dict ={X: x, Y: y})
print('training_cost : ', training_cost)
print('training_cost dtype : ', training_cost.dtype)
weight = sess.run(W)
print('weight : ', weight)
print('weight : ', weight.dtype)
bias = sess.run(b)
print('bias : ', bias)
print('biad dtype : ', bias.dtype)
# Calculating the predictions
green_light_duration_new = weight * x + bias
print("Training cost =", training_cost, "Weight =", weight, "bias =", bias, '\n')
# Plotting the Results
plt.plot(x, y, 'ro', label ='Original data')
plt.plot(x, green_light_duration_new, label ='Fitted line')
plt.title('Linear Regression Result')
return green_light_duration_new
# Go to the training function
new_green_dur = green_light_duration_new(current_reward, current_green)
# Append the predicted green light to its list
# Go on to run the rest of the simulation with the new green light duration,
# and append its subsequent reward to current_reward list to run again later.
通过以下解决方案更新图片 使用下面提供的解决方案,它仅绘制一个数据点,而不是三个I输入,并且没有最佳拟合线,并且第二个绘图底部的轴坐标不能反映一个数据点的真实位置。
答案 0 :(得分:2)
首先有两个错误,请使用MinMaxScaler缩放数据。在计算过程中,当数字超出范围时,会弹出NAN 2. Append在numpy数组中不起作用。