Tensorflow的新手,并尝试将分类器数据分为5个类。如果我运行代码,我得到了:
ERROR:tensorflow:Model diverged with loss = NaN.
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.contrib.learn.python.learn.monitors.NanLossDuringTrainingError'>, NaN loss during training.
Traceback (most recent call last):
File "net_version2.py", line 83, in <module>
tf.app.run()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/platform/app.py", line 30, in run
sys.exit(main(sys.argv))
File "net_version2.py", line 79, in main
train_and_eval()
File "net_version2.py", line 72, in train_and_eval
m.fit(input_fn=lambda: input_fn(df_train, True), steps=200)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 219, in fit
max_steps=max_steps)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 507, in _train_model
max_steps=max_steps)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3530, in get_controller
yield default
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 507, in _train_model
max_steps=max_steps)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3004, in device
yield
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 507, in _train_model
max_steps=max_steps)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 282, in _supervised_train
return loss
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3530, in get_controller
yield default
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 281, in _supervised_train
None)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/supervised_session.py", line 318, in run
run_metadata=run_metadata)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/recoverable_session.py", line 54, in run
run_metadata=run_metadata)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/coordinated_session.py", line 70, in run
self._coord.join(self._coordinated_threads_to_join)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/training/coordinator.py", line 383, in join
six.reraise(*self._exc_info_to_raise)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/six.py", line 686, in reraise
raise value
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/coordinated_session.py", line 66, in run
return self._sess.run(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/monitored_session.py", line 107, in run
induce_stop = monitor.step_end(monitors_step, monitor_outputs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 396, in step_end
return self.every_n_step_end(step, output)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/monitors.py", line 1086, in every_n_step_end
raise NanLossDuringTrainingError
tensorflow.contrib.learn.python.learn.monitors.NanLossDuringTrainingError: NaN loss during training.
我的代码是:
import pandas as pd
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.DEBUG)
CATEGORICAL_COLUMNS = []
CONTINUOUS_COLUMNS = ["a", "b", "c", "d", "e", "f", "g"]
GOAL_COLUMN = "goal"
def build_estimator(model_dir):
"""Build an estimator."""
a = tf.contrib.layers.real_valued_column("a")
b = tf.contrib.layers.real_valued_column("b")
c = tf.contrib.layers.real_valued_column("c")
d = tf.contrib.layers.real_valued_column("d")
e = tf.contrib.layers.real_valued_column("e")
f = tf.contrib.layers.real_valued_column("f")
g = tf.contrib.layers.real_valued_column("g")
wide_columns = [a, d, b, c, e, f, g]
deep_columns = [a, d, b, c, e, f, g]
return tf.contrib.learn.DNNLinearCombinedClassifier(
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[20, 10, 5],
n_classes=3)
def input_fn(df, train=False):
"""Input builder function."""
continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
feature_cols = dict(continuous_cols)
if train:
label = tf.constant(df[GOAL_COLUMN].values)
return feature_cols, label
else:
return feature_cols
def train_and_eval():
"""Train and evaluate the model."""
df_train = pd.read_csv(
tf.gfile.Open("./train.csv"),
skipinitialspace=True)
df_test = pd.read_csv(
tf.gfile.Open("./test.csv"),
skipinitialspace=True)
model_dir = "./models"
print("model directory = %s" % model_dir)
m = build_estimator(model_dir)
m.fit(input_fn=lambda: input_fn(df_train, True), steps=200)
print (m.predict(input_fn=lambda: input_fn(df_test)))
results = m.evaluate(input_fn=lambda: input_fn(df_train, True), steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
def main(_):
train_and_eval()
if __name__ == "__main__":
tf.app.run()
我的csv的一个例子是:
a,b,goal,c,d,e,f,g
21,93895,1,18161,1362,0,-1,-1
21,93929,1,5706,1485,134,-1,-1
如果我将 n_classes - 参数更改为2(默认值),我得到的结果张量为[1, 1, ..., 1, 1]
,平均值为1.0。每个上面的类设置都会抛出上面的错误。
return tf.contrib.learn.DNNLinearCombinedClassifier(
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[20, 10, 5],
n_classes=3)
同样如果我只使用DNNClassifier。如果我使用一个回归量,我得到的值介于-5和10之间,可能的目标值为1到5。
希望有人有想法解决这个问题。