我正在训练一个神经网络模型,一段时间后损失不再减少。尽管损失的确减少了,但是不足以做出正确的预测,我尝试了几件事,但没有一件事情有很大的不同。
我所有的尝试都以失败告终。步骤如下图所示,只是展平所需的步骤可能有所不同:
这是我已经尝试过的东西:
降低学习率。我将学习率从1e-3一直提高到1e-8,它们都达到了相同的损失值。现在,在我的代码中,我什至使用tf.train.exponential_decay(),因此学习率会逐步降低,但无济于事。
更改层数(我尝试2到4个隐藏层)和每个层的节点。也不利于降低最终损失值。
更改批次大小。这确实有所帮助。我将批次大小从20000扫到了1,然后选择了可以最快达到最低损失值的批次,即批次大小1000。但是再次,最低损失值是我显示的数字,即还不够。
更改正则化系数(代码中的“ beta”)。这并不重要,因为我的案子没有面临过度拟合的问题。太不合身了。
以下是我的代码:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import with_statement
#import scipy
from sklearn import preprocessing
from pdb import set_trace as bp
import argparse
import sys
import numpy as np
import csv
import pandas as pd
import tensorflow as tf
beta = 0.001
training_batch_size = 1000
train_predict_file = "./Data/training_data_3Param.csv"
CV_file = "./Data/CV_data_3Param.csv"
result_file = "hyper_param_result_LRdecay.txt"
def model_fn(features, labels, mode, params):
"""Model function for Estimator."""
first_hidden_layer = tf.layers.dense(features["x"], 100, activation=tf.nn.leaky_relu)
second_hidden_layer = tf.layers.dense(first_hidden_layer, 100, activation=tf.nn.leaky_relu)
third_hidden_layer = tf.layers.dense(second_hidden_layer,100, activation=tf.nn.leaky_relu)
fourth_hidden_layer = tf.layers.dense(third_hidden_layer, 100, activation=tf.nn.leaky_relu)
output_layer = tf.layers.dense(fourth_hidden_layer, 3)
predictions = tf.reshape(output_layer, [-1,3])
if labels != None:
labels = tf.reshape(labels, [-1,3])
var = [v for v in tf.trainable_variables() if "kernel" in v.name]
# Provide an estimator spec for `ModeKeys.PREDICT`.
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={"par": predictions})
# Calculate loss using mean squared error
regularizer = tf.nn.l2_loss(var[0]) + tf.nn.l2_loss(var[1]) + tf.nn.l2_loss(var[2])
loss = tf.losses.mean_squared_error(labels, predictions)
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(
0.001, #base Learning Rate
batch * training_batch_size,
1e6, #decay step
0.96, #decay rate
staircase=True)
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train_op = optimizer.minimize(
loss=(loss+(beta/2)*regularizer) , global_step=tf.train.get_global_step())
# Calculate root mean squared error as additional eval metric
eval_metric_ops = {
"rmse": tf.metrics.root_mean_squared_error(
tf.cast(labels, tf.float32), predictions)
}
# Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
def main(unused_argv):
train_predict_data_interim = []
CV_data_interim = []
with open(train_predict_file) as f:
csvreader = csv.reader(f)
for row in csvreader:
train_predict_data_interim.append(row)
with open(CV_file) as f:
csvreader = csv.reader(f)
for row in csvreader:
CV_data_interim.append(row)
train_predict_data_interim = pd.DataFrame(train_predict_data_interim)
CV_data_interim = pd.DataFrame(CV_data_interim)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
train_predict_data_transformed = min_max_scaler.fit_transform(train_predict_data_interim)
CV_data_transformed = min_max_scaler.fit_transform(CV_data_interim)
train_predict_data_transformed = pd.DataFrame(train_predict_data_transformed)
CV_data_transformed = pd.DataFrame(CV_data_transformed)
train_data_interim = train_predict_data_transformed.sample(frac=0.9996875)
predict_data_interim = train_predict_data_transformed.loc[~train_predict_data_transformed.index.isin(train_data_interim.index), :]
a = len(train_predict_data_transformed.columns)
train_labels_interim = train_data_interim.iloc[:, a-3:a]
train_features_interim = train_data_interim.iloc[:, :a-3]
train_features_numpy = np.asarray(train_features_interim, dtype=np.float32)
train_labels_numpy = np.asarray(train_labels_interim, dtype=np.float32)
# Instantiate Estimator
nn = tf.estimator.Estimator(model_fn=model_fn, model_dir="/tmp/nmos_3Param_LRdecay_bs%s_beta%s" %(training_batch_size, beta))
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_features_numpy},
y=train_labels_numpy,
batch_size = training_batch_size,
num_epochs= None,
shuffle=True)
# Train
nn.train(input_fn=train_input_fn, steps=448000)
test_features_interim = CV_data_transformed.iloc[:, :a-3]
test_features_numpy = np.asarray(test_features_interim, dtype=np.float32)
test_labels_interim = CV_data_transformed.iloc[:, a-3:a]
test_labels_numpy = np.asarray(test_labels_interim, dtype=np.float32)
# Score accuracy
test_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_features_numpy},
y=test_labels_numpy,
batch_size = 1,
num_epochs= 1,
shuffle=False)
ev = nn.evaluate(input_fn=test_input_fn)
print("Loss: %s" % ev["loss"])
print("Root Mean Squared Error: %s" % ev["rmse"])
prediction_features_interim = predict_data_interim.iloc[:, :a-3]
prediction_features_numpy = np.asarray(prediction_features_interim, dtype=np.float32)
prediction_labels_interim = predict_data_interim.iloc[:, a-3:a]
prediction_labels_numpy = np.asarray(prediction_labels_interim, dtype=np.float32)
print ('-' * 30, 'prediction_labels_numpy', '-' * 30)
print (prediction_labels_numpy)
prediction_lables_str = np.char.mod('%f',prediction_labels_numpy)
with open(result_file,'a') as f:
f.write('-' * 50)
f.write('\n')
f.write("batch_size = %s beta = %s\n" %(training_batch_size, beta))
f.write("Loss: %s\n" % ev["loss"])
f.write("Root Mean Squared Error: %s\n" % ev["rmse"])
f.write("%s\n" %(prediction_lables_str))
# Print out predictions
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
x= {"x": prediction_features_numpy},
num_epochs=1,
batch_size = 1,
shuffle=False)
predictions = nn.predict(input_fn=predict_input_fn)#, yield_single_examples=False)
for i, p in enumerate(predictions):
print("Prediction %s: %s" % (i + 1, p["par"]))
with open(result_file,'a') as f:
f.write("Prediction %s: %s \n" % (i + 1, p["par"]))
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
main()的很大一部分是将数据归一化为(-1,1)范围,包括训练数据和交叉验证数据。
原始训练数据如下:
基本上,数据的一行代表图中的一条线。前65列是要素,数据的后三列是标签,未绘制。因此,这是具有65个输入和3个输出的NN回归案例。
我仍然可以尝试其他方式:
但是我很乐意让一些专家研究此案,也许我在这里遗漏了一些明显的东西。请分享您的想法,我感谢任何讨论。谢谢!