鲍鱼数据集精度低

时间:2018-04-28 00:12:15

标签: python tensorflow machine-learning

我正在尝试使用鲍鱼数据集来练习如何在tensorflow中使用估算器。然而,即使我尝试了不同数量的训练步骤,批量大小,激活功能,学习率和网络布局,评估准确率仍然在20%左右。我尝试了与虹膜数据集相同的结构,准确率超过90%。我想知道鲍鱼数据集是否应该具有高精度结果?或者它应该使用DNN以外的其他型号进行培训?我的代码已附上。非常感谢你!

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import tensorflow as tf

import Aba_data

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=1000, type=int, help='batch size')
parser.add_argument('--train_steps', default=100000, type=int,
                help='number of training steps')

def my_model(features, labels, mode, params):
"""DNN with three hidden layers, and dropout of 0.1 probability."""
# Create three fully connected layers each layer having a dropout
# probability of 0.1.
net = tf.feature_column.input_layer(features, params['feature_columns'])
for units in params['hidden_units']:
    net = tf.layers.dense(net, units=units, activation=tf.sigmoid)

# Compute logits (1 per class).
logits = tf.layers.dense(net, params['n_classes'], activation=None)

# Compute predictions.
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'class_ids': predicted_classes[:, tf.newaxis],
        'probabilities': tf.nn.softmax(logits),
        'logits': logits,
    }
    return tf.estimator.EstimatorSpec(mode, predictions=predictions)

# Compute loss.
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

# Compute evaluation metrics.
accuracy = tf.metrics.accuracy(labels=labels,
                               predictions=predicted_classes,
                               name='acc_op')
metrics = {'accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy[1])

if mode == tf.estimator.ModeKeys.EVAL:
    return tf.estimator.EstimatorSpec(
        mode, loss=loss, eval_metric_ops=metrics)

# Create training op.
assert mode == tf.estimator.ModeKeys.TRAIN

optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)


def main(argv):
args = parser.parse_args(argv[1:])

# Fetch the data
(train_x, train_y), (test_x, test_y) = Aba_data.load_data(0.7)

# Feature columns describe how to use the input.
my_feature_columns = []
for key in train_x.keys():
    if key == 'Sex':
        my_feature_columns.append(tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                key=key, vocabulary_list=["M", "F", "I"])))
    else:
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

# Build 2 hidden layer DNN with 10, 10 units respectively.
classifier = tf.estimator.Estimator(
    model_fn=my_model,
    params={
        'feature_columns': my_feature_columns,
        # Two hidden layers of 10 nodes each.
        'hidden_units': [10, 10],
        # The model must choose between 3 classes.
        'n_classes': 30,
    })

# Train the Model.
classifier.train(
    input_fn=lambda:Aba_data.train_input_fn(train_x, train_y, args.batch_size),
    steps=args.train_steps)

# Evaluate the model.
eval_result = classifier.evaluate(
    input_fn=lambda:Aba_data.eval_input_fn(test_x, test_y, args.batch_size))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

# Generate predictions from the model
# expected = ['Setosa', 'Versicolor', 'Virginica']
# predict_x = {
#     'SepalLength': [5.1, 5.9, 6.9],
#     'SepalWidth': [3.3, 3.0, 3.1],
#     'PetalLength': [1.7, 4.2, 5.4],
#     'PetalWidth': [0.5, 1.5, 2.1],
# }
#
# predictions = classifier.predict(
#     input_fn=lambda:iris_data.eval_input_fn(predict_x,
#                                             labels=None,
#                                             batch_size=args.batch_size))
#
# for pred_dict, expec in zip(predictions, expected):
#     template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
#
#     class_id = pred_dict['class_ids'][0]
#     probability = pred_dict['probabilities'][class_id]
#
#     print(template.format(iris_data.SPECIES[class_id],
#                           100 * probability, expec))


if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main)

0 个答案:

没有答案