如何配置`train_and_evaluate`以将训练集上的度量保存到tensorboard?

时间:2017-11-09 14:23:14

标签: python tensorflow tensorboard

我有以下代码在虹膜数据集上训练模型,并使用准确度作为评估模型的度量标准:

# This is based on the complete code for the following blogpost:
# https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html


import tensorflow as tf
import os
from tensorflow.contrib.learn import Experiment, RunConfig
from urllib.request import urlopen
from tensorflow.contrib.learn import RunConfig

()

PATH = "./tf_dataset_and_estimator_apis"

# Fetch and store Training and Test dataset files
PATH_DATASET = PATH + os.sep + "dataset"
FILE_TRAIN = PATH_DATASET + os.sep + "iris_training.csv"
FILE_TEST = PATH_DATASET + os.sep + "iris_test.csv"
URL_TRAIN = "http://download.tensorflow.org/data/iris_training.csv"
URL_TEST = "http://download.tensorflow.org/data/iris_test.csv"


def downloadDataset(url, file):
    if not os.path.exists(PATH_DATASET):
        os.makedirs(PATH_DATASET)
    if not os.path.exists(file):
        data = urlopen(url).read()
        with open(file, "wb") as f:
            f.write(data)
            f.close()


downloadDataset(URL_TRAIN, FILE_TRAIN)
downloadDataset(URL_TEST, FILE_TEST)

# The CSV features in our training & test data
feature_names = [
    'SepalLength',
    'SepalWidth',
    'PetalLength',
    'PetalWidth']


# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API


def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1:]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.contrib.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    # dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.repeat()
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels


next_batch = my_input_fn(FILE_TRAIN, True)  # Will return 32 random elements

# Create the feature_columns, which specifies the input to our model
# All our input features are numeric, so use numeric_column for each one
feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]

# Create a deep neural network regression classifier
# Use the DNNClassifier pre-made estimator
config = RunConfig(save_checkpoints_steps=50, save_summary_steps=50)
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,  # The input features to our model
    hidden_units=[10, 10],  # Two layers, each with 10 neurons
    n_classes=3,
    model_dir=PATH + '/model_dir',
    config=config)  # Path to where checkpoints etc are stored

experiment = Experiment(
    estimator=classifier,
    train_input_fn=lambda: my_input_fn(FILE_TRAIN, True, 8),
    eval_input_fn=lambda: my_input_fn(FILE_TEST, False, 4),
    eval_metrics=None,
    train_steps=1000,
    min_eval_frequency=50,
    eval_delay_secs=0
)

experiment.train_and_evaluate()

这是张量板结果:

enter image description here

是否有办法配置Experiment或DNNClassifier类,以便在训练集上计算精度度量并在张量板中显示?

1 个答案:

答案 0 :(得分:0)

您可以使用tf.summary.scalar在tensorboard中显示训练准确度,例如:

accuracy = tf.metrics.mean(some_tensor)
if mode == tf.estimator.ModeKeys.TRAIN:
   tf.summary.scalar('accuracy', accuracy[1])