Question

我有一个很小的数据集，非常适合gpu ram。我的目标是更好地利用我的gpu（目前大约70％），从而减少使用Tensorflow v1.4中的新数据集API的培训时间。

我想在不添加更多图层或增加批量大小的情况下提高gpu的利用率。数据集API如何实现？

以下是我当前实施的简化示例：

import numpy as np
from time import time
import tensorflow as tf
"""
Simple regression example with Dataset API.
The training and val sets are small enought to fit in GPU ram.
"""

TRAIN_SET_SIZE = 130000
VAL_SET_SIZE = 30000

TRAIN_BATCH_SIZE = 100
VAL_BATCH_SIZE = 1000

TRAIN_PREFETCH = 200
VAL_PREFETCH = 1

INPUT_FEATURES = 120
LAYERS = [500, 500, 500, 500, 1]  # last layer size should be 1


def fc_layer(in_tensor, in_dim, out_dim, name, act_fun=tf.nn.relu):
    with tf.variable_scope(name):
        sd = 1.0 / np.sqrt(in_dim)

        W_fc = tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=sd), name='weights')
        b_fc = tf.Variable(tf.truncated_normal([out_dim], stddev=sd), name='bias')

        z_fc = tf.matmul(in_tensor, W_fc) + b_fc
        if act_fun is None:
            return z_fc
        else:
            return act_fun(z_fc)


# Create dummy data
train_set_x = np.random.uniform(low=-1, high=1, size=(TRAIN_SET_SIZE, INPUT_FEATURES)).astype(np.float32)
train_set_y = np.random.uniform(low=-1, high=2, size=(TRAIN_SET_SIZE)).astype(np.float32)
val_set_x = np.random.uniform(low=-1, high=1, size=(VAL_SET_SIZE, INPUT_FEATURES)).astype(np.float32)
val_set_y = np.random.uniform(low=-1, high=2, size=(VAL_SET_SIZE)).astype(np.float32)

# Reset graph
tf.reset_default_graph()

with tf.device('/gpu:0'):
    # Dummy train data
    train_set = tf.data.Dataset.from_tensor_slices((train_set_x, train_set_y))
    # TODO First batch and then prefetch or inverse the order?
    # TODO TRAIN_PREFETCH value?
    train_set = train_set.shuffle(buffer_size=1000).batch(TRAIN_BATCH_SIZE).prefetch(TRAIN_PREFETCH)

    # Dummy val data
    val_set = tf.data.Dataset.from_tensor_slices((val_set_x, val_set_y))
    # TODO VAL_PREFETCH value?
    val_set = val_set.batch(VAL_BATCH_SIZE).prefetch(VAL_PREFETCH)

    # Iterator
    iterator = tf.data.Iterator.from_structure(train_set.output_types, train_set.output_shapes)
    train_init_op = iterator.make_initializer(train_set)
    val_init_op = iterator.make_initializer(val_set)
    x, truth = iterator.get_next()

    # Build graph
    activations = []
    activations.append(fc_layer(x,
                                INPUT_FEATURES,
                                LAYERS[0],
                                name='fc0'))

    for layer_ix in range(1, len(LAYERS) - 1):
        activations.append(fc_layer(activations[-1],
                                    LAYERS[layer_ix - 1],
                                    LAYERS[layer_ix],
                                    name='fc' + str(layer_ix)))

    activations.append(fc_layer(activations[-1],
                                LAYERS[-2],
                                LAYERS[-1],
                                act_fun=None,
                                name='fc' + str(len(LAYERS) - 1)))

    prediction = activations[-1]

    loss = tf.reduce_mean(tf.square(truth - prediction))

    global_step = tf.Variable(0, name='global_step', trainable=False)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_step = optimizer.minimize(loss, global_step=global_step, name='train_step')

    sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True),
                                            log_device_placement=True,
                                            allow_soft_placement=True))
    sess.run(tf.global_variables_initializer())

    for e in range(1, 6):  # epochs
        epoch_start_time = time()
        # Train set
        sess.run(train_init_op)
        print('\nTrain init op time: %.4f' % (time() - epoch_start_time))
        while True:
            try:
                batch_start_time = time()
                batch_loss, step, _ = sess.run([loss, global_step, train_step])
#                if step % 1000 == 0:
#                    print('Step: %5d Loss: %.2f, Batch Time : %.5f sec' % (step, batch_loss, time() - batch_start_time))
            except tf.errors.OutOfRangeError:
                break
#        print('Epoch time (without computing val set loss): %.2f' % (time() - epoch_start_time))

        # Val set
        sess.run(val_init_op)
        pred_err = np.ndarray([VAL_SET_SIZE])
        ix = 0
        while True:
            try:
                p, t = sess.run([prediction, truth])
                pred_err[ix:ix + VAL_BATCH_SIZE] = p.reshape([-1]) - t
                ix += VAL_BATCH_SIZE

            except tf.errors.OutOfRangeError:
                val_loss = np.mean(pred_err ** 2)
                print('Epoch: %2d, Loss: %.2f, Epoch time: %.2f sec' % (e, val_loss, time() - epoch_start_time))
                break

使用张量流

0 个答案: