这个Cifar10模型,在Cifar10教程中有所改变,似乎是尖峰然后挂起,永远不会将程序的控制权返回给解释器。有人建议说这是因为没有打电话给.start_queue_runners
。但是,原始Cifar10教程中似乎没有任何代码调用.start_queue_runners
,并且它运行得很好。 (我正在使用单GPU培训代码cifar10_train.py
...多GPU代码可以调用该方法)
import os
import tensorflow as tf
import functools
from urllib.request import urlretrieve
import zipfile
import tarfile
import sys
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 24
# Global constants describing the CIFAR-10 data set.
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
def maybe_download_and_extract():
main_directory = "../data/"
cifar_10_directory = main_directory+"cifar_10/"
if not os.path.exists(main_directory):
os.makedirs(main_directory)
if not os.path.exists(cifar_10_directory):
url = "http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
filename = url.split('/')[-1]
file_path = os.path.join(main_directory, filename)
zip_cifar_10 = file_path
file_path, _ = urlretrieve(url=url, filename=file_path, reporthook=print_download_progress)
print()
print("Download finished. Extracting files.")
if file_path.endswith(".zip"):
zipfile.ZipFile(file=file_path, mode="r").extractall(main_directory)
elif file_path.endswith((".tar.gz", ".tgz")):
tarfile.open(name=file_path, mode="r:gz").extractall(main_directory)
print("Done.")
os.rename(main_directory+"cifar-10-batches-bin", cifar_10_directory)
os.remove(zip_cifar_10)
return (
[os.path.join(cifar_10_directory, 'data_batch_{}.bin'.format(i)) for i in range(1,6)],
[os.path.join(cifar_10_directory, 'test_batch.bin')]
)
class Cifar10Record(list):
# This class represents a Cifar10 thing that we read from the Cifar10 files.
# Note that it is a tensorflow-type-thing, so data *moves through* here, as we read it (like a placeholder, or whatever)
# So one record object is sufficient to feed a whole training pipeline
label_bytes = 1 # 2 for CIFAR-100
height = 32
width = 32
depth = 3
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
image_bytes = height * width * depth
record_bytes = label_bytes + image_bytes
def __init__(self, filenames):
# This is going to create the tf graph that reads a record.
self.filename_queue = tf.train.string_input_producer(filenames)
#1st part of graph: a record reader, that will get filenames from the filename_queue
self.reader = tf.FixedLengthRecordReader(record_bytes=Cifar10Record.record_bytes)
#2nd part, the reader is reading files from the filename_queue
self.key, value = self.reader.read(self.filename_queue)
# Convert from a string to a vector of uint8 that is record_bytes long.
record = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
label = tf.cast(
tf.strided_slice(record, [0], [Cifar10Record.label_bytes]), tf.int32)
label.set_shape([1])
self.label = tf.one_hot(label, 10, on_value=1.0, off_value=0.0)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(
tf.strided_slice(record, [Cifar10Record.label_bytes],
[Cifar10Record.record_bytes]),
[Cifar10Record.depth, Cifar10Record.height, Cifar10Record.width])
# Convert from [depth, height, width] to [height, width, depth].
self.uint8image = tf.transpose(depth_major, [1, 2, 0])
self.float32image = tf.cast(self.uint8image, tf.float32)
#self.distorted_image = self.distorted_input_branch()
self.image = self.undistorted_input_branch()
def undistorted_input_branch(self):
"""Construct input for CIFAR evaluation using the Reader ops.
"""
height = IMAGE_SIZE
width = IMAGE_SIZE
# Image processing for evaluation.
# Crop the central [height, width] of the image.
resized_image = tf.image.resize_image_with_crop_or_pad(self.float32image, height, width)
# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_standardization(resized_image)
# Set the shapes of tensors.
float_image.set_shape([height, width, 3])
return float_image
def batched_input(self, num_examples_per_epoch, batch_size, shuffle=True, distorted=False):
"""Construct a queued batch of images and labels.
Returns:
images: Images. 4D tensor of [batch_size, height, width, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(num_examples_per_epoch *
min_fraction_of_examples_in_queue)
image = self.distorted_image if distorted else self.image
label = self.label
print ('Filling queue with %d CIFAR images before starting to train. '
'This will take a few minutes.' % min_queue_examples)
# Create a queue that shuffles the examples, and then
# read 'batch_size' images + labels from the example queue.
NUM_PREPROCESS_THREADS = 16
print(batch_size)
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size=batch_size,
num_threads=NUM_PREPROCESS_THREADS,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
return images, tf.reshape(label_batch, [batch_size,10])
train_files, test_files = maybe_download_and_extract()
train_data = Cifar10Record(train_files).batched_input(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN, 100)
test_data = Cifar10Record(test_files).batched_input(NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, 100)
with tf.Session() as sess:
sess.run(train_data)
input("foo!")
答案 0 :(得分:0)
根据@ de1的评论,推理是MonitoredTrainingSession
调用start_queue_runners
或做类似的事情。以下是正常启动队列运行程序的功能代码:
import os
import tensorflow as tf
import functools
from urllib.request import urlretrieve
import zipfile
import tarfile
import sys
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 24
# Global constants describing the CIFAR-10 data set.
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
def maybe_download_and_extract():
main_directory = "../data/"
cifar_10_directory = main_directory+"cifar_10/"
if not os.path.exists(main_directory):
os.makedirs(main_directory)
if not os.path.exists(cifar_10_directory):
url = "http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
filename = url.split('/')[-1]
file_path = os.path.join(main_directory, filename)
zip_cifar_10 = file_path
file_path, _ = urlretrieve(url=url, filename=file_path, reporthook=print_download_progress)
print()
print("Download finished. Extracting files.")
if file_path.endswith(".zip"):
zipfile.ZipFile(file=file_path, mode="r").extractall(main_directory)
elif file_path.endswith((".tar.gz", ".tgz")):
tarfile.open(name=file_path, mode="r:gz").extractall(main_directory)
print("Done.")
os.rename(main_directory+"cifar-10-batches-bin", cifar_10_directory)
os.remove(zip_cifar_10)
return (
[os.path.join(cifar_10_directory, 'data_batch_{}.bin'.format(i)) for i in range(1,6)],
[os.path.join(cifar_10_directory, 'test_batch.bin')]
)
class Cifar10Record(list):
# This class represents a Cifar10 thing that we read from the Cifar10 files.
# Note that it is a tensorflow-type-thing, so data *moves through* here, as we read it (like a placeholder, or whatever)
# So one record object is sufficient to feed a whole training pipeline
label_bytes = 1 # 2 for CIFAR-100
height = 32
width = 32
depth = 3
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
image_bytes = height * width * depth
record_bytes = label_bytes + image_bytes
def __init__(self, filenames):
# This is going to create the tf graph that reads a record.
self.filename_queue = tf.train.string_input_producer(filenames)
#1st part of graph: a record reader, that will get filenames from the filename_queue
self.reader = tf.FixedLengthRecordReader(record_bytes=Cifar10Record.record_bytes)
#2nd part, the reader is reading files from the filename_queue
self.key, value = self.reader.read(self.filename_queue)
# Convert from a string to a vector of uint8 that is record_bytes long.
record = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
label = tf.cast(
tf.strided_slice(record, [0], [Cifar10Record.label_bytes]), tf.int32)
label.set_shape([1])
self.label = tf.one_hot(label, 10, on_value=1.0, off_value=0.0)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(
tf.strided_slice(record, [Cifar10Record.label_bytes],
[Cifar10Record.record_bytes]),
[Cifar10Record.depth, Cifar10Record.height, Cifar10Record.width])
# Convert from [depth, height, width] to [height, width, depth].
self.uint8image = tf.transpose(depth_major, [1, 2, 0])
self.float32image = tf.cast(self.uint8image, tf.float32)
#self.distorted_image = self.distorted_input_branch()
self.image = self.undistorted_input_branch()
def undistorted_input_branch(self):
"""Construct input for CIFAR evaluation using the Reader ops.
"""
height = IMAGE_SIZE
width = IMAGE_SIZE
# Image processing for evaluation.
# Crop the central [height, width] of the image.
resized_image = tf.image.resize_image_with_crop_or_pad(self.float32image, height, width)
# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_standardization(resized_image)
# Set the shapes of tensors.
float_image.set_shape([height, width, 3])
return float_image
def batched_input(self, num_examples_per_epoch, batch_size, shuffle=True, distorted=False):
"""Construct a queued batch of images and labels.
Returns:
images: Images. 4D tensor of [batch_size, height, width, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(num_examples_per_epoch *
min_fraction_of_examples_in_queue)
image = self.distorted_image if distorted else self.image
label = self.label
print ('Filling queue with %d CIFAR images before starting to train. '
'This will take a few minutes.' % min_queue_examples)
# Create a queue that shuffles the examples, and then
# read 'batch_size' images + labels from the example queue.
NUM_PREPROCESS_THREADS = 16
print(batch_size)
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size=batch_size,
num_threads=NUM_PREPROCESS_THREADS,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
return images, tf.reshape(label_batch, [batch_size,10])
train_files, test_files = maybe_download_and_extract()
train_queue = Cifar10Record(train_files)
train_data = train_queue.batched_input(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN, 100)
test_queue = Cifar10Record(test_files)
test_data = test_queue.batched_input(NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, 100)
with tf.Session() as sess:
tf.train.start_queue_runners(sess=sess)
sess.run(train_data)
input("foo!")
答案 1 :(得分:0)
任何有兴趣以新的方式阅读Cifar10记录的人#34;使用数据集API可能会从这个答案中受益:
import os
import tensorflow as tf
import functools
from urllib.request import urlretrieve
import zipfile
import tarfile
import sys
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 24
BYTE = 1
KB = 1024*BYTE
MB = 1024*KB
# Global constants describing the CIFAR-10 data set.
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
def print_download_progress(count, block_size, total_size):
pct_complete = float(count * block_size) / total_size
msg = "\r- Download progress: {0:.1%}".format(pct_complete)
sys.stdout.write(msg)
sys.stdout.flush()
def maybe_download_and_extract():
main_directory = "../data/"
cifar_10_directory = main_directory+"cifar_10/"
if not os.path.exists(main_directory):
os.makedirs(main_directory)
if not os.path.exists(cifar_10_directory):
url = "http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
filename = url.split('/')[-1]
file_path = os.path.join(main_directory, filename)
zip_cifar_10 = file_path
file_path, _ = urlretrieve(url=url, filename=file_path, reporthook=print_download_progress)
print()
print("Download finished. Extracting files.")
if file_path.endswith(".zip"):
zipfile.ZipFile(file=file_path, mode="r").extractall(main_directory)
elif file_path.endswith((".tar.gz", ".tgz")):
tarfile.open(name=file_path, mode="r:gz").extractall(main_directory)
print("Done.")
os.rename(main_directory+"cifar-10-batches-bin", cifar_10_directory)
os.remove(zip_cifar_10)
return (
[os.path.join(cifar_10_directory, 'data_batch_{}.bin'.format(i)) for i in range(1,6)],
[os.path.join(cifar_10_directory, 'test_batch.bin')]
)
class Cifar10Record(object):
# This class represents a Cifar10 thing that we read from the Cifar10 files.
# Note that it is a tensorflow-type-thing, so data *moves through* here, as we read it (like a placeholder, or whatever)
# So one record object is sufficient to feed a whole training pipeline
label_bytes = 1 # 2 for CIFAR-100
height = 32
width = 32
depth = 3
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
image_bytes = height * width * depth
record_bytes = label_bytes + image_bytes
def __init__(self, filenames):
# This is going to create the tf graph that reads a record.
self.filename_queue = tf.train.string_input_producer(filenames)
self.dataset = tf.data.FixedLengthRecordDataset(filenames, Cifar10Record.record_bytes, buffer_size=1*MB)
self.dataset = self.dataset.map(Cifar10Record._parse_function)
self.dataset = self.dataset.shuffle(buffer_size=1*MB)
self.dataset = self.dataset.batch(100)
self.iterator = self.dataset.make_initializable_iterator()
self.next_input = self.iterator.get_next()
@staticmethod
def _parse_function(value):
record = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
label = tf.cast(
tf.strided_slice(record, [0], [Cifar10Record.label_bytes]), tf.int32)
label.set_shape([1])
label = tf.one_hot(label, 10, on_value=1.0, off_value=0.0)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(
tf.strided_slice(record, [Cifar10Record.label_bytes],
[Cifar10Record.record_bytes]),
[Cifar10Record.depth, Cifar10Record.height, Cifar10Record.width])
# Convert from [depth, height, width] to [height, width, depth].
uint8image = tf.transpose(depth_major, [1, 2, 0])
float32image = tf.cast(uint8image, tf.float32)
return float32image, label
train_files, test_files = maybe_download_and_extract()
train_data = Cifar10Record(train_files)
test_data = Cifar10Record(test_files)
with tf.Session() as sess:
sess.run(train_data.iterator.initializer)
print(sess.run(train_data.next_input))