#Now actually start building the network
def ENet(inputs,
The ENet model for real-time semantic segmentation!
- inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
- num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
- batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
- num_initial_blocks(int): the number of times to repeat the initial block.
- stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
- skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
- reuse(bool): Whether or not to reuse the variables for evaluation.
- is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
- scope(str): a string that represents the scope name for the variables.
- net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
determining the label of the pixel.
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
image_dir = './dataset/test/'
images_list = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')])
checkpoint_dir = "log/original/"
listi = os.listdir(checkpoint_dir)
checkpoint = tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original")
num_initial_blocks = 1
skip_connections = False
stage_two_repeat = 2
#Labels to colours are obtained from here:
However, the road_marking class is collapsed into the road class in the dataset provided.
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
label_to_colours = {0: [128,128,128],
1: [0, 0, 0]}
#Create the photo directory
photo_dir = checkpoint_dir + "/test_images"
if not os.path.exists(photo_dir):
#Create a function to convert each pixel label to colour.
def grayscale_to_colour(image):
print 'Converting image...'
image = image.reshape((256, 256, 1))
image = np.repeat(image, 3, axis=-1)
for i in xrange(image.shape[0]):
for j in xrange(image.shape[1]):
label = int(image[i][j][0])
image[i][j] = np.array(label_to_colours[label])
return image
with tf.Graph().as_default() as graph:
images_tensor = tf.train.string_input_producer(images_list, shuffle=False)
reader = tf.WholeFileReader()
key, image_tensor = reader.read(images_tensor)
image = tf.image.decode_png(image_tensor, channels=3)
print(image.shape, 'newwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww shapeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee')
# image = tf.image.resize_image_with_crop_or_pad(image, 360, 480)
# image = tf.cast(image, tf.float32)
image = preprocess(image)
images = tf.train.batch([image], batch_size = 10, allow_smaller_final_batch=True)
#Create the model inference
with slim.arg_scope(ENet_arg_scope()):
logits, probabilities = ENet(images,
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint)
predictions = tf.argmax(probabilities, -1)
predictions = tf.cast(predictions, tf.float32)
print 'HERE', predictions.get_shape()
sv = tf.train.Supervisor(logdir=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for i in xrange(len(images_list) / 10):
segmentations = sess.run(predictions)
# print segmentations.shape
print(segmentations.shape, 'shape')
for j in xrange(segmentations.shape[0]):
converted_image = grayscale_to_colour(segmentations[j])
print 'Saving image %s/%s' %(i*10 + j, len(images_list))
imsave(photo_dir + "/image_%s.png" %(i*10 + j), converted_image)
# plt.show()
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
print('Tensorflow', tf.__version__)
def initial_block(inp):
inp1 = inp
conv = Conv2D(filters=13, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal')(inp)
pool = MaxPool2D(2)(inp1)
concat = concatenate([conv, pool])
return concat
def encoder_bottleneck(inp, filters, name, dilation_rate=2, downsample=False, dilated=False, asymmetric=False, drop_rate=0.1):
reduce = filters // 4
down = inp
kernel_stride = 1
if downsample:
kernel_stride = 2
pad_activations = filters - inp.shape.as_list()[-1]
down = MaxPool2D(2)(down)
down = Permute(dims=(1, 3, 2))(down)
down = ZeroPadding2D(padding=((0, 0), (0, pad_activations)))(down)
down = Permute(dims=(1, 3, 2))(down)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=kernel_stride, strides=kernel_stride, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
if not dilated and not asymmetric:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
elif dilated:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', dilation_rate=dilation_rate, kernel_initializer='he_normal', name=f'{name}_reduce_dilated')(x)
elif asymmetric:
x = Conv2D(filters=reduce, kernel_size=(1,5), padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_asymmetric')(x)
x = Conv2D(filters=reduce, kernel_size=(5,1), padding='same', kernel_initializer='he_normal', name=name)(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
x = SpatialDropout2D(rate=drop_rate)(x)
concat = Add()([x, down])
concat = PReLU(shared_axes=[1, 2])(concat)
return concat
def decoder_bottleneck(inp, filters, name, upsample=False):
reduce = filters // 4
up = inp
if upsample:
up = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_upsample')(up)
up = UpSampling2D(size=2)(up)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
if not upsample:
x = Conv2D(filters=reduce, kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
x = Conv2DTranspose(filters=reduce, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal', name=f'{name}_transpose')(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
concat = Add()([x, up])
concat = ReLU()(concat)
return concat
def ENet(H, W, nclasses):
H: Height of the image
W: Width of the image
nclasses: Total no of classes
model: Keras model in .h5 format
inp = Input(shape=(H, W, 3))
enc = initial_block(inp)
#Bottleneck 1.0
enc = encoder_bottleneck(enc, 64, name='enc1', downsample=True, drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.1', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.2', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.3', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.4', drop_rate=0.001)
#Bottleneck 2.0
enc = encoder_bottleneck(enc, 128, name='enc2.0', downsample=True)
enc = encoder_bottleneck(enc, 128, name='enc2.1')
enc = encoder_bottleneck(enc, 128, name='enc2.2', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.3', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.4', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.5')
enc = encoder_bottleneck(enc, 128, name='enc2.6', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.7', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.8', dilation_rate=16, dilated=True)
#Bottleneck 3.0
enc = encoder_bottleneck(enc, 128, name='enc3.0')
enc = encoder_bottleneck(enc, 128, name='enc3.1', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.2', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.3', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.4')
enc = encoder_bottleneck(enc, 128, name='enc3.5', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.6', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.7', dilation_rate=16, dilated=True)
#Bottleneck 4.0
dec = decoder_bottleneck(enc, 64, name='dec4.0', upsample=True)
dec = decoder_bottleneck(dec, 64, name='dec4.1')
dec = decoder_bottleneck(dec, 64, name='dec4.2')
#Bottleneck 5.0
dec = decoder_bottleneck(dec, 16, name='dec5.0', upsample=True)
dec = decoder_bottleneck(dec, 16, name='dec5.1')
dec = Conv2DTranspose(filters=nclasses, kernel_size=2, strides=2, padding='same', kernel_initializer='he_normal', name='fullconv')(dec)
dec = Activation('softmax')(dec)
model = Model(inputs=inp, outputs=dec, name='Enet')
return model