我开始学习三元组网络并决定使用卷积神经网络进行实现,但是我决定将CIFAR-10数据集用于图像分类,但是准确性很低。
经过训练后,准确度约为0.32。
def pairwise_distance(feature, squared=False):
"""Computes the pairwise distance matrix with numerical stability.
output[i, j] = || feature[i, :] - feature[j, :] ||_2
Args:
feature: 2-D Tensor of size [number of data, feature dimension].
squared: Boolean, whether or not to square the pairwise distances.
Returns:
pairwise_distances: 2-D Tensor of size [number of data, number of data].
"""
pairwise_distances_squared = math_ops.add(
math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
math_ops.reduce_sum(
math_ops.square(array_ops.transpose(feature)),
axis=[0],
keepdims=True)) - 2.0 * math_ops.matmul(feature,
array_ops.transpose(feature))
# Deal with numerical inaccuracies. Set small negatives to zero.
pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
# Get the mask where the zero distances are at.
error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)
# Optionally take the sqrt.
if squared:
pairwise_distances = pairwise_distances_squared
else:
pairwise_distances = math_ops.sqrt(
pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)
# Undo conditionally adding 1e-16.
pairwise_distances = math_ops.multiply(
pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))
num_data = array_ops.shape(feature)[0]
# Explicitly set diagonals to zero.
mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
array_ops.ones([num_data]))
pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
return pairwise_distances
def masked_maximum(data, mask, dim=1):
"""Computes the axis wise maximum over chosen elements.
Args:
data: 2-D float `Tensor` of size [n, m].
mask: 2-D Boolean `Tensor` of size [n, m].
dim: The dimension over which to compute the maximum.
Returns:
masked_maximums: N-D `Tensor`.
The maximized dimension is of size 1 after the operation.
"""
axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
masked_maximums = math_ops.reduce_max(
math_ops.multiply(data - axis_minimums, mask), dim,
keepdims=True) + axis_minimums
return masked_maximums
def masked_minimum(data, mask, dim=1):
"""Computes the axis wise minimum over chosen elements.
Args:
data: 2-D float `Tensor` of size [n, m].
mask: 2-D Boolean `Tensor` of size [n, m].
dim: The dimension over which to compute the minimum.
Returns:
masked_minimums: N-D `Tensor`.
The minimized dimension is of size 1 after the operation.
"""
axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
masked_minimums = math_ops.reduce_min(
math_ops.multiply(data - axis_maximums, mask), dim,
keepdims=True) + axis_maximums
return masked_minimums
def triplet_loss_adapted_from_tf(y_true, y_pred):
del y_true
margin = 1.
labels = y_pred[:, :1]
labels = tf.cast(labels, dtype='int32')
embeddings = y_pred[:, 1:]
### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:
# Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
# lshape=array_ops.shape(labels)
# assert lshape.shape == 1
# labels = array_ops.reshape(labels, [lshape[0], 1])
# Build pairwise squared distance matrix.
pdist_matrix = pairwise_distance(embeddings, squared=True)
# Build pairwise binary adjacency matrix.
adjacency = math_ops.equal(labels, array_ops.transpose(labels))
# Invert so we can select negatives only.
adjacency_not = math_ops.logical_not(adjacency)
# global batch_size
batch_size = array_ops.size(labels) # was 'array_ops.size(labels)'
# Compute the mask.
pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
mask = math_ops.logical_and(
array_ops.tile(adjacency_not, [batch_size, 1]),
math_ops.greater(
pdist_matrix_tile, array_ops.reshape(
array_ops.transpose(pdist_matrix), [-1, 1])))
mask_final = array_ops.reshape(
math_ops.greater(
math_ops.reduce_sum(
math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
0.0), [batch_size, batch_size])
mask_final = array_ops.transpose(mask_final)
adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
mask = math_ops.cast(mask, dtype=dtypes.float32)
# negatives_outside: smallest D_an where D_an > D_ap.
negatives_outside = array_ops.reshape(
masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
negatives_outside = array_ops.transpose(negatives_outside)
# negatives_inside: largest D_an.
negatives_inside = array_ops.tile(
masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
semi_hard_negatives = array_ops.where(
mask_final, negatives_outside, negatives_inside)
loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)
mask_positives = math_ops.cast(
adjacency, dtype=dtypes.float32) - array_ops.diag(
array_ops.ones([batch_size]))
# In lifted-struct, the authors multiply 0.5 for upper triangular
# in semihard, they take all positive pairs except the diagonal.
num_positives = math_ops.reduce_sum(mask_positives)
semi_hard_triplet_loss_distance = math_ops.truediv(
math_ops.reduce_sum(
math_ops.maximum(
math_ops.multiply(loss_mat, mask_positives), 0.0)),
num_positives,
name='triplet_semihard_loss')
### Code from Tensorflow function semi-hard triplet loss ENDS here.
return semi_hard_triplet_loss_distance
def create_base_network(image_input_shape, embedding_size):
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),
input_shape=image_input_shape))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(embedding_size, activation='softmax'))
return model
if __name__ == "__main__":
# in case this scriot is called from another file, let's make sure it doesn't start training the network...
batch_size = 128
epochs = 100
train_flag = True # either True or False
embedding_size = 64
no_of_components = 2 # for visualization -> PCA.fit_transform()
step = 10
# The data, split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.
x_test /= 255.
input_image_shape = (32, 32, 3)
x_val = x_test#[:2000, :, :]
y_val = y_test#[:2000]
# Network training...
if train_flag == True:
base_network = create_base_network(input_image_shape, embedding_size)
input_images = Input(shape=input_image_shape, name='input_image') # input layer for images
input_labels = Input(shape=(1,), name='input_label') # input layer for labels
embeddings = base_network([input_images]) # output of network -> embeddings
labels_plus_embeddings = concatenate([input_labels, embeddings]) # concatenating the labels + embeddings
# Defining a model with inputs (images, labels) and outputs (labels_plus_embeddings)
model = Model(inputs=[input_images, input_labels],
outputs=labels_plus_embeddings)
#model.summary()
#plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
# train session
opt = Adam(lr=0.001) # choose optimiser. RMS is good too!
model.compile(loss=triplet_loss_adapted_from_tf,
optimizer=opt)
filepath = "semiH_trip_MNIST_v13_ep{epoch:02d}_BS%d.hdf5" % batch_size
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, period=25)
callbacks_list = [checkpoint]
# Uses 'dummy' embeddings + dummy gt labels. Will be removed as soon as loaded, to free memory
dummy_gt_train = np.zeros((len(x_train), embedding_size + 1))
dummy_gt_val = np.zeros((len(x_val), embedding_size + 1))
x_train = np.reshape(x_train, (len(x_train), x_train.shape[1], x_train.shape[1], 3))
x_val = np.reshape(x_val, (len(x_val), x_train.shape[1], x_train.shape[1], 3))
H = model.fit(
x=[x_train, y_train],
y=dummy_gt_train,
batch_size=batch_size,
epochs=epochs,
validation_data=([x_val, y_val], dummy_gt_val),
callbacks=callbacks_list)
else:
#####
model = load_model('semiH_trip_MNIST_v13_ep25_BS256.hdf5',
custom_objects={'triplet_loss_adapted_from_tf': triplet_loss_adapted_from_tf})
# Test the network
# creating an empty network
testing_embeddings = create_base_network(input_image_shape,
embedding_size=embedding_size)
x_embeddings_before_train = testing_embeddings.predict(np.reshape(x_test, (len(x_test), 32, 32, 3)))
# Grabbing the weights from the trained network
for layer_target, layer_source in zip(testing_embeddings.layers, model.layers[2].layers):
weights = layer_source.get_weights()
layer_target.set_weights(weights)
del weights
# Visualizing the effect of embeddings -> using PCA!
x_embeddings = testing_embeddings.predict(x_train)
y_embeddings = testing_embeddings.predict(x_val)
svc = SVC()
svc.fit(x_embeddings, y_train)
valid_prediction = svc.predict(y_embeddings)
print(valid_prediction.shape)
print("validation accuracy : ", accuracy_score(y_val, valid_prediction))
如果你们可以检查我是否做对了,我真的很高兴。希望很快能收到任何人的声音
答案 0 :(得分:0)
尝试这样的简单网络(来自here):
def create_base_network(image_input_shape, embedding_size):
input_image = Input(shape=image_input_shape)# input_5:InputLayer
x = Flatten()(input_image)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
x = Dense(embedding_size)(x) # dense_15: Dense
base_network = Model(inputs=input_image, outputs=x)
plot_model(base_network, to_file='base_netwoN.png',
show_shapes=True, show_layer_names=True)
return base_network