Question

我为Keras的深度度量学习定义了自定义损失函数，尽管问题很简单，但在Colab中使用GPU加速器的性能却很差。我该怎么做才能加快训练速度？

示例代码：该代码编写为可以在Colab / Jupyter中运行，但是我想它也可以作为脚本运行（我没有尝试过，但是我看不出任何明显的理由为什么不应该这样做）工作）。

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = (X_train / 255.0).astype('float32')
X_test = (X_test / 255.0).astype('float32')

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

tf.keras.backend.clear_session()

embedding_size = 3
model = keras.Sequential()
model.add(Flatten(input_shape = input_shape))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(embedding_size))

model.summary()

margin = 0.2
def contrastive_loss(y_true, embeddings):
  loss = 0.0

  b = embeddings.shape[0]
  
  for i in range(0,b):
    yi = y_true[i]
    xi = embeddings[i]

    for j in range(i+1,b):
      yj = y_true[j]
      xj = embeddings[j]

      yij = tf.minimum(1.0, tf.abs(tf.cast(yi-yj, dtype = tf.float32)))
      distance = tf.norm(xi-xj)
      loss = loss + (1-yij)*distance**2 + yij*tf.maximum(0.0, margin-distance)**2
    
  loss = 0.5 * loss 
  return loss

model.compile(keras.optimizers.Adam(learning_rate=1e-4), loss=contrastive_loss)
history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

print(X_test.shape)
emb = model.predict(X_test)
print(emb.shape, "emb")
print(y_test.shape, "y_test")
plt.scatter(emb[:,0], emb[:,1], c=y_test, cmap='tab10')

在带有GPU加速器的Colab上，培训非常慢。关于如何加快速度的任何提示？现在，我的步长> 200 ms，即每个时代≈380 s。

编辑：最初，我编写了一个简化的实际代码版本，但是，根据一些评论，加快代码速度的方法可能取决于我在损失函数中实际计算的内容，所以我现在输入实际的代码。抱歉，这会使代码变慢...

EDIT2 ：我试图在Jeff的答案中实施该解决方案，但出现错误，原因是我不明白。这是新代码：

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = (X_train / 255.0).astype('float32')
X_test = (X_test / 255.0).astype('float32')

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
tf.keras.backend.clear_session()

embedding_size = 3
model = keras.Sequential()
model.add(Flatten(input_shape = input_shape))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(embedding_size))

model.summary() 

margin = 0.2
def contrastive_loss(y_true, embeddings):

  # it's easier to work with a flattened array
  flat_y_true = tf.reshape(y_true, (-1)) 

  # matrix of absolute differences, clipped to 1 (if yi!=yj then yij =1)
  yijs = abs(np.subtract.outer(flat_y_true, flat_y_true)).clip(max=1) 

  # we only need the upper triangular part of the matrix
  yijs = yijs[np.triu_indices_from(yijs)] 

  # first compute row differences of the embeddings matrix, the compute norms for
  # each row with axis=2
  distances = np.linalg.norm(embeddings[:,None] - embeddings[None], axis=2)

  # we only need the upper triangular part, again
  distances = distances[np.triu_indices_from(distances)]

  loss = ((1-yijs)*(distances**2) + (yijs*((margin-distances).clip(min=0)**2))).sum()*0.5

  return loss

model.compile(keras.optimizers.Adam(learning_rate=1e-4), loss=contrastive_loss)
history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

我收到以下错误：

Epoch 1/10
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-1ae713ccf38c> in <module>()
      4 
      5 # When setting batch size, remember we are *quadratically* expanding it in our loss.
----> 6 history = model.fit(X_train, y_train, batch_size=32, epochs=10, shuffle=True)

10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    971           except Exception as e:  # pylint:disable=broad-except
    972             if hasattr(e, "ag_error_metadata"):
--> 973               raise e.ag_error_metadata.to_exception(e)
    974             else:
    975               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    <ipython-input-4-6c3b8535eb43>:5 contrastive_loss  *
        flat_y_true = tf.reshape(y_true, (-1))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:195 reshape
        result = gen_array_ops.reshape(tensor, shape, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py:8234 reshape
        "Reshape", tensor=tensor, shape=shape, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        compute_device)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:3485 _create_op_internal
        op_def=op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1975 __init__
        control_input_ops, op_def)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 1 but is rank 0 for '{{node contrastive_loss/Reshape}} = Reshape[T=DT_UINT8, Tshape=DT_INT32](ExpandDims, contrastive_loss/Reshape/shape)' with input shapes: [32,1], [].

Answer 1

如果this不是您想要的，我们将亲自实施。

由于损失函数中的python for循环可能是问题所在，因此我们将集中精力解决这个问题。

编辑：我想我需要多睡一会，因为我可能发誓我以前有numpy个函数可以在自定义tensorflow丢失函数中工作。将所有操作都切换为tensorflow操作，因为实际上并非如此。

已编辑的完整功能：

def contrastive_loss(y_true, embeddings):
    
    yijs = tf.cast(tf.reshape((y_true[:,None]!=y_true),(y_true.get_shape()[0],y_true.get_shape()[0])),tf.dtypes.float32)
    distances = tf.norm(tf.cast(embeddings[:,None]-embeddings,tf.dtypes.float32),axis=2)
    dist_len = distances.get_shape()[0]
    return tf.math.reduce_sum((1-yijs)*distances**2 + yijs*(tf.maximum(0.2-distances,tf.zeros((dist_len,dist_len)))**2))*0.25

编辑的逐行演练：

def contrastive_loss(y_true, embeddings):
    y_true_len = y_true.get_shape()[0] #shape (None,1)
    yijs = tf.cast(
        tf.reshape(
            (
                #Max clipping abs diff of every value and all other values 
                #to 1 is essentially checking whether 
                #the other values are equal to itself or not
                #this would be a much faster method of doing so
                y_true[:,None]!=y_true
            ),
            #resize to square matrix of shape (batch_size,batch_size)
            (y_true_len,y_true_len) 
        ),
        tf.dtypes.float32 
    )
    
    distances = tf.norm(
        tf.cast(
            #find difference between each row and all other rows
            embeddings[:,None]-embeddings,
            tf.dtypes.float32
        ),
        #normalize every row to find 
        #magnitude of every row vector
        axis=2 
    )
    
    #get len to setup equivalent square matrix tensor of 0's later
    dist_len = distances.get_shape()[0] 
    #Sums all values in tensor
    loss = tf.math.reduce_sum(
        (1-yijs)*(distances**2) 
        + yijs*(
            #clip all values to be above 0
            tf.maximum(
                0.2-distances,
                #declare tensor of same dimension as 0.2-distances
                #for element wise comparison so tf.maximum can do its thing
                tf.zeros((dist_len,dist_len))
            )**2
        )
    )*0.25 #since we didn't drop lower triangle, need to multiply by 0.25 instead of 0.5
    
    return loss

如何在Keras中加快具有自定义损失功能的模型的训练？

1 个答案: