NotImplementedError:TPUStrategy.run(fn,...)不支持纯急切执行。请确保将函数传递到`strategy.run`

时间:2020-08-23 15:06:06

标签: tensorflow google-colaboratory tpu

以下是我的代码以及在colab上使用TPU时的错误。你觉得呢?

我正在尝试在colab上运行VAE模型。有关数据集的更多信息,请访问this页面。这是一个信用卡欺诈数据集。

请让我知道如何解决此问题。

谢谢您的帮助。

'''

    #WITH TPU

latent_dim = 2 #number of latent variables to learn
hidden_size = 32
input_dim = x_train.shape[1]
latent_dim = 3 # d, dimensionality of the latent code t.
intermediate_dim = 256 # Size of the hidden layer.


def create_model():
  x = Input(shape=(input_dim,))
  t = BatchNormalization()(x)
  t = Dense(intermediate_dim, activation='relu' , name='encoder_hidden')(t)
  t = BatchNormalization()(t)

  z_mean = Dense(latent_dim, name='z_mean')(t)
  z_log_var = Dense(latent_dim, name='z_log_var')(t)

  def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean), mean=0., stddev=1.,name="epsilon")
    return z_mean + tf.exp(z_log_var / 2) * epsilon

  z = Lambda(sampling, name='z_sampled')([z_mean, z_log_var])

  t = Dense(intermediate_dim, activation='relu', name='decoder_hidden')(z)

  decoded_mean = Dense(input_dim, activation=None, name='decoded_mean')(t)

  def kl_loss(y_true, y_pred):
    kl_loss_ = - 0.5 * tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
    return kl_loss_

  def rec_loss(y_true, y_pred):
    rec_loss_ = tf.reduce_sum(tf.square(y_true - y_pred), axis=-1)
    return rec_loss_   

  def vae_loss(x, decoded_mean):
    rec_loss_ = tf.reduce_sum(tf.square(x - decoded_mean), axis=-1)
    kl_loss_ = - 0.5 * tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
    vae_loss_ = K.mean((rec_loss + kl_loss) / 2)
    return vae_loss_

  vae = Model(x, decoded_mean)

  return vae
  

with strategy.scope():
  vae = create_model()
  vae.compile(optimizer=Adam(lr=1e-2), loss=vae_loss, metrics=[rec_loss, kl_loss])
  #vae.compile(optimizer=tf.keras.optimizers.Nadam(), loss=negloglik)
  vae.summary()
  n_epochs = 30
  batch_size = 128

  early_stopping = EarlyStopping(monitor='loss', patience=10, min_delta=1e-5) #stop training if loss does not decrease with at least 0.00001
  reduce_lr = ReduceLROnPlateau(monitor='loss', patience=5, min_delta=1e-5, factor=0.2) #reduce learning rate (divide it by 5 = multiply it by 0.2) if loss does not decrease with at least 0.00001

  callbacks = [early_stopping, reduce_lr]

  #tf.config.experimental_run_functions_eagerly(True)

  tf_train = tf.data.Dataset.from_tensor_slices((x_train, x_train)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).shuffle(int(10e4))
  tf_val = tf.data.Dataset.from_tensor_slices((x_val, x_val)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).shuffle(int(10e4))

  hist = vae.fit(tf_train,
                 validation_data=tf_val,
                 shuffle=True,
                 verbose=0,
                 #batch_size=batch_size, 
                 epochs=n_epochs,
                 callbacks=callbacks)

'''

***这是我得到的错误! ***

'''

       Model: "functional_5"
    __________________________________________________________________________________________________
    Layer (type)                    Output Shape         Param #     Connected to                     
    ==================================================================================================
    input_3 (InputLayer)            [(None, 30)]         0                                            
    __________________________________________________________________________________________________
    batch_normalization_4 (BatchNor (None, 30)           120         input_3[0][0]                    
    __________________________________________________________________________________________________
    encoder_hidden (Dense)          (None, 256)          7936        batch_normalization_4[0][0]      
    __________________________________________________________________________________________________
    batch_normalization_5 (BatchNor (None, 256)          1024        encoder_hidden[0][0]             
    __________________________________________________________________________________________________
    z_mean (Dense)                  (None, 3)            771         batch_normalization_5[0][0]      
    __________________________________________________________________________________________________
    z_log_var (Dense)               (None, 3)            771         batch_normalization_5[0][0]      
    __________________________________________________________________________________________________
    z_sampled (Lambda)              (None, 3)            0           z_mean[0][0]                     
                                                                     z_log_var[0][0]                  
    __________________________________________________________________________________________________
    decoder_hidden (Dense)          (None, 256)          1024        z_sampled[0][0]                  
    __________________________________________________________________________________________________
    decoded_mean (Dense)            (None, 30)           7710        decoder_hidden[0][0]             
    ==================================================================================================
    Total params: 19,356
    Trainable params: 18,784
    Non-trainable params: 572
    __________________________________________________________________________________________________
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
         59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
    ---> 60                                         inputs, attrs, num_outputs)
         61   except core._NotOkStatusException as e:
    
    TypeError: An op outside of the function building code is being passed
    a "Graph" tensor. It is possible to have Graph tensors
    leak out of the function building context by including a
    tf.init_scope in your function building code.
    For example, the following function will fail:
      @tf.function
      def has_init_scope():
        my_constant = tf.constant(1.)
        with tf.init_scope():
          added = my_constant * 2
    The graph tensor has name: z_log_var/BiasAdd:0
    
    During handling of the above exception, another exception occurred:
    
    _SymbolicException                        Traceback (most recent call last)
    9 frames
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
         72       raise core._SymbolicException(
         73           "Inputs to eager execution function cannot be Keras symbolic "
    ---> 74           "tensors, but found {}".format(keras_symbolic_tensors))
         75     raise e
         76   # pylint: enable=protected-access
    
    _SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'z_log_var/BiasAdd:0' shape=(None, 3) dtype=float32>, <tf.Tensor 'z_mean/BiasAdd:0' shape=(None, 3) dtype=float32>]

'''

0 个答案:

没有答案