Question

您好，我目前使用tensorflow.org变体自动编码器实现。

https://www.tensorflow.org/tutorials/generative/cvae

我只是试图以一种可接收6张图像的方式更改体系结构。我试图通过myslelf修改代码，只是将Conv2D更改为Conv3D，但这并没有真正起作用。原始图像是6 x 299 x 299 OCT图像的批次。我将其重塑为64 x 64。

class CVAE(tf.keras.Model):
  def __init__(self, latent_dim):
    super(CVAE, self).__init__()
    self.latent_dim = latent_dim

    # defines an approximate posterior distribution q(z|x)
    # outputs mean and log-variance of a factorized Gaussian 
    self.inference_net = tf.keras.Sequential(
      [
          tf.keras.layers.InputLayer(input_shape=(6, 64, 64, 1)), # (28, 28, 1)
          tf.keras.layers.Conv3D(
              filters=32, kernel_size=3, strides=(2, 2, 2), activation='relu'),
          tf.keras.layers.Conv3D(
              filters=64, kernel_size=3, strides=(2, 2, 2), activation='relu'),
          tf.keras.layers.Flatten(),
          # No activation
          tf.keras.layers.Dense(latent_dim + latent_dim),
       ]
    )

    # outputs p(x|z)
    self.generative_net = tf.keras.Sequential(
        [
          tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
          tf.keras.layers.Dense(units=6*16*16*32, activation=tf.nn.relu), # change with img_size (7,7,32)
          tf.keras.layers.Reshape(target_shape=(6, 16, 16, 32)), # change with image size # (7,7,32)
          tf.keras.layers.Conv3DTranspose(
              filters=64,
              kernel_size=3,
              strides=(2, 2, 2),
              padding="SAME",
              activation='relu'),
          tf.keras.layers.Conv3DTranspose(
              filters=32,
              kernel_size=3,
              strides=(2, 2, 2),
              padding="SAME",
              activation='relu'),
          # No activation
          tf.keras.layers.Conv3DTranspose(
              filters=1, kernel_size=3, strides=(1, 1, 1), padding="SAME"),
        ]
    )

InvalidArgumentError: Negative dimension size caused by subtracting 3 from 2 for 'conv3d_5/Conv3D' (op: 'Conv3D') with input shapes: [?,2,13,13,32], [3,3,3,32,64].

def _parser(self, example_proto):
    # define a dict with the data-names and types we
    # expect to find
    features = { 'image_raw': tf.io.FixedLenFeature([], tf.string) }
    # parse the serialized data 
    parsed_features = tf.io.parse_single_example(example_proto, features)
    # decode the raw bytes so it becomes a tensor with type
    ima = tf.io.decode_raw(parsed_features['image_raw'], tf.float64)
    ima = tf.reshape(ima, (6,299,299)) # (6,299,299)

    ## custom; ima is already a tensor
    ima = tf.expand_dims(ima, -1) # (6,299,299,1) 
    ima = tf.image.resize(ima, (64, 64)) 
    #ima = ima[0,:] # only 1st scan
    #ima = tf.squeeze(ima)
    #ima = tf.reshape(ima, (1,784)) #(6, 784)
    ima = tf.cast(ima, 'float32')
    # normalizing images
    ima = ima / 255
    print("Parser Format: {}" .format(ima))

    return ima # (28, 28, 1)

任何帮助都将受到高度赞赏。我是神经网络的新手。预先非常感谢。

用于4D输入的可变自动编码器

0 个答案: