如何将CNN和RNN融合或嵌入?

时间:2019-05-17 12:24:14

标签: python image-processing neural-network deep-learning

我是人工智能的新手,我正在尝试结合卷积神经元网络和递归神经网络或嵌入。
在我们有两个数据作为输入的情况下,第一个是图像,另一个是文本,该图像对应于产品图像,文本对应于其描述,输出是产品是否与描述对应的验证。

为此,我已经收集了具有JSON格式图像和说明的数据集,我什至拥有Embedding和CNN代码,但我不知道如何组合它们以解决该问题。

在下图中,您可以看到我正在尝试开发的模型:

Markdown model

对于CNN,我正在使用Resnet v2

Traceback (most recent call last):  

File "main_latest.py", line 45, in 

max_queue_size=10)  

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/training.py", line 2177, in fit_generator

initial_epoch=initial_epoch)    

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/training_generator.py", line 216, in fit_generator  

callbacks.on_epoch_end(epoch, epoch_logs)  

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/callbacks.py", line 214, in on_epoch_end

callback.on_epoch_end(epoch, logs)    

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/callbacks.py", line 601, in on_epoch_end

self.model.save(filepath, overwrite=True)  

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/network.py", line 1363, in save

save_model(self, filepath, overwrite, include_optimizer)  

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/keras/engine/saving.py", line 134, in save_model

default=serialization.get_json_type).encode('utf8')  

File "/usr/lib/python3.5/json/init.py", line 237, in dumps

**kw).encode(obj)  

File "/usr/lib/python3.5/json/encoder.py", line 198, in encode

chunks = self.iterencode(o, _one_shot=True)  

File "/usr/lib/python3.5/json/encoder.py", line 256, in iterencode

return _iterencode(o, 0)  

File "/home/tejal/.local/lib/python3.5/site-packages/tensorflow/python/util/serialization.py", line 64, in get_json_type  

raise TypeError('Not JSON Serializable:', obj)  

TypeError: ('Not JSON Serializable:', <tf.Variable 'Variable:0' shape=() dtype=float32>)

对于嵌入,这是我的代码(可以正常工作):

def lr_schedule(epoch):

    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):

    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x


def resnet_v2(input_shape, depth):

    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    # Instantiate the stack of residual units
    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2    # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])

        num_filters_in = num_filters_out

    # Add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    # Instantiate model.
    model = Model(inputs=inputs, outputs=y)
    return model

depth = 3 * 9 + 2
batch_size = 8
epochs = 200
num_classes = 10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
input_shape = x_train.shape[1:]
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
#x_train_mean = np.mean(x_train, axis=0)
#x_train -= x_train_mean
#x_test -= x_train_mean
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'a101.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True)
lr_scheduler = LearningRateScheduler(lr_schedule)

model_image = resnet_v2(input_shape=input_shape, depth=depth)

model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, 
          callbacks=[checkpoint, lr_scheduler])

如果有人可以提出解决问题的建议,我将非常感激。

0 个答案:

没有答案