Question

我的体系结构是使用keras生成器的标准seq-to-seq编码器解码器，因此我不必一次将TD都加载到内存中。如果我仅更改对self.model的编译和拟合调用，这将在出现错误的同一虚拟环境中完美运行（也就是说，使用tensorflow-gpu），也就是说，一切都可以在单个GPU上正常运行。但是，使用多个GPU时，其中的某些部分被展平或错误地重新组合，从而导致形状不匹配。这是错误： [！[在此处输入图片描述] [1]] [1]

----编辑----我能够使用一个最新版本的keras + tensorflow-gpu + cuda9.2在单个文件中使用以下代码来轻松重现此错误：

import tensorflow as tf
import numpy as np
from keras.utils import multi_gpu_model, Sequence
from keras.layers import Input, LSTM, Dense, TimeDistributed, Embedding
from keras.models import Model

BATCH_SIZE = 4

class trivial_Sequence(Sequence):

    def __init__(self, x_set, y_set, batch_size):
        self.x = np.zeros((batch_size*4, 64))
        self.y = np.zeros((batch_size*4, 64, 1))
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x)/float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_y = self.y[idx*self.batch_size:(idx+1)*self.batch_size]

        return batch_x, batch_y


def error_train():

    #instantiate components

    td = trivial_Sequence(None, None, BATCH_SIZE)
    input = Input(shape=(None,), dtype='int32')
    emb = Embedding(output_dim=10, input_dim = 64, input_length=None)
    encode = LSTM(10, return_sequences=True, return_state = True)
    project_up = Dense(units=20, activation='softmax')

    #build network

    temp = emb(input)
    temp, _, _ = encode(temp)
    output = TimeDistributed(project_up)(temp)

    model = Model(inputs = input, outputs = output)
    parallel_model = multi_gpu_model(model, gpus=4)

    parallel_model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy',
                       metrics=['sparse_categorical_accuracy'])

    parallel_model.fit_generator(td, epochs=1, verbose=1)


#run it

error_train()

形状与keras multi_gpu_model不匹配，但在单个GPU上运行良好

0 个答案: