Question

我正在尝试分别使用视频帧和音频来分析视频，并且我想出了一个看起来像这样的模型
Model Structure](https://imgur.com/a/TRM2LFj[![Model Structure
现在，我将训练数据分为两个生成器-一个用于视频，一个用于音频。我不得不将生成器进一步分为两半，这就是我认为出错的地方。由于错误日志很大，并且数组庞大，因此无法在此处发布整个内容。您可以找到它here。

基本上错误是

ERROR 1:
ValueError: The two structures don't have the same nested structure.

First structure: type=tuple str=(array([[1],
       [1],
       [1],
       [1]]), array([[1.],
       [1.],
       [1.],
       [1.]]))

Second structure: type=tuple str=(None,)

ERROR 2:
More specifically: The two structures don't have the same number of elements. First structure: type=tuple str=(array([[1],
       [1],
       [1],
       [1]]), array([[1.],
       [1.],
       [1.],
       [1.]])). Second structure: type=tuple str=(None,)
Entire first structure:
(., .)
Entire second structure:
(.,)

ERROR 3:
ValueError: Unable to match target structure and sample_weight_modes structure:
  ['...', '...']
    to  
  ['...']

VideoGenerator如下

class VideoFrameGenerator(tf.keras.utils.Sequence):
    def __init__(
            self,
            rescale=1/255.,
            nb_frames: int = 5,
            classes: list = [],
            batch_size: int = 16,
            use_frame_cache: bool = False,
            target_shape: tuple = (648, 384),
            shuffle: bool = True,
            transformation: tf.keras.preprocessing.image.ImageDataGenerator = None,
            split: float = None,
            nb_channel: int = 3,
            glob_pattern: str = './videos/{classname}/*.avi',
            _validation_data: list = None):

        # should be only RGB or Grayscale
        assert nb_channel in (1, 3)

        # we should have classes
        assert len(classes) != 0

        # shape size should be 2
        assert len(target_shape) == 2

        # split factor should be a propoer value
        if split is not None:
            assert 0.0 < split < 1.0

        # be sure that classes are well ordered
        classes.sort()

        self.rescale = rescale
        self.classes = classes
        self.batch_size = batch_size
        self.nbframe = nb_frames
        self.shuffle = shuffle
        self.target_shape = target_shape
        self.nb_channel = nb_channel
        self.transformation = transformation
        self.use_frame_cache = use_frame_cache

        self._random_trans = []
        self.__frame_cache = {}
        self.files = []
        self.validation = []

        if _validation_data is not None:
            # we only need to set files here
            self.files = _validation_data
        else:
            if split is not None and split > 0.0:
                for cls in classes:
                    files = glob.glob(glob_pattern.format(classname=cls))
                    nbval = int(split * len(files))

                    print("class %s, validation count: %d" % (cls, nbval))

                    # generate validation indexes
                    indexes = np.arange(len(files))

                    if shuffle:
                        np.random.shuffle(indexes)

                    val = np.random.permutation(
                        indexes)[:nbval]  # get some sample
                    # remove validation from train
                    indexes = np.array([i for i in indexes if i not in val])

                    # and now, make the file list
                    self.files += [files[i] for i in indexes]
                    self.validation += [files[i] for i in val]

            else:
                for cls in classes:
                    self.files += glob.glob(glob_pattern.format(classname=cls))

        # build indexes
        self.files_count = len(self.files)
        self.indexes = np.arange(self.files_count)
        self.classes_count = len(classes)

        # to initialize transformations and shuffle indices
        self.on_epoch_end()

        print("get %d classes for %d files for %s" % (
            self.classes_count,
            self.files_count,
            'train' if _validation_data is None else 'validation'))

    def get_validation_generator(self):
        """ Return the validation generator if you've provided split factor """
        return self.__class__(
            nb_frames=self.nbframe,
            nb_channel=self.nb_channel,
            target_shape=self.target_shape,
            classes=self.classes,
            batch_size=self.batch_size,
            shuffle=self.shuffle,
            rescale=self.rescale,
            _validation_data=self.validation)

    def on_epoch_end(self):
        """ Called by Keras after each epoch """

        if self.transformation is not None:
            self._random_trans = []
            for i in range(self.files_count):
                self._random_trans.append(
                    self.transformation.get_random_transform(self.target_shape)
                )

        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __len__(self):
        return int(np.floor(self.files_count / self.batch_size))

    def __getitem__(self, index):
        classes = self.classes
        shape = self.target_shape
        nbframe = self.nbframe

        labels = []
        images = []

        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        transformation = None

        for i in indexes:
            # prepare a transformation if provided
            if self.transformation is not None:
                transformation = self._random_trans[i]

            # video = random.choice(files)
            video = self.files[i]
            classname = video.split(os.sep)[-2]

#            create a label array and set 1 to the right column
            label = []
            col = classes.index(classname)
            label.append(1 if col==0 else 0)
            # label = [0,1]
            # label = np.array(label)

            if video not in self.__frame_cache:
                cap = cv2.VideoCapture(video)
                frames = []
                while True:
                    grabbed, frame = cap.read()
                    if not grabbed:
                        # end of video
                        break

                    # resize
                    frame = cv2.resize(frame, shape)

                    # use RGB or Grayscale ?
                    if self.nb_channel == 3:
                        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    else:
                        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

                    # to np
                    frame = tf.keras.preprocessing.image.img_to_array(
                        frame) * self.rescale

                    # keep frame
                    frames.append(frame)

                # Add 2 frames to drop first and last frame
                jump = len(frames)//(nbframe+2)


                # get only some images
                try:
                    frames = frames[jump::jump][:nbframe]
                except Exception as exception:
                    print(video)
                    raise exception

                # add to frame cache to not read from disk later
                if self.use_frame_cache:
                    self.__frame_cache[video] = frames
            else:
                frames = self.__frame_cache[video]

            # apply transformation
            if transformation is not None:
                frames = [self.transformation.apply_transform(
                    frame, transformation) for frame in frames]
                # print(frames)

            # add the sequence in batch
            images.append(frames)
            labels.append(label)

        return np.array(images), np.array(labels)

AudioGenerator是：

class AudioGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, ids, train_dir):
        'Initialization'
        self.ids = ids
        self.train_dir = train_dir

    def __len__(self):
        'Denotes the number of batches per epoch'
        return 4

    def __getitem__(self, index):
        batch_id = self.ids[index]
        X = np.asarray(np.load(self.train_dir + str(batch_id*100) + '.npy'))
        Y = np.asarray(np.load(self.train_dir +'Y_'+ str(batch_id*100) + '.npy'))
        from sklearn import preprocessing
        if index==1:
          enc = preprocessing.OneHotEncoder()
          Y = Y.reshape(-1,1)
          enc.fit(Y)
          joblib.dump(enc,"/content/drive/My Drive/DeepFakeFiles/Encoder")
        enc = joblib.load("/content/drive/My Drive/DeepFakeFiles/Encoder")
        Y = Y.reshape(-1,1)  
        Y = enc.transform(Y).toarray()
        X = X.reshape(X.shape[0],X.shape[1],1)
        start=0
        batch,size= 4,4
        X_temp = X[start:batch,:]
        Y_temp = Y[start:batch,:]
        start = batch
        batch+=size
        return X_temp,Y_temp

最后生成训练数据的数据生成器是：

import itertools
def create_data_generator(video_generator,audio_generator):
    while(True):
        _video_generator, _lvideo_generator = next(iter(video_generator))
        _audio_generator, _laudio_generator = next(iter(audio_generator))

        yield [_video_generator,_audio_generator], [_lvideo_generator,_laudio_generator]

如果有人可以帮助我了解“第二结构”的含义，那就太好了，因为我是keras fit_generator（）的新手。预先感谢！

Answer 1

我在一个完全不同的模型类型上遇到了这个错误。我花了很多时间试图理解为什么它以某种方式设置了sample_weights。实际上，至少就我而言，该信息具有误导性。

我通过确保生成器返回了numpy数组而不是列表来解决了该问题。

我没有深入研究它，但是在我看来，生成器的列表和嵌套列表似乎使TF感到困惑，然后TF认为标签的某些部分实际上是描述样本权重的第三项。

我建议显式创建np数组作为生成器的输出。

ValueError：这两个结构没有相同的嵌套结构

1 个答案: