我正在尝试分别使用视频帧和音频来分析视频,并且我想出了一个看起来像这样的模型
现在,我将训练数据分为两个生成器-一个用于视频,一个用于音频。
我不得不将生成器进一步分为两半,这就是我认为出错的地方。由于错误日志很大,并且数组庞大,因此无法在此处发布整个内容。您可以找到它here。
基本上错误是
ERROR 1:
ValueError: The two structures don't have the same nested structure.
First structure: type=tuple str=(array([[1],
[1],
[1],
[1]]), array([[1.],
[1.],
[1.],
[1.]]))
Second structure: type=tuple str=(None,)
ERROR 2:
More specifically: The two structures don't have the same number of elements. First structure: type=tuple str=(array([[1],
[1],
[1],
[1]]), array([[1.],
[1.],
[1.],
[1.]])). Second structure: type=tuple str=(None,)
Entire first structure:
(., .)
Entire second structure:
(.,)
ERROR 3:
ValueError: Unable to match target structure and sample_weight_modes structure:
['...', '...']
to
['...']
VideoGenerator如下
class VideoFrameGenerator(tf.keras.utils.Sequence):
def __init__(
self,
rescale=1/255.,
nb_frames: int = 5,
classes: list = [],
batch_size: int = 16,
use_frame_cache: bool = False,
target_shape: tuple = (648, 384),
shuffle: bool = True,
transformation: tf.keras.preprocessing.image.ImageDataGenerator = None,
split: float = None,
nb_channel: int = 3,
glob_pattern: str = './videos/{classname}/*.avi',
_validation_data: list = None):
# should be only RGB or Grayscale
assert nb_channel in (1, 3)
# we should have classes
assert len(classes) != 0
# shape size should be 2
assert len(target_shape) == 2
# split factor should be a propoer value
if split is not None:
assert 0.0 < split < 1.0
# be sure that classes are well ordered
classes.sort()
self.rescale = rescale
self.classes = classes
self.batch_size = batch_size
self.nbframe = nb_frames
self.shuffle = shuffle
self.target_shape = target_shape
self.nb_channel = nb_channel
self.transformation = transformation
self.use_frame_cache = use_frame_cache
self._random_trans = []
self.__frame_cache = {}
self.files = []
self.validation = []
if _validation_data is not None:
# we only need to set files here
self.files = _validation_data
else:
if split is not None and split > 0.0:
for cls in classes:
files = glob.glob(glob_pattern.format(classname=cls))
nbval = int(split * len(files))
print("class %s, validation count: %d" % (cls, nbval))
# generate validation indexes
indexes = np.arange(len(files))
if shuffle:
np.random.shuffle(indexes)
val = np.random.permutation(
indexes)[:nbval] # get some sample
# remove validation from train
indexes = np.array([i for i in indexes if i not in val])
# and now, make the file list
self.files += [files[i] for i in indexes]
self.validation += [files[i] for i in val]
else:
for cls in classes:
self.files += glob.glob(glob_pattern.format(classname=cls))
# build indexes
self.files_count = len(self.files)
self.indexes = np.arange(self.files_count)
self.classes_count = len(classes)
# to initialize transformations and shuffle indices
self.on_epoch_end()
print("get %d classes for %d files for %s" % (
self.classes_count,
self.files_count,
'train' if _validation_data is None else 'validation'))
def get_validation_generator(self):
""" Return the validation generator if you've provided split factor """
return self.__class__(
nb_frames=self.nbframe,
nb_channel=self.nb_channel,
target_shape=self.target_shape,
classes=self.classes,
batch_size=self.batch_size,
shuffle=self.shuffle,
rescale=self.rescale,
_validation_data=self.validation)
def on_epoch_end(self):
""" Called by Keras after each epoch """
if self.transformation is not None:
self._random_trans = []
for i in range(self.files_count):
self._random_trans.append(
self.transformation.get_random_transform(self.target_shape)
)
if self.shuffle:
np.random.shuffle(self.indexes)
def __len__(self):
return int(np.floor(self.files_count / self.batch_size))
def __getitem__(self, index):
classes = self.classes
shape = self.target_shape
nbframe = self.nbframe
labels = []
images = []
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
transformation = None
for i in indexes:
# prepare a transformation if provided
if self.transformation is not None:
transformation = self._random_trans[i]
# video = random.choice(files)
video = self.files[i]
classname = video.split(os.sep)[-2]
# create a label array and set 1 to the right column
label = []
col = classes.index(classname)
label.append(1 if col==0 else 0)
# label = [0,1]
# label = np.array(label)
if video not in self.__frame_cache:
cap = cv2.VideoCapture(video)
frames = []
while True:
grabbed, frame = cap.read()
if not grabbed:
# end of video
break
# resize
frame = cv2.resize(frame, shape)
# use RGB or Grayscale ?
if self.nb_channel == 3:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
else:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
# to np
frame = tf.keras.preprocessing.image.img_to_array(
frame) * self.rescale
# keep frame
frames.append(frame)
# Add 2 frames to drop first and last frame
jump = len(frames)//(nbframe+2)
# get only some images
try:
frames = frames[jump::jump][:nbframe]
except Exception as exception:
print(video)
raise exception
# add to frame cache to not read from disk later
if self.use_frame_cache:
self.__frame_cache[video] = frames
else:
frames = self.__frame_cache[video]
# apply transformation
if transformation is not None:
frames = [self.transformation.apply_transform(
frame, transformation) for frame in frames]
# print(frames)
# add the sequence in batch
images.append(frames)
labels.append(label)
return np.array(images), np.array(labels)
AudioGenerator是:
class AudioGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, ids, train_dir):
'Initialization'
self.ids = ids
self.train_dir = train_dir
def __len__(self):
'Denotes the number of batches per epoch'
return 4
def __getitem__(self, index):
batch_id = self.ids[index]
X = np.asarray(np.load(self.train_dir + str(batch_id*100) + '.npy'))
Y = np.asarray(np.load(self.train_dir +'Y_'+ str(batch_id*100) + '.npy'))
from sklearn import preprocessing
if index==1:
enc = preprocessing.OneHotEncoder()
Y = Y.reshape(-1,1)
enc.fit(Y)
joblib.dump(enc,"/content/drive/My Drive/DeepFakeFiles/Encoder")
enc = joblib.load("/content/drive/My Drive/DeepFakeFiles/Encoder")
Y = Y.reshape(-1,1)
Y = enc.transform(Y).toarray()
X = X.reshape(X.shape[0],X.shape[1],1)
start=0
batch,size= 4,4
X_temp = X[start:batch,:]
Y_temp = Y[start:batch,:]
start = batch
batch+=size
return X_temp,Y_temp
最后生成训练数据的数据生成器是:
import itertools
def create_data_generator(video_generator,audio_generator):
while(True):
_video_generator, _lvideo_generator = next(iter(video_generator))
_audio_generator, _laudio_generator = next(iter(audio_generator))
yield [_video_generator,_audio_generator], [_lvideo_generator,_laudio_generator]
如果有人可以帮助我了解“第二结构”的含义,那就太好了,因为我是keras fit_generator()的新手。预先感谢!
答案 0 :(得分:1)
我在一个完全不同的模型类型上遇到了这个错误。我花了很多时间试图理解为什么它以某种方式设置了sample_weights。实际上,至少就我而言,该信息具有误导性。
我通过确保生成器返回了numpy数组而不是列表来解决了该问题。
我没有深入研究它,但是在我看来,生成器的列表和嵌套列表似乎使TF感到困惑,然后TF认为标签的某些部分实际上是描述样本权重的第三项。
我建议显式创建np数组作为生成器的输出。