此代码用于创建tfrecords,这是张量流标准输入格式,用于保存从视频样本中获取的音频和标签。此文件作为神经网络训练的输入。
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import menpo
import tensorflow as tf
import numpy as np
import os
from io import BytesIO
from pathlib import Path
from moviepy.editor import VideoFileClip
from menpo.visualize import progress_bar_str, print_progress
from moviepy.audio.AudioClip import AudioArrayClip
root_dir = Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/RECOLA') #Where RECOLA is located
portion_to_id = dict(
train = [1], # 25
valid = [70, 71],
test = [80, 81] # 54, 53
) #samples taken
def get_samples(subject_id): #location of arousal and valence files and appropriate video sample
arousal_label_path = root_dir / 'ratings_individual/arousal/{}.csv'.format(subject_id)
valence_label_path = root_dir / 'ratings_individual/valence/{}.csv'.format(subject_id)
clip = VideoFileClip(str(root_dir /"Video_recordings_MP4/{}.mp4".format(subject_id)))
subsampled_audio = clip.audio.set_fps(16000)
audio_frames = []
for i in range(1, 7501): #extract audio sample
try:
time = 0.04 * i
audio = np.array(list(subsampled_audio.subclip(time - 0.04, time).iter_frames()))
audio = audio.mean(1)[:640]
audio_frames.append(audio.astype(np.float32))
except ValueError:
print('Not float')
quit()
try:
arousal = np.loadtxt(str(arousal_label_path), delimiter=',')[:+1][1:]
valence = np.loadtxt(str(valence_label_path), delimiter=',')[:+1][1:]
return audio_frames, np.dstack([arousal, valence])[0].astype(np.float32) #return audio frames
except ValueError:
print('problem')
def get_jpg_string(im):
# Gets the serialized jpg from a menpo `Image`.
fp = BytesIO()
menpo.io.export_image(im, fp, extension='jpg')
fp.seek(0)
return fp.read()
def _int_feauture(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feauture(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def serialize_sample(writer, subject_id):
subject_name = 'P{}'.format(subject_id)
print(subject_name)
print(get_samples)
#repeat for each sample
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
example = tf.train.Example(features=tf.train.Features(feature={
'sample_id': _int_feauture(i),
'subject_id': _int_feauture(subject_id),
'label': _bytes_feauture(label.tobytes()),
'raw_audio': _bytes_feauture(audio.tobytes()),
})) #extract sample_id,subject_id,label and raw_audio
writer.write(example.SerializeToString())
del audio, label
def main(directory):
print('In Main')
for portion in portion_to_id.keys():
print(portion)
for subj_id in print_progress(portion_to_id[portion]):
temp = (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix() #display sample
print(temp)
writer = tf.python_io.TFRecordWriter(
(directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix()) #write to tfrecords
serialize_sample(writer, subj_id)
if __name__ == "__main__":
print("Calling Main")
main(Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/records')) #save tfrecord
此代码引发错误并终止。我已经给出了查找输入视频的所有路径。
错误
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
TypeError: zip() argument after * must be an iterable, not NoneType
为什么我会收到此错误?
答案 0 :(得分:0)
您的测试,火车和有效文件夹中是否有以下视频/音频文件:
train = P1.mp4
valid = P70.mp4 , P71.mp4
test = P80.mp4 , P81.mp4 ??
因为代码:zip(* get_samples(subject_name))似乎无法获取数据:Nonetype!