使用音频样本创建tfrecord,从python中的视频中提取

时间:2018-02-26 13:37:09

标签: python

此代码用于创建tfrecords,这是张量流标准输入格式,用于保存从视频样本中获取的音频和标签。此文件作为神经网络训练的输入。

#!/usr/bin/env python2
# -*- coding: utf-8 -*-   

import menpo
import tensorflow as tf
import numpy as np
import os

from io import BytesIO
from pathlib import Path
from moviepy.editor import VideoFileClip
from menpo.visualize import progress_bar_str, print_progress
from moviepy.audio.AudioClip import AudioArrayClip

root_dir = Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/RECOLA') #Where RECOLA is located
portion_to_id = dict(
train = [1], # 25
valid = [70, 71],
test  = [80, 81] # 54, 53
) #samples taken

def get_samples(subject_id): #location of arousal and valence files and appropriate video sample
  arousal_label_path = root_dir / 'ratings_individual/arousal/{}.csv'.format(subject_id)
 valence_label_path = root_dir / 'ratings_individual/valence/{}.csv'.format(subject_id)

 clip = VideoFileClip(str(root_dir /"Video_recordings_MP4/{}.mp4".format(subject_id)))

  subsampled_audio = clip.audio.set_fps(16000)

  audio_frames = []

  for i in range(1, 7501): #extract audio sample
     try:
       time = 0.04 * i

       audio = np.array(list(subsampled_audio.subclip(time - 0.04, time).iter_frames()))
       audio = audio.mean(1)[:640]

       audio_frames.append(audio.astype(np.float32))
     except ValueError:
       print('Not float') 
       quit() 
      try:   
       arousal = np.loadtxt(str(arousal_label_path), delimiter=',')[:+1][1:]
       valence = np.loadtxt(str(valence_label_path), delimiter=',')[:+1][1:]


      return audio_frames, np.dstack([arousal, valence])[0].astype(np.float32) #return audio frames
     except ValueError:
      print('problem')



def get_jpg_string(im):
# Gets the serialized jpg from a menpo `Image`.
   fp = BytesIO()
   menpo.io.export_image(im, fp, extension='jpg')
   fp.seek(0)
   return fp.read()

def _int_feauture(value):
   return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feauture(value):
   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_sample(writer, subject_id):


  subject_name = 'P{}'.format(subject_id)
  print(subject_name)
  print(get_samples)
  #repeat for each sample
  for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):

     example = tf.train.Example(features=tf.train.Features(feature={
                'sample_id': _int_feauture(i),
                'subject_id': _int_feauture(subject_id),
                'label': _bytes_feauture(label.tobytes()),
                'raw_audio': _bytes_feauture(audio.tobytes()),
            })) #extract sample_id,subject_id,label and raw_audio

     writer.write(example.SerializeToString())
     del audio, label



def main(directory):
   print('In Main')
   for portion in portion_to_id.keys():
    print(portion)

    for subj_id in print_progress(portion_to_id[portion]):


     temp = (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
      ).as_posix() #display sample
     print(temp)

     writer = tf.python_io.TFRecordWriter(
      (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
      ).as_posix()) #write to tfrecords

     serialize_sample(writer, subj_id)

if __name__ == "__main__":
  print("Calling Main")
  main(Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/records')) #save tfrecord

此代码引发错误并终止。我已经给出了查找输入视频的所有路径。

  

错误

   for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
   TypeError: zip() argument after * must be an iterable, not NoneType

为什么我会收到此错误?

1 个答案:

答案 0 :(得分:0)

您的测试,火车和有效文件夹中是否有以下视频/音频文件:

train = P1.mp4
valid = P70.mp4 , P71.mp4
test  = P80.mp4 , P81.mp4   ??

因为代码:zip(* get_samples(subject_name))似乎无法获取数据:Nonetype!