在下面的函数中的某个地方,我得到了切片索引超出范围错误。越界发生在索引2的第一个维度上。这是函数。
import librosa
import librosa.display
import tensorflow as tf
from tensorflow_addons.image import sparse_image_warp
import numpy as np
import matplotlib.pyplot as plt
def sparse_warp(mel_spectrogram, time_warping_para=80):
"""Spec augmentation Calculation Function.
'SpecAugment' have 3 steps for audio data augmentation.
first step is time warping using Tensorflow's image_sparse_warp function.
Second step is frequency masking, last step is time masking.
# Arguments:
mel_spectrogram(numpy array): audio file path of you want to warping and masking.
time_warping_para(float): Augmentation parameter, "time warp parameter W".
If none, default = 80 for LibriSpeech.
# Returns
mel_spectrogram(numpy array): warped and masked mel spectrogram.
"""
fbank_size = tf.shape(mel_spectrogram)
n, v = fbank_size[1], fbank_size[2]
# Step 1 : Time warping
# Image warping control point setting.
# Source
pt = tf.random.uniform([], time_warping_para, n-time_warping_para, tf.int32) # radnom point along the time axis
src_ctr_pt_freq = tf.range(v // 2) # control points on freq-axis
src_ctr_pt_time = tf.ones_like(src_ctr_pt_freq) * pt # control points on time-axis
src_ctr_pts = tf.stack((src_ctr_pt_time, src_ctr_pt_freq), -1)
src_ctr_pts = tf.cast(src_ctr_pts, dtype=tf.float32)
# Destination
w = tf.random.uniform([], -time_warping_para, time_warping_para, tf.int32) # distance
dest_ctr_pt_freq = src_ctr_pt_freq
dest_ctr_pt_time = src_ctr_pt_time + w
dest_ctr_pts = tf.stack((dest_ctr_pt_time, dest_ctr_pt_freq), -1)
dest_ctr_pts = tf.cast(dest_ctr_pts, dtype=tf.float32)
# warp
source_control_point_locations = tf.expand_dims(src_ctr_pts, 0) # (1, v//2, 2)
dest_control_point_locations = tf.expand_dims(dest_ctr_pts, 0) # (1, v//2, 2)
warped_image, _ = sparse_image_warp(mel_spectrogram,
source_control_point_locations,
dest_control_point_locations)
return warped_image
和
该功能所需的mel_spectogram可以通过以下方式生成
audio, sampling_rate = librosa.load('audio_file_path')
mel_spectrogram = librosa.feature.melspectrogram(y=audio,sr=sampling_rate,n_mels=256,hop_length=128,fmax=8000)
我收到了我无法理解的错误。
ValueError: slice index 2 of dimension 0 out of bounds. for '{{node strided_slice_3}} = StridedSlice[Index=DT_INT32, T=DT_INT32, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1](Shape_1, strided_slice_3/stack, strided_slice_3/stack_1, strided_slice_3/stack_2)' with input shapes: [2], [1], [1], [1] and with computed input tensors: input[1] = <2>, input[2] = <3>, input[3] = <1>.