我制作了一个自定义生成器函数,以读取存储MFCC值的CSV文件以进行说话人识别,这些文件的格式与将图像文件保存在张量流的图像数据生成器中的格式相同。
在将Model.fit()
与简单的密集层模型结合使用时,它可以带来价值
错误:
MFCC值的形状通常是可变的15561,而15639如何将张量输入到层中?
import re
import tensorflow as tf
tf.__version__2.2.0
def tf_data_generator(file_list, batch_size = 1):
i = 0
while True:
if i*batch_size >= len(file_list):
i = 0
np.random.shuffle(file_list)
else:
file_chunk = file_list[i*batch_size:(i+1)*batch_size]
data = []
labels = []
label_classes = tf.constant(["1034", "1039", "1044", "1048", "1050", "1055", "1061"...])
for file in file_chunk:
temp = pd.read_csv(open(file,'r'), header=None)
data.append(temp.values.reshape(-1, 1))
pattern = tf.constant(eval('file[6:10]'))
for j in range(len(label_classes)):
if re.match(pattern.numpy(), label_classes[j].numpy()):
labels.append(j)
data = np.asarray(data)
data = tf.reshape(data, [-1])
labels = np.asarray(labels)
yield data, labels
i = i + 1
dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [list_path, 1], output_types= (tf.float32,tf.float32))
num = 0
for data, labels in dataset:
print(data.shape, labels.shape,"\n")
print(labels, "<--Labels") # Just to see the lables
print()
num = num + 1
if num > 5: break
(15561,) (1,)
tf.Tensor([0.], shape=(1,), dtype=float32) <--Labels
(15561,) (1,)
tf.Tensor([0.], shape=(1,), dtype=float32) <--Labels
(15561,) (1,)
tf.Tensor([0.], shape=(1,), dtype=float32) <--Labels
(15639,) (1,)
tf.Tensor([1.], shape=(1,), dtype=float32) <--Labels
(15639,) (1,)
tf.Tensor([1.], shape=(1,), dtype=float32) <--Labels
(15561,) (1,)
tf.Tensor([1.], shape=(1,), dtype=float32) <--Labels
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input, Masking
Model = Sequential([Input(shape=(15561,), dtype = np.float32),
Dense( units = 10, activation='relu'),
Dense(units=10, activation = 'softmax')
])
Model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['acc'], optimizer = 'Adam')
history = Model.fit(dataset, epochs = 1, steps_per_epoch=512)
sequence_5层的输入0与该层不兼容:其等级未定义,但该层需要一个已定义的等级。