使用我在网上找到的一些代码,我成功地训练了一个模型(哇!),能够判断我插入的音频文件中是否有笑声。
这是模型:
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Flatten
lr_model = Sequential()
# lr_model.add(keras.Input((None, 128)))
lr_model.add(BatchNormalization(input_shape=(10, 128)))
lr_model.add(Flatten())
lr_model.add(Dense(1, activation='sigmoid'))
print(lr_model.summary())
# try using different optimizers and different optimizer configs
lr_model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
batch_size=32
CV_frac = 0.1
train_gen = data_generator(batch_size,'.../Data/bal_laugh_speech_subset.tfrecord', 0, 1-CV_frac)
val_gen = data_generator(128,'.../Data/bal_laugh_speech_subset.tfrecord', 1-CV_frac, 1)
rec_len = 18768
lr_h = lr_model.fit(train_gen,steps_per_epoch=int(rec_len*(1-CV_frac))//batch_size, epochs=100,
validation_data=val_gen, validation_steps=int(rec_len*CV_frac)//128,
verbose=1)
然后我写了这段代码来插入我的音频文件。它类似于一些检测音频文件中蝙蝠调用的代码。
soundarray, sr = librosa.load("video-1609522619.wav")
maxseconds = int(len(soundarray)/sr)
for second in range(maxseconds-1):
audiosample = np.array(soundarray[second*sr:(second+1)*sr])
print(audiosample)
prediction = lr_model.predict(audiosample)
if np.argmax(prediction)==1:
IPython.display.display(IPython.display.Audio(audiosample, rate = sr,autoplay=True))
librosa.output.write_wav('first_test.wav', audiosample, sr)
time.sleep(2)
print("hello")
print("Detected laugh at " + str(second) + " out of " + str(maxseconds) + " seconds")
print(prediction)
然后我得到这个错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-28-0d974db166cf> in <module>
4 audiosample = np.array(soundarray[second*sr:(second+1)*sr])
5 print(audiosample)
----> 6 prediction = lr_model.predict(audiosample)
7
8 if np.argmax(prediction)==1:
~/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
1627 for step in data_handler.steps():
1628 callbacks.on_predict_batch_begin(step)
-> 1629 tmp_batch_outputs = self.predict_function(iterator)
1630 if data_handler.should_sync:
1631 context.async_wait()
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
860 # In this case we have not created variables on the first call. So we can
861 # run the first trace but we should fail if variables are created.
--> 862 results = self._stateful_fn(*args, **kwds)
863 if self._created_variables:
864 raise ValueError("Creating variables on a non-first call to a function"
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2939 with self._lock:
2940 (graph_function,
-> 2941 filtered_flat_args) = self._maybe_define_function(args, kwargs)
2942 return graph_function._call_flat(
2943 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3355 self.input_signature is None and
3356 call_context_key in self._function_cache.missed):
-> 3357 return self._define_function_with_shape_relaxation(
3358 args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3359
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _define_function_with_shape_relaxation(self, args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3277 expand_composites=True)
3278
-> 3279 graph_function = self._create_graph_function(
3280 args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes)
3281 self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3194 arg_names = base_arg_names + missing_arg_names
3195 graph_function = ConcreteFunction(
-> 3196 func_graph_module.func_graph_from_py_func(
3197 self._name,
3198 self._python_function,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988 _, original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args, **func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635 return out
636
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1478 predict_function *
return step_function(self, iterator)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1468 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1461 run_step **
outputs = model.predict_step(data)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1434 predict_step
return self(x, training=False)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
/home/pete/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:219 assert_input_compatibility
raise ValueError('Input ' + str(input_index) + ' of layer ' +
ValueError: Input 0 of layer sequential is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1)
当我在 google 周围搜索时,这通常是 LSTM 模型的错误。但事实并非如此。无论如何,我的输入与预期之间存在一些奇怪的问题。
这是我的模型摘要:
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
batch_normalization_2 (Batch (None, 10, 128) 512
_________________________________________________________________
flatten_2 (Flatten) (None, 1280) 0
_________________________________________________________________
dense_2 (Dense) (None, 1) 1281
=================================================================
Total params: 1,793
Trainable params: 1,537
Non-trainable params: 256
_________________________________________________________________
None
我实际上不知道为什么 BatchNormalization 会有 input_shape
。因为这不是告诉模型以某种方式进行批处理吗?看起来更像是一个设置而不是一个层......这是问题的一部分吗?无论如何,这部分大部分是从比我更有经验的人那里复制的,最终它奏效了,所以我不愿意过多地乱搞。