因此,我使用keras创建了一个模型,该模型将声音分类为某些单词。我用来创建模型的数据集只有1秒钟的声音。我想用C#或Python创建一个应用程序,它将使用我创建的模型从麦克风听一些单词(不是从文件输入声音数据,而是从麦克风实时输入)。如果我说一些话,那么应用程序将执行某些操作。
这是我在Python中的尝试,但效果不佳,因为预测始终对错误的单词有100%的把握。
import collections
import contextlib
import functools
import queue
import pyaudio
from ctypes import windll
from keras.models import load_model
import numpy as np
model = load_model("..\\model.hdf5")
labels = ["roll left", "roll right", "yaw left", "yaw right", "pitch up", "pitch down", "stop"]
def read(data):
probability = model.predict(data.reshape(1, 8000, 1))
index = np.argmax(probability[0])
print(probability)
print(labels[index], probability[0][index])
# if word == something
# do some job
class SpeechRecog:
CHANNELS = 1
RATE = 8000
CHUNK = RATE
SECS_OVERLAP = 1
def __init__(self):
self.audio_interface = pyaudio.PyAudio()
def listen(self):
with self.record_audio() as buff:
overlap_buffer = collections.deque(maxlen=int(self.SECS_OVERLAP * self.RATE / self.CHUNK))
for data in self._audio_data_generator(buff, overlap_buffer):
read(np.frombuffer(data, dtype=np.int16))
@staticmethod
def _audio_data_generator(buff: queue.Queue, overlap_buffer):
if overlap_buffer:
yield b''.join(overlap_buffer)
overlap_buffer.clear()
while True:
# Use a blocking get() to ensure there's at least one chunk of data.
data = [buff.get()]
# Now consume whatever other data's still buffered.
while True:
try:
data.append(buff.get(block=False))
except queue.Empty:
break
# `None` in the buffer signals that we should stop generating. Put the
# data back into the buffer for the next generator.
if None in data:
data.remove(None)
if data:
buff.put(b''.join(data))
break
else:
overlap_buffer.extend(data)
yield b''.join(data)
def fill_buffer(self, buff: queue.Queue, in_data, frame_count, time_info, status_flag):
buff.put(in_data)
return None, pyaudio.paContinue
@contextlib.contextmanager
def record_audio(self):
buff: queue.Queue = queue.Queue()
stream: pyaudio.Stream = self.audio_interface \
.open(format=pyaudio.paInt16,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK,
stream_callback=functools.partial(self.fill_buffer, buff))
yield buff
stream.stop_stream()
stream.close()
self.audio_interface.terminate()
s = SpeechRecog()
s.listen()
您能给我一些如何解决此问题的指示吗?