我正在为设备测试系统。我要确保从被测设备播放特定的声音效果。
为验证这一点,我打算使用一台运行Linux的小型计算机。
目标是使用Python打开麦克风并收听波形格式的声音效果。
我发现一个名为pyaudio的库似乎是一个不错的开始。我发现这个例子可以从麦克风读取原始数据: Reading input sound signal using Python
我还找到了检测拍手https://github.com/xSparfuchs/clap-detection/blob/master/clap-detection.py
的示例import pyaudio
import struct
import math
INITIAL_TAP_THRESHOLD = 0.25
FORMAT = pyaudio.paInt16
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
def get_rms( block ):
# RMS amplitude is defined as the square root of the
# mean over time of the square of the amplitude.
# so we need to convert this string of bytes into
# a string of 16-bit samples...
# we will get one short out for each
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )
# iterate over the block.
sum_squares = 0.0
for sample in shorts:
# sample is a signed short in +/- 32768.
# normalize it to 1.0
n = sample * SHORT_NORMALIZE
sum_squares += n*n
return math.sqrt( sum_squares / count )
class TapTester(object):
def __init__(self):
self.pa = pyaudio.PyAudio()
self.stream = self.open_mic_stream()
self.tap_threshold = INITIAL_TAP_THRESHOLD
self.noisycount = MAX_TAP_BLOCKS+1
self.quietcount = 0
self.errorcount = 0
def stop(self):
self.stream.close()
def find_input_device(self):
device_index = None
for i in range( self.pa.get_device_count() ):
devinfo = self.pa.get_device_info_by_index(i)
print( "Device %d: %s"%(i,devinfo["name"]) )
for keyword in ["mic","input"]:
if keyword in devinfo["name"].lower():
print( "Found an input: device %d - %s"%(i,devinfo["name"]) )
device_index = i
return device_index
if device_index == None:
print( "No preferred input found; using default input device." )
return device_index
def open_mic_stream( self ):
device_index = self.find_input_device()
stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
return stream
def tapDetected(self): #DETECTED
print ("tapped")
def listen(self):
try:
block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
except e:
# dammit.
self.errorcount += 1
print( "(%d) Error recording: %s"%(self.errorcount,e) )
self.noisycount = 1
return
amplitude = get_rms( block )
if amplitude > self.tap_threshold:
# noisy block
self.quietcount = 0
self.noisycount += 1
else:
# quiet block.
if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
self.tapDetected()
self.noisycount = 0
self.quietcount += 1
if __name__ == "__main__":
tt = TapTester()
for i in range(1000):
tt.listen()
但是我想要的是一些更复杂的东西,可以实际识别出我作为.wav文件具有的特定声音效果。如果可能的话,我什至需要检测两种或两种不同的声音效果。
所以问题是,这是否足够容易构建自己,在这种情况下,我需要获取有关可以用于识别部分的算法/函数的一些信息。另外,如果有一些可用的库,可能是一个不错的解决方法。