我目前正在使用称为Aubio的python模块进行实时音高跟踪,然后计算标准偏差以了解说话者的语调。但是,我得到的检测结果包括一个非常高的频率(如5067)和一个非常低的频率(42 Hz)。这让我感到困惑,并且我认为结果很奇怪,因为女性的正常基础频率(音高)范围是165〜255 Hz。有人知道怎么做吗?谢谢。
下面是我当前的代码。
import aubio
import numpy as np
import pyaudio
import sys
import time
from statistics import stdev
# Some constants for setting the PyAudio and the
# Aubio.
BUFFER_SIZE = 2048
CHANNELS = 1
FORMAT = pyaudio.paFloat32
METHOD = "default"
SAMPLE_RATE = 44100
HOP_SIZE = BUFFER_SIZE//2
PERIOD_SIZE_IN_FRAME = HOP_SIZE
def main(args):
# Initiating PyAudio object.
pA = pyaudio.PyAudio()
# Open the microphone stream.
mic = pA.open(format=FORMAT, channels=CHANNELS,
rate=SAMPLE_RATE, input=True,
frames_per_buffer=PERIOD_SIZE_IN_FRAME)
# Initiating Aubio's pitch detection object.
pDetection = aubio.pitch(METHOD, BUFFER_SIZE,
HOP_SIZE, SAMPLE_RATE)
# Set unit.
pDetection.set_unit("Hz")
# Frequency under -40 dB will considered
# as a silence.
pDetection.set_silence(-40)
sound_data = []
end_time = time.time() + 5
while time.time() < end_time:
# Always listening to the microphone.
data = mic.read(PERIOD_SIZE_IN_FRAME)
# Convert into number that Aubio understand.
samples = np.fromstring(data,
dtype=aubio.float_type)
# Finally get the pitch.
pitch = pDetection(samples)[0]
# Compute the energy (volume)
# of the current frame.
#volume = num.sum(samples**2)/len(samples)
# Format the volume output so it only
# displays at most six numbers behind 0.
#volume = "{:6f}".format(volume)
# Finally print the pitch and the volume.
if pitch:
sound_data.append(int(pitch))
print(sound_data)
data = np.array([sound_data])
std = np.std(data)
heikin = np.average(data)
print('{0:.2f}'.format(std))
print(std)
print(heikin)
if __name__ == "__main__": main(sys.argv)