Question

我正在尝试录制音频并同时打印录制信号的幅度。所以我在stream.read中保存所有数据。但是当我尝试打印它们时，我有一串字节而没有整数。我想知道如何转换这些符号以获得振幅。

这是我的代码：

import pyaudio
import wave

CHUNK = 1024 
FORMAT = pyaudio.paInt16
CHANNELS = 1 
RATE = 44100 
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK) 

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data) # 2 bytes(16 bits) per channel

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

for data in frames:
    print(data)

这就是我获得的：

       ����#  ����
          
 !$
          

                 ��  ���� ��������������������������
           ������  �� ��                                           
��

   �� ������ ����������������������������
                            ��    
                                     ����

％（）， . ％＃

Answer 1

PyAudio正在为字符串中的字节提供二进制编码的音频帧。请参阅此问题的答案，了解如何打印人类可读的框架表示：

Get an audio sample as float number from pyaudio-stream

Answer 2

您可以通过以下代码激励自己：

#!/usr/bin/python

# open a microphone in pyAudio and listen for taps

import pyaudio
import struct
import math

INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME

def get_rms( block ):
    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

# we will get one short out for each 
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )

# iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
        # sample is a signed short in +/- 32768. 
        # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )

class TapTester(object):
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0

    def stop(self):
        self.stream.close()

    def find_input_device(self):
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%        (i,devinfo["name"]) )
                    device_index = i
                    return device_index

    if device_index == None:
        print( "No preferred input found; using default input device." )

    return device_index

def open_mic_stream( self ):
    device_index = self.find_input_device()

    stream = self.pa.open(   format = FORMAT,
                             channels = CHANNELS,
                             rate = RATE,
                             input = True,
                             input_device_index = device_index,
                             frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

    return stream

def tapDetected(self):
    print "Tap!"

def listen(self):
    try:
        block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
    except IOError, e:
        # dammit. 
        self.errorcount += 1
        print( "(%d) Error recording: %s"%(self.errorcount,e) )
        self.noisycount = 1
        return

    amplitude = get_rms( block )
    if amplitude > self.tap_threshold:
        # noisy block
        self.quietcount = 0
        self.noisycount += 1
        if self.noisycount > OVERSENSITIVE:
            # turn down the sensitivity
            self.tap_threshold *= 1.1
    else:            
        # quiet block.

        if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
            self.tapDetected()
        self.noisycount = 0
        self.quietcount += 1
        if self.quietcount > UNDERSENSITIVE:
            # turn up the sensitivity
            self.tap_threshold *= 0.9

if __name__ == "__main__":
tt = TapTester()

for i in range(1000):
    tt.listen()

来自这篇文章：[Detect tap with pyaudio from live mic

您可以轻松地调整它以将RMS放在表格中并绘制表格。

Answer 3

我想这个问题很老了，我在寻找其他答案时绊倒了它，但在我的项目中我使用了这样的东西。

#Lets assume the constants are defined somewhere

import struct
import pyaudio
import numpy as np

self.input = pyaudio.PyAudio().open(
            format=pyaudio.paInt16,
            channels=1,
            rate=44100,
            input=True,
            output=False,
            frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)

paInt16 和 'h' 对应。您可以在此处找出与您的 pyaudio 格式匹配的字母。 https://docs.python.org/3/library/struct.html

归功于： https://www.youtube.com/channel/UC2W0aQEPNpU6XrkFCYifRFQ

Answer 4

我认为你可以做到这一点

data = stream.read(CHUNK)
for each in data:
    print(each)

Answer 5

处理音频时，您可能需要信号缓冲区的 RMS（均方根）值。我相信它可以更好地“查看”音频信号的整体功率。

python 标准库作为一个名为 audioop 的模块，该模块有一个名为 rms 的函数。

import pyaudio
import time
import audioop

def get_rms():
    # Creates a generator that can iterate rms values
    CHUNK = 8
    WIDTH = 2
    CHANNELS = 1
    RATE = 44100

    p = pyaudio.PyAudio()

    try:
        stream = p.open(format=p.get_format_from_width(WIDTH),
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=False,
                        frames_per_buffer=CHUNK)
        # wait a second to allow the stream to be setup
        time.sleep(1)
        while True:
            # read the data
            data = stream.read(CHUNK, exception_on_overflow = False)
            rms = audioop.rms(data, 1)
            yield rms_scaled
    finally:
        p.terminate()
        stream.stop_stream()
        stream.close()

你可以像这样使用这个功能

rms_values = get_rms()
for rms in rms_values:
    print(rms)

PyAudio - 将stream.read转换为int以获得幅度

5 个答案: