我正在尝试显示音频波形表示的所选段的频谱图。我可以在tkInter GUI内显示音频波形,但不能显示频谱图。我不知道如何在tkInter的已定义画布内包含频谱图功能。如果有人可以帮助我,我将不胜感激。谢谢。
这是我的代码:
from __future__ import print_function, absolute_import
import numpy
import math
import scipy.fftpack
import scipy.signal
from numpy.lib.stride_tricks import as_strided
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.widgets import SpanSelector
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
class AudioPlayer(object):
def __init__(self, signal, sampling_rate):
self.signal = signal
self.sampling_rate = sampling_rate
if len(self.signal.shape) == 1:
self.channels = 1
else:
self.channels = self.signal.shape[1]
@property
def fs(self):
return self.sampling_rate
@property
def duration_samples(self):
return self.signal.shape[0]
class EventListVisualizer(object):
def __init__(self,master, **kwargs):
self.master = master
self.master.title("A simple GUI")
if kwargs.get('audio_signal') is not None and kwargs.get('sampling_rate') is not None:
audio_signal = kwargs.get('audio_signal') / numpy.max(numpy.abs(kwargs.get('audio_signal')))
self.audio = AudioPlayer(signal=audio_signal,sampling_rate=kwargs.get('sampling_rate'))
self.mode = 'spectrogram'
#self.mode = 'time_domain'
self.spec_hop_size = kwargs.get('spec_hop_size', 256)
self.spec_win_size = kwargs.get('spec_win_size', 1024)
self.spec_fft_size = kwargs.get('spec_fft_size', 1024)
self.spec_cmap = kwargs.get('spec_cmap', 'magma')
self.spec_interpolation = kwargs.get('spec_interpolation', 'nearest')
self.color = kwargs.get('color', '#339933')
self.D = None
self.x = None
self.timedomain_locations = None
self.begin_time = None
self.end_time = None
self.slider_time = None
self.use_blit = kwargs.get('use_blit', False)
self.waveform_selector_point_hop = kwargs.get('waveform_selector_point_hop', 1000)
self.waveform_highlight_point_hop = 100
self.waveform_highlight_color = self.color
self.fig_shape = (14, 2)
self._quit = False
self.label_colormap = cm.get_cmap(name=kwargs.get('event_roll_cmap','rainbow'))
def generate_GUI(self):
#self.fig = plt.figure(figsize=self.fig_shape)
self.fig1 = Figure(figsize=self.fig_shape, dpi=100)
self.ax1 = self.fig1.add_subplot(111)
self.ax1.grid(True)
#Waveform display pannel
# ====================================
self.timedomain_locations = numpy.arange(0, self.audio.signal.shape[0])
self.ax1.fill_between(
self.timedomain_locations[::self.waveform_selector_point_hop],
self.audio.signal[::self.waveform_selector_point_hop],
-self.audio.signal[::self.waveform_selector_point_hop],
color='0.5')
# we create a frame in which we will pack the sound wave graph
self.waveforms_frame = tk.Frame(self.master, relief=tk.RAISED, borderwidth=3)
self.waveforms_frame.pack(fill=tk.X)
title_label_1 = tk.Label(self.waveforms_frame, text="Wave Plot", font="Times 12 italic bold")
title_label_1.pack()
# we create a canvas to which we will convert the sound chart from MatPlotLib
self.waveform_canvas = FigureCanvasTkAgg(self.fig1, master=self.waveforms_frame)
self.waveform_canvas.draw()
self.waveform_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
# Highlight panel
# ====================================
self.fig2 = Figure(figsize=self.fig_shape, dpi=100)
self.ax2 = self.fig2.add_subplot(111)
self.ax2.grid(True)
self.ax2.axhline() #Plot a line ine middle
self.x = numpy.arange(0, self.audio.duration_samples)
self.begin_time = self.x[0] / float(self.audio.fs)
self.end_time = self.x[-1] / float(self.audio.fs)
#Spectrogram display pannel
self.D = self.get_spectrogram(audio=self.audio.signal,n_fft=self.spec_fft_size,win_length=self.spec_win_size, hop_length=self.spec_hop_size)
self.plot_spectrogram(data=self.D,sampling_rate=self.audio.fs,interpolation=self.spec_interpolation,cmap=self.spec_cmap)
# we create a frame in which we will pack the spectogram graph
self.spectrums_frame = tk.Frame(self.master, relief=tk.RAISED, borderwidth=3)
self.spectrums_frame.pack(fill=tk.X)
title_label_2 = tk.Label(self.spectrums_frame, text="Spectrogram Plot", font="Times 12 italic bold")
title_label_2.pack()
# we create a canvas to which we will convert the spectogram graph from MatPlotLib
self.spectogram_canvas = FigureCanvasTkAgg(self.fig2, master=self.spectrums_frame)
self.spectogram_canvas.draw()
self.spectogram_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=tk.TRUE)
#It select the area to display below
self.slider_time = SpanSelector(ax=self.ax1,onselect=self.on_select,minspan=None,direction='horizontal',
span_stays=True,useblit=self.use_blit,onmove_callback=None,rectprops=dict(alpha=0.15, facecolor=self.color))
def on_select(self, x_min, x_max):
x_min = int(x_min)
x_max = int(x_max)
if math.fabs(x_min-x_max) < 10:
# Reset highlight
self.begin_time = self.x[0] / float(self.audio.fs)
self.end_time = self.x[-1] / float(self.audio.fs)
# Set signal highlight panel
if self.mode == 'spectrogram':
self.ax2.set_xlim(0, self.D.shape[1])
elif self.mode == 'time_domain':
self.ax2.set_xlim(self.timedomain_locations[0], self.timedomain_locations[-1])
self.slider_time.stay_rect.set_visible(False)
else:
# Set annotation panel
self.begin_time = float(x_min) / self.audio.fs
self.end_time = float(x_max) / self.audio.fs
# Set signal highlight panel
if self.mode == 'spectrogram':
spec_min = int(x_min / float(self.spec_hop_size))
spec_max = int(x_max / float(self.spec_hop_size))
self.ax2.set_xlim(spec_min, spec_max)
elif self.mode == 'time_domain':
index_min, index_max = numpy.searchsorted(self.x, (x_min, x_max))
index_max = min(len(self.x) - 1, index_max)
this_x = self.timedomain_locations[index_min:index_max]
self.ax2.set_xlim(this_x[0], this_x[-1])
self.slider_time.stay_rect.set_visible(True)
#self.fig.canvas.draw()
self.spectogram_canvas.draw_idle()
@staticmethod
def get_spectrogram(audio, n_fft=256, win_length=1024, hop_length=1024):
fft_window = scipy.signal.hann(win_length, sym=False).reshape((-1, 1))
audio = numpy.pad(array=audio,
pad_width=int(n_fft // 2),
mode='reflect')
n_frames = 1 + int((len(audio) - n_fft) / hop_length)
y_frames = as_strided(x=audio,
shape=(n_fft, n_frames),
strides=(audio.itemsize, int(hop_length * audio.itemsize)))
S = numpy.empty((int(1 + n_fft // 2), y_frames.shape[1]), dtype=numpy.complex64, order='F')
max_memory_block = 2**8 * 2**10
n_columns = int(max_memory_block / (S.shape[0] * S.itemsize))
for bl_s in range(0, S.shape[1], n_columns):
bl_t = min(bl_s + n_columns, S.shape[1])
# RFFT and Conjugate here to match phase from DPWE code
S[:, bl_s:bl_t] = scipy.fftpack.fft(fft_window * y_frames[:, bl_s:bl_t], axis=0)[:S.shape[0]].conj()
magnitude = numpy.abs(S) ** 2
ref = numpy.max(magnitude)
amin=1e-10
top_db = 80.0
log_spec = 10.0 * numpy.log10(numpy.maximum(amin, magnitude))
log_spec -= 10.0 * numpy.log10(numpy.maximum(amin, ref))
log_spec = numpy.maximum(log_spec, log_spec.max() - top_db)
return log_spec
@staticmethod
def plot_spectrogram(data, sampling_rate=44100, n_yticks=5, interpolation='nearest', cmap='magma'):
axes = plt.imshow(data, aspect='auto', origin='lower', interpolation=interpolation, cmap=plt.get_cmap(cmap))
# X axis
plt.xticks([])
# Y axis
positions = numpy.linspace(0, data.shape[0]-1, n_yticks, endpoint=True).astype(int)
values = numpy.linspace(0, 0.5 * sampling_rate, data.shape[0], endpoint=True).astype(int)
t_log = (data.shape[0] * (1 - numpy.logspace(-numpy.log2(data.shape[0]), 0, data.shape[0], base=2, endpoint=True))[::-1]).astype(int)
t_inv = numpy.arange(len(t_log))
for i in range(len(t_log)-1):
t_inv[t_log[i]:t_log[i+1]] = i
plt.yticks(positions, values[t_inv[positions]])
return axes
if __name__ == '__main__':
root = tk.Tk()
#Input Processing
import soundfile
import numpy as np
import librosa
def read_audio(audio_path, target_fs=None):
(audio, fs) = soundfile.read(audio_path)
if audio.ndim > 1:
audio = np.mean(audio, axis=1)
if target_fs is not None and fs != target_fs:
audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
fs = target_fs
return audio, fs
audio, fs = read_audio('sample.wav')
vis = EventListVisualizer(root, audio_signal=audio,sampling_rate=fs)
vis.generate_GUI()
root.mainloop()