# ================================================================================ #
# Authors: Fabio Frazao and Oliver Kirsebom #
# Contact: fsfrazao@dal.ca, oliver.kirsebom@dal.ca #
# Organization: MERIDIAN (https://meridian.cs.dal.ca/) #
# Team: Data Analytics #
# Project: ketos #
# Project goal: The ketos library provides functionalities for handling #
# and processing acoustic data and applying deep neural networks to sound #
# detection and classification tasks. #
# #
# License: GNU GPLv3 #
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
# ================================================================================ #
""" 'audio.gammatone' module within the ketos library.
This module provides utilities to process audio data
using a auditory filterbank based on the gammatone function.
Contents:
GammatoneFilterBank class:
AuralFeatures class
"""
import sys
import os
import copy
import warnings
import numpy as np
from version_parser.version import Version
import scipy
import matplotlib.pyplot as plt
from ketos.audio.waveform import Waveform
from ketos.audio.annotation import AnnotationHandler
from ketos.audio.base_audio import BaseAudio, BaseAudioTime, segment_data
gammatone_filter_coeff = dict()
[docs]
def compute_center_freqs(num_chan, sampl_rate, freq_min):
""" Compute the center frequencies of the filter bank
Args:
num_chan: int
Number of channels in the filter bank
sampl_rate: float
Sampling rate in Hz
freq_min: float
Minimum frequency in Hz
Returns:
freqs: float
Center frequencies in Hz
"""
f0 = 228.8
i = np.linspace(1, num_chan, num_chan)
freqs = np.exp(i / num_chan * (np.log(freq_min + f0) - np.log(sampl_rate / 2. + f0))) * (sampl_rate / 2. + f0) - f0
freqs = np.flip(freqs)
return freqs
[docs]
def filter_signal(signal, sampl_rate, freqs):
""" Pass the signal through the gammatone filters
Args:
signal: numpy.array
Audio signal
sampl_rate: float
Sampling rate in Hz
freqs: numpy.array
Center frequencies of the filter bank
Returns:
x: numpy.array
The filtered signals stacked vertically into 2D array
"""
x = []
for freq in freqs:
(b, a) = get_filter_coeffs(sampl_rate, freq)
x.append(scipy.signal.filtfilt(b, a, signal))
x = np.array(x)
x = np.swapaxes(x, 0, 1)
return x
[docs]
def apply_weight_func(x, freqs):
""" Apply C weighting function.
This weighting function represents the approximate frequency sensitivity
of the human auditory system.
Args:
x: numpy.array
The filtered signals
freqs: numpy.array
Center frequencies of the filter bank
Returns:
x: numpy.array
The C-weighted filtered signals
"""
C = 1.007 * 12200**2 * freqs**2 / (freqs**2 + 20.6**2) / (freqs**2 + 12200**2)
return x * C[np.newaxis,:]
[docs]
def get_filter_coeffs(sampl_rate, freq):
""" Get the gammatone filter coefficients.
Args:
sampl_rate: float
Sampling rate in Hz
freq: float
Center frequency in Hz
Returns:
: tuple
Coefficients of the gammatone filter
"""
key = (sampl_rate, freq)
if key not in gammatone_filter_coeff.keys():
gammatone_filter_coeff[key] = compute_filter_coeffs(sampl_rate, freq)
return gammatone_filter_coeff[key]
[docs]
def compute_filter_coeffs(sampl_rate, freq):
""" Compute the gammatone filter coefficients.
Args:
sampl_rate: float
Sampling rate in Hz
freq: float
Center frequency in Hz
Returns:
(b, a): tuple
Coefficients of the gammatone filter
"""
if Version(scipy.__version__) < Version("1.6.0"):
print('The `compute_filter_coeffs` method in the `gammatone` module requires Scipy>=1.6.0')
print(f'The present environment only has Scipy=={Version(scipy.__version__)}')
print('Note that Scipy>=1.6.0 requires Python>=3.7.0')
exit(1)
b, a = scipy.signal.gammatone(freq=freq, ftype='iir', fs=sampl_rate)
return (b, a)
[docs]
class GammatoneFilterBank(BaseAudioTime):
""" Gammatone filter bank.
The filtered signals are stored in a 2D numpy array, where the first axis
(0) is the time dimension and the second axis (1) is the frequency dimension.
Args:
data: 2d numpy array
Filtered data
rate: float
Sampling rate in Hz
freqs: array-like
Center frequencies of the filter bank in Hz
filename: str or list(str)
Name of the source audio file, if available.
offset: float or array-like
Position in seconds of the left edge of the spectrogram within the source
audio file, if available.
label: int
Spectrogram label. Optional
annot: AnnotationHandler
AnnotationHandler object. Optional
weight_func: bool
Apply C weighting function. Default is True.
Attributes:
data: 2d numpy array
Filtered data
rate: float
Sampling rate in Hz
freqs: array-like
Center frequencies of the filter bank in Hz
filename: str or list(str)
Name of the source audio file, if available.
offset: float or array-like
Position in seconds of the left edge of the spectrogram within the source
audio file, if available.
label: int
Spectrogram label. Optional
annot: AnnotationHandler
AnnotationHandler object. Optional
weight_func: bool
Apply C weighting function.
"""
def __init__(self, data, rate, freqs, filename=None, offset=0, label=None, annot=None, weight_func=True, **kwargs):
self.rate = rate
self.freqs = freqs
self.weight_func = weight_func
super().__init__(data=data, time_res=1./rate, filename=filename, offset=offset, label=label, annot=annot)
[docs]
@classmethod
def empty(cls):
""" Creates an empty GammatoneFilterBank object
"""
return cls(data=np.empty(shape=(0,0), dtype=np.float64), rate=1, freqs=[])
[docs]
@classmethod
def from_wav(cls, path, num_chan=20, freq_min=1, channel=0, rate=None, offset=0, duration=None,
resample_method='scipy', id=None, normalize_wav=False, weight_func=True, **kwargs):
""" Create a Gammatone Filter Bank directly from wav file.
The arguments offset and duration can be used to select a portion of the wav file.
Note that values specified for the arguments offset and duration may be subject
to slight adjustments to ensure that the selected portion corresponds to an integer
number of samples.
Args:
path: str
Path to wav file
num_chan: int
Number of channels in the filter bank
freq_min: float
Minimum frequency of the filter bank in Hz
channel: int
Channel to read from. Only relevant for stereo recordings
rate: float
Desired sampling rate in Hz. If None, the original sampling rate will be used
offset: float
Start time of selection in seconds, relative the start of the wav file.
duration: float
Length of selection in seconds.
resample_method: str
Resampling method. Only relevant if `rate` is specified. Options are
* kaiser_best
* kaiser_fast
* scipy (default)
* polyphase
See https://librosa.github.io/librosa/generated/librosa.core.resample.html
for details on the individual methods.
id: str
Unique identifier (optional). If None, the filename will be used.
normalize_wav: bool
Normalize the waveform to have a mean of zero (mean=0) and a standard
deviation of unity (std=1) before computing the spectrogram. Default is False.
weight_func: bool
Apply C weighting function. Default is True.
Returns:
: GammatoneFilterBank
Gammatone filter bank
Example:
>>> # load gammatone filter bank from wav file
>>> from ketos.audio.gammatone import GammatoneFilterBank
>>> gfb = GammatoneFilterBank.from_wav('ketos/tests/assets/grunt1.wav', num_chan=20, freq_min=10, rate=1000)
>>> # print the center frequencies rounded to 1 decimal
>>> print(np.round(gfb.freqs,1))
[ 10. 23.7 38.2 53.5 69.7 86.8 104.9 124.1 144.3 165.7 188.4 212.3
237.6 264.4 292.7 322.6 354.2 387.7 423.1 460.5]
>>> # display the 4th filter bank signal
>>> fig = gfb.plot(filter_id=3)
>>> fig.savefig("ketos/tests/assets/tmp/gfb3_grunt1.png")
>>> plt.close(fig)
.. image:: ../../../ketos/tests/assets/tmp/gfb3_grunt1.png
"""
# load audio
audio = Waveform.from_wav(path=path, channel=channel, rate=rate, offset=offset, duration=duration,
resample_method=resample_method, id=id, normalize_wav=normalize_wav, **kwargs)
if len(audio.get_data()) == 0:
warnings.warn("Empty GammatoneFilterBank returned", RuntimeWarning)
return cls.empty()
# compute gammatone filter bank
return cls.from_waveform(audio=audio, num_chan=num_chan, freq_min=freq_min, weight_func=weight_func)
[docs]
def get_repres_attrs(self):
""" Get audio representation attributes """
attrs = super().get_repres_attrs()
attrs.update({'rate':self.rate, 'freqs':self.freqs,
'weight_func':self.weight_func, 'type':self.__class__.__name__})
return attrs
[docs]
def plot(self, filter_id, show_annot=False, figsize=(5,4), label_in_title=True, show_envelope=False):
""" Plot the filtered signal with proper axes ranges and labels.
Optionally, also display annotations as boxes superimposed on the signal.
Note: The resulting figure can be shown (fig.show())
or saved (fig.savefig(file_name))
Args:
filter_id: int
Filter to be plotted.
show_annot: bool
Display annotations
figsize: tuple
Figure size
label_in_title: bool
Include label (if available) in figure title
show_envelope: bool
Display envelope on top of signal
Returns:
: matplotlib.figure.Figure
A figure object.
Example:
>>> from ketos.audio.gammatone import GammatoneFilterBank
>>> # load gammatone filter bank
>>> gfb = GammatoneFilterBank.from_wav('ketos/tests/assets/grunt1.wav', num_chan=20, freq_min=10, rate=1000)
>>> # add an annotation
>>> gfb.annotate(start=1.2, end=1.6, freq_min=70, freq_max=600, label=1)
>>> # show the 4th filter bank with annotation box
>>> fig = gfb.plot(filter_id=3, show_annot=True)
>>> fig.savefig("ketos/tests/assets/tmp/gfb3_w_annot_box.png")
>>> plt.close(fig)
.. image:: ../../../ketos/tests/assets/tmp/gfb3_w_annot_box.png
"""
x = self.get_data()[:,filter_id] # select the filtered signal
wf = Waveform(data=x, rate=self.rate, filename=self.get_filename(),
offset=self.get_offset(), label=self.get_label(), annot=self.get_annotations())
return wf.plot(show_annot=show_annot, figsize=figsize, label_in_title=label_in_title,
append_title=f', {self.freqs[filter_id]:.1f} Hz', show_envelope=show_envelope)
[docs]
class AuralFeatures(BaseAudio):
""" Aural features computed with the aural-features package (https://pypi.org/project/aural-features/).
Args:
data: 1d numpy array
Feature values
filename: str
Name of the source audio file, if available.
offset: float
Position in seconds of the left edge of the audio segment within the source
audio file, if available.
label: int
Label. Optional
annot: AnnotationHandler
AnnotationHandler object. Optional
"""
def __init__(self, data, filename=None, offset=0, label=None, annot=None, waveform_transform_log=None, **kwargs):
super().__init__(data=data, filename=filename, offset=offset, label=label, annot=annot)
if waveform_transform_log is None: waveform_transform_log = []
self.waveform_transform_log = waveform_transform_log
[docs]
@classmethod
def from_wav(cls, path, filter_pad_samples=64, global_km_window_seconds=0.25,
local_km_window_seconds=0.008, filter_n=100, filter_min_hz=50,
channel=0, rate=None, offset=0, duration=None,
resample_method='scipy', id=None, normalize_wav=False,
waveform_transforms=None, **kwargs):
""" Compute aural features directly from wav file.
The arguments offset and duration can be used to select a portion of the wav file.
Note that values specified for the arguments offset and duration may be subject
to slight adjustments to ensure that the selected portion corresponds to an integer
number of samples.
Args:
path: str
Path to wav file
filter_min: float
Min filter frequency in Hz
local_km_window: float
Length of local KM window in seconds
channel: int
Channel to read from. Only relevant for stereo recordings
rate: float
Desired sampling rate in Hz. If None, the original sampling rate will be used
offset: float
Start time of selection in seconds, relative the start of the wav file.
duration: float
Length of selection in seconds.
resample_method: str
Resampling method. Only relevant if `rate` is specified. Options are
* kaiser_best
* kaiser_fast
* scipy (default)
* polyphase
See https://librosa.github.io/librosa/generated/librosa.core.resample.html
for details on the individual methods.
id: str
Unique identifier (optional). If None, the filename will be used.
normalize_wav: bool
Normalize the waveform to have a mean of zero (mean=0) and a standard
deviation of unity (std=1) before computing the spectrogram. Default is False.
waveform_transforms: list(dict)
List of dictionaries, where each dictionary specifies the name of
a transformation to be applied to the waveform before generating
the spectrogram. For example,
{"name":"add_gaussian_noise", "sigma":0.5}
Returns:
: AuralFeatures
Aural features
"""
# load audio
audio = Waveform.from_wav(path=path, channel=channel, rate=rate, offset=offset, duration=duration,
resample_method=resample_method, id=id, normalize_wav=normalize_wav,
transforms=waveform_transforms, **kwargs)
if len(audio.get_data()) == 0:
warnings.warn("Empty AuralFeatures returned", RuntimeWarning)
return cls.empty()
# compute gammatone filter bank
return cls.from_waveform(audio=audio, filter_pad_samples=filter_pad_samples, global_km_window_seconds=global_km_window_seconds,
local_km_window_seconds=local_km_window_seconds, filter_n=filter_n, filter_min_hz=filter_min_hz)
[docs]
def get_repres_attrs(self):
""" Get audio representation attributes """
attrs = super().get_repres_attrs()
attrs.update({'type':self.__class__.__name__})
return attrs