Source code for ketos.audio.utils.filter

# ================================================================================ #
#   Authors: Fabio Frazao and Oliver Kirsebom                                      #
#   Contact: fsfrazao@dal.ca, oliver.kirsebom@dal.ca                               #
#   Organization: MERIDIAN (https://meridian.cs.dal.ca/)                           #
#   Team: Data Analytics                                                           #
#   Project: ketos                                                                 #
#   Project goal: The ketos library provides functionalities for handling          #
#   and processing acoustic data and applying deep neural networks to sound        #
#   detection and classification tasks.                                            #
#                                                                                  #
#   License: GNU GPLv3                                                             #
#                                                                                  #
#       This program is free software: you can redistribute it and/or modify       #
#       it under the terms of the GNU General Public License as published by       #
#       the Free Software Foundation, either version 3 of the License, or          #
#       (at your option) any later version.                                        #
#                                                                                  #
#       This program is distributed in the hope that it will be useful,            #
#       but WITHOUT ANY WARRANTY; without even the implied warranty of             #
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
#       GNU General Public License for more details.                               # 
#                                                                                  #
#       You should have received a copy of the GNU General Public License          #
#       along with this program.  If not, see <https://www.gnu.org/licenses/>.     #
# ================================================================================ #

""" 'audio.utils.filter' module within the ketos library

    This module provides utilities for manipulating and filtering waveforms and
    spectrograms.
"""
import numpy as np
import scipy.ndimage as ndimage
import matplotlib.pyplot as plt


[docs]
def plot_image(img, fig, ax, extent=None, xlabel='', ylabel=''):
    """ Draw the image.

        Args:
            img: numpy array
                Pixel values
            fig: matplotlib.figure.Figure
                Figure object
            ax: matplotlib.axes.Axes
                Axes object
            extent: tuple(float,float,float,float)
                Extent of axes, optional.
            xlabel: str
                Label for x axis, optional.
            ylabel: str
                Label for y axis, optional.

        Returns:
            None
    """
    img_plt = ax.imshow(img.T, aspect='auto', origin='lower', extent=extent)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    fig.colorbar(img_plt, ax=ax, format='%.1f')



[docs]
def enhance_signal(img, enhancement=1.):
    """ Enhance the contrast between regions of high and low intensity, while preserving 
        the range of pixel values.

        Multiplies each pixel value by the factor,

        .. math::
            f(x) = ( e^{-(x - m_x - \sigma_m) / w} + 1)^{-1}

        where :math:`x` is the pixel value, :math:`m_x` is the pixel value median of 
        the image, and :math:`w = \sigma_x / \epsilon`, where :math:`\sigma_x`
        is the pixel value standard deviation of the image and :math:`\epsilon` is the 
        enhancement parameter.

        Some observations:
          
         * :math:`f(x)` is a smoothly increasing function from 0 to 1.
         * :math:`f(m_x)=0.5`, i.e. the median :math:`m_x` demarks the transition from "low intensity" to "high intensity".
         * The smaller the width, :math:`w`, the faster the transition from 0 to 1.

        Args:
            img : numpy array
                Image to be processed. 
            enhancement: float
                Parameter determining the amount of enhancement.

        Returns:
            img_en: numpy array
                Enhanced image.

        Example:
            >>> from ketos.audio.utils.filter import enhance_signal, plot_image
            >>> #create an image 
            >>> x = np.linspace(-4,4,100)
            >>> y = np.linspace(-6,6,100)
            >>> x,y = np.meshgrid(x,y,indexing='ij')
            >>> img = np.exp(-(x**2+y**2)/(2*0.5**2)) #symmetrical Gaussian 
            >>> img += 0.2 * np.random.rand(100,100)  #add some noise
            >>> # apply enhancement
            >>> img_enh = enhance_signal(img, enhancement=3.0)
            >>> #draw the original image and its enhanced version
            >>> import matplotlib.pyplot as plt
            >>> fig, (ax1,ax2) = plt.subplots(1,2,figsize=(10,4)) #create canvas to draw on
            >>> plot_image(img,fig,ax1,extent=(-4,4,-6,6))
            >>> plot_image(img_enh,fig,ax2,extent=(-4,4,-6,6))
            >>> fig.savefig("ketos/tests/assets/tmp/image_enhancement1.png")

            .. image:: ../../../ketos/tests/assets/tmp/image_enhancement1.png
    """
    if enhancement > 0:
        med = np.median(img)
        std = np.std(img)
        wid = (1. / enhancement) * std
        scaling = 1. / (np.exp(-(img - med - std) / wid) + 1.)

    else:
        scaling = 1.

    img_en = img * scaling
    return img_en



[docs]
def reduce_tonal_noise(img, method='MEDIAN', **kwargs):
    """ Reduce continuous tonal noise produced by e.g. ships and slowly varying 
        background noise

        Currently, offers the following two methods:

            1. MEDIAN: Subtracts from each row the median value of that row.
            
            2. RUNNING_MEAN: Subtracts from each row the running mean of that row.
            
        The running mean is computed according to the formula given in 
        Baumgartner & Mussoline, JASA 129, 2889 (2011); doi: 10.1121/1.3562166

        Args:
            img: numpy.array
                Spectrogram image
            method: str
                Options are 'MEDIAN' and 'RUNNING_MEAN'
        
        Optional args:
            time_const_len: int
                Time constant in number of samples, used for the computation of the running mean.
                Must be provided if the method 'RUNNING_MEAN' is chosen.

        Returns:
            img_new: numpy array
                Corrected spectrogram image

        Example:
            >>> import numpy as np
            >>> from ketos.audio.utils.filter import reduce_tonal_noise, plot_image
            >>> #create an image 
            >>> x = np.linspace(-4,4,100)
            >>> y = np.linspace(-6,6,100)
            >>> x,y = np.meshgrid(x,y,indexing='ij')
            >>> img = np.exp(-(x**2+y**2)/(2*0.5**2)) #symmetrical Gaussian 
            >>> img += 0.2 * np.random.rand(100,100)  #add some flat noise
            >>> #add tonal noise that exhibits sudden increase in amplitude
            >>> img += 0.2 * (1 + np.heaviside(x,0.5)) * np.exp(-(y + 2.)**2/(2*0.1**2))
            >>> #reduce tonal noise 
            >>> img_m = reduce_tonal_noise(img, method='MEDIAN')
            >>> img_r = reduce_tonal_noise(img, method='RUNNING_MEAN', time_const_len=30)
            >>> #draw the resulting images along with the original one 
            >>> import matplotlib.pyplot as plt
            >>> fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,4)) #create canvas to draw on
            >>> ext = (-4,4,-6,6)
            >>> plot_image(img,fig,ax1,extent=ext)
            >>> plot_image(img_m,fig,ax2,extent=ext)
            >>> plot_image(img_r,fig,ax3,extent=ext)
            >>> fig.savefig("ketos/tests/assets/tmp/image_tonal_noise_red1.png")

            .. image:: ../../../ketos/tests/assets/tmp/image_tonal_noise_red1.png
    """
    if method == 'MEDIAN':
        img_new = img - np.ma.median(img, axis=0)
    
    elif method == 'RUNNING_MEAN':
        assert 'time_const_len' in kwargs.keys(), 'method RUNNING_MEAN requires time_constant input argument'
        img_new = reduce_tonal_noise_running_mean(img, kwargs['time_const_len'])

    else:
        print('Invalid tonal noise reduction method:',method)
        print('Available options are: MEDIAN, RUNNING_MEAN')

    return img_new



[docs]
def reduce_tonal_noise_running_mean(img, time_const_len):
    """ Reduce continuous tonal noise produced by e.g. ships and slowly varying background noise 
        by subtracting from each row a running mean, computed according to the formula given in 
        Baumgartner & Mussoline, Journal of the Acoustical Society of America 129, 2889 (2011); doi: 10.1121/1.3562166

        Args:
            img: numpy.array
                Spectrogram image
            time_const_len: int
                Time constant in number of samples, used for the computation of the running mean.
                Must be provided if the method 'RUNNING_MEAN' is chosen.

        Returns:
            img_new : 2d numpy array
                Corrected spetrogram image
    """
    T = time_const_len
    eps = 1 - np.exp((np.log(0.15) * 1. / T))
    rmean = np.average(img, axis=0)
    img_new = np.zeros(img.shape)
    nx = img.shape[0]
    for ix in range(nx):
        img_new[ix,:] = img[ix,:] - rmean # subtract running mean
        rmean = (1 - eps) * rmean + eps * img[ix,:] # update running mean

    return img_new



[docs]
def filter_isolated_spots(img, struct=np.array([[1,1,1],[1,1,1],[1,1,1]])):
    """ Remove isolated spots from the image.

        Args:
            img : numpy array
                An array like object representing an image. 
            struct : numpy array
                A structuring pattern that defines feature connections.
                Must be symmetric.

        Returns:
            filtered_array : numpy array
                An array containing the input image without the isolated spots.

        Example:
            >>> from ketos.audio.utils.filter import filter_isolated_spots
            >>> img = np.array([[0,0,1,1,0,0],
            ...                 [0,0,0,1,0,0],
            ...                 [0,1,0,0,0,0],
            ...                 [0,0,0,0,0,0],
            ...                 [0,0,0,1,0,0]])
            >>> # remove pixels without neighbors
            >>> img_fil = filter_isolated_spots(img)
            >>> print(img_fil)
            [[0 0 1 1 0 0]
             [0 0 0 1 0 0]
             [0 0 0 0 0 0]
             [0 0 0 0 0 0]
             [0 0 0 0 0 0]]
    """
    filtered_array = np.copy(img)
    id_regions, num_ids = ndimage.label(filtered_array, structure=struct)
    id_sizes = np.array(ndimage.sum(img, id_regions, range(num_ids + 1)))
    area_mask = (id_sizes == 1)
    filtered_array[area_mask[id_regions]] = 0
    
    return filtered_array



[docs]
def blur_image(img, size=20, sigma=5, gaussian=True):
    """ Smooth the input image using a median or Gaussian blur filter.
        
        Note that the input image is recasted as np.float32.

        This is essentially a wrapper around the scipy.ndimage.median_filter 
        and scipy.ndimage.gaussian_filter methods. 

        For further details, see https://docs.scipy.org/doc/scipy/reference/ndimage.html

        Args:
            img : numpy array
                Image to be processed. 
            size: int
                Only used by the median filter. Describes the shape that is taken from the input array,
                at every element position, to define the input to the filter function.
            sigma: float or array
                Only used by the Gaussian filter. Standard deviation for Gaussian kernel. May be given as a 
                single number, in which case all axes have the same standard deviation, or as an array, allowing 
                for the axes to have different standard deviations.
            Gaussian: bool
                Switch between median and Gaussian (default) filter

        Returns:
            blur_img: numpy array
                Blurred image.

        Example:
            >>> from ketos.audio.utils.filter import blur_image
            >>> img = np.array([[0,0,0],
            ...                 [0,1,0],
            ...                 [0,0,0]])
            >>> # blur using Gaussian filter with sigma of 0.5
            >>> img_blur = blur_image(img, sigma=0.5)
            >>> img_blur = np.around(img_blur, decimals=2) # only keep up to two decimals
            >>> print(img_blur)
            [[0.01 0.08 0.01]
             [0.08 0.62 0.08]
             [0.01 0.08 0.01]]
    """
    try:
        assert img.dtype == "float32", "img type {0} shoult be 'float32'".format(img.dtype)
    except AssertionError:
        img = img.astype(dtype = np.float32)    
    
    if (gaussian):
        img_blur = ndimage.gaussian_filter(img, sigma=sigma)
    else:
        img_blur = ndimage.median_filter(img, size=size)

    return img_blur



[docs]
def apply_median_filter(img, row_factor=3, col_factor=4):
    """ Discard pixels that are lower than the median threshold. 

        The resulting image will have 0s for pixels below the threshold and 1s for the pixels above the threshold.

        Note: Code adapted from Kahl et al. (2017)
            Paper: http://ceur-ws.org/Vol-1866/paper_143.pdf
            Code:  https://github.com/kahst/BirdCLEF2017/blob/master/birdCLEF_spec.py 

        Args:
            img : numpy array
                Array containing the img to be filtered. 
                OBS: Note that contents of img are modified by call to function.
            row_factor: int or float
                Factor by which the row-wise median pixel value will be multiplied in orther to define the threshold.
            col_factor: int or float
                Factor by which the col-wise median pixel value will be multiplied in orther to define the threshold.

        Returns:
            filtered_img: numpy array
                The filtered image with 0s and 1s.

        Example:
            >>> from ketos.audio.utils.filter import apply_median_filter
            >>> img = np.array([[1,4,5],
            ...                 [3,5,1],
            ...                 [1,0,9]])
            >>> img_fil = apply_median_filter(img, row_factor=1, col_factor=1)
            >>> print(img_fil)
            [[0 0 0]
             [0 1 0]
             [0 0 1]]
    """
    col_median = np.median(img, axis=0, keepdims=True)
    row_median = np.median(img, axis=1, keepdims=True)

    img[img <= row_median * row_factor] = 0
    img[img <= col_median * col_factor] = 0 
    filtered_img = img
    filtered_img[filtered_img > 0] = 1

    return filtered_img



[docs]
def apply_preemphasis(sig, coeff=0.97):
    """ Apply pre-emphasis to signal

        Args:
            sig : numpy array
                1-d array containing the signal.
            coeff: float
                The preemphasis coefficient. If set to 0,
                no preemphasis is applied (the output will be the same as the input).

        Returns:
            emphasized_signal : numpy array
                The filtered signal.

        Example:

            >>> from ketos.audio.utils.filter import apply_preemphasis
            >>> sig = np.array([1,2,3,4,5])
            >>> sig_new = apply_preemphasis(sig, coeff=0.95)
            >>> print(sig_new)
            [1.   1.05 1.1  1.15 1.2 ]
    """
    emphasized_signal = np.append(sig[0], sig[1:] - coeff * sig[:-1])
    
    return emphasized_signal