Source code for ketos.data_handling.parsing

# ================================================================================ #
#   Authors: Fabio Frazao and Oliver Kirsebom                                      #
#   Contact: fsfrazao@dal.ca, oliver.kirsebom@dal.ca                               #
#   Organization: MERIDIAN (https://meridian.cs.dal.ca/)                           #
#   Team: Data Analytics                                                           #
#   Project: ketos                                                                 #
#   Project goal: The ketos library provides functionalities for handling          #
#   and processing acoustic data and applying deep neural networks to sound        #
#   detection and classification tasks.                                            #
#                                                                                  #
#   License: GNU GPLv3                                                             #
#                                                                                  #
#       This program is free software: you can redistribute it and/or modify       #
#       it under the terms of the GNU General Public License as published by       #
#       the Free Software Foundation, either version 3 of the License, or          #
#       (at your option) any later version.                                        #
#                                                                                  #
#       This program is distributed in the hope that it will be useful,            #
#       but WITHOUT ANY WARRANTY; without even the implied warranty of             #
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
#       GNU General Public License for more details.                               # 
#                                                                                  #
#       You should have received a copy of the GNU General Public License          #
#       along with this program.  If not, see <https://www.gnu.org/licenses/>.     #
# ================================================================================ #

""" Parsing module within the ketos library

    This module provides utilities to parse various string 
    structures.
"""
import os
import sys
import json
from pint import UnitRegistry
import importlib
import inspect

ureg = UnitRegistry()


""" Standard audio-representation parameters recognized by Ketos.
"""
audio_std_params = {'rate':                     {'type':float, 'unit':'Hz'},
                    'window':                   {'type':float, 'unit':'s'},
                    'step':                     {'type':float, 'unit':'s'},
                    'bins_per_oct':             {'type':int,   'unit':None},
                    'freq_min':                 {'type':float, 'unit':'Hz'},
                    'freq_max':                 {'type':float, 'unit':'Hz'},
                    'window_func':              {'type':str,   'unit':None},
                    'resample_method':          {'type':str,   'unit':None},
                    'duration':                 {'type':float, 'unit':'s'},
                    'normalize_wav':            {'type':bool,  'unit':None},
                    'transforms':               {'type':list,  'unit':None},
                    'waveform_transforms':      {'type':list,  'unit':None},
                    'num_chan':                 {'type':int,   'unit':None},
                    'filter_pad_samples':       {'type':int,   'unit':None},
                    'global_km_window_seconds': {'type':float, 'unit':'s'},
                    'local_km_window_seconds':  {'type':float, 'unit':'s'},
                    'filter_n':                 {'type':int,   'unit':None},
                    'filter_min_hz':            {'type':float, 'unit':'Hz'},
                    'decibel':                  {'type':bool,  'unit':None},
                    'input_shape':              {'type':list,  'unit':None}
                    }

def is_encoded(s):
    """ Check that the audio presentation has been encoded.

        More specifically, the method checks that items specified as having a 
        physical unit in `audio_std_params`, have string values.
    
        Args:
            s: dict
                Audio representation  

        Returns:
            : bool
                True, if the audio representation is encoded. False, otherwise.
    """
    s_dict = {'s': s} if 'type' in s.keys() else s
        
    for _,s in s_dict.items():     
        for key, value in s.items():
            if key in audio_std_params.keys() and \
                    audio_std_params[key]['unit'] != None and not isinstance(value, str):
                return False

    return True


[docs]def load_audio_representation(path, name=None, return_unparsed=False): """ Load audio representation from JSON file. By default the function attempts to parse the individual parameter values, e.g., the value "20 kHz" will be returned as 20000 and the value "11 ms" will be returned as 0.011. Use the `return_unparsed` argument to change this behaviour. Args: path: str Path to json file name: str Heading of the relevant section of the json file. If None, the function returns the entire content of the JSON file. return_unparsed: bool Do not parse the parameter values. Default is False. Returns: d: dict Audio representation Example: >>> import json >>> import os >>> from ketos.data_handling.parsing import load_audio_representation >>> # create json file with spectrogram settings >>> json_str = '{"spectrogram": {"type": "MagSpectrogram", "rate": "20 kHz", "window": "0.1 s", "step": "0.025 s", "window_func": "hamming", "freq_min": "30Hz", "freq_max": "3000Hz"}}' >>> path = 'ketos/tests/assets/tmp/config.py' >>> file = open(path, 'w') >>> _ = file.write(json_str) >>> file.close() >>> # load settings back from json file >>> settings = load_audio_representation(path=path, name='spectrogram') >>> print(settings) {'type': <class 'ketos.audio.spectrogram.MagSpectrogram'>, 'rate': 20000.0, 'window': 0.1, 'step': 0.025, 'window_func': 'hamming', 'freq_min': 30, 'freq_max': 3000} >>> # clean up >>> os.remove(path) It is also possible to pass a custom audio representation class to this function. In this case, include a key/value pair indicating the path to the module you are loading the class from. For instance: >>> import json # doctest: +SKIP >>> import os # doctest: +SKIP >>> from ketos.data_handling.parsing import load_audio_representation # doctest: +SKIP >>> # create json file with spectrogram settings >>> json_str = '{"custom_representation": {"type": "Cepstrum", "module": "path/to/my/audio_representation.py", "any": "parameter", "for": "the", "custom": "representation"}}' # doctest: +SKIP >>> path = 'my/custom/config.py' # doctest: +SKIP >>> settings = load_audio_representation(path=path, name='custom_representation') # doctest: +SKIP >>> print(settings) # doctest: +SKIP {'type': <class 'audio_representation.Cepstrum'>, "module": "path/to/my/audio_representation.py", "any": "parameter", "for": "the", "custom": "representation"} """ with open(path, 'r') as fil: data = json.load(fil) if name != None: data = data[name] if not return_unparsed: data = parse_audio_representation(data) return data
[docs]def parse_audio_representation(audio_representations): """ Parse audio representation parameters. Args: audio_representation: dict Unparsed audio representation Returns: audio_representation: dict Parsed audio representation """ from ketos.audio import audio_representation_names_in_recipe # Determines if the input is a nested dictionary. is_nested = isinstance(audio_representations, dict) and isinstance(list(audio_representations.values())[0], dict) if not is_nested: audio_representations = {0: audio_representations} for name,params in audio_representations.items(): # check if audio representation type is a class included in ketos and return the class if params['type'] in audio_representation_names_in_recipe: audio_representations[name]['type'] = audio_representation_names_in_recipe[params['type']] else: try: # If not, try to load a custom module provided by the user # See docs https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly module_name = os.path.basename(params['module']).split('.')[0] spec = importlib.util.spec_from_file_location(module_name, params['module']) module = importlib.util.module_from_spec(spec) sys.modules[module_name] = module spec.loader.exec_module(module) # Now we load the class audio_representation_class = getattr(module, params['type']) audio_representations[name]['type'] = audio_representation_class except KeyError as ex: raise Exception(f'The audio representation "{audio_representations[name]["type"]}" is not included with ketos. However, it is possible to use a custom audio representation. Consult the documentation in "data_handling.parsing.load_audio_representation" for examples.') from None for key,value in params.items(): audio_representations[name][key] = parse_parameter(name=key, value=value) if not is_nested: audio_representations = audio_representations[0] return audio_representations
[docs]def parse_parameter(name, value): """ Parse the parameter value according to the type and unit specified in the `audio_std_params` dictionary. For example, if name='window' and value='22.1 ms', the function returns the float 0.0221. If the parameter is not found in the `audio_std_params` dictionary, the function returns the input value unmodified. Args: name: str Name of the parameter to be parsed value: str Value of the parameter to be parsed Returns: parsed_value: str, int, float, bool, or list Parsed value Example: >>> from ketos.data_handling.parsing import parse_parameter >>> print(parse_parameter(name='step', value='23 ms')) 0.023 """ Q = ureg.Quantity parsed_value = value if name in audio_std_params.keys(): param = audio_std_params[name] typ = param['type'] unit = param['unit'] if unit is not None and Q(value).check(unit): parsed_value = Q(value).m_as(unit) if typ in ['int', int]: parsed_value = int(parsed_value) elif unit in ['float', float]: parsed_value = float(parsed_value) elif typ in ['str', str] and value is not None: parsed_value = str(parsed_value) elif typ in ['bool', bool]: parsed_value = (parsed_value.lower() == "true") elif typ in [list]: # convert specific transform arguments from str to tuple if name == 'transforms': for tr in parsed_value: if tr['name'] == 'adjust_range': s = tr['range'][1:-1] tr['range'] = tuple(map(int, s.split(','))) elif tr['name'] == 'resize' and 'shape' in tr.keys(): v = tr['shape'] assert isinstance(v, (list, str)), "shape argument of resize transform must be "\ f"of type 'list' or 'str' whereas a '{type(v)}' was provided" if isinstance(v, list): tr['shape'] = tuple(v) elif isinstance(v, str): s = v[1:-1] tr['shape'] = tuple(map(int, s.split(','))) return parsed_value
[docs]def encode_audio_representation(s): """ Encode audio representation. Every parameter listed in the `audio_std_params` dictionary with a unit is encoded as a str. Args: s: dict Input audio representation Returns: s: dict Encoded audio representation """ s_dict = {'s': s} if 'type' in s.keys() else s for i,s in s_dict.items(): for key,value in s.items(): s_dict[i][key] = encode_parameter(name=key, value=value) s = s_dict['s'] if 's' in s_dict.keys() else s_dict return s
[docs]def encode_parameter(name, value): """ Encode paramater as a string with an SI unit, according to the unit specified in the `audio_std_params` dictionary. For example, if name='window' and value=4.22, the function returns the str '4.22 s'. If the parameter is not found in the `audio_std_params` dictionary, the function returns the input value unmodified, unless the parameter is a tuple in which case it is converted to a string. Args: name: str Name of the parameter to be encoded value: str Value of the parameter to be encoded Returns: encoded_value: str or type of input value Encoded value Example: >>> from ketos.data_handling.parsing import encode_parameter >>> print(encode_parameter(name='step', value=0.037)) 0.037 s """ encoded_value = value if name in audio_std_params.keys(): param = audio_std_params[name] unit = param['unit'] if unit is not None: encoded_value = f'{value} {unit}' typ = param['type'] if typ == bool: encoded_value = str(value).lower() elif isinstance(value, tuple): encoded_value = ','.join([str(x) for x in value]) encoded_value = '(' + encoded_value + ')' elif name == 'type': if inspect.isclass(value): encoded_value = value.__name__ return encoded_value
[docs]def str2bool(v): """ Convert most common answers to yes/no questions to boolean Args: v : str Answer Returns: res : bool Answer converted to boolean """ res = v.lower() in ("yes", "YES", "Yes", "true", "True", "TRUE", "on", "ON", "t", "T", "1") return res