Source code for muda.deformers.background

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# CREATED:2015-03-03 21:29:49 by Brian McFee <brian.mcfee@nyu.edu>
'''Additive background noise'''

import soundfile as psf
import librosa
import numpy as np
import os
import six

from ..base import BaseTransformer


def sample_clip_indices(filename, n_samples, sr):
    '''Calculate the indices at which to sample a fragment of audio from a file.

    Parameters
    ----------
    filename : str
        Path to the input file

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    Returns
    -------
    start : int
        The sample index from `filename` at which the audio fragment starts
    stop : int
        The sample index from `filename` at which the audio fragment stops (e.g. y = audio[start:stop])
    '''

    with psf.SoundFile(str(filename), mode='r') as soundf:
        # Measure required length of fragment
        n_target = int(np.ceil(n_samples * soundf.samplerate / float(sr)))

        # Raise exception if source is too short
        if len(soundf) < n_target:
            raise RuntimeError('Source {} (length={})'.format(filename, len(soundf)) +
                ' must be at least the length of the input ({})'.format(n_target))

        # Draw a starting point at random in the background waveform
        start = np.random.randint(0, 1 + len(soundf) - n_target)
        stop = start + n_target

        return start, stop


def slice_clip(filename, start, stop, n_samples, sr, mono=True):
    '''Slice a fragment of audio from a file.

    This uses pysoundfile to efficiently seek without
    loading the entire stream.

    Parameters
    ----------
    filename : str
        Path to the input file

    start : int
        The sample index of `filename` at which the audio fragment should start

    stop : int
        The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop])

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    mono : bool
        Ensure monophonic audio

    Returns
    -------
    y : np.ndarray [shape=(n_samples,)]
        A fragment of audio sampled from `filename`

    Raises
    ------
    ValueError
        If the source file is shorter than the requested length

    '''

    with psf.SoundFile(str(filename), mode='r') as soundf:
        n_target = stop - start

        soundf.seek(start)

        y = soundf.read(n_target).T

        if mono:
            y = librosa.to_mono(y)

        # Resample to initial sr
        y = librosa.resample(y, soundf.samplerate, sr)

        # Clip to the target length exactly
        y = librosa.util.fix_length(y, n_samples)

        return y


[docs]class BackgroundNoise(BaseTransformer): '''Additive background noise deformations. From each background noise signal, `n_samples` clips are randomly extracted and mixed with the input audio with a random mixing coefficient sampled uniformly between `weight_min` and `weight_max`. This transformation affects the following attributes: - Audio Attributes ---------- n_samples : int > 0 The number of samples to generate with each noise source files : str or list of str Path to audio file(s) on disk containing background signals weight_min : float in (0.0, 1.0) weight_max : float in (0.0, 1.0) The minimum and maximum weight to combine input signals `y_out = (1 - weight) * y + weight * y_noise` ''' def __init__(self, n_samples=1, files=None, weight_min=0.1, weight_max=0.5): if n_samples <= 0: raise ValueError('n_samples must be strictly positive') if not 0 < weight_min < weight_max < 1.0: raise ValueError('weights must be in the range (0.0, 1.0)') if isinstance(files, six.string_types): files = [files] for fname in files: if not os.path.exists(fname): raise RuntimeError('file not found: {}'.format(fname)) BaseTransformer.__init__(self) self.n_samples = n_samples self.files = files self.weight_min = weight_min self.weight_max = weight_max def states(self, jam): mudabox = jam.sandbox.muda for fname in self.files: for _ in range(self.n_samples): start, stop = sample_clip_indices(fname, len(mudabox._audio['y']), mudabox._audio['sr']) yield dict(filename=fname, weight=np.random.uniform(low=self.weight_min, high=self.weight_max, size=None), start=start, stop=stop) def audio(self, mudabox, state): weight = state['weight'] fname = state['filename'] start = state['start'] stop = state['stop'] noise = slice_clip(fname, start, stop, len(mudabox._audio['y']), mudabox._audio['sr'], mono=mudabox._audio['y'].ndim == 1) # Normalize the data mudabox._audio['y'] = librosa.util.normalize(mudabox._audio['y']) noise = librosa.util.normalize(noise) mudabox._audio['y'] = ((1.0 - weight) * mudabox._audio['y'] + weight * noise)