#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# CREATED:2015-03-03 21:29:49 by Brian McFee <brian.mcfee@nyu.edu>
'''Additive background noise'''
import soundfile as psf
import librosa
import numpy as np
import os
import six
from ..base import BaseTransformer
def sample_clip_indices(filename, n_samples, sr):
'''Calculate the indices at which to sample a fragment of audio from a file.
Parameters
----------
filename : str
Path to the input file
n_samples : int > 0
The number of samples to load
sr : int > 0
The target sampling rate
Returns
-------
start : int
The sample index from `filename` at which the audio fragment starts
stop : int
The sample index from `filename` at which the audio fragment stops (e.g. y = audio[start:stop])
'''
with psf.SoundFile(str(filename), mode='r') as soundf:
n_target = int(np.ceil(n_samples * soundf.samplerate / float(sr)))
# Draw a random clip
start = np.random.randint(0, len(soundf) - n_target)
stop = start + n_target
return start, stop
def slice_clip(filename, start, stop, n_samples, sr, mono=True):
'''Slice a fragment of audio from a file.
This uses pysoundfile to efficiently seek without
loading the entire stream.
Parameters
----------
filename : str
Path to the input file
start : int
The sample index of `filename` at which the audio fragment should start
stop : int
The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop])
n_samples : int > 0
The number of samples to load
sr : int > 0
The target sampling rate
mono : bool
Ensure monophonic audio
Returns
-------
y : np.ndarray [shape=(n_samples,)]
A fragment of audio sampled from `filename`
Raises
------
ValueError
If the source file is shorter than the requested length
'''
with psf.SoundFile(str(filename), mode='r') as soundf:
n_target = stop - start
soundf.seek(start)
y = soundf.read(n_target).T
if mono:
y = librosa.to_mono(y)
# Resample to initial sr
y = librosa.resample(y, soundf.samplerate, sr)
# Clip to the target length exactly
y = librosa.util.fix_length(y, n_samples)
return y
[docs]class BackgroundNoise(BaseTransformer):
'''Additive background noise deformations.
From each background noise signal, `n_samples` clips are randomly
extracted and mixed with the input audio with a random mixing coefficient
sampled uniformly between `weight_min` and `weight_max`.
This transformation affects the following attributes:
- Audio
Attributes
----------
n_samples : int > 0
The number of samples to generate with each noise source
files : str or list of str
Path to audio file(s) on disk containing background signals
weight_min : float in (0.0, 1.0)
weight_max : float in (0.0, 1.0)
The minimum and maximum weight to combine input signals
`y_out = (1 - weight) * y + weight * y_noise`
'''
def __init__(self, n_samples=1, files=None, weight_min=0.1, weight_max=0.5):
if n_samples <= 0:
raise ValueError('n_samples must be strictly positive')
if not 0 < weight_min < weight_max < 1.0:
raise ValueError('weights must be in the range (0.0, 1.0)')
if isinstance(files, six.string_types):
files = [files]
for fname in files:
if not os.path.exists(fname):
raise RuntimeError('file not found: {}'.format(fname))
BaseTransformer.__init__(self)
self.n_samples = n_samples
self.files = files
self.weight_min = weight_min
self.weight_max = weight_max
def states(self, jam):
mudabox = jam.sandbox.muda
for fname in self.files:
for _ in range(self.n_samples):
start, stop = sample_clip_indices(fname, len(mudabox._audio['y']), mudabox._audio['sr'])
yield dict(filename=fname,
weight=np.random.uniform(low=self.weight_min,
high=self.weight_max,
size=None),
start=start,
stop=stop)
def audio(self, mudabox, state):
weight = state['weight']
fname = state['filename']
start = state['start']
stop = state['stop']
noise = slice_clip(fname, start, stop, len(mudabox._audio['y']),
mudabox._audio['sr'],
mono=mudabox._audio['y'].ndim == 1)
# Normalize the data
mudabox._audio['y'] = librosa.util.normalize(mudabox._audio['y'])
noise = librosa.util.normalize(noise)
mudabox._audio['y'] = ((1.0 - weight) * mudabox._audio['y'] +
weight * noise)