librosa

1 minute read

Published:

Just stumbled upon this powerful python package for audio analysis - librosa (see also McFee et al. 2015). Extracting waveforms and decomposing into harmonic/percussive components can all be done in single-liners, and it also substantially simplifies more complicated analyses like synchronization and Laplacian segmentation. Here is a quick decomposition of a sample Rammstein song from youtube:

Merged variable sampling rate:

Components on the waveform Components

import librosa
import librosa.display
import numpy as np
from pydub import AudioSegment
from pafy  import new

import matplotlib.pyplot as plt
from scipy.io.wavfile import write


# download from youtube as .m4a
fmt  = 'm4a'
url  = 'https://www.youtube.com/watch?v=IxuEtL7gxoM'
v    =  new(url)
a    =  v.audiostreams
exts = [i.extension for i in a]
if fmt in exts: a[exts.index(fmt)].download()

# waveform and sampling rate
# by default sr = 22050 Hz
#            wf = mono signal
wf, sr = librosa.load(f'{v.title}.{fmt}')

# decompose as a waveform
harmonic, percussive = librosa.effects.hpss(y=wf)

# waveforms
time = np.arange(len(wf)) / sr
fig, ax = plt.subplots(nrows=2, figsize=(10,4), sharex=True, sharey=True)
for axis in ax: axis.plot(time, wf, color='gray')
ax[0].plot(time, harmonic)
ax[0].set_title('harmonic')
ax[1].plot(time, percussive)
ax[1].set(title='percussive', xlabel='Time')
plt.tight_layout()
plt.show()


# save samples at varying sampling rates and merge through pydub
istart = int(len(wf)/4)
srx    = 10000
write('slow.wav',   srx, wf[istart: istart+srx*5])
write('fast.wav', 4*srx, wf[istart: istart+srx*20])
fast = AudioSegment.from_wav("fast.wav")
slow = AudioSegment.from_wav("slow.wav")
combined = fast + slow
fname = 'librosa_varying_sr.wav'
combined.export(fname, format="wav")


# decomposition
wf, sr = librosa.load(fname)

harmonic, percussive = librosa.decompose.hpss(librosa.stft(wf))

total = harmonic + percussive
rp = np.max(np.abs(total))

fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
img = librosa.display.specshow(librosa.amplitude_to_db(np.abs(total), ref=rp))
librosa.display.specshow(librosa.amplitude_to_db(np.abs(harmonic), ref=rp),
                         y_axis='log', x_axis='time', ax=ax[0])
ax[0].set(title='harmonic')
ax[0].label_outer()


librosa.display.specshow(librosa.amplitude_to_db(np.abs(percussive), ref=rp),
                         y_axis='log', x_axis='time', ax=ax[1])
ax[1].set(title='percussive')
fig.colorbar(img, ax=ax)
plt.show()