librosa
Published:
Just stumbled upon this powerful python package for audio analysis - librosa (see also McFee et al. 2015). Extracting waveforms and decomposing into harmonic/percussive components can all be done in single-liners, and it also substantially simplifies more complicated analyses like synchronization and Laplacian segmentation. Here is a quick decomposition of a sample Rammstein song from youtube:
import librosa
import librosa.display
import numpy as np
from pydub import AudioSegment
from pafy import new
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
# download from youtube as .m4a
fmt = 'm4a'
url = 'https://www.youtube.com/watch?v=IxuEtL7gxoM'
v = new(url)
a = v.audiostreams
exts = [i.extension for i in a]
if fmt in exts: a[exts.index(fmt)].download()
# waveform and sampling rate
# by default sr = 22050 Hz
# wf = mono signal
wf, sr = librosa.load(f'{v.title}.{fmt}')
# decompose as a waveform
harmonic, percussive = librosa.effects.hpss(y=wf)
# waveforms
time = np.arange(len(wf)) / sr
fig, ax = plt.subplots(nrows=2, figsize=(10,4), sharex=True, sharey=True)
for axis in ax: axis.plot(time, wf, color='gray')
ax[0].plot(time, harmonic)
ax[0].set_title('harmonic')
ax[1].plot(time, percussive)
ax[1].set(title='percussive', xlabel='Time')
plt.tight_layout()
plt.show()
# save samples at varying sampling rates and merge through pydub
istart = int(len(wf)/4)
srx = 10000
write('slow.wav', srx, wf[istart: istart+srx*5])
write('fast.wav', 4*srx, wf[istart: istart+srx*20])
fast = AudioSegment.from_wav("fast.wav")
slow = AudioSegment.from_wav("slow.wav")
combined = fast + slow
fname = 'librosa_varying_sr.wav'
combined.export(fname, format="wav")
# decomposition
wf, sr = librosa.load(fname)
harmonic, percussive = librosa.decompose.hpss(librosa.stft(wf))
total = harmonic + percussive
rp = np.max(np.abs(total))
fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
img = librosa.display.specshow(librosa.amplitude_to_db(np.abs(total), ref=rp))
librosa.display.specshow(librosa.amplitude_to_db(np.abs(harmonic), ref=rp),
y_axis='log', x_axis='time', ax=ax[0])
ax[0].set(title='harmonic')
ax[0].label_outer()
librosa.display.specshow(librosa.amplitude_to_db(np.abs(percussive), ref=rp),
y_axis='log', x_axis='time', ax=ax[1])
ax[1].set(title='percussive')
fig.colorbar(img, ax=ax)
plt.show()