-
Notifications
You must be signed in to change notification settings - Fork 2
/
features.py
84 lines (63 loc) · 2.2 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import io
import urllib.request
import numpy
import pandas
import dask.array
import scipy.io.wavfile
import librosa
def read_audio(path):
f = urllib.request.urlopen(path)
data = io.BytesIO(f.read())
samplerate, samples = scipy.io.wavfile.read(data)
assert samplerate == 44100, samplerate
return samplerate, samples
def meansub(s):
return s - (numpy.mean(s, axis=0) + 1e-8)
def minmaxscale(s):
mins = numpy.min(s, axis=0) + 1e-8
maxs = numpy.max(s, axis=0)
return ( s - mins) / ( maxs - mins )
def melspec_maxp(data, sr):
params = dict(n_mels=64, fmin=500, n_fft=2048, fmax=15000, htk=True)
mel = librosa.feature.melspectrogram(y=data, sr=sr, **params)
mel = meansub(mel)
mel = minmaxscale(mel)
features = numpy.concatenate([
numpy.max(mel, axis=1),
])
return features
def extract_melspec_max(path):
try:
samplerate, samples = read_audio(path)
except AssertionError:
return numpy.full((64,), numpy.nan)
return melspec_maxp(samples.astype(float), samplerate)
import itertools
def chunk_sequence(iterable, size):
it = iter(iterable)
item = list(itertools.islice(it, size))
while item:
yield item
item = list(itertools.islice(it, size))
# Return a Dask.array, for distributed lazy computation of features
def extract(wavfiles, location=None,
feature_extractor=None, feature_length=None, chunk_size=50):
if feature_extractor is None:
feature_extractor = extract_melspec_max
feature_length = 64
# Do processing in chunks, to avoid having too many tasks
chunk_shape = (chunk_size, feature_length)
def extract_chunk(urls):
r = numpy.zeros(shape=chunk_shape)
#r.fill(numpy.nan)
for i, url in enumerate(urls):
r[i,:] = feature_extractor(url)
return r
extract = dask.delayed(extract_chunk)
def setup_extraction(urls):
values = extract(urls)
arr = dask.array.from_delayed(values, dtype=numpy.float, shape=chunk_shape)
return arr
arrays = [ setup_extraction(c) for c in chunk_sequence(wavfiles, chunk_size) ]
features = dask.array.concatenate(arrays, axis=0)
return features