-
Notifications
You must be signed in to change notification settings - Fork 3
/
feats.py
58 lines (45 loc) · 1.94 KB
/
feats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import numpy as np
import scipy.io
import pandas as pd
import librosa
import pickle
import soundfile as sound
from multiprocessing import Pool
data_path = #path
csv_file = data_path + #csvfile
output_path = #path
feature_type = 'logmel'
sr = 44100
duration = 10
num_freq_bin = 128
num_fft = 2048
hop_length = int(num_fft / 2)
num_time_bin = int(np.ceil(duration * sr / hop_length))
num_channel = 1
if not os.path.exists(output_path):
os.makedirs(output_path)
data_df = pd.read_csv(csv_file, sep='\t', encoding='ASCII')
wavpath = data_df['filename'].tolist()
for i in range(len(wavpath)):
stereo, fs = sound.read(data_path + wavpath[i], stop=duration*sr)
logmel_data = np.zeros((num_freq_bin, num_time_bin, num_channel), 'float32')
logmel_data[:,:,0] = librosa.feature.melspectrogram(stereo[:],
sr=sr,
n_fft=num_fft,
hop_length=hop_length,
n_mels=num_freq_bin,
fmin=0.0,
fmax=sr/2,
htk=True,
norm=None)
logmel_data = np.log(logmel_data+1e-8)
for j in range(len(logmel_data[:,:,0][:,0])):
mean = np.mean(logmel_data[:,:,0][j,:])
std = np.std(logmel_data[:,:,0][j,:])
logmel_data[:,:,0][j,:] = ((logmel_data[:,:,0][j,:]-mean)/std)
logmel_data[:,:,0][np.isnan(logmel_data[:,:,0])]=0.
feature_data = {'feat_data': logmel_data}
cur_file_name = output_path + wavpath[i][5:-3] + feature_type
pickle.dump(feature_data, open(cur_file_name, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
print (cur_file_name)