-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocessing.py
63 lines (41 loc) · 1.85 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from scipy import signal
from scipy.io import wavfile
import numpy as np
import glob
def read_spect_matrix(audio_file_list):
data = []
for i,filename in enumerate(audio_file_list):
print ("Done",i,"/",len(audio_file_list))
sample_rate, samples = wavfile.read(filename)
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
data.append(spectrogram.T)
return data
def get_file_list():
meta_data = np.load('meta_data.npy')
male_usa_idx = np.logical_and(meta_data[:, 2]=='m', meta_data[:, 3]=='USA')
male_usa_list = meta_data[male_usa_idx, 0]
female_usa_idx = np.logical_and(meta_data[:, 2]=='f', meta_data[:, 3]=='USA')
female_usa_list = meta_data[female_usa_idx, 0]
male_file_list = []
female_file_list = []
male_id = []
female_id = []
for id in male_usa_list:
for filename in glob.iglob('vox1_dev/wav/' + id + '/**/*.wav', recursive=True):
male_file_list.append(filename)
male_id.append(id)
for id in female_usa_list:
for filename in glob.iglob('vox1_dev/wav/' + id + '/**/*.wav', recursive=True):
female_file_list.append(filename)
female_id.append(id)
return male_file_list, female_file_list, male_id, female_id
if __name__ == '__main__':
male_file_list, female_file_list,male_id, female_id = get_file_list()
male_spect = read_spect_matrix(male_file_list[:10000])
female_spect = read_spect_matrix(female_file_list[:10000])
np.save('male_spect.npy', np.array(male_spect))
np.save('female_spect.npy', np.array(female_spect))
np.save('male_id.npy', np.array(male_id))
np.save('female_id.npy', np.array(female_id))
print('done!!!')
print(len(male_spect), len(female_spect))