forked from srviest/SoloLa
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
executable file
·163 lines (154 loc) · 7.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import librosa as rosa
import numpy as np
import guitar_trans.te_note_tracking as note_tracking
import guitar_trans.parameters as pm
from guitar_trans import models
from guitar_trans.song import *
from guitar_trans.note import *
from guitar_trans.contour import *
from guitar_trans.technique import *
from guitar_trans.evaluation import evaluation_note, evaluation_esn, evaluation_ts
from melody_extraction import extract_melody
from os import path, sep, makedirs
N_BIN = int(round(0.14 * 44100))
N_FRAME = pm.MC_LENGTH
def transcribe(audio, melody, asc_model_fp, desc_model_fp, save_dir, audio_fn):
if not path.exists(save_dir): makedirs(save_dir)
print ' Output directory: ', '\n', ' ', save_dir
trend, new_melody, notes = note_tracking.tent(melody, debug=save_dir)
np.savetxt(save_dir+sep+'FilteredMelody.txt', new_melody.seq, fmt='%.8f')
np.savetxt(save_dir+sep+'TentNotes.txt', [n.discrete_to_cont(pm.HOP_LENGTH, pm.SAMPLING_RATE).array_repr() for n in notes], fmt='%.8f')
cand_dict = {pm.D_ASCENDING: [], pm.D_DESCENDING: []}
cand_ranges = []
rate = float(pm.HOP_LENGTH) / float(pm.SAMPLING_RATE)
cand_results = []
for nt in notes:
if nt.tech(T_BEND).value > 0:
cand_results.append([nt.onset * rate, nt.offset * rate, T_BEND])
if nt.tech(T_RELEASE).value > 0:
cand_results.append([nt.onset * rate, nt.offset * rate, -T_RELEASE])
if nt.tech(T_SLIDE_IN).value > 0:
cand_results.append([nt.onset * rate, nt.offset * rate, T_SLIDE_IN])
if nt.tech(T_SLIDE_OUT).value > 0:
cand_results.append([nt.onset * rate, nt.offset * rate, T_SLIDE_OUT])
if nt.tech(T_VIBRATO).value > 0:
cand_results.append([nt.onset * rate, nt.offset * rate, T_VIBRATO])
for seg in nt.segs:
mid_frame = nt.onset + seg.mid
mid_bin = int(float(mid_frame) / rate)
start_i, end_i = mid_frame - N_FRAME/2, mid_frame + N_FRAME - N_FRAME/2
start_bin = start_i * pm.HOP_LENGTH
sub_audio = audio[start_bin: start_bin + N_BIN]
sub_mc = melody[start_i: end_i]
assert(len(sub_audio) == N_BIN)
assert(len(sub_mc) == N_FRAME)
sub_fn = audio_fn + '_' + str(mid_frame)
direction = pm.D_ASCENDING if seg.val >= 0 else pm.D_DESCENDING
cand_dict[direction].append((sub_audio, sub_mc, sub_fn, nt, seg, start_i, end_i))
# rosa.output.write_wav('trans/audio/clip_'+sub_fn+'.wav', sub_audio, sr=pm.SAMPLING_RATE, norm=False)
no_next = []
for direction in cand_dict:
print 'Processing direction', direction
cand_list = cand_dict[direction]
model_fp = asc_model_fp if direction == pm.D_ASCENDING else desc_model_fp
if len(cand_list) > 0:
pred_list = classification(model_fp, [cand[:3] for cand in cand_list])
for pred, cand in zip(pred_list, cand_list):
sub_audio, sub_mc, sub_fn, nt, seg, start_i, end_i = cand
t_name = pm.inv_tech_dict[direction][np.argmax(pred)]
t_type = get_tech(t_name, direction)
origin_t_val = nt.tech(t_type).value
t_val = int(round(seg.diff())) if t_type in (T_BEND, T_RELEASE) else origin_t_val + 1
if t_type < T_NORMAL:
### Merge Notes
if nt.next_note is None:
print 'No next note. Ignore this candidate.'
no_next.append([start_i * rate, end_i * rate, t_type * sign])
continue
# print 'next_note is None'
# print nt, cand[4]
# if t_type in [T_HAMMER, T_PULL, T_SLIDE]:
# print('WARNING!!! Changing {} to bend or release.'.format(t_type))
# print cand[4]
# t_type = T_BEND if direction == pm.D_ASCENDING else T_RELEASE
elif t_type in [T_BEND, T_RELEASE]:
if nt.next_note in notes:
notes.remove(nt.next_note)
nt.merge_note(nt.next_note)
elif t_type in [T_HAMMER, T_PULL, T_SLIDE]:
tv = nt.next_note.tech(t_type).value
nt.next_note.add_tech(Tech(t_type, tv+2))
nt.add_tech(Tech(t_type, t_val))
sign = 1 if direction == pm.D_ASCENDING else -1
cand_results.append([start_i * rate, end_i * rate, t_type * sign])
np.savetxt(save_dir+sep+'NoNextNote.txt', no_next, fmt='%.8f')
np.savetxt(save_dir+sep+'CandidateResults.txt', cand_results, fmt='%.8f')
# note.merge_notes(notes)
cont_notes = [nt.discrete_to_cont(pm.HOP_LENGTH, pm.SAMPLING_RATE) for nt in notes]
np.savetxt(save_dir+sep+'FinalNotes.txt', [n.array_repr() for n in cont_notes], fmt='%.8f')
return cont_notes
def classification(model_fp, cand_list):
model = models.Model.init_from_file(model_fp)
data_list = [model.extract_features(*(cand[:3])) for cand in cand_list]
pred_list = model.run(data_list)
return pred_list
def get_tech(t_name, direction):
if t_name == pm.BEND and direction == pm.D_ASCENDING:
return T_BEND
elif t_name == pm.BEND and direction == pm.D_DESCENDING:
return T_RELEASE
elif t_name == pm.HAMM:
return T_HAMMER
elif t_name == pm.PULL:
return T_PULL
elif t_name == pm.SLIDE:
return T_SLIDE
elif t_name == pm.NORMAL:
return T_NORMAL
else:
raise ValueError("t_name shouldn't be {}.".format(t_name))
def main(audio_fp, asc_model_fp, desc_model_fp, output_dir, mc_fp=None, eval_note=None, eval_ts=None):
audio_fn = path.splitext(path.basename(audio_fp))[0]
save_dir = path.join(output_dir, audio_fn)
if mc_fp is None:
mc, mc_midi = extract_melody(audio_fp, save_dir)
else:
mc_midi = np.loadtxt(mc_fp)
audio, sr = rosa.load(audio_fp, sr=None, mono=True)
melody = Contour(0, mc_midi)
notes = transcribe(audio, melody, asc_model_fp, desc_model_fp, save_dir, audio_fn)
if eval_note is not None:
sg = Song(name=audio_fn)
sg.load_esn_list(eval_note)
evaluation_note(sg.es_note_list, notes, save_dir, audio_fn, string='evaluate notes')
evaluation_esn(sg.es_note_list, notes, save_dir, audio_fn, string='evaluate esn')
if eval_ts is not None:
ans_list = np.loadtxt(eval_ts)
# TODO
def parser():
import argparse
p = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=
"""
===================================================================
Script for transcribing a song.
===================================================================
""")
p.add_argument('audio_fp', type=str, metavar='audio_fp',
help='The filepath of the audio to be transcribed.')
p.add_argument('-a', '--asc_model_fp', type=str, metavar='asc_model_fp', default='models/cnn_normmc/ascending.npz',
help='The name of the ascending model.')
p.add_argument('-d', '--desc_model_fp', type=str, metavar='desc_model_fp', default='models/cnn_normmc/descending.npz',
help='The name of the descending model.')
p.add_argument('-o', '--output_dir', type=str, metavar='output_dir', default='outputs',
help='The output directory.')
p.add_argument('-m', '--melody_contour', type=str, default=None,
help='The filepath of melody contour.')
p.add_argument('-e', '--evaluate', type=str, default=None,
help='The filepath of answer file.')
return p.parse_args()
if __name__ == '__main__':
args = parser()
main(args.audio_fp, args.asc_model_fp, args.desc_model_fp,
args.output_dir, args.melody_contour, args.evaluate)