Skip to content

Commit

Permalink
fix audio file bug: add the ability to read multiple audio file format.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilyes REBAI committed Feb 3, 2020
1 parent 5c5d45f commit 6f6b4d7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ RUN apt-get update &&\
libtool-bin \
make \
sox \
libsox-fmt-all \
libav-tools \
subversion \
vorbis-tools \
Expand Down
2 changes: 1 addition & 1 deletion document/swagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ paths:
produces:
- "application/json"
parameters:
- name: "wavFile"
- name: "file"
in: "formData"
description: "Wave File"
required: true
Expand Down
35 changes: 19 additions & 16 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ def run_shell_command(command_line):
return False, ''

def decode(wav_file,wav_name):
b,o=run_shell_command("sox "+wav_file+".wav -t wav -b 16 -r 16000 -c 1 "+wav_file+"_tmp.wav")
if not b:
return False, ''
b,o=run_shell_command("sox "+wav_file+" -t wav -b 16 -r 16000 -c 1 "+wav_file+"_tmp.wav")
if not b or 'error' in o or 'FAIL' in o:
return False, 'Audio file format is not supported!!! Supported formats are wav, mp3, aiff, flac, and ogg.'
b,o=run_shell_command("mv "+wav_file+"_tmp.wav "+wav_file+".wav")
if not b:
return False, ''


wav_file = wav_file+".wav"
decode_conf = TEMP_FILE_PATH1+"/online.conf"
decode_mdl = AM_PATH+"/"+AM_FILE_PATH+"/final.mdl"
decode_graph = LM_PATH+"/HCLG.fst"
Expand All @@ -64,12 +64,9 @@ def decode(wav_file,wav_name):
b,o=run_shell_command("kaldi-nnet2-latgen-faster --do-endpointing=false --online=false --config="+decode_conf+" --min-active="+DECODER_MINACT+" --max-active="+DECODER_MAXACT+" --beam="+DECODER_BEAM+" --lattice-beam="+DECODER_LATBEAM+" --acoustic-scale="+DECODER_ACWT+" --word-symbol-table="+decode_words+" "+decode_mdl+" "+decode_graph+" \"ark:echo "+wav_name+" "+wav_name+"|\" \"scp:echo "+wav_name+" "+wav_file+"|\" ark:"+TEMP_FILE_PATH+"/"+wav_name+".lat")
else:
b=False
o='KaldiFatalError decode param is not recognized'

if not b or 'KaldiFatalError' in o:
print(o)
return False, ''

return False, 'The decoder params of the acoustic model in "decode.cfg" file are not correct!!'
hypothesis = re.findall('\n'+wav_name+'.*',o)
#app.logger.info(hypothesis)
o=re.sub(wav_name,'',hypothesis[0]).strip()
Expand All @@ -83,17 +80,23 @@ def transcribe():
global busy
busy=1
fileid = str(uuid.uuid4())
if 'wavFile' in request.files.keys():
file = request.files['wavFile']
filename = TEMP_FILE_PATH+'/'+fileid+'.wav'
file.save(filename)
b, out = decode(filename,fileid)
if not b:
if 'file' in request.files.keys():
file = request.files['file']
file_ext = file.filename.rsplit('.', 1)[-1].lower()
file_type = file.content_type.rsplit('/', 1)[0]
if file_type == "audio":
filename = TEMP_FILE_PATH+'/'+fileid+'.'+file_ext
file.save(filename)
b, out = decode(filename,fileid)
if not b:
busy=0
return 'Error while file transcription: '+out, 400
else:
busy=0
abort(403)
return 'Error while file transcription: The uploaded file format is not supported!!! Supported formats are wav, mp3, aiff, flac, and ogg.', 400
else:
busy=0
return 'No wave file was uploaded', 404
return 'No audio file was uploaded', 400

# Delete temporary files
for file in os.listdir(TEMP_FILE_PATH):
Expand Down

0 comments on commit 6f6b4d7

Please sign in to comment.