From 1914d5c869cae2a17a56991dee897a903c222627 Mon Sep 17 00:00:00 2001 From: ZDisket <30500847+ZDisket@users.noreply.github.com> Date: Mon, 30 Nov 2020 18:21:23 -0300 Subject: [PATCH] Change how end padding is applied, model JSON now has field to specify it --- Voice.cpp | 2 +- VoxCommon.cpp | 3 ++- VoxCommon.hpp | 2 ++ mainwindow.cpp | 10 ++-------- phoneticdict.cpp | 4 ++-- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/Voice.cpp b/Voice.cpp index f1af53c..4ebcbfa 100644 --- a/Voice.cpp +++ b/Voice.cpp @@ -135,7 +135,7 @@ void Voice::AddPhonemizer(Phonemizer *InPhn) std::vector Voice::Vocalize(const std::string & Prompt, float Speed, int32_t SpeakerID, float Energy, float F0, int32_t EmotionID) { - std::string PhoneticTxt = Processor.ProcessTextPhonetic(Prompt,Phonemes,CurrentDict,(ETTSLanguage::Enum)VoxInfo.Language); + std::string PhoneticTxt = Processor.ProcessTextPhonetic(Prompt + VoxInfo.EndPadding,Phonemes,CurrentDict,(ETTSLanguage::Enum)VoxInfo.Language); TFTensor Mel = MelPredictor.DoInference(PhonemesToID(PhoneticTxt), SpeakerID, Speed, Energy, F0,EmotionID); diff --git a/VoxCommon.cpp b/VoxCommon.cpp index 413c7b9..e58b1a4 100644 --- a/VoxCommon.cpp +++ b/VoxCommon.cpp @@ -66,7 +66,8 @@ VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename) JS["note"].get(), JS["sarate"].get(), Lang, - LanguageNames[Lang]}; + LanguageNames[Lang], + " " + JS["pad"].get()}; // Add a space for separation since we directly append the value to the prompt if (Inf.Note.size() > MaxNoteSize) Inf.Note = Inf.Note.substr(0,MaxNoteSize); diff --git a/VoxCommon.hpp b/VoxCommon.hpp index 3099b13..e49f09f 100644 --- a/VoxCommon.hpp +++ b/VoxCommon.hpp @@ -79,6 +79,8 @@ struct VoiceInfo{ uint32_t Language; std::string s_Language; + std::string EndPadding; + }; diff --git a/mainwindow.cpp b/mainwindow.cpp index 8d893e1..d70e9ce 100644 --- a/mainwindow.cpp +++ b/mainwindow.cpp @@ -179,8 +179,6 @@ void MainWindow::on_btnInfer_clicked() on_btnLoad_clicked(); - - // Convert to lowercase here before we add phonemes QString BeforeInput = ui->edtInput->toPlainText(); QString RawInput = BeforeInput; @@ -189,11 +187,6 @@ void MainWindow::on_btnInfer_clicked() - - - - - QStringList InputSplits; QStringList BeforeSplits; @@ -251,7 +244,7 @@ void MainWindow::on_btnInfer_clicked() Dets.Speed = RangeToFloat(ui->sliSpeed->value()); Dets.Energy = RangeToFloat(ui->sliEnergy->value()); Dets.pItem = widItm; - Dets.Prompt = idvInput + " @SIL @END"; + Dets.Prompt = idvInput; Dets.SpeakerID = 0; Dets.EmotionID = -1; Dets.Denoise = ui->chkDenoise->isChecked(); @@ -322,6 +315,7 @@ void MainWindow::PlayBuffer(QBuffer *pBuff,bool ByUser) pBuff->open(QBuffer::ReadWrite); + StdOutput->start(pBuff); CanPlayAudio = false; diff --git a/phoneticdict.cpp b/phoneticdict.cpp index 8a21890..e9d6ebd 100644 --- a/phoneticdict.cpp +++ b/phoneticdict.cpp @@ -37,8 +37,8 @@ bool PhoneticDict::Import(const QString &infn) if (fi.GetFileLength() == 0){ - fi.Close(); - return true; + fi.Close(); + return true; }