Skip to content

Commit

Permalink
Correct indentation, change how lowercase is done to avoid lowercasin…
Browse files Browse the repository at this point in the history
…g phonetic input
  • Loading branch information
ZDisket committed Oct 24, 2020
1 parent a01979d commit 2459b8b
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 32 deletions.
56 changes: 28 additions & 28 deletions FastSpeech2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ bool FastSpeech2::Initialize(const std::string & SavedModelFolder)

TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs, int32_t SpeakerID, float Speed, float Energy, float F0, int32_t EmotionID)
{
if (!FastSpeech)
throw std::exception("Tried to do inference on unloaded or invalid model!");

// Convenience reference so that we don't have to constantly derefer pointers.
Model& Mdl = *FastSpeech;

// Define the tensors
Tensor input_ids{ Mdl,"serving_default_input_ids" };
Tensor energy_ratios{ Mdl,"serving_default_energy_ratios" };
Tensor f0_ratios{ Mdl,"serving_default_f0_ratios" };
Tensor speaker_ids{ Mdl,"serving_default_speaker_ids" };
Tensor speed_ratios{ Mdl,"serving_default_speed_ratios" };
if (!FastSpeech)
throw std::exception("Tried to do inference on unloaded or invalid model!");

// Convenience reference so that we don't have to constantly derefer pointers.
Model& Mdl = *FastSpeech;

// Define the tensors
Tensor input_ids{ Mdl,"serving_default_input_ids" };
Tensor energy_ratios{ Mdl,"serving_default_energy_ratios" };
Tensor f0_ratios{ Mdl,"serving_default_f0_ratios" };
Tensor speaker_ids{ Mdl,"serving_default_speaker_ids" };
Tensor speed_ratios{ Mdl,"serving_default_speed_ratios" };
Tensor* emotion_ids = nullptr;

// This is a multi-emotion model
Expand All @@ -51,39 +51,39 @@ TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs, i
}


// This is the shape of the input IDs, our equivalent to tf.expand_dims.
std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
// This is the shape of the input IDs, our equivalent to tf.expand_dims.
std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };

input_ids.set_data(InputIDs, InputIDShape);
energy_ratios.set_data(std::vector<float>{ Energy });
f0_ratios.set_data(std::vector<float>{F0});
speaker_ids.set_data(std::vector<int32_t>{SpeakerID});
speed_ratios.set_data(std::vector<float>{Speed});
input_ids.set_data(InputIDs, InputIDShape);
energy_ratios.set_data(std::vector<float>{ Energy });
f0_ratios.set_data(std::vector<float>{F0});
speaker_ids.set_data(std::vector<int32_t>{SpeakerID});
speed_ratios.set_data(std::vector<float>{Speed});

// Define output tensor
Tensor output{ Mdl,"StatefulPartitionedCall" };
// Define output tensor
Tensor output{ Mdl,"StatefulPartitionedCall" };


// Vector of input tensors
std::vector<Tensor*> inputs = { &input_ids,&speaker_ids,&speed_ratios,&f0_ratios,&energy_ratios };
// Vector of input tensors
std::vector<Tensor*> inputs = { &input_ids,&speaker_ids,&speed_ratios,&f0_ratios,&energy_ratios };

if (EmotionID != -1)
inputs.push_back(emotion_ids);


// Do inference
FastSpeech->run(inputs, output);
// Do inference
FastSpeech->run(inputs, output);

// Define output and return it
TFTensor<float> Output = VoxUtil::CopyTensor<float>(output);
// Define output and return it
TFTensor<float> Output = VoxUtil::CopyTensor<float>(output);

// We allocated the emotion_ids tensor dynamically, delete it
if (emotion_ids)
delete emotion_ids;

// We could just straight out define it in the return statement, but I like it more this way

return Output;
return Output;
}

FastSpeech2::~FastSpeech2()
Expand Down
2 changes: 1 addition & 1 deletion TensorVox.pro
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,5 @@ RESOURCES += \

win32:RC_ICONS += winicon.ico

VERSION = 0.7.9.2
VERSION = 0.8.2.0
CONFIG += force_debug_info
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void FramelessWindow::setContent(QWidget *w)
ui->windowContent->setLayout(&contentLayout);
}


void FramelessWindow::ContentDlg(QDialog *indlg)
{
ContDlg = true;
Expand All @@ -104,6 +105,7 @@ void FramelessWindow::SetTitleBarBtns(bool Maximize, bool Minimize, bool Close)

void FramelessWindow::setWindowTitle(const QString &text)
{

ui->titleText->setText(text);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@
</item>
<item>
<widget class="QLabel" name="titleText">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="font">
<font>
<weight>75</weight>
Expand Down
14 changes: 11 additions & 3 deletions mainwindow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void MainWindow::showEvent(QShowEvent *e)


#endif
FwParent->setWindowTitle("TensorVox");

e->accept();
}
Expand Down Expand Up @@ -181,7 +182,7 @@ void MainWindow::on_btnInfer_clicked()


// Convert to lowercase here before we add phonemes
QString BeforeInput = ui->edtInput->toPlainText().toLower();
QString BeforeInput = ui->edtInput->toPlainText();
QString RawInput = BeforeInput;
QString Input = RawInput.replace("\n"," ");
const int MaxShowInputLen = ui->lstUtts->size().width() / 6;
Expand Down Expand Up @@ -413,7 +414,12 @@ QStringList MainWindow::SuperWordSplit(const QString &InStr, int MaxLen)
if (CurrentStr.size() > 0)
CurrentStr.append(" ");

CurrentStr.append(RawWords[Idx]);
QString CuWord = RawWords[Idx];

if (!CuWord.contains("@")) // phonetic input has to be uppercase
CuWord = CuWord.toLower();

CurrentStr.append(CuWord);

if (CurrentStr.length() > MaxLen){
SplitStrs.append(CurrentStr);
Expand Down Expand Up @@ -456,7 +462,9 @@ void MainWindow::ProcessCurlies(QString &ModTxt)
continue;


NewTokens.push_back("@" + Tk);


NewTokens.push_back("@" + Tk.toUpper());

}

Expand Down

0 comments on commit 2459b8b

Please sign in to comment.