Skip to content

Commit

Permalink
Downgraded requirements for datasets and sacremoses, updated Thot
Browse files Browse the repository at this point in the history
  • Loading branch information
TaperChipmunk32 committed Oct 25, 2024
1 parent 81c52e7 commit 11c60ef
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 41 deletions.
7 changes: 2 additions & 5 deletions machine/translation/thot/simplex_model_weight_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,8 @@ def _generate_translations(
try:
model = load_smt_model(self._word_alignment_model_type, parameters)
decoder = load_smt_decoder(model, parameters)
if decoder is not None:
translations = decoder.translate_batch([to_sentence(s) for s in source_corpus])
return [to_target_tokens(t.target) for t in translations]
else:
raise ValueError("Decoder could not be loaded.")
translations = decoder.translate_batch([to_sentence(s) for s in source_corpus])
return [to_target_tokens(t.target) for t in translations]
finally:
if decoder is not None:
decoder.clear()
Expand Down
4 changes: 0 additions & 4 deletions machine/translation/thot/thot_smt_model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,7 @@ def _train_tune_corpus(
for i in range(len(tune_source_corpus)):
if i > 0:
progress(ProgressStatus.from_step(i, len(tune_source_corpus)))
if decoder is None or smt_model is None:
raise RuntimeError("Decoder or SMT model is None")
decoder.train_sentence_pair(to_sentence(tune_source_corpus[i]), to_sentence(tune_target_corpus[i]))
if smt_model is None:
raise RuntimeError("SMT model is None")
smt_model.print_translation_model(parameters.translation_model_filename_prefix)
smt_model.print_language_model(parameters.language_model_filename_prefix)
progress(ProgressStatus.from_step(len(tune_source_corpus), len(tune_source_corpus)))
Expand Down
4 changes: 0 additions & 4 deletions machine/translation/thot/thot_word_alignment_model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ def __init__(
if model_type >= ThotWordAlignmentModelType.IBM2:
if parameters.get_hmm_iteration_count(model_type) > 0:
ibm2_or_hmm = ta.HmmAlignmentModel(ibm1)
if ibm2_or_hmm is None:
raise ValueError("ibm2_or_hmm should not be None")
if parameters.hmm_p0 is not None:
ibm2_or_hmm.hmm_p0 = parameters.hmm_p0
if parameters.hmm_lexical_smoothing_factor is not None:
Expand All @@ -100,8 +98,6 @@ def __init__(
and parameters.get_ibm3_iteration_count(model_type) > 0
):
ibm3 = ta.Ibm3AlignmentModel(ibm2_or_hmm)
if ibm3 is None:
raise ValueError("ibm3 should not be None")
if parameters.ibm3_fertility_smoothing_factor is not None:
ibm3.fertility_smoothing_factor = parameters.ibm3_fertility_smoothing_factor
if parameters.ibm3_count_threshold is not None:
Expand Down
50 changes: 26 additions & 24 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ norecursedirs = "tests/testutils"
[tool.pyright]
typeCheckingMode = "basic"
extraPaths = ["tests"]
reportMissingModuleSource = false

[tool.poetry]
name = "sil-machine"
Expand Down Expand Up @@ -61,11 +62,11 @@ charset-normalizer = "^2.1.1"


sentencepiece = "^0.2.0"
sil-thot = "^3.4.4"
sil-thot = "^3.4.5"

transformers = "^4.38.0, <4.46.0"
datasets = "^3.0.0"
sacremoses = "^0.1.0"
transformers = ">=4.38.0, <4.46.0"
datasets = "^2.4.0"
sacremoses = "^0.0.53"

clearml = { extras = ["s3"], version = "^1.13.1" }
botocore = "^1.35.41"
Expand Down

0 comments on commit 11c60ef

Please sign in to comment.