Skip to content

Commit

Permalink
Fail job when logs cannot be copied
Browse files Browse the repository at this point in the history
Signed-off-by: Kelly A <kellyaa@users.noreply.github.com>
  • Loading branch information
kellyaa committed May 9, 2024
1 parent ccd1787 commit d9c0709
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
3 changes: 2 additions & 1 deletion build/launch_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def main():
shutil.copy(train_logs_filepath, original_output_dir)
except Exception as e: # pylint: disable=broad-except
logging.error(traceback.format_exc())
# Continue, don't fail the training because of this
write_termination_log("Exception encountered in capturing training logs")
sys.exit(INTERNAL_ERROR_EXIT_CODE)

return 0

Expand Down
8 changes: 7 additions & 1 deletion tests/build/test_launch_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"prompt_tuning_init_text": "hello",
"tokenizer_name_or_path": MODEL_NAME,
"save_strategy": "epoch",
"output_dir": "tmp"
"output_dir": "tmp",
}


Expand Down Expand Up @@ -138,3 +138,9 @@ def test_config_parsing_error():
main()
assert pytest_wrapped_e.type == SystemExit
assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE


def test_cleanup():
# This runs to unset env variables that could disrupt other tests
os.environ.pop("LAUNCH_TRAINING_SCRIPT", None)
assert True

0 comments on commit d9c0709

Please sign in to comment.