From f67cdd2a1e0a52a0553b0b07c47d7973998bba39 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Mon, 9 Dec 2024 10:37:34 +0100 Subject: [PATCH 1/7] Convert job class to abstract class using abc SCMSUITE-8681 SO-- --- core/basejob.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/core/basejob.py b/core/basejob.py index a35d53b5..2e1335f6 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -5,6 +5,7 @@ import time from os.path import join as opj from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union +from abc import ABC, abstractmethod from scm.plams.core.enums import JobStatus from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError @@ -28,7 +29,7 @@ __all__ = ["SingleJob", "MultiJob"] -class Job: +class Job(ABC): """General abstract class for all kind of computational tasks. Methods common for all kinds of jobs are gathered here. Instances of |Job| should never be created. It should not be subclassed either. If you wish to define a new type of job please subclass either |SingleJob| or |MultiJob|. @@ -182,13 +183,13 @@ def ok(self, strict: bool = True) -> bool: self.results.wait() return self.status in [JobStatus.SUCCESSFUL, JobStatus.COPIED] + @abstractmethod def check(self) -> bool: - """Check if the execution of this instance was successful. Abstract method meant for internal use.""" - raise PlamsError("Trying to run an abstract method Job.check()") + """Check if the execution of this instance was successful.""" + @abstractmethod def hash(self) -> Optional[str]: - """Calculate the hash of this instance. Abstract method meant for internal use.""" - raise PlamsError("Trying to run an abstract method Job.hash()") + """Calculate the hash of this instance.""" def prerun(self) -> None: # noqa F811 """Actions to take before the actual job execution. @@ -248,13 +249,13 @@ def _prepare(self, jobmanager: "JobManager") -> bool: self._log_status(3) return prev is None + @abstractmethod def _get_ready(self) -> None: - """Get ready for :meth:`~Job._execute`. This is the last step before :meth:`~Job._execute` is called. Abstract method.""" - raise PlamsError("Trying to run an abstract method Job._get_ready()") + """Get ready for :meth:`~Job._execute`. This is the last step before :meth:`~Job._execute` is called.""" + @abstractmethod def _execute(self, jobrunner: "JobRunner") -> None: - """Execute the job. Abstract method.""" - raise PlamsError("Trying to run an abstract method Job._execute()") + """Execute the job.""" def _finalize(self) -> None: """Gather the results of the job execution and organize them. This method collects steps 9-12 from :ref:`job-life-cycle`. Should not be overridden.""" @@ -333,15 +334,16 @@ def __init__(self, molecule: Optional[Molecule] = None, **kwargs): Job.__init__(self, **kwargs) self.molecule = molecule.copy() if isinstance(molecule, Molecule) else molecule + @abstractmethod def get_input(self) -> str: - """Generate the input file. Abstract method. + """Generate the input file. This method should return a single string with the full content of the input file. It should process information stored in the ``input`` branch of job settings and in the ``molecule`` attribute. """ - raise PlamsError("Trying to run an abstract method SingleJob._get_input()") + @abstractmethod def get_runscript(self) -> str: - """Generate the runscript. Abstract method. + """Generate the runscript. This method should return a single string with the runscript contents. It can process information stored in ``runscript`` branch of job settings. In general the full runscript has the following form:: @@ -355,7 +357,6 @@ def get_runscript(self) -> str: When overridden, this method should pay attention to ``.runscript.stdout_redirect`` key in job's ``settings``. """ - raise PlamsError("Trying to run an abstract method SingleJob._get_runscript()") def hash_input(self) -> str: """Calculate SHA256 hash of the input file.""" From dc09db82f7c4fb3c9c1e84d0e6f6262d6b87af27 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Mon, 9 Dec 2024 11:04:30 +0100 Subject: [PATCH 2/7] Add get_errormsg to base job SCMSUITE-10131 SO107 --- core/basejob.py | 12 ++++++++++++ core/formatters.py | 11 ++--------- core/jobrunner.py | 7 +------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core/basejob.py b/core/basejob.py index 2e1335f6..fd81e2c5 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -83,6 +83,7 @@ def __init__( self.default_settings = [config.job] self.depend = depend or [] self._dont_pickle: List[str] = [] + self._error_msg: Optional[str] = None if settings is not None: if isinstance(settings, Settings): self.settings = settings.copy() @@ -187,6 +188,17 @@ def ok(self, strict: bool = True) -> bool: def check(self) -> bool: """Check if the execution of this instance was successful.""" + def get_errormsg(self) -> Optional[str]: + """Tries to get an error message for a failed job. This method returns ``None`` for successful jobs.""" + if self.check(): + return None + + return ( + self._error_msg + if self._error_msg + else "Could not determine error message. Please check the output manually." + ) + @abstractmethod def hash(self) -> Optional[str]: """Calculate the hash of this instance.""" diff --git a/core/formatters.py b/core/formatters.py index 902ae881..0a7abef6 100644 --- a/core/formatters.py +++ b/core/formatters.py @@ -38,15 +38,8 @@ def _format_job(job: Job) -> Dict[str, Any]: if job.status not in [JobStatus.REGISTERED, JobStatus.RUNNING]: message.update({"job_ok": job.ok()}) - try: - message.update({"job_check": job.check()}) - except TypeError: - pass - try: - # this one it is not supported by the Job class but on many jobs they have it implemented - message.update({"job_get_errormsg": job.get_errormsg()}) - except (AttributeError, TypeError): - pass + message.update({"job_check": job.check()}) + message.update({"job_get_errormsg": job.get_errormsg()}) if job.parent: message["job_parent_name"] = job.parent.name diff --git a/core/jobrunner.py b/core/jobrunner.py index b915044f..bc06af77 100644 --- a/core/jobrunner.py +++ b/core/jobrunner.py @@ -140,12 +140,7 @@ def _run_job(self, job, jobmanager): try: # Log any error messages to the standard logger if not job.ok(False) or not job.check(): - # get_errormsg is not required by the base job class, but often implemented by convention - err_msg = ( - job.get_errormsg() - if hasattr(job, "get_errormsg") - else "Could not determine error message. Please check the output manually." - ) + err_msg = job.get_errormsg() err_lines = err_msg.splitlines() max_lines = 30 if len(err_lines) > max_lines: From 92763bc9eff2835becac9d1680df78cb1e3f30c6 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Mon, 9 Dec 2024 11:57:41 +0100 Subject: [PATCH 3/7] Add _fail_on_exception decorator SCMSUITE-10131 SO107 --- core/basejob.py | 28 ++++- interfaces/adfsuite/ams.py | 18 ++- unit_tests/test_amsjob.py | 7 ++ unit_tests/test_basejob.py | 246 ++++++++++++++++++++++++++++++++++++- 4 files changed, 289 insertions(+), 10 deletions(-) diff --git a/core/basejob.py b/core/basejob.py index fd81e2c5..9a43d51d 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -4,8 +4,9 @@ import threading import time from os.path import join as opj -from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union +from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union, Callable, Any from abc import ABC, abstractmethod +import traceback from scm.plams.core.enums import JobStatus from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError @@ -29,6 +30,26 @@ __all__ = ["SingleJob", "MultiJob"] +def _fail_on_exception(func: Callable[["Job", Any], Any]) -> Callable[["Job", Any], Any]: + """Decorator to wrap a job method and mark the job as failed on any exception.""" + + def wrapper(self: "Job", *args, **kwargs): + try: + return func(self, *args, **kwargs) + except: + # Mark job status as failed and the results as complete + self.status = JobStatus.FAILED + self.results.finished.set() + self.results.done.set() + # Notify any parent multi-job of the failure + if self.parent and self in self.parent: + self.parent._notify() + # Store the exception message to be accessed from get_errormsg + self._error_msg = traceback.format_exc() + + return wrapper + + class Job(ABC): """General abstract class for all kind of computational tasks. @@ -130,6 +151,7 @@ def run( """ if self.status != JobStatus.CREATED: raise JobError("Trying to run previously started job {}".format(self.name)) + self._error_msg = None self.status = JobStatus.STARTED self._log_status(1) @@ -217,6 +239,7 @@ def postrun(self) -> None: # ======================================================================= + @_fail_on_exception def _prepare(self, jobmanager: "JobManager") -> bool: """Prepare the job for execution. This method collects steps 1-7 from :ref:`job-life-cycle`. Should not be overridden. Returned value indicates if job execution should continue (|RPM| did not find this job as previously run).""" @@ -269,6 +292,7 @@ def _get_ready(self) -> None: def _execute(self, jobrunner: "JobRunner") -> None: """Execute the job.""" + @_fail_on_exception def _finalize(self) -> None: """Gather the results of the job execution and organize them. This method collects steps 9-12 from :ref:`job-life-cycle`. Should not be overridden.""" log("Starting {}._finalize()".format(self.name), 7) @@ -446,6 +470,7 @@ def _get_ready(self) -> None: os.chmod(runfile, os.stat(runfile).st_mode | stat.S_IEXEC) + @_fail_on_exception def _execute(self, jobrunner) -> None: """Execute previously created runscript using *jobrunner*. @@ -674,6 +699,7 @@ def _notify(self) -> None: with self._lock: self._active_children -= 1 + @_fail_on_exception def _execute(self, jobrunner: "JobRunner") -> None: """Run all children from ``children``. Then use :meth:`~MultiJob.new_children` and run all jobs produced by it. Repeat this procedure until :meth:`~MultiJob.new_children` returns an empty list. Wait for all started jobs to finish.""" log("Starting {}._execute()".format(self.name), 7) diff --git a/interfaces/adfsuite/ams.py b/interfaces/adfsuite/ams.py index 5f681562..762f3229 100644 --- a/interfaces/adfsuite/ams.py +++ b/interfaces/adfsuite/ams.py @@ -2483,10 +2483,14 @@ def get_errormsg(self) -> Optional[str]: if self.check(): return None else: + # Check if there is an error captured during the job process, or a previously cached error + if self._error_msg: + return self._error_msg + default_msg = "Could not determine error message. Please check the output manually." msg = None try: - # Something went wrong. The first place to check is the termination status on the ams.rkf. + # If not, the first place to check is the termination status on the ams.rkf. # If the AMS driver stopped with a known error (called StopIt in the Fortran code), the error will be in there. # Status can be: # - NORMAL TERMINATION with errors: find the error from the ams log file @@ -2503,7 +2507,8 @@ def get_errormsg(self) -> Optional[str]: try: log_err_lines = self.results.grep_file("ams.log", "ERROR: ") if log_err_lines: - return log_err_lines[-1].partition("ERROR: ")[2] + self._error_msg = log_err_lines[-1].partition("ERROR: ")[2] + return self._error_msg except FileError: pass @@ -2517,7 +2522,8 @@ def get_errormsg(self) -> Optional[str]: match=1, ) if license_err_lines: - return str.join("\n", license_err_lines) + self._error_msg = str.join("\n", license_err_lines) + return self._error_msg except FileError: pass @@ -2537,7 +2543,11 @@ def get_errormsg(self) -> Optional[str]: break except: pass - return msg if msg else default_msg + + # Cache error message if called again + self._error_msg = msg if msg else default_msg + + return self._error_msg def hash_input(self) -> str: """Calculate the hash of the input file. diff --git a/unit_tests/test_amsjob.py b/unit_tests/test_amsjob.py index a1204a2c..655270fa 100644 --- a/unit_tests/test_amsjob.py +++ b/unit_tests/test_amsjob.py @@ -569,6 +569,7 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self): # Invalid license job = AMSJob() + job._error_msg = None results = MagicMock(spec=AMSResults) results.readrkf.side_effect = FileError() results.grep_file.side_effect = FileError() @@ -595,6 +596,7 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self): ) # Invalid input + job._error_msg = None results.grep_file.side_effect = None results.grep_file.return_value = [ ' <12:03:49> ERROR: Input error: value "foo" found in line 13 for multiple choice key "Task" is not an allowed choice' @@ -605,9 +607,14 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self): ) # Error in calculation + job._error_msg = None results.readrkf.return_value = "NORMAL TERMINATION with errors" results.readrkf.side_effect = None results.grep_file.return_value = [ " <13:44:55> ERROR: Geometry optimization failed! (Did not converge.)" ] assert job.get_errormsg() == "Geometry optimization failed! (Did not converge.)" + + # Error in prerun + job._error_msg = "RuntimeError: something went wrong" + assert job.get_errormsg() == "RuntimeError: something went wrong" diff --git a/unit_tests/test_basejob.py b/unit_tests/test_basejob.py index 978532b6..e9911e55 100644 --- a/unit_tests/test_basejob.py +++ b/unit_tests/test_basejob.py @@ -8,8 +8,8 @@ from io import StringIO from scm.plams.core.settings import Settings -from scm.plams.core.basejob import SingleJob -from scm.plams.core.errors import PlamsError, FileError +from scm.plams.core.basejob import SingleJob, MultiJob +from scm.plams.core.errors import PlamsError, FileError, ResultsError from scm.plams.core.jobrunner import JobRunner from scm.plams.core.jobmanager import JobManager from scm.plams.core.functions import add_to_instance @@ -70,10 +70,17 @@ def get_runscript(self) -> str: return f"sleep {self.wait} && {self.command} {self._filename('inp')}" def check(self) -> bool: - return self.results.read_file(self._filename("err")) == "" + try: + return self.results.read_file(self._filename("err")) == "" + except ResultsError: + return True def get_errormsg(self) -> str: - return self.results.read_file(self._filename("err")) + if self._error_msg: + return self._error_msg + + msg = self.results.read_file(self._filename("err")) + return msg if msg else None class TestSingleJob: @@ -124,6 +131,8 @@ def test_run_writes_output_file_on_success(self): # Then job check passes and output file written assert job.check() + assert job.ok() + assert job.status == JobStatus.SUCCESSFUL assert results.read_file("$JN.in") == f"Dummy input {job.id}" assert results.read_file("$JN.out") == f"Dummy output {job.id}" assert results.read_file("$JN.err") == "" @@ -135,10 +144,39 @@ def test_run_writes_error_file_on_failure(self): # Then job check fails and error file written assert not job.check() + assert not job.ok() + assert job.status == JobStatus.CRASHED assert results.read_file("$JN.in") == f"Dummy input {job.id}" assert results.read_file("$JN.out") == "" assert results.read_file("$JN.err") != "" + def test_run_marks_jobs_as_failed_and_stores_exception_on_prerun_or_postrun_error(self): + # Given job which error in pre- or post-run + job1 = DummySingleJob() + job2 = DummySingleJob() + + @add_to_instance(job1) + def prerun(s): + raise RuntimeError("something went wrong") + + @add_to_instance(job2) + def postrun(s): + raise RuntimeError("something went wrong") + + # When run jobs + job1.run() + job2.run() + + # Then status is marked as failed + assert job1.status == JobStatus.FAILED + assert job2.status == JobStatus.FAILED + assert not job1.ok() + assert not job2.ok() + assert "self.prerun()" in job1.get_errormsg() + assert "RuntimeError: something went wrong" in job1.get_errormsg() + assert "self.postrun()" in job2.get_errormsg() + assert "RuntimeError: something went wrong" in job2.get_errormsg() + @pytest.mark.parametrize( "mode,expected", [ @@ -184,7 +222,7 @@ def test_run_multiple_independent_jobs_in_parallel(self): runner = JobRunner(parallel=True, maxjobs=2) # When set up two jobs with no dependencies - job1 = DummySingleJob(wait=0.2) + job1 = DummySingleJob(wait=0.5) job2 = DummySingleJob(wait=0.01) results = [job1.run(runner), job2.run(runner)] @@ -244,6 +282,27 @@ def prerun(s): assert job2.status == JobStatus.SUCCESSFUL assert job2.call_log[2].timestamp >= job1.call_log[-1].timestamp + def test_run_multiple_dependent_jobs_first_job_fails_in_prerun(self): + # Given two dependent jobs where first errors in prerun + job1 = DummySingleJob() + job2 = DummySingleJob(depend=[job1]) + + @add_to_instance(job1) + def prerun(s): + raise RuntimeError("something went wrong") + + # When run jobs + job1.run() + job2.run() + + # Then first job is marked as failed, the dependent job as successful + assert job1.status == JobStatus.FAILED + assert job2.status == JobStatus.SUCCESSFUL + assert not job1.ok() + assert job2.ok() + assert "RuntimeError" in job1.get_errormsg() + assert job2.get_errormsg() is None + def test_ok_waits_on_results_and_checks_status(self): # Given job and a copy job1 = DummySingleJob(wait=0.1) @@ -344,3 +403,180 @@ def test_job_errors_logged_to_stdout(self, config): stdout, re.DOTALL, ) + + +class TestMultiJob: + """ + Test suite for the Multi Job. + Not truly independent as relies upon the job runner/manager and results components. + But this suite focuses on testing the methods on the job class itself. + """ + + def test_run_multiple_independent_single_jobs_all_succeed(self, config): + runner = JobRunner(parallel=True, maxjobs=3) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given 3 jobs which are independent + jobs = [DummySingleJob() for _ in range(3)] + multi_job = MultiJob(children=jobs) + + # When run multi-job + multi_job.run(jobrunner=runner).wait() + + # Then multi-job ran ok + assert multi_job.check() + assert multi_job.ok() + assert multi_job.status == JobStatus.SUCCESSFUL + assert all([j.status == JobStatus.SUCCESSFUL for j in jobs]) + + def test_run_multiple_independent_single_jobs_one_fails(self, config): + runner = JobRunner(parallel=True, maxjobs=3) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given 3 jobs which are independent, one of which fails + jobs = [DummySingleJob(), DummySingleJob(cmd="not_a_cmd"), DummySingleJob()] + multi_job = MultiJob(children=jobs) + + # When run multi-job + multi_job.run(jobrunner=runner).wait() + + # Then multi-job fails + assert not multi_job.check() + assert not multi_job.ok() + assert multi_job.status == JobStatus.FAILED + assert [j.status for j in jobs] == [JobStatus.SUCCESSFUL, JobStatus.CRASHED, JobStatus.SUCCESSFUL] + + def test_run_multiple_dependent_single_jobs_all_succeed(self, config): + runner = JobRunner(parallel=True, maxjobs=3) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given 3 jobs which are dependent + jobs = [DummySingleJob() for _ in range(3)] + + @add_to_instance(jobs[1]) + def prerun(s): + jobs[0].results.wait() + + @add_to_instance(jobs[2]) + def postrun(s): + jobs[1].results.wait() + + multi_job = MultiJob(children=jobs) + + # When run multi-job + multi_job.run(jobrunner=runner).wait() + + # Then multi-job ran ok + assert multi_job.check() + assert multi_job.ok() + assert multi_job.status == JobStatus.SUCCESSFUL + assert all([j.status == JobStatus.SUCCESSFUL for j in jobs]) + + def test_run_multiple_independent_single_jobs_error_in_prerun_or_postrun(self, config): + runner = JobRunner(parallel=True, maxjobs=3) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given 3 jobs which are dependent + jobs = [DummySingleJob() for _ in range(3)] + + @add_to_instance(jobs[1]) + def prerun(s): + raise RuntimeError("something went wrong") + + @add_to_instance(jobs[2]) + def postrun(s): + raise RuntimeError("something went wrong") + + multi_job = MultiJob(children=jobs) + + # When run multi-job + multi_job.run(jobrunner=runner).wait() + + # Then multi-job failed + assert not multi_job.check() + assert not multi_job.ok() + assert multi_job.status == JobStatus.FAILED + assert [j.status for j in jobs] == [JobStatus.SUCCESSFUL, JobStatus.FAILED, JobStatus.FAILED] + + def test_run_multiple_independent_multijobs_all_succeed(self, config): + runner = JobRunner(parallel=True, maxjobs=3) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given multi-job with multiple multi-jobs + jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)] + multi_jobs = [MultiJob(children=js) for js in jobs] + multi_job = MultiJob(children=multi_jobs) + + # When run top level job + multi_job.run(runner=runner).wait() + + # Then multi-job ran ok + assert multi_job.check() + assert multi_job.ok() + assert multi_job.status == JobStatus.SUCCESSFUL + assert all([mj.status == JobStatus.SUCCESSFUL for mj in multi_jobs]) + assert all([j.status == JobStatus.SUCCESSFUL for js in jobs for j in js]) + + def test_run_multiple_dependent_multijobs_all_succeed(self, config): + runner = JobRunner(parallel=True, maxjobs=5) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given multi-job with multiple dependent multi-jobs + jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)] + multi_jobs = [MultiJob(children=js) for js in jobs] + multi_job = MultiJob(children=multi_jobs) + + @add_to_instance(jobs[1][0]) + def prerun(s): + jobs[0][0].results.wait() + + @add_to_instance(multi_jobs[1]) + def postrun(s): + multi_jobs[0].results.wait() + + # When run top level job + multi_job.run(runner=runner).wait() + + # Then multi-job ran ok + assert multi_job.check() + assert multi_job.ok() + assert multi_job.status == JobStatus.SUCCESSFUL + assert all([mj.status == JobStatus.SUCCESSFUL for mj in multi_jobs]) + assert all([j.status == JobStatus.SUCCESSFUL for js in jobs for j in js]) + + def test_run_multiple_dependent_multijobs_one_fails(self, config): + runner = JobRunner(parallel=True, maxjobs=5) + config.sleepstep = 0.1 + + with patch("scm.plams.core.basejob.config", config): + # Given multi-job with multiple dependent multi-jobs and one which fails + jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)] + jobs[1][1].command = "not a cmd" + multi_jobs = [MultiJob(children=js) for js in jobs] + multi_job = MultiJob(children=multi_jobs) + + # When run top level job + multi_job.run(runner=runner).wait() + + # Then overall multi-job failed + assert not multi_job.check() + assert not multi_job.ok() + assert multi_job.status == JobStatus.FAILED + assert [mj.status for mj in multi_jobs] == [ + JobStatus.SUCCESSFUL, + JobStatus.FAILED, + JobStatus.SUCCESSFUL, + ] + assert all([j.status == JobStatus.SUCCESSFUL for j in jobs[0]]) + assert all([j.status == JobStatus.SUCCESSFUL for j in jobs[2]]) + assert [j.status for j in jobs[1]] == [ + JobStatus.SUCCESSFUL, + JobStatus.CRASHED, + JobStatus.SUCCESSFUL, + ] From 521ac3535c8155f36511c56b94ca8467c17454f0 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Mon, 9 Dec 2024 12:11:21 +0100 Subject: [PATCH 4/7] Update CHANGELOG SCMSUITE-10131 SO107 --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 988a13f3..2d45dfac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ This changelog is effective from the 2025 releases. * Build using `pyproject.toml`, addition of extras groups to install optional dependencies * Logging of job summaries to CSV logfile * Logging of AMS job error messages to stdout and logfile on job failure +* Method `get_errormsg` enforced on the `Job` base class, with a default implementation ### Changed * Functions for optional packages (e.g. RDKit, ASE) are available even when these packages are not installed, but will raise an `MissingOptionalPackageError` when called @@ -33,12 +34,15 @@ This changelog is effective from the 2025 releases. * Restructuring of examples and conversion of various examples to notebooks * Support for `networkx>=3` and `ase>=3.23` * Use standard library logger for `log` function +* Make `Job` class inherit from `ABC` and mark abstract methods +* Exceptions raised in `prerun` and `postrun` will always be caught and populate error message ### Fixed * `Molecule.properties.charge` is a numeric instead of string type when loading molecule from a file * `Molecule.delete_all_bonds` removes the reference molecule from the removed bond instances * `SingleJob.load` returns the correctly loaded job * `AMSJob.check` handles a `NoneType` status, returning `False` +* `MultiJob.run` locking resolved when errors raised within `prerun` and `postrun` methods ### Deprecated * `plams` launch script is deprecated in favour of simply running with `amspython` From a7a376eaa965466280fef7d050baa35337189aa4 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Mon, 9 Dec 2024 15:40:54 +0100 Subject: [PATCH 5/7] Fix mypy issues SCMSUITE-10131 SO107 --- core/basejob.py | 12 ++++++------ interfaces/adfsuite/ams.py | 6 +++--- mol/molecule.py | 4 ++-- unit_tests/test_molecule.py | 9 +++++++++ 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/core/basejob.py b/core/basejob.py index 9a43d51d..e53efc35 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -4,7 +4,7 @@ import threading import time from os.path import join as opj -from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union, Callable, Any +from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union from abc import ABC, abstractmethod import traceback @@ -30,7 +30,7 @@ __all__ = ["SingleJob", "MultiJob"] -def _fail_on_exception(func: Callable[["Job", Any], Any]) -> Callable[["Job", Any], Any]: +def _fail_on_exception(func): """Decorator to wrap a job method and mark the job as failed on any exception.""" def wrapper(self: "Job", *args, **kwargs): @@ -39,11 +39,11 @@ def wrapper(self: "Job", *args, **kwargs): except: # Mark job status as failed and the results as complete self.status = JobStatus.FAILED - self.results.finished.set() - self.results.done.set() + self.results.finished.set() # type: ignore + self.results.done.set() # type: ignore # Notify any parent multi-job of the failure - if self.parent and self in self.parent: - self.parent._notify() + if self.parent and self in self.parent: # type: ignore + self.parent._notify() # type: ignore # Store the exception message to be accessed from get_errormsg self._error_msg = traceback.format_exc() diff --git a/interfaces/adfsuite/ams.py b/interfaces/adfsuite/ams.py index 762f3229..e7480651 100644 --- a/interfaces/adfsuite/ams.py +++ b/interfaces/adfsuite/ams.py @@ -1802,8 +1802,8 @@ def get_density_along_axis( start_step, end_step, every, _ = self._get_integer_start_end_every_max(start_fs, end_fs, every_fs, None) nEntries = self.readrkf("History", "nEntries") - coords = np.array(self.get_history_property("Coords")).reshape(nEntries, -1, 3) - coords = coords[start_step:end_step:every] + history_coords = np.array(self.get_history_property("Coords")).reshape(nEntries, -1, 3) + coords = history_coords[start_step:end_step:every] nEntries = len(coords) axis2index = {"x": 0, "y": 1, "z": 2} @@ -2507,7 +2507,7 @@ def get_errormsg(self) -> Optional[str]: try: log_err_lines = self.results.grep_file("ams.log", "ERROR: ") if log_err_lines: - self._error_msg = log_err_lines[-1].partition("ERROR: ")[2] + self._error_msg: Optional[str] = log_err_lines[-1].partition("ERROR: ")[2] return self._error_msg except FileError: pass diff --git a/mol/molecule.py b/mol/molecule.py index 4f82d1db..10506d76 100644 --- a/mol/molecule.py +++ b/mol/molecule.py @@ -1487,7 +1487,7 @@ def get_fragment(self, indices): return ret - def get_complete_molecules_within_threshold(self, atom_indices, threshold: float): + def get_complete_molecules_within_threshold(self, atom_indices: List[int], threshold: float): """ Returns a new molecule containing complete submolecules for any molecules that are closer than ``threshold`` to any of the atoms in ``atom_indices``. @@ -1509,7 +1509,7 @@ def get_complete_molecules_within_threshold(self, atom_indices, threshold: float D = distance_array(solvated_coords, solvated_coords)[zero_based_indices] less_equal = np.less_equal(D, threshold) within_threshold = np.any(less_equal, axis=0) - good_indices = [i for i, value in enumerate(within_threshold) if value] + good_indices = [i for i, value in enumerate(within_threshold) if value] # type: ignore complete_indices: Set[int] = set() for indlist in molecule_indices: diff --git a/unit_tests/test_molecule.py b/unit_tests/test_molecule.py index 564f36ce..1159db9c 100644 --- a/unit_tests/test_molecule.py +++ b/unit_tests/test_molecule.py @@ -216,6 +216,15 @@ def test_system_and_atomic_charge(self, mol): with pytest.raises(MoleculeError): assert mol.guess_atomic_charges() == [1, 1, 0, 0] + def test_get_complete_molecules_within_threshold(self, mol): + m0 = mol.get_complete_molecules_within_threshold([2], 0) + m1 = mol.get_complete_molecules_within_threshold([2], 1) + m2 = mol.get_complete_molecules_within_threshold([2], 2) + + assert m0.get_formula() == "H" + assert m1.get_formula() == "HO" + assert m2.get_formula() == "H2O" + class TestNiO(MoleculeTestBase): """ From 0f6ce1761ad28b55e437e4a3716468f81e5ba702 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Wed, 11 Dec 2024 11:54:29 +0100 Subject: [PATCH 6/7] Update job status type hint SCMSUITE-10131 SO107 --- core/basejob.py | 6 +++--- core/enums.py | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/core/basejob.py b/core/basejob.py index e53efc35..1127f202 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod import traceback -from scm.plams.core.enums import JobStatus +from scm.plams.core.enums import JobStatus, JobStatusType from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError from scm.plams.core.functions import config, log from scm.plams.core.private import sha256 @@ -124,14 +124,14 @@ def __init__( # ======================================================================= @property - def status(self) -> JobStatus: + def status(self) -> JobStatusType: """ Current status of the job """ return self._status @status.setter - def status(self, value: JobStatus) -> None: + def status(self, value: JobStatusType) -> None: # This setter should really be private i.e. internally should use self._status # But for backwards compatibility it is exposed and set by e.g. the JobManager self._status = value diff --git a/core/enums.py b/core/enums.py index d28970a4..117baa05 100644 --- a/core/enums.py +++ b/core/enums.py @@ -1,3 +1,5 @@ +from typing import Literal, Union + # Import StrEnum for Python >=3.11, otherwise use backwards compatible class try: from enum import StrEnum # type: ignore @@ -13,7 +15,7 @@ def __str__(self) -> str: return str(self.value) -__all__ = ["JobStatus"] +__all__ = ["JobStatus", "JobStatusType"] class JobStatus(StrEnum): @@ -32,3 +34,21 @@ class JobStatus(StrEnum): COPIED = "copied" PREVIEW = "preview" DELETED = "deleted" + + +JobStatusType = Union[ + JobStatus, + Literal[ + "created", + "started", + "registered", + "running", + "finished", + "crashed", + "failed", + "successful", + "copied", + "preview", + "deleted", + ], +] From d90658138e9101e57ffd32fe4d31d34b253114f0 Mon Sep 17 00:00:00 2001 From: David Ormrod Morley Date: Wed, 11 Dec 2024 12:05:25 +0100 Subject: [PATCH 7/7] Remove bare exception catching SCMSUITE-10131 SO107 --- core/basejob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/basejob.py b/core/basejob.py index 1127f202..fef4a864 100644 --- a/core/basejob.py +++ b/core/basejob.py @@ -36,7 +36,7 @@ def _fail_on_exception(func): def wrapper(self: "Job", *args, **kwargs): try: return func(self, *args, **kwargs) - except: + except Exception: # Mark job status as failed and the results as complete self.status = JobStatus.FAILED self.results.finished.set() # type: ignore