From f67cdd2a1e0a52a0553b0b07c47d7973998bba39 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Mon, 9 Dec 2024 10:37:34 +0100
Subject: [PATCH 1/7] Convert job class to abstract class using abc
 SCMSUITE-8681 SO--

---
 core/basejob.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/core/basejob.py b/core/basejob.py
index a35d53b5..2e1335f6 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -5,6 +5,7 @@
 import time
 from os.path import join as opj
 from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union
+from abc import ABC, abstractmethod
 
 from scm.plams.core.enums import JobStatus
 from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError
@@ -28,7 +29,7 @@
 __all__ = ["SingleJob", "MultiJob"]
 
 
-class Job:
+class Job(ABC):
     """General abstract class for all kind of computational tasks.
 
     Methods common for all kinds of jobs are gathered here. Instances of |Job| should never be created. It should not be subclassed either. If you wish to define a new type of job please subclass either |SingleJob| or |MultiJob|.
@@ -182,13 +183,13 @@ def ok(self, strict: bool = True) -> bool:
         self.results.wait()
         return self.status in [JobStatus.SUCCESSFUL, JobStatus.COPIED]
 
+    @abstractmethod
     def check(self) -> bool:
-        """Check if the execution of this instance was successful. Abstract method meant for internal use."""
-        raise PlamsError("Trying to run an abstract method Job.check()")
+        """Check if the execution of this instance was successful."""
 
+    @abstractmethod
     def hash(self) -> Optional[str]:
-        """Calculate the hash of this instance. Abstract method meant for internal use."""
-        raise PlamsError("Trying to run an abstract method Job.hash()")
+        """Calculate the hash of this instance."""
 
     def prerun(self) -> None:  # noqa F811
         """Actions to take before the actual job execution.
@@ -248,13 +249,13 @@ def _prepare(self, jobmanager: "JobManager") -> bool:
         self._log_status(3)
         return prev is None
 
+    @abstractmethod
     def _get_ready(self) -> None:
-        """Get ready for :meth:`~Job._execute`. This is the last step before :meth:`~Job._execute` is called. Abstract method."""
-        raise PlamsError("Trying to run an abstract method Job._get_ready()")
+        """Get ready for :meth:`~Job._execute`. This is the last step before :meth:`~Job._execute` is called."""
 
+    @abstractmethod
     def _execute(self, jobrunner: "JobRunner") -> None:
-        """Execute the job. Abstract method."""
-        raise PlamsError("Trying to run an abstract method Job._execute()")
+        """Execute the job."""
 
     def _finalize(self) -> None:
         """Gather the results of the job execution and organize them. This method collects steps 9-12 from :ref:`job-life-cycle`. Should not be overridden."""
@@ -333,15 +334,16 @@ def __init__(self, molecule: Optional[Molecule] = None, **kwargs):
         Job.__init__(self, **kwargs)
         self.molecule = molecule.copy() if isinstance(molecule, Molecule) else molecule
 
+    @abstractmethod
     def get_input(self) -> str:
-        """Generate the input file. Abstract method.
+        """Generate the input file.
 
         This method should return a single string with the full content of the input file. It should process information stored in the ``input`` branch of job settings and in the ``molecule`` attribute.
         """
-        raise PlamsError("Trying to run an abstract method SingleJob._get_input()")
 
+    @abstractmethod
     def get_runscript(self) -> str:
-        """Generate the runscript. Abstract method.
+        """Generate the runscript.
 
         This method should return a single string with the runscript contents. It can process information stored in ``runscript`` branch of job  settings. In general the full runscript has the following form::
 
@@ -355,7 +357,6 @@ def get_runscript(self) -> str:
 
         When overridden, this method should pay attention to ``.runscript.stdout_redirect`` key in job's ``settings``.
         """
-        raise PlamsError("Trying to run an abstract method SingleJob._get_runscript()")
 
     def hash_input(self) -> str:
         """Calculate SHA256 hash of the input file."""

From dc09db82f7c4fb3c9c1e84d0e6f6262d6b87af27 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Mon, 9 Dec 2024 11:04:30 +0100
Subject: [PATCH 2/7] Add get_errormsg to base job SCMSUITE-10131 SO107

---
 core/basejob.py    | 12 ++++++++++++
 core/formatters.py | 11 ++---------
 core/jobrunner.py  |  7 +------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/core/basejob.py b/core/basejob.py
index 2e1335f6..fd81e2c5 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -83,6 +83,7 @@ def __init__(
         self.default_settings = [config.job]
         self.depend = depend or []
         self._dont_pickle: List[str] = []
+        self._error_msg: Optional[str] = None
         if settings is not None:
             if isinstance(settings, Settings):
                 self.settings = settings.copy()
@@ -187,6 +188,17 @@ def ok(self, strict: bool = True) -> bool:
     def check(self) -> bool:
         """Check if the execution of this instance was successful."""
 
+    def get_errormsg(self) -> Optional[str]:
+        """Tries to get an error message for a failed job. This method returns ``None`` for successful jobs."""
+        if self.check():
+            return None
+
+        return (
+            self._error_msg
+            if self._error_msg
+            else "Could not determine error message. Please check the output manually."
+        )
+
     @abstractmethod
     def hash(self) -> Optional[str]:
         """Calculate the hash of this instance."""
diff --git a/core/formatters.py b/core/formatters.py
index 902ae881..0a7abef6 100644
--- a/core/formatters.py
+++ b/core/formatters.py
@@ -38,15 +38,8 @@ def _format_job(job: Job) -> Dict[str, Any]:
 
             if job.status not in [JobStatus.REGISTERED, JobStatus.RUNNING]:
                 message.update({"job_ok": job.ok()})
-                try:
-                    message.update({"job_check": job.check()})
-                except TypeError:
-                    pass
-                try:
-                    # this one it is not supported by the Job class but on many jobs they have it implemented
-                    message.update({"job_get_errormsg": job.get_errormsg()})
-                except (AttributeError, TypeError):
-                    pass
+                message.update({"job_check": job.check()})
+                message.update({"job_get_errormsg": job.get_errormsg()})
 
         if job.parent:
             message["job_parent_name"] = job.parent.name
diff --git a/core/jobrunner.py b/core/jobrunner.py
index b915044f..bc06af77 100644
--- a/core/jobrunner.py
+++ b/core/jobrunner.py
@@ -140,12 +140,7 @@ def _run_job(self, job, jobmanager):
             try:
                 # Log any error messages to the standard logger
                 if not job.ok(False) or not job.check():
-                    # get_errormsg is not required by the base job class, but often implemented by convention
-                    err_msg = (
-                        job.get_errormsg()
-                        if hasattr(job, "get_errormsg")
-                        else "Could not determine error message. Please check the output manually."
-                    )
+                    err_msg = job.get_errormsg()
                     err_lines = err_msg.splitlines()
                     max_lines = 30
                     if len(err_lines) > max_lines:

From 92763bc9eff2835becac9d1680df78cb1e3f30c6 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Mon, 9 Dec 2024 11:57:41 +0100
Subject: [PATCH 3/7] Add _fail_on_exception decorator SCMSUITE-10131 SO107

---
 core/basejob.py            |  28 ++++-
 interfaces/adfsuite/ams.py |  18 ++-
 unit_tests/test_amsjob.py  |   7 ++
 unit_tests/test_basejob.py | 246 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 289 insertions(+), 10 deletions(-)

diff --git a/core/basejob.py b/core/basejob.py
index fd81e2c5..9a43d51d 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -4,8 +4,9 @@
 import threading
 import time
 from os.path import join as opj
-from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union, Callable, Any
 from abc import ABC, abstractmethod
+import traceback
 
 from scm.plams.core.enums import JobStatus
 from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError
@@ -29,6 +30,26 @@
 __all__ = ["SingleJob", "MultiJob"]
 
 
+def _fail_on_exception(func: Callable[["Job", Any], Any]) -> Callable[["Job", Any], Any]:
+    """Decorator to wrap a job method and mark the job as failed on any exception."""
+
+    def wrapper(self: "Job", *args, **kwargs):
+        try:
+            return func(self, *args, **kwargs)
+        except:
+            # Mark job status as failed and the results as complete
+            self.status = JobStatus.FAILED
+            self.results.finished.set()
+            self.results.done.set()
+            # Notify any parent multi-job of the failure
+            if self.parent and self in self.parent:
+                self.parent._notify()
+            # Store the exception message to be accessed from get_errormsg
+            self._error_msg = traceback.format_exc()
+
+    return wrapper
+
+
 class Job(ABC):
     """General abstract class for all kind of computational tasks.
 
@@ -130,6 +151,7 @@ def run(
         """
         if self.status != JobStatus.CREATED:
             raise JobError("Trying to run previously started job {}".format(self.name))
+        self._error_msg = None
         self.status = JobStatus.STARTED
         self._log_status(1)
 
@@ -217,6 +239,7 @@ def postrun(self) -> None:
 
     # =======================================================================
 
+    @_fail_on_exception
     def _prepare(self, jobmanager: "JobManager") -> bool:
         """Prepare the job for execution. This method collects steps 1-7 from :ref:`job-life-cycle`. Should not be overridden. Returned value indicates if job execution should continue (|RPM| did not find this job as previously run)."""
 
@@ -269,6 +292,7 @@ def _get_ready(self) -> None:
     def _execute(self, jobrunner: "JobRunner") -> None:
         """Execute the job."""
 
+    @_fail_on_exception
     def _finalize(self) -> None:
         """Gather the results of the job execution and organize them. This method collects steps 9-12 from :ref:`job-life-cycle`. Should not be overridden."""
         log("Starting {}._finalize()".format(self.name), 7)
@@ -446,6 +470,7 @@ def _get_ready(self) -> None:
 
         os.chmod(runfile, os.stat(runfile).st_mode | stat.S_IEXEC)
 
+    @_fail_on_exception
     def _execute(self, jobrunner) -> None:
         """Execute previously created runscript using *jobrunner*.
 
@@ -674,6 +699,7 @@ def _notify(self) -> None:
         with self._lock:
             self._active_children -= 1
 
+    @_fail_on_exception
     def _execute(self, jobrunner: "JobRunner") -> None:
         """Run all children from ``children``. Then use :meth:`~MultiJob.new_children` and run all jobs produced by it. Repeat this procedure until :meth:`~MultiJob.new_children` returns an empty list. Wait for all started jobs to finish."""
         log("Starting {}._execute()".format(self.name), 7)
diff --git a/interfaces/adfsuite/ams.py b/interfaces/adfsuite/ams.py
index 5f681562..762f3229 100644
--- a/interfaces/adfsuite/ams.py
+++ b/interfaces/adfsuite/ams.py
@@ -2483,10 +2483,14 @@ def get_errormsg(self) -> Optional[str]:
         if self.check():
             return None
         else:
+            # Check if there is an error captured during the job process, or a previously cached error
+            if self._error_msg:
+                return self._error_msg
+
             default_msg = "Could not determine error message. Please check the output manually."
             msg = None
             try:
-                # Something went wrong. The first place to check is the termination status on the ams.rkf.
+                # If not, the first place to check is the termination status on the ams.rkf.
                 # If the AMS driver stopped with a known error (called StopIt in the Fortran code), the error will be in there.
                 # Status can be:
                 # - NORMAL TERMINATION with errors: find the error from the ams log file
@@ -2503,7 +2507,8 @@ def get_errormsg(self) -> Optional[str]:
                 try:
                     log_err_lines = self.results.grep_file("ams.log", "ERROR: ")
                     if log_err_lines:
-                        return log_err_lines[-1].partition("ERROR: ")[2]
+                        self._error_msg = log_err_lines[-1].partition("ERROR: ")[2]
+                        return self._error_msg
                 except FileError:
                     pass
 
@@ -2517,7 +2522,8 @@ def get_errormsg(self) -> Optional[str]:
                         match=1,
                     )
                     if license_err_lines:
-                        return str.join("\n", license_err_lines)
+                        self._error_msg = str.join("\n", license_err_lines)
+                        return self._error_msg
                 except FileError:
                     pass
 
@@ -2537,7 +2543,11 @@ def get_errormsg(self) -> Optional[str]:
                             break
             except:
                 pass
-            return msg if msg else default_msg
+
+            # Cache error message if called again
+            self._error_msg = msg if msg else default_msg
+
+            return self._error_msg
 
     def hash_input(self) -> str:
         """Calculate the hash of the input file.
diff --git a/unit_tests/test_amsjob.py b/unit_tests/test_amsjob.py
index a1204a2c..655270fa 100644
--- a/unit_tests/test_amsjob.py
+++ b/unit_tests/test_amsjob.py
@@ -569,6 +569,7 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self):
 
         # Invalid license
         job = AMSJob()
+        job._error_msg = None
         results = MagicMock(spec=AMSResults)
         results.readrkf.side_effect = FileError()
         results.grep_file.side_effect = FileError()
@@ -595,6 +596,7 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self):
         )
 
         # Invalid input
+        job._error_msg = None
         results.grep_file.side_effect = None
         results.grep_file.return_value = [
             '<Dec05-2024> <12:03:49>  ERROR: Input error: value "foo" found in line 13 for multiple choice key "Task" is not an allowed choice'
@@ -605,9 +607,14 @@ def test_get_errormsg_populates_correctly_for_different_scenarios(self):
         )
 
         # Error in calculation
+        job._error_msg = None
         results.readrkf.return_value = "NORMAL TERMINATION with errors"
         results.readrkf.side_effect = None
         results.grep_file.return_value = [
             "<Dec05-2024> <13:44:55>  ERROR: Geometry optimization failed! (Did not converge.)"
         ]
         assert job.get_errormsg() == "Geometry optimization failed! (Did not converge.)"
+
+        # Error in prerun
+        job._error_msg = "RuntimeError: something went wrong"
+        assert job.get_errormsg() == "RuntimeError: something went wrong"
diff --git a/unit_tests/test_basejob.py b/unit_tests/test_basejob.py
index 978532b6..e9911e55 100644
--- a/unit_tests/test_basejob.py
+++ b/unit_tests/test_basejob.py
@@ -8,8 +8,8 @@
 from io import StringIO
 
 from scm.plams.core.settings import Settings
-from scm.plams.core.basejob import SingleJob
-from scm.plams.core.errors import PlamsError, FileError
+from scm.plams.core.basejob import SingleJob, MultiJob
+from scm.plams.core.errors import PlamsError, FileError, ResultsError
 from scm.plams.core.jobrunner import JobRunner
 from scm.plams.core.jobmanager import JobManager
 from scm.plams.core.functions import add_to_instance
@@ -70,10 +70,17 @@ def get_runscript(self) -> str:
         return f"sleep {self.wait} && {self.command} {self._filename('inp')}"
 
     def check(self) -> bool:
-        return self.results.read_file(self._filename("err")) == ""
+        try:
+            return self.results.read_file(self._filename("err")) == ""
+        except ResultsError:
+            return True
 
     def get_errormsg(self) -> str:
-        return self.results.read_file(self._filename("err"))
+        if self._error_msg:
+            return self._error_msg
+
+        msg = self.results.read_file(self._filename("err"))
+        return msg if msg else None
 
 
 class TestSingleJob:
@@ -124,6 +131,8 @@ def test_run_writes_output_file_on_success(self):
 
         # Then job check passes and output file written
         assert job.check()
+        assert job.ok()
+        assert job.status == JobStatus.SUCCESSFUL
         assert results.read_file("$JN.in") == f"Dummy input {job.id}"
         assert results.read_file("$JN.out") == f"Dummy output {job.id}"
         assert results.read_file("$JN.err") == ""
@@ -135,10 +144,39 @@ def test_run_writes_error_file_on_failure(self):
 
         # Then job check fails and error file written
         assert not job.check()
+        assert not job.ok()
+        assert job.status == JobStatus.CRASHED
         assert results.read_file("$JN.in") == f"Dummy input {job.id}"
         assert results.read_file("$JN.out") == ""
         assert results.read_file("$JN.err") != ""
 
+    def test_run_marks_jobs_as_failed_and_stores_exception_on_prerun_or_postrun_error(self):
+        # Given job which error in pre- or post-run
+        job1 = DummySingleJob()
+        job2 = DummySingleJob()
+
+        @add_to_instance(job1)
+        def prerun(s):
+            raise RuntimeError("something went wrong")
+
+        @add_to_instance(job2)
+        def postrun(s):
+            raise RuntimeError("something went wrong")
+
+        # When run jobs
+        job1.run()
+        job2.run()
+
+        # Then status is marked as failed
+        assert job1.status == JobStatus.FAILED
+        assert job2.status == JobStatus.FAILED
+        assert not job1.ok()
+        assert not job2.ok()
+        assert "self.prerun()" in job1.get_errormsg()
+        assert "RuntimeError: something went wrong" in job1.get_errormsg()
+        assert "self.postrun()" in job2.get_errormsg()
+        assert "RuntimeError: something went wrong" in job2.get_errormsg()
+
     @pytest.mark.parametrize(
         "mode,expected",
         [
@@ -184,7 +222,7 @@ def test_run_multiple_independent_jobs_in_parallel(self):
         runner = JobRunner(parallel=True, maxjobs=2)
 
         # When set up two jobs with no dependencies
-        job1 = DummySingleJob(wait=0.2)
+        job1 = DummySingleJob(wait=0.5)
         job2 = DummySingleJob(wait=0.01)
         results = [job1.run(runner), job2.run(runner)]
 
@@ -244,6 +282,27 @@ def prerun(s):
         assert job2.status == JobStatus.SUCCESSFUL
         assert job2.call_log[2].timestamp >= job1.call_log[-1].timestamp
 
+    def test_run_multiple_dependent_jobs_first_job_fails_in_prerun(self):
+        # Given two dependent jobs where first errors in prerun
+        job1 = DummySingleJob()
+        job2 = DummySingleJob(depend=[job1])
+
+        @add_to_instance(job1)
+        def prerun(s):
+            raise RuntimeError("something went wrong")
+
+        # When run jobs
+        job1.run()
+        job2.run()
+
+        # Then first job is marked as failed, the dependent job as successful
+        assert job1.status == JobStatus.FAILED
+        assert job2.status == JobStatus.SUCCESSFUL
+        assert not job1.ok()
+        assert job2.ok()
+        assert "RuntimeError" in job1.get_errormsg()
+        assert job2.get_errormsg() is None
+
     def test_ok_waits_on_results_and_checks_status(self):
         # Given job and a copy
         job1 = DummySingleJob(wait=0.1)
@@ -344,3 +403,180 @@ def test_job_errors_logged_to_stdout(self, config):
                     stdout,
                     re.DOTALL,
                 )
+
+
+class TestMultiJob:
+    """
+    Test suite for the Multi Job.
+    Not truly independent as relies upon the job runner/manager and results components.
+    But this suite focuses on testing the methods on the job class itself.
+    """
+
+    def test_run_multiple_independent_single_jobs_all_succeed(self, config):
+        runner = JobRunner(parallel=True, maxjobs=3)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given 3 jobs which are independent
+            jobs = [DummySingleJob() for _ in range(3)]
+            multi_job = MultiJob(children=jobs)
+
+            # When run multi-job
+            multi_job.run(jobrunner=runner).wait()
+
+        # Then multi-job ran ok
+        assert multi_job.check()
+        assert multi_job.ok()
+        assert multi_job.status == JobStatus.SUCCESSFUL
+        assert all([j.status == JobStatus.SUCCESSFUL for j in jobs])
+
+    def test_run_multiple_independent_single_jobs_one_fails(self, config):
+        runner = JobRunner(parallel=True, maxjobs=3)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given 3 jobs which are independent, one of which fails
+            jobs = [DummySingleJob(), DummySingleJob(cmd="not_a_cmd"), DummySingleJob()]
+            multi_job = MultiJob(children=jobs)
+
+            # When run multi-job
+            multi_job.run(jobrunner=runner).wait()
+
+            # Then multi-job fails
+            assert not multi_job.check()
+            assert not multi_job.ok()
+            assert multi_job.status == JobStatus.FAILED
+            assert [j.status for j in jobs] == [JobStatus.SUCCESSFUL, JobStatus.CRASHED, JobStatus.SUCCESSFUL]
+
+    def test_run_multiple_dependent_single_jobs_all_succeed(self, config):
+        runner = JobRunner(parallel=True, maxjobs=3)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given 3 jobs which are dependent
+            jobs = [DummySingleJob() for _ in range(3)]
+
+            @add_to_instance(jobs[1])
+            def prerun(s):
+                jobs[0].results.wait()
+
+            @add_to_instance(jobs[2])
+            def postrun(s):
+                jobs[1].results.wait()
+
+            multi_job = MultiJob(children=jobs)
+
+            # When run multi-job
+            multi_job.run(jobrunner=runner).wait()
+
+            # Then multi-job ran ok
+            assert multi_job.check()
+            assert multi_job.ok()
+            assert multi_job.status == JobStatus.SUCCESSFUL
+            assert all([j.status == JobStatus.SUCCESSFUL for j in jobs])
+
+    def test_run_multiple_independent_single_jobs_error_in_prerun_or_postrun(self, config):
+        runner = JobRunner(parallel=True, maxjobs=3)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given 3 jobs which are dependent
+            jobs = [DummySingleJob() for _ in range(3)]
+
+            @add_to_instance(jobs[1])
+            def prerun(s):
+                raise RuntimeError("something went wrong")
+
+            @add_to_instance(jobs[2])
+            def postrun(s):
+                raise RuntimeError("something went wrong")
+
+            multi_job = MultiJob(children=jobs)
+
+            # When run multi-job
+            multi_job.run(jobrunner=runner).wait()
+
+            # Then multi-job failed
+            assert not multi_job.check()
+            assert not multi_job.ok()
+            assert multi_job.status == JobStatus.FAILED
+            assert [j.status for j in jobs] == [JobStatus.SUCCESSFUL, JobStatus.FAILED, JobStatus.FAILED]
+
+    def test_run_multiple_independent_multijobs_all_succeed(self, config):
+        runner = JobRunner(parallel=True, maxjobs=3)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given multi-job with multiple multi-jobs
+            jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)]
+            multi_jobs = [MultiJob(children=js) for js in jobs]
+            multi_job = MultiJob(children=multi_jobs)
+
+            # When run top level job
+            multi_job.run(runner=runner).wait()
+
+            # Then multi-job ran ok
+            assert multi_job.check()
+            assert multi_job.ok()
+            assert multi_job.status == JobStatus.SUCCESSFUL
+            assert all([mj.status == JobStatus.SUCCESSFUL for mj in multi_jobs])
+            assert all([j.status == JobStatus.SUCCESSFUL for js in jobs for j in js])
+
+    def test_run_multiple_dependent_multijobs_all_succeed(self, config):
+        runner = JobRunner(parallel=True, maxjobs=5)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given multi-job with multiple dependent multi-jobs
+            jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)]
+            multi_jobs = [MultiJob(children=js) for js in jobs]
+            multi_job = MultiJob(children=multi_jobs)
+
+            @add_to_instance(jobs[1][0])
+            def prerun(s):
+                jobs[0][0].results.wait()
+
+            @add_to_instance(multi_jobs[1])
+            def postrun(s):
+                multi_jobs[0].results.wait()
+
+            # When run top level job
+            multi_job.run(runner=runner).wait()
+
+            # Then multi-job ran ok
+            assert multi_job.check()
+            assert multi_job.ok()
+            assert multi_job.status == JobStatus.SUCCESSFUL
+            assert all([mj.status == JobStatus.SUCCESSFUL for mj in multi_jobs])
+            assert all([j.status == JobStatus.SUCCESSFUL for js in jobs for j in js])
+
+    def test_run_multiple_dependent_multijobs_one_fails(self, config):
+        runner = JobRunner(parallel=True, maxjobs=5)
+        config.sleepstep = 0.1
+
+        with patch("scm.plams.core.basejob.config", config):
+            # Given multi-job with multiple dependent multi-jobs and one which fails
+            jobs = [[DummySingleJob() for _ in range(3)] for _ in range(3)]
+            jobs[1][1].command = "not a cmd"
+            multi_jobs = [MultiJob(children=js) for js in jobs]
+            multi_job = MultiJob(children=multi_jobs)
+
+            # When run top level job
+            multi_job.run(runner=runner).wait()
+
+            # Then overall multi-job failed
+            assert not multi_job.check()
+            assert not multi_job.ok()
+            assert multi_job.status == JobStatus.FAILED
+            assert [mj.status for mj in multi_jobs] == [
+                JobStatus.SUCCESSFUL,
+                JobStatus.FAILED,
+                JobStatus.SUCCESSFUL,
+            ]
+            assert all([j.status == JobStatus.SUCCESSFUL for j in jobs[0]])
+            assert all([j.status == JobStatus.SUCCESSFUL for j in jobs[2]])
+            assert [j.status for j in jobs[1]] == [
+                JobStatus.SUCCESSFUL,
+                JobStatus.CRASHED,
+                JobStatus.SUCCESSFUL,
+            ]

From 521ac3535c8155f36511c56b94ca8467c17454f0 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Mon, 9 Dec 2024 12:11:21 +0100
Subject: [PATCH 4/7] Update CHANGELOG SCMSUITE-10131 SO107

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 988a13f3..2d45dfac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ This changelog is effective from the 2025 releases.
 * Build using `pyproject.toml`, addition of extras groups to install optional dependencies
 * Logging of job summaries to CSV logfile
 * Logging of AMS job error messages to stdout and logfile on job failure
+* Method `get_errormsg` enforced on the `Job` base class, with a default implementation
 
 ### Changed
 * Functions for optional packages (e.g. RDKit, ASE) are available even when these packages are not installed, but will raise an `MissingOptionalPackageError` when called
@@ -33,12 +34,15 @@ This changelog is effective from the 2025 releases.
 * Restructuring of examples and conversion of various examples to notebooks
 * Support for `networkx>=3` and `ase>=3.23`
 * Use standard library logger for `log` function
+* Make `Job` class inherit from `ABC` and mark abstract methods 
+* Exceptions raised in `prerun` and `postrun` will always be caught and populate error message
 
 ### Fixed
 * `Molecule.properties.charge` is a numeric instead of string type when loading molecule from a file
 * `Molecule.delete_all_bonds` removes the reference molecule from the removed bond instances
 * `SingleJob.load` returns the correctly loaded job
 * `AMSJob.check` handles a `NoneType` status, returning `False`
+* `MultiJob.run` locking resolved when errors raised within `prerun` and `postrun` methods
 
 ### Deprecated
 * `plams` launch script is deprecated in favour of simply running with `amspython`

From a7a376eaa965466280fef7d050baa35337189aa4 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Mon, 9 Dec 2024 15:40:54 +0100
Subject: [PATCH 5/7] Fix mypy issues SCMSUITE-10131 SO107

---
 core/basejob.py             | 12 ++++++------
 interfaces/adfsuite/ams.py  |  6 +++---
 mol/molecule.py             |  4 ++--
 unit_tests/test_molecule.py |  9 +++++++++
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/core/basejob.py b/core/basejob.py
index 9a43d51d..e53efc35 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -4,7 +4,7 @@
 import threading
 import time
 from os.path import join as opj
-from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union, Callable, Any
+from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union
 from abc import ABC, abstractmethod
 import traceback
 
@@ -30,7 +30,7 @@
 __all__ = ["SingleJob", "MultiJob"]
 
 
-def _fail_on_exception(func: Callable[["Job", Any], Any]) -> Callable[["Job", Any], Any]:
+def _fail_on_exception(func):
     """Decorator to wrap a job method and mark the job as failed on any exception."""
 
     def wrapper(self: "Job", *args, **kwargs):
@@ -39,11 +39,11 @@ def wrapper(self: "Job", *args, **kwargs):
         except:
             # Mark job status as failed and the results as complete
             self.status = JobStatus.FAILED
-            self.results.finished.set()
-            self.results.done.set()
+            self.results.finished.set()  # type: ignore
+            self.results.done.set()  # type: ignore
             # Notify any parent multi-job of the failure
-            if self.parent and self in self.parent:
-                self.parent._notify()
+            if self.parent and self in self.parent:  # type: ignore
+                self.parent._notify()  # type: ignore
             # Store the exception message to be accessed from get_errormsg
             self._error_msg = traceback.format_exc()
 
diff --git a/interfaces/adfsuite/ams.py b/interfaces/adfsuite/ams.py
index 762f3229..e7480651 100644
--- a/interfaces/adfsuite/ams.py
+++ b/interfaces/adfsuite/ams.py
@@ -1802,8 +1802,8 @@ def get_density_along_axis(
 
         start_step, end_step, every, _ = self._get_integer_start_end_every_max(start_fs, end_fs, every_fs, None)
         nEntries = self.readrkf("History", "nEntries")
-        coords = np.array(self.get_history_property("Coords")).reshape(nEntries, -1, 3)
-        coords = coords[start_step:end_step:every]
+        history_coords = np.array(self.get_history_property("Coords")).reshape(nEntries, -1, 3)
+        coords = history_coords[start_step:end_step:every]
         nEntries = len(coords)
 
         axis2index = {"x": 0, "y": 1, "z": 2}
@@ -2507,7 +2507,7 @@ def get_errormsg(self) -> Optional[str]:
                 try:
                     log_err_lines = self.results.grep_file("ams.log", "ERROR: ")
                     if log_err_lines:
-                        self._error_msg = log_err_lines[-1].partition("ERROR: ")[2]
+                        self._error_msg: Optional[str] = log_err_lines[-1].partition("ERROR: ")[2]
                         return self._error_msg
                 except FileError:
                     pass
diff --git a/mol/molecule.py b/mol/molecule.py
index 4f82d1db..10506d76 100644
--- a/mol/molecule.py
+++ b/mol/molecule.py
@@ -1487,7 +1487,7 @@ def get_fragment(self, indices):
 
         return ret
 
-    def get_complete_molecules_within_threshold(self, atom_indices, threshold: float):
+    def get_complete_molecules_within_threshold(self, atom_indices: List[int], threshold: float):
         """
         Returns a new molecule containing complete submolecules for any molecules
         that are closer than ``threshold`` to any of the atoms in ``atom_indices``.
@@ -1509,7 +1509,7 @@ def get_complete_molecules_within_threshold(self, atom_indices, threshold: float
         D = distance_array(solvated_coords, solvated_coords)[zero_based_indices]
         less_equal = np.less_equal(D, threshold)
         within_threshold = np.any(less_equal, axis=0)
-        good_indices = [i for i, value in enumerate(within_threshold) if value]
+        good_indices = [i for i, value in enumerate(within_threshold) if value]  # type: ignore
 
         complete_indices: Set[int] = set()
         for indlist in molecule_indices:
diff --git a/unit_tests/test_molecule.py b/unit_tests/test_molecule.py
index 564f36ce..1159db9c 100644
--- a/unit_tests/test_molecule.py
+++ b/unit_tests/test_molecule.py
@@ -216,6 +216,15 @@ def test_system_and_atomic_charge(self, mol):
         with pytest.raises(MoleculeError):
             assert mol.guess_atomic_charges() == [1, 1, 0, 0]
 
+    def test_get_complete_molecules_within_threshold(self, mol):
+        m0 = mol.get_complete_molecules_within_threshold([2], 0)
+        m1 = mol.get_complete_molecules_within_threshold([2], 1)
+        m2 = mol.get_complete_molecules_within_threshold([2], 2)
+
+        assert m0.get_formula() == "H"
+        assert m1.get_formula() == "HO"
+        assert m2.get_formula() == "H2O"
+
 
 class TestNiO(MoleculeTestBase):
     """

From 0f6ce1761ad28b55e437e4a3716468f81e5ba702 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Wed, 11 Dec 2024 11:54:29 +0100
Subject: [PATCH 6/7] Update job status type hint SCMSUITE-10131 SO107

---
 core/basejob.py |  6 +++---
 core/enums.py   | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/core/basejob.py b/core/basejob.py
index e53efc35..1127f202 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -8,7 +8,7 @@
 from abc import ABC, abstractmethod
 import traceback
 
-from scm.plams.core.enums import JobStatus
+from scm.plams.core.enums import JobStatus, JobStatusType
 from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError
 from scm.plams.core.functions import config, log
 from scm.plams.core.private import sha256
@@ -124,14 +124,14 @@ def __init__(
     # =======================================================================
 
     @property
-    def status(self) -> JobStatus:
+    def status(self) -> JobStatusType:
         """
         Current status of the job
         """
         return self._status
 
     @status.setter
-    def status(self, value: JobStatus) -> None:
+    def status(self, value: JobStatusType) -> None:
         # This setter should really be private i.e. internally should use self._status
         # But for backwards compatibility it is exposed and set by e.g. the JobManager
         self._status = value
diff --git a/core/enums.py b/core/enums.py
index d28970a4..117baa05 100644
--- a/core/enums.py
+++ b/core/enums.py
@@ -1,3 +1,5 @@
+from typing import Literal, Union
+
 # Import StrEnum for Python >=3.11, otherwise use backwards compatible class
 try:
     from enum import StrEnum  # type: ignore
@@ -13,7 +15,7 @@ def __str__(self) -> str:
             return str(self.value)
 
 
-__all__ = ["JobStatus"]
+__all__ = ["JobStatus", "JobStatusType"]
 
 
 class JobStatus(StrEnum):
@@ -32,3 +34,21 @@ class JobStatus(StrEnum):
     COPIED = "copied"
     PREVIEW = "preview"
     DELETED = "deleted"
+
+
+JobStatusType = Union[
+    JobStatus,
+    Literal[
+        "created",
+        "started",
+        "registered",
+        "running",
+        "finished",
+        "crashed",
+        "failed",
+        "successful",
+        "copied",
+        "preview",
+        "deleted",
+    ],
+]

From d90658138e9101e57ffd32fe4d31d34b253114f0 Mon Sep 17 00:00:00 2001
From: David Ormrod Morley <ormrodmorley@scm.com>
Date: Wed, 11 Dec 2024 12:05:25 +0100
Subject: [PATCH 7/7] Remove bare exception catching SCMSUITE-10131 SO107

---
 core/basejob.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/basejob.py b/core/basejob.py
index 1127f202..fef4a864 100644
--- a/core/basejob.py
+++ b/core/basejob.py
@@ -36,7 +36,7 @@ def _fail_on_exception(func):
     def wrapper(self: "Job", *args, **kwargs):
         try:
             return func(self, *args, **kwargs)
-        except:
+        except Exception:
             # Mark job status as failed and the results as complete
             self.status = JobStatus.FAILED
             self.results.finished.set()  # type: ignore