SCM-NV · dormrod · Dec 9, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -42,6 +42,7 @@ htmlcov/
 nosetests.xml
 coverage.xml
 *,cover
+unit_tests/result_images/*
 
 # Translations
 *.mo

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ This changelog is effective from the 2025 releases.
 * GitHub workflows for CI and publishing to PyPI
 * Build using `pyproject.toml`, addition of extras groups to install optional dependencies
 * Logging of job summaries to CSV logfile
+* Logging of AMS job error messages to stdout and logfile on job failure
 
 ### Changed
 * Functions for optional packages (e.g. RDKit, ASE) are available even when these packages are not installed, but will raise an `MissingOptionalPackageError` when called

diff --git a/core/formatters.py b/core/formatters.py
@@ -25,12 +25,12 @@ def _format_job(job: Job) -> Dict[str, Any]:
             "job_base_name": re.sub(r"\.\d+$", "", job.name),
             "job_name": job.name,
             "job_status": job.status,
-            "job_parent_name": "",
-            "job_parent_path": "",
             "job_path": "",
             "job_ok": "",
             "job_check": "",
             "job_get_errormsg": "",
+            "job_parent_name": "",
+            "job_parent_path": "",
         }
 
         if job.status not in [JobStatus.CREATED, JobStatus.STARTED]:

diff --git a/core/functions.py b/core/functions.py
@@ -300,6 +300,7 @@ def finish(otherJM: Optional[Iterable["JobManager"]] = None):
 
 # Register call to _finish on workflow end
 atexit.register(_finish)
+atexit.register(_logger.close)
 
 
 # ===========================================================================

diff --git a/core/jobrunner.py b/core/jobrunner.py
@@ -136,7 +136,25 @@ def _run_job(self, job, jobmanager):
                 job._execute(self)
                 job._finalize()
         finally:
+            # Log job summaries
             try:
+                # Log any error messages to the standard logger
+                if not job.ok(False) or not job.check():
+                    # get_errormsg is not required by the base job class, but often implemented by convention
+                    err_msg = (
+                        job.get_errormsg()
+                        if hasattr(job, "get_errormsg")
+                        else "Could not determine error message. Please check the output manually."
+                    )
+                    err_lines = err_msg.splitlines()
+                    max_lines = 30
+                    if len(err_lines) > max_lines:
+                        err_lines = err_lines[:max_lines]
+                        err_lines.append("... (see output for full error)")
+                    err_msg = str.join("\n", [f"\t{l}" for l in err_lines])
+                    log(f"""Error message for job {job.name} was:\n{err_msg}""", 3)
+
+                # Log job summary to the csv logger
                 jobmanager.job_logger.log(job, level=3)
             except:  # logging should never throw, but best to make sure
                 pass

diff --git a/core/logging.py b/core/logging.py
@@ -303,7 +303,7 @@ def format(self, record: logging.LogRecord) -> str:
         if self.include_level:
             log_record["level"] = 28 - record.levelno
         if self.log_time:
-            log_record["asctime"] = self.formatTime(record, self.datefmt)
+            log_record["logged_at"] = self.formatTime(record, self.datefmt)
 
         if isinstance(record.msg, dict):
             log_record.update(record.msg)

diff --git a/core/results.py b/core/results.py
@@ -80,10 +80,21 @@ def guardian(self, *args, **kwargs):
             raise ResultsError("Using Results associated with deleted job")
 
         elif self.job.status in [JobStatus.CRASHED, JobStatus.FAILED]:
-            if func.__name__ == "wait":  # waiting for crashed of failed job should not trigger any warnings/exceptions
+            # waiting for crashed of failed job should not trigger any warnings/exceptions
+            # and neither should checking the status from this job with 'ok', 'check' or 'get_errormsg'
+            suppress_errors = func.__name__ == "wait"
+            if not suppress_errors:
+                for frame in inspect.getouterframes(inspect.currentframe()):
+                    cal, arg = _caller_name_and_arg(frame[0])
+                    if arg == self.job and cal in ["ok", "check", "get_errormsg"]:
+                        suppress_errors = True
+                        break
+
+            if suppress_errors:
                 cal, arg = _caller_name_and_arg(inspect.currentframe())
                 if isinstance(arg, Results):
                     return func(self, *args, **kwargs)
+
             if config.ignore_failure:
                 log("WARNING: Trying to obtain results of crashed or failed job {}".format(self.job.name), 3)
                 try:

diff --git a/interfaces/adfsuite/ams.py b/interfaces/adfsuite/ams.py
@@ -2483,27 +2483,61 @@ def get_errormsg(self) -> Optional[str]:
         if self.check():
             return None
         else:
-            # Something went wrong. The first place to check is the termination status on the ams.rkf.
-            # If the AMS driver stopped with a known error (called StopIt in the Fortran code), the error will be in there.
+            default_msg = "Could not determine error message. Please check the output manually."
+            msg = None
             try:
-                msg = self.results.readrkf("General", "termination status")
-                if msg == "NORMAL TERMINATION with errors" or msg is None:
-                    # Apparently this wasn't a hard stop in the middle of the job.
-                    # Let's look for the last error in the logfile ...
-                    msg = self.results.grep_file("ams.log", "ERROR: ")[-1].partition("ERROR: ")[2]
-                elif msg == "IN PROGRESS" and "$JN.err" in self.results:
+                # Something went wrong. The first place to check is the termination status on the ams.rkf.
+                # If the AMS driver stopped with a known error (called StopIt in the Fortran code), the error will be in there.
+                # Status can be:
+                # - NORMAL TERMINATION with errors: find the error from the ams log file
+                # - IN PROGRESS: probably means AMS was shut down hard from the outside
+                #                e.g. it got SIGKILL from the scheduler for exceeding some resource limit
+                #                find the last error from the stderr
+                # Note AMS can crash before even creating an rkf, then can just check the output and error files.
+                try:
+                    termination_status = self.results.readrkf("General", "termination status")
+                except FileError:
+                    termination_status = None
+
+                # First look for the last error in the logfile
+                try:
+                    log_err_lines = self.results.grep_file("ams.log", "ERROR: ")
+                    if log_err_lines:
+                        return log_err_lines[-1].partition("ERROR: ")[2]
+                except FileError:
+                    pass
+
+                # Then for a licensing issue, check the output logs directly
+                try:
+                    license_err_lines = self.results.get_output_chunk(
+                        begin="LICENSE INVALID",
+                        end="License file",
+                        inc_begin=True,
+                        inc_end=True,
+                        match=1,
+                    )
+                    if license_err_lines:
+                        return str.join("\n", license_err_lines)
+                except FileError:
+                    pass
+
+                # For any other issue fall back to the error file directly
+                if "$JN.err" in self.results:
                     # If the status is still "IN PROGRESS", that probably means AMS was shut down hard from the outside.
                     # E.g. it got SIGKILL from the scheduler for exceeding some resource limit.
                     # In this case useful information may be found on stderr.
-                    with open(self.results["$JN.err"], "r") as err:
+                    with open(self.results["$JN.err"]) as err:
                         errlines = err.read().splitlines()
                     for el in reversed(errlines):
                         if el != "" and not el.isspace():
-                            msg = "Killed while IN PROGRESS: " + el
+                            msg = (
+                                f"Termination status: {termination_status}. Message: {el} . "
+                                f"Check the files in {self.path} for more details."
+                            )
                             break
             except:
-                msg = "Could not determine error message. Please check the output manually."
-            return msg
+                pass
+            return msg if msg else default_msg
 
     def hash_input(self) -> str:
         """Calculate the hash of the input file.

diff --git a/unit_tests/test_amsjob.py b/unit_tests/test_amsjob.py
@@ -563,3 +563,51 @@ def get_runscript() -> str:
                 assert len(status_lines) == 4
 
                 logger.close()
+
+    def test_get_errormsg_populates_correctly_for_different_scenarios(self):
+        from scm.plams.core.errors import FileError
+
+        # Invalid license
+        job = AMSJob()
+        results = MagicMock(spec=AMSResults)
+        results.readrkf.side_effect = FileError()
+        results.grep_file.side_effect = FileError()
+        results.get_output_chunk.return_value = [
+            "LICENSE INVALID",
+            "---------------",
+            "Your license does not include module AMS version 2024.206 on this machine.",
+            "Module AMS",
+            "Version 2024.206",
+            "Machine: Foo",
+            "License file: ./license.txt",
+        ]
+        job.results = results
+
+        assert (
+            job.get_errormsg()
+            == """LICENSE INVALID
+---------------
+Your license does not include module AMS version 2024.206 on this machine.
+Module AMS
+Version 2024.206
+Machine: Foo
+License file: ./license.txt"""
+        )
+
+        # Invalid input
+        results.grep_file.side_effect = None
+        results.grep_file.return_value = [
+            '<Dec05-2024> <12:03:49>  ERROR: Input error: value "foo" found in line 13 for multiple choice key "Task" is not an allowed choice'
+        ]
+        assert (
+            job.get_errormsg()
+            == 'Input error: value "foo" found in line 13 for multiple choice key "Task" is not an allowed choice'
+        )
+
+        # Error in calculation
+        results.readrkf.return_value = "NORMAL TERMINATION with errors"
+        results.readrkf.side_effect = None
+        results.grep_file.return_value = [
+            "<Dec05-2024> <13:44:55>  ERROR: Geometry optimization failed! (Did not converge.)"
+        ]
+        assert job.get_errormsg() == "Geometry optimization failed! (Did not converge.)"
diff --git a/unit_tests/test_basejob.py b/unit_tests/test_basejob.py
@@ -5,6 +5,7 @@
 from collections import namedtuple
 import shutil
 import re
+from io import StringIO
 
 from scm.plams.core.settings import Settings
 from scm.plams.core.basejob import SingleJob
@@ -71,6 +72,9 @@ def get_runscript(self) -> str:
     def check(self) -> bool:
         return self.results.read_file(self._filename("err")) == ""
 
+    def get_errormsg(self) -> str:
+        return self.results.read_file(self._filename("err"))
+
 
 class TestSingleJob:
     """
@@ -297,7 +301,7 @@ def test_job_summaries_logged(self, config):
         with open(job_manager.job_logger.logfile) as f:
             assert (
                 f.readline()
-                == """asctime,job_base_name,job_name,job_status,job_parent_name,job_parent_path,job_path,job_ok,job_check,job_get_errormsg
+                == """logged_at,job_base_name,job_name,job_status,job_path,job_ok,job_check,job_get_errormsg,job_parent_name,job_parent_path
 """
             )
 
@@ -315,3 +319,28 @@ def test_job_summaries_logged(self, config):
 
         job_manager.job_logger.close()
         shutil.rmtree(job_manager.workdir)
+
+    def test_job_errors_logged_to_stdout(self, config):
+        from scm.plams.core.logging import get_logger
+
+        logger = get_logger(f"plams-{uuid.uuid4()}")
+
+        with patch("scm.plams.core.functions._logger", logger):
+            with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
+                job1 = DummySingleJob(cmd="not_a_cmd")
+                job2 = DummySingleJob(cmd="x\n" * 50)
+
+                job1.run()
+                job2.run()
+
+                stdout = mock_stdout.getvalue()
+                assert re.match(
+                    f".*Error message for job {job1.name} was:.* 3: not_a_cmd: (command ){{0,1}}not found",
+                    stdout,
+                    re.DOTALL,
+                )
+                assert re.match(
+                    f".*Error message for job {job2.name} was:.* 3: x: (command ){{0,1}}not found.* 32: x: (command ){{0,1}}not found.*(see output for full error)",
+                    stdout,
+                    re.DOTALL,
+                )
diff --git a/unit_tests/test_logging.py b/unit_tests/test_logging.py
@@ -521,7 +521,7 @@ def test_configure_adds_fields_to_logging(self):
                     )
                 logger.close()
 
-    def test_logger_gets_field_names_from_dictionary(self):
+    def test_log_gets_field_names_from_dictionary(self):
         with temp_file_path(suffix=".log") as temp_log_file:
             name = str(uuid.uuid4())
             logger = get_logger(name, "csv")
@@ -541,12 +541,12 @@ def test_logger_gets_field_names_from_dictionary(self):
                 )
             logger.close()
 
-    def test_logger_cannot_change_field_names_or_add_fields(self):
+    def test_log_cannot_change_field_names_or_add_fields(self):
         with patch("sys.stderr", new_callable=StringIO) as mock_stderr:
             with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
                 name = str(uuid.uuid4())
                 logger = get_logger(name, "csv")
-                logger.configure(3, 3)
+                logger.configure(3)
 
                 logger.log({"foo": "bar", "fizz": "buzz"}, 3)
                 logger.log({"abc": "bar2", "def": "buzz2"}, 3)
@@ -559,6 +559,52 @@ def test_logger_cannot_change_field_names_or_add_fields(self):
 """
             )
 
+            logger.close()
+
+    def test_log_correctly_escapes_commas_and_multiline_strings(self):
+        with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
+            name = str(uuid.uuid4())
+            logger = get_logger(name, "csv")
+            logger.configure(3)
+
+            logger.log(
+                {
+                    "commas": "a,b,c,'(d,e,f)'",
+                    "multi-line": """See the following:
+    a,b,c
+    d,e,f
+""",
+                },
+                3,
+            )
+            logger.log(
+                {
+                    "commas": 'm,"n,",,,o\'',
+                    "multi-line": """See the following:
+    m,n,o
+
+    p,q,r
+""",
+                },
+                3,
+            )
+
+        assert mock_stdout.getvalue() == (
+            """commas,multi-line
+"a,b,c,\'(d,e,f)\'","See the following:
+    a,b,c
+    d,e,f
+"
+"m,""n,"",,,o\'","See the following:
+    m,n,o
+
+    p,q,r
+"
+"""
+        )
+
+        logger.close()
+
 
 class TestJobCSVFormatter:
 
@@ -592,11 +638,11 @@ def get_errormsg():
             with open(temp_log_file) as tf:
                 assert (
                     tf.read()
-                    == f"""job_base_name,job_name,job_status,job_parent_name,job_parent_path,job_path,job_ok,job_check,job_get_errormsg
+                    == f"""job_base_name,job_name,job_status,job_path,job_ok,job_check,job_get_errormsg,job_parent_name,job_parent_path
 test_job_csv_formatter,test_job_csv_formatter,created,,,,,,
 test_job_csv_formatter,test_job_csv_formatter.002,created,,,,,,
-test_job_csv_formatter,test_job_csv_formatter,successful,,,{path1},True,True,
-test_job_csv_formatter,test_job_csv_formatter.002,crashed,,,{path2},False,False,some error
+test_job_csv_formatter,test_job_csv_formatter,successful,{path1},True,True,,,
+test_job_csv_formatter,test_job_csv_formatter.002,crashed,{path2},False,False,some error,,
 """
                 )