Skip to content

Commit

Permalink
Merge pull request #156 from SCM-NV/GiulioBenedini/csv_logger
Browse files Browse the repository at this point in the history
Giulio benedini/csv logger
  • Loading branch information
dormrod authored Nov 29, 2024
2 parents a0c4341 + 59fe7d8 commit 17a62cd
Show file tree
Hide file tree
Showing 18 changed files with 1,242 additions and 44 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ This changelog is effective from the 2025 releases.
* Script `generate_example.sh` to generate documentation pages from notebook examples
* GitHub workflows for CI and publishing to PyPI
* Build using `pyproject.toml`, addition of extras groups to install optional dependencies
* `LogManager` and `TextLogger` to manage log files and console logging
* Logging of job summaries to CSV logfile

### Changed
* Functions for optional packages (e.g. RDKit, ASE) are available even when these packages are not installed, but will raise an `MissingOptionalPackageError` when called
Expand Down
3 changes: 1 addition & 2 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
SafeRunSettings,
Settings,
)
from scm.plams.core.logging import get_logger, TextLogger
from scm.plams.core.logging import get_logger
from scm.plams.interfaces.adfsuite.ams import AMSJob, AMSResults
from scm.plams.interfaces.adfsuite.amsanalysis import AMSAnalysisJob, AMSAnalysisResults, convert_to_unicode
from scm.plams.interfaces.adfsuite.amsworker import AMSWorker, AMSWorkerError, AMSWorkerPool, AMSWorkerResults
Expand Down Expand Up @@ -165,7 +165,6 @@
"JobManagerSettings",
"ConfigSettings",
"get_logger",
"TextLogger",
"SingleJob",
"MultiJob",
"JobStatus",
Expand Down
6 changes: 3 additions & 3 deletions core/basejob.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
import threading
import time
from os.path import join as opj
from typing import Optional, List, Generator, TYPE_CHECKING, Union, Dict, Iterable
from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Union

from scm.plams.core.enums import JobStatus
from scm.plams.core.errors import FileError, JobError, PlamsError, ResultsError
from scm.plams.core.functions import config, log
from scm.plams.core.private import sha256
from scm.plams.core.results import Results
from scm.plams.core.settings import Settings
from scm.plams.mol.molecule import Molecule
from scm.plams.core.enums import JobStatus

try:
from scm.pisa.block import DriverBlock
Expand All @@ -22,8 +22,8 @@
_has_scm_pisa = False

if TYPE_CHECKING:
from scm.plams.core.jobrunner import JobRunner
from scm.plams.core.jobmanager import JobManager
from scm.plams.core.jobrunner import JobRunner

__all__ = ["SingleJob", "MultiJob"]

Expand Down
55 changes: 55 additions & 0 deletions core/formatters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
import logging
from typing import Dict, Any

from scm.plams.core.logging import CSVFormatter
from scm.plams.core.basejob import Job
from scm.plams.core.enums import JobStatus

__all__ = ["JobCSVFormatter"]


class JobCSVFormatter(CSVFormatter):
"""
Formatter which creates comma-separated log lines from a ``Job`` log record.
"""

def format(self, record: logging.LogRecord) -> str:
if isinstance(record.msg, Job):
record.msg = self._format_job(record.msg)
return super().format(record)

@staticmethod
def _format_job(job: Job) -> Dict[str, Any]:
message = {
"job_base_name": re.sub(r"\.\d+$", "", job.name),
"job_name": job.name,
"job_status": job.status,
"job_parent_name": "",
"job_parent_path": "",
"job_path": "",
"job_ok": "",
"job_check": "",
"job_get_errormsg": "",
}

if job.status not in [JobStatus.CREATED, JobStatus.STARTED]:
message.update({"job_path": job.path})

if job.status not in [JobStatus.REGISTERED, JobStatus.RUNNING]:
message.update({"job_ok": job.ok()})
try:
message.update({"job_check": job.check()})
except TypeError:
pass
try:
# this one it is not supported by the Job class but on many jobs they have it implemented
message.update({"job_get_errormsg": job.get_errormsg()})
except (AttributeError, TypeError):
pass

if job.parent:
message["job_parent_name"] = job.parent.name
message["job_parent_path"] = job.parent.path

return message
12 changes: 7 additions & 5 deletions core/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def log(message: str, level: int = 0) -> None:
Logs are printed independently to the text logfile (a file called ``logfile`` in the main working folder) and to the standard output.
If *level* is equal or lower than verbosity (defined by ``config.log.file`` or ``config.log.stdout``) the message is printed.
By convention in PLAMS, level should be between 0-7, with 0 indicating no loggin and 7 indicating the most verbose logging.
By convention in PLAMS, level should be between 0-7, with 0 indicating no logging and 7 indicating the most verbose logging.
Date and/or time can be added based on ``config.log.date`` and ``config.log.time``.
All logging activity is thread safe.
"""
Expand Down Expand Up @@ -260,10 +260,12 @@ def _finish():
# Close all loggers which have files in the directory to be erased
workdir = os.path.abspath(config.default_jobmanager.workdir)
for logger in LogManager._loggers.values():
if logger._file_handler is not None:
logfile = os.path.abspath(logger._file_handler.baseFilename)
if os.path.commonpath([workdir]) == os.path.commonpath([workdir, logfile]):
logger.close()
if (logfile := logger.logfile) is not None:
try:
if os.path.commonpath([workdir]) == os.path.commonpath([workdir, os.path.abspath(logfile)]):
logger.close()
except ValueError:
pass

shutil.rmtree(config.default_jobmanager.workdir)

Expand Down
42 changes: 33 additions & 9 deletions core/jobmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
import shutil
import threading
from os.path import join as opj
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional, List, Dict

from scm.plams.core.basejob import MultiJob
from scm.plams.core.errors import FileError, PlamsError
from scm.plams.core.functions import config, log
from scm.plams.core.enums import JobStatus
from scm.plams.core.errors import FileError, PlamsError
from scm.plams.core.functions import config, get_logger, log
from scm.plams.core.logging import Logger
from scm.plams.core.formatters import JobCSVFormatter

if TYPE_CHECKING:
from scm.plams.core.basejob import Job
from scm.plams.core.settings import Settings

__all__ = ["JobManager"]

Expand All @@ -23,7 +26,8 @@ class JobManager:
* ``foldername`` -- the working folder name.
* ``workdir`` -- the absolute path to the working folder.
* ``logfile`` -- the absolute path to the logfile.
* ``logfile`` -- the absolute path to the text logfile.
* ``job_logger`` -- the logger used to write job summaries.
* ``input`` -- the absolute path to the copy of the input file in the working folder.
* ``settings`` -- a |Settings| instance for this job manager (see below).
* ``jobs`` -- a list of all jobs managed with this instance (in order of |run| calls).
Expand All @@ -42,18 +46,25 @@ class JobManager:
"""

def __init__(self, settings, path=None, folder=None, use_existing_folder=False):
def __init__(
self,
settings: "Settings",
path: Optional[str] = None,
folder: Optional[str] = None,
use_existing_folder: bool = False,
job_logger: Optional[Logger] = None,
):

self.settings = settings
self.jobs = []
self.names = {}
self.hashes = {}
self.jobs: List[Job] = []
self.names: Dict[str, int] = {}
self.hashes: Dict[str, Job] = {}

self._register_lock = threading.RLock()

if path is None:
ams_resultsdir = os.getenv("AMS_RESULTSDIR")
if not ams_resultsdir is None and os.path.isdir(ams_resultsdir):
if ams_resultsdir is not None and os.path.isdir(ams_resultsdir):
self.path = ams_resultsdir
else:
self.path = os.getcwd()
Expand All @@ -78,6 +89,17 @@ def __init__(self, settings, path=None, folder=None, use_existing_folder=False):
if not (use_existing_folder and os.path.exists(self.workdir)):
os.mkdir(self.workdir)

if job_logger is None:
job_logger = get_logger(os.path.basename(self.workdir), fmt="csv")
job_logger.configure(
logfile_level=7,
logfile_path=opj(self.workdir, "job_logfile.csv"),
csv_formatter=JobCSVFormatter,
include_date=True,
include_time=True,
)
self.job_logger = job_logger

def load_job(self, filename):
"""Load previously saved job from *filename*.
Expand Down Expand Up @@ -203,4 +225,6 @@ def _clean(self):
if not os.listdir(fullname):
os.rmdir(fullname)

self.job_logger.close()

log("Job manager cleaned", 7)
5 changes: 5 additions & 0 deletions core/jobrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def _run_job(self, job, jobmanager):
job._execute(self)
job._finalize()
finally:
try:
jobmanager.job_logger.log(job, level=3)
except: # logging should never throw, but best to make sure
pass

if self.parallel and self._jobthread_limit:
self._jobthread_limit.release()

Expand Down
Loading

0 comments on commit 17a62cd

Please sign in to comment.