From 0d396a10eaa14cfee9be52a240f28ac8708a4445 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 7 Oct 2024 09:28:54 -0700 Subject: [PATCH 1/5] Add note to docstring of `output_reports` Note about replacement of the output directory if it already exists --- src/dandisets_linkml_status_tools/cli/tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dandisets_linkml_status_tools/cli/tools.py b/src/dandisets_linkml_status_tools/cli/tools.py index 08286ea..978cc12 100644 --- a/src/dandisets_linkml_status_tools/cli/tools.py +++ b/src/dandisets_linkml_status_tools/cli/tools.py @@ -177,6 +177,8 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - Output the given list of dandiset validation reports and a summary of the reports , as a `summary.md`, to a given file path + Note: This function will replace the output directory if it already exists. + :param reports: The given list of dandiset validation reports :param output_path: The given file path to output the reports to. Note: In the case of the given output path already points to an existing object, From af7b6142806994e4c7d8912b309e599345a692d7 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 7 Oct 2024 10:22:38 -0700 Subject: [PATCH 2/5] Write DANDI LinkML schema to validation report directory --- src/dandisets_linkml_status_tools/cli/tools.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/tools.py b/src/dandisets_linkml_status_tools/cli/tools.py index 978cc12..b492fbd 100644 --- a/src/dandisets_linkml_status_tools/cli/tools.py +++ b/src/dandisets_linkml_status_tools/cli/tools.py @@ -13,6 +13,7 @@ from linkml.validator import Validator from linkml.validator.plugins import JsonschemaValidationPlugin, ValidationPlugin from linkml.validator.report import ValidationResult +from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import SchemaDefinition from pydantic import TypeAdapter, ValidationError from pydantic2linkml.gen_linkml import translate_defs @@ -174,8 +175,9 @@ def compile_validation_report(dandiset: RemoteDandiset) -> DandisetValidationRep def output_reports(reports: list[DandisetValidationReport], output_path: Path) -> None: """ - Output the given list of dandiset validation reports and a summary of the reports - , as a `summary.md`, to a given file path + Output the given list of dandiset validation reports, a summary of the reports + , as a `summary.md`, and the schema used in the LinkML validations, + as a `dandi_linkml_schema.yml`, to a given file path Note: This function will replace the output directory if it already exists. @@ -188,6 +190,7 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - raises NotADirectoryError: If the given output path points to a non-directory object """ summary_file_name = "summary.md" + dandi_linkml_schema_file_name = "dandi_linkml_schema.yml" summary_headers = [ "dandiset", "version", @@ -208,6 +211,14 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - output_path.mkdir() logger.info("Recreated report output directory: %s", output_path) + # Output the LinkML schema used in the validations + dandi_linkml_schema_yml = yaml_dumper.dumps( + DandisetLinkmlValidator.get_dandi_linkml_schema() + ) + with open(output_path / dandi_linkml_schema_file_name, "w") as f: + f.write(dandi_linkml_schema_yml) + logger.info("Output the DANDI LinkML schema to %s", dandi_linkml_schema_file_name) + with (output_path / summary_file_name).open("w") as summary_f: # === Write the headers of the summary table === header_row = _gen_row(f" {h} " for h in summary_headers) From f7a21b465b32af5012e61f6fa933c7dd9c1e96c3 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 7 Oct 2024 11:59:35 -0700 Subject: [PATCH 3/5] Provide reference to DANDI LinkML schema in `summary.md` --- src/dandisets_linkml_status_tools/cli/tools.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/tools.py b/src/dandisets_linkml_status_tools/cli/tools.py index b492fbd..d60f0d5 100644 --- a/src/dandisets_linkml_status_tools/cli/tools.py +++ b/src/dandisets_linkml_status_tools/cli/tools.py @@ -190,7 +190,7 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - raises NotADirectoryError: If the given output path points to a non-directory object """ summary_file_name = "summary.md" - dandi_linkml_schema_file_name = "dandi_linkml_schema.yml" + dandi_linkml_schema_file_name = "dandi-linkml-schema.yml" summary_headers = [ "dandiset", "version", @@ -215,11 +215,21 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - dandi_linkml_schema_yml = yaml_dumper.dumps( DandisetLinkmlValidator.get_dandi_linkml_schema() ) - with open(output_path / dandi_linkml_schema_file_name, "w") as f: + dandi_linkml_schema_file_path = output_path / dandi_linkml_schema_file_name + with dandi_linkml_schema_file_path.open("w") as f: f.write(dandi_linkml_schema_yml) - logger.info("Output the DANDI LinkML schema to %s", dandi_linkml_schema_file_name) + logger.info("Output the DANDI LinkML schema to %s", dandi_linkml_schema_file_path) with (output_path / summary_file_name).open("w") as summary_f: + # === Provide a reference to the DANDI LinkML schema in the summary === + summary_f.write( + f"[DANDI LinkML schema](./{dandi_linkml_schema_file_name}) " + f"(LinkML schema used in the LinkML validations)\n" + ) + + # Write line break before the start of the summary table + summary_f.write("\n") + # === Write the headers of the summary table === header_row = _gen_row(f" {h} " for h in summary_headers) alignment_row = _gen_row("-" * (len(h) + 2) for h in summary_headers) From 0bec230b9ea3db908c0304eb1f59f0f4469a16bd Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 7 Oct 2024 13:28:40 -0700 Subject: [PATCH 4/5] RF: Consolidate code for writing DANDI LinkML schema to file into a func --- .../cli/tools.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/tools.py b/src/dandisets_linkml_status_tools/cli/tools.py index d60f0d5..354f67b 100644 --- a/src/dandisets_linkml_status_tools/cli/tools.py +++ b/src/dandisets_linkml_status_tools/cli/tools.py @@ -211,14 +211,7 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - output_path.mkdir() logger.info("Recreated report output directory: %s", output_path) - # Output the LinkML schema used in the validations - dandi_linkml_schema_yml = yaml_dumper.dumps( - DandisetLinkmlValidator.get_dandi_linkml_schema() - ) - dandi_linkml_schema_file_path = output_path / dandi_linkml_schema_file_name - with dandi_linkml_schema_file_path.open("w") as f: - f.write(dandi_linkml_schema_yml) - logger.info("Output the DANDI LinkML schema to %s", dandi_linkml_schema_file_path) + output_dandi_linkml_schema(output_path / dandi_linkml_schema_file_name) with (output_path / summary_file_name).open("w") as summary_f: # === Provide a reference to the DANDI LinkML schema in the summary === @@ -304,6 +297,21 @@ def output_reports(reports: list[DandisetValidationReport], output_path: Path) - logger.info("Output of dandiset validation reports completed") +def output_dandi_linkml_schema(output_path: Path) -> None: + """ + Output the DANDI LinkML schema, in YAML, to a file + + :param output_path: The path specifying the location of the file + """ + # Output the LinkML schema used in the validations + dandi_linkml_schema_yml = yaml_dumper.dumps( + DandisetLinkmlValidator.get_dandi_linkml_schema() + ) + with output_path.open("w") as f: + f.write(dandi_linkml_schema_yml) + logger.info("Output the DANDI LinkML schema to %s", output_path) + + def _write_data( data: Any, data_adapter: TypeAdapter, base_file_name: str, output_dir: Path ) -> None: From a27626cb7e6d9d73575683b8de121b1ca8928eb4 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 7 Oct 2024 16:44:49 -0700 Subject: [PATCH 5/5] Use a deepcopy of DANDI LinkML schema to initiate LinkML validator The validator modifies the provided schema --- src/dandisets_linkml_status_tools/cli/tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/dandisets_linkml_status_tools/cli/tools.py b/src/dandisets_linkml_status_tools/cli/tools.py index 354f67b..35e3201 100644 --- a/src/dandisets_linkml_status_tools/cli/tools.py +++ b/src/dandisets_linkml_status_tools/cli/tools.py @@ -3,6 +3,7 @@ import re from collections import Counter from collections.abc import Iterable +from copy import deepcopy from functools import partial from pathlib import Path from shutil import rmtree @@ -86,7 +87,9 @@ def __init__(self, validation_plugins: Optional[list[ValidationPlugin]] = None): validation_plugins = [JsonschemaValidationPlugin(closed=True)] self._inner_validator = Validator( - self.get_dandi_linkml_schema(), + # TODO: The deep copying may not be needed if + # https://github.com/linkml/linkml/issues/2359 is resolved + deepcopy(self.get_dandi_linkml_schema()), validation_plugins=validation_plugins, )