From f23b0deedf12665502175c2e5f0df42984b41147 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 12 Feb 2024 15:52:18 +0100 Subject: [PATCH] feat: add libraries input to convert --- AMDirT/cli.py | 44 +++++++++++++++++++++++++++++++++----- AMDirT/convert/__init__.py | 37 +++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/AMDirT/cli.py b/AMDirT/cli.py index f543579..9626cc9 100644 --- a/AMDirT/cli.py +++ b/AMDirT/cli.py @@ -10,6 +10,31 @@ from json import load +class MutuallyExclusiveOption(click.Option): + # Credits goes to Stan Chang for this code snippet + # https://gist.github.com/stanchan/bce1c2d030c76fe9223b5ff6ad0f03db + + def __init__(self, *args, **kwargs): + self.mutually_exclusive = set(kwargs.pop("mutually_exclusive", [])) + help = kwargs.get("help", "") + if self.mutually_exclusive: + ex_str = ", ".join(self.mutually_exclusive) + kwargs["help"] = help + ( + " NOTE: This argument is mutually exclusive with " + " arguments: [" + ex_str + "]." + ) + super(MutuallyExclusiveOption, self).__init__(*args, **kwargs) + + def handle_parse_result(self, ctx, opts, args): + if self.mutually_exclusive.intersection(opts) and self.name in opts: + raise click.UsageError( + "Illegal usage: `{}` is mutually exclusive with " + "arguments `{}`.".format(self.name, ", ".join(self.mutually_exclusive)) + ) + + return super(MutuallyExclusiveOption, self).handle_parse_result(ctx, opts, args) + + def get_table_list(): json_path = get_json_path() with open(json_path, "r") as f: @@ -110,6 +135,20 @@ def viewer(ctx, no_args_is_help=True, **kwargs): type=click.Path(exists=True), help="(Optional) JSON file listing AncientMetagenomeDir tables", ) +@click.option( + "--libraries", + type=click.Path(readable=True, file_okay=True, dir_okay=False, exists=True), + help=("(Optional) Path to libraries table"), + cls=MutuallyExclusiveOption, + mutually_exclusive=["librarymetadata"], +) +@click.option( + "--librarymetadata", + is_flag=True, + help="Generate AncientMetagenomeDir libraries table of all samples in input table", + cls=MutuallyExclusiveOption, + mutually_exclusive=["libraries"], +) @click.option( "-o", "--output", @@ -123,11 +162,6 @@ def viewer(ctx, no_args_is_help=True, **kwargs): is_flag=True, help="Generate BibTeX file of all publications in input table", ) -@click.option( - "--librarymetadata", - is_flag=True, - help="Generate AncientMetagenomeDir libraries table of all samples in input table", -) @click.option( "--curl", is_flag=True, diff --git a/AMDirT/convert/__init__.py b/AMDirT/convert/__init__.py index 572457d..825266a 100644 --- a/AMDirT/convert/__init__.py +++ b/AMDirT/convert/__init__.py @@ -22,6 +22,7 @@ def run_convert( samples, + libraries, table_name, tables=None, output=".", @@ -40,9 +41,10 @@ def run_convert( """Run the AMDirT conversion application to input samplesheet tables for different pipelines Args: - tables (str): Path to JSON file listing tables samples (str): Path to AncientMetagenomeDir filtered samples tsv file + libraries(str): Optional path to AncientMetagenomeDir filtered libraries tsv file table_name (str): Name of the table of the table to convert + tables (str): Path to JSON file listing tables output (str): Path to output table. Defaults to "." """ os.makedirs(output, exist_ok=True) @@ -79,14 +81,33 @@ def run_convert( else: logger.info("Input sample dataset is valid") samples = pd.read_csv(samples, sep="\t") - libraries = pd.read_csv(remote_resources["libraries"][table_name], sep="\t") + remote_libraries = pd.read_csv( + remote_resources["libraries"][table_name], sep="\t" + ) - selected_libraries = get_libraries( - samples=samples, - libraries=libraries, - table_name=table_name, - supported_archives=supported_archives, - ) + if not libraries: + selected_libraries = get_libraries( + samples=samples, + libraries=remote_libraries, + table_name=table_name, + supported_archives=supported_archives, + ) + else: + dataset_valid = list() + v = AMDirValidator(schema, libraries) + dataset_valid.append(v.parsing_ok) + if v.parsing_ok: + dataset_valid.append(v.validate_schema()) + dataset_valid.append(v.check_duplicate_rows()) + dataset_valid.append(v.check_columns()) + + dataset_valid = all(dataset_valid) + if dataset_valid is False: + v.to_rich() + raise DatasetValidationError("Input libraries dataset is not valid") + else: + logger.info("Input libraries dataset is valid") + selected_libraries = pd.read_csv(libraries, sep="\t") accession_table = prepare_accession_table( samples=samples,