diff --git a/dysgu/__init__.py b/dysgu/__init__.py index 35c25b1..4a52493 100644 --- a/dysgu/__init__.py +++ b/dysgu/__init__.py @@ -1,10 +1,12 @@ from dysgu.python_api import DysguSV,\ - dysgu_default_args, merge_intervals, load_bed, load_dysgu_vcf, merge_dysgu_df + dysgu_default_args, dysgu_preset_args, merge_intervals, load_bed, load_dysgu_vcf, merge_dysgu_df + __all__ = [ 'DysguSV', 'dysgu_default_args', + 'dysgu_preset_args', 'merge_intervals', 'merge_dysgu_df', 'load_bed', diff --git a/dysgu/main.py b/dysgu/main.py index 99035b5..1497dfa 100644 --- a/dysgu/main.py +++ b/dysgu/main.py @@ -118,6 +118,12 @@ def show_params(): def apply_preset(kwargs): + if kwargs["mode"] == "pacbio": + logging.warning("Using --mode pacbio is deprecated. Use 'pacbio-sequel2' or 'pacbio-revio' instead. Mode will be set as 'pacbio-revio'") + kwargs["mode"] = "pacbio-revio" + elif kwargs == ["nanopore"]: + logging.warning("Using --mode nanopore is deprecated. Use 'nanopore-r9' or 'nanopore-r10' instead. Mode will be set as 'nanopore-r10'") + kwargs["mode"] = "nanopore-r10" if kwargs["mode"] != "pe": kwargs["paired"] = "False" p = presets[kwargs["mode"]] @@ -206,7 +212,8 @@ def cli(): f"pacbio-revio: --mq {presets['pacbio-revio']['mq']} --paired False --min-support '{presets['pacbio-revio']['min_support']}' --max-cov {presets['pacbio-revio']['max_cov']} --dist-norm {presets['pacbio-revio']['dist_norm']} --trust-ins-len True --thresholds {presets['pacbio-revio']['thresholds']} --symbolic-sv-size {presets['pacbio-revio']['symbolic_sv_size']} --sd {presets['pacbio-revio']['sd']}." f"nanopore-r9: --mq {presets['nanopore-r9']['mq']} --paired False --min-support '{presets['nanopore-r9']['min_support']}' --max-cov {presets['nanopore-r9']['max_cov']} --dist-norm {presets['nanopore-r9']['dist_norm']} --trust-ins-len False --symbolic-sv-size {presets['nanopore-r9']['symbolic_sv_size']} --sd {presets['nanopore-r9']['sd']} --divergence {presets['nanopore-r9']['divergence']}." f"nanopore-r10: --mq {presets['nanopore-r10']['mq']} --paired False --min-support '{presets['nanopore-r10']['min_support']}' --max-cov {presets['nanopore-r10']['max_cov']} --dist-norm {presets['nanopore-r10']['dist_norm']} --trust-ins-len False --thresholds {presets['nanopore-r10']['thresholds']} --symbolic-sv-size {presets['nanopore-r10']['symbolic_sv_size']} --sd {presets['nanopore-r10']['sd']}", - default="pe", type=click.Choice(["pe", "pacbio-sequel2", "pacbio-revio", "nanopore-r9", "nanopore-r10"]), show_default=True) + default="pe", type=click.Choice(["pe", "pacbio-sequel2", "pacbio-revio", "nanopore-r9", "nanopore-r10", "pacbio", "nanopore"]), show_default=True) + @click.option('--pl', help=f"Type of input reads [default: {defaults['pl']}]", type=click.Choice(["pe", "pacbio", "nanopore"]), callback=add_option_set) @click.option('--clip-length', help="Minimum soft-clip length, >= threshold are kept. Set to -1 to ignore [default: {deafults['clip_length']}]", type=int, callback=add_option_set) @@ -267,6 +274,7 @@ def run_pipeline(ctx, **kwargs): # Add arguments to context t0 = time.time() logging.info("[dysgu-run] Version: {}".format(dysgu_version)) + make_wd(kwargs) apply_preset(kwargs) show_params() @@ -364,7 +372,8 @@ def get_reads(ctx, **kwargs): f"pacbio-revio: --mq {presets['pacbio-revio']['mq']} --paired False --min-support '{presets['pacbio-revio']['min_support']}' --max-cov {presets['pacbio-revio']['max_cov']} --dist-norm {presets['pacbio-revio']['dist_norm']} --trust-ins-len True --thresholds {presets['pacbio-revio']['thresholds']} --symbolic-sv-size {presets['pacbio-revio']['symbolic_sv_size']} --sd {presets['pacbio-revio']['sd']}." f"nanopore-r9: --mq {presets['nanopore-r9']['mq']} --paired False --min-support '{presets['nanopore-r9']['min_support']}' --max-cov {presets['nanopore-r9']['max_cov']} --dist-norm {presets['nanopore-r9']['dist_norm']} --trust-ins-len False --symbolic-sv-size {presets['nanopore-r9']['symbolic_sv_size']} --sd {presets['nanopore-r9']['sd']} --divergence {presets['nanopore-r9']['divergence']}." f"nanopore-r10: --mq {presets['nanopore-r10']['mq']} --paired False --min-support '{presets['nanopore-r10']['min_support']}' --max-cov {presets['nanopore-r10']['max_cov']} --dist-norm {presets['nanopore-r10']['dist_norm']} --trust-ins-len False --thresholds {presets['nanopore-r10']['thresholds']} --symbolic-sv-size {presets['nanopore-r10']['symbolic_sv_size']} --sd {presets['nanopore-r10']['sd']}", - default="pe", type=click.Choice(["pe", "pacbio-sequel2", "pacbio-revio", "nanopore-r9", "nanopore-r10"]), show_default=True) + default="pe", type=click.Choice(["pe", "pacbio-sequel2", "pacbio-revio", "nanopore-r9", "nanopore-r10", "pacbio", "nanopore"]), show_default=True) + @click.option('--pl', help=f"Type of input reads [default: {defaults['pl']}]", type=click.Choice(["pe", "pacbio", "nanopore"]), callback=add_option_set) @click.option('--clip-length', help="Minimum soft-clip length, >= threshold are kept. Set to -1 to ignore [default: {deafults['clip_length']}]", type=int, callback=add_option_set) diff --git a/dysgu/python_api.py b/dysgu/python_api.py index 1025184..13ca624 100644 --- a/dysgu/python_api.py +++ b/dysgu/python_api.py @@ -67,10 +67,10 @@ def dysgu_default_args(): 'symbolic_sv_size': -1, 'template_size': '', 'thresholds': {'DEL': 0.45, - 'INS': 0.45, - 'INV': 0.45, - 'DUP': 0.45, - 'TRA': 0.45}, + 'INS': 0.45, + 'INV': 0.45, + 'DUP': 0.45, + 'TRA': 0.45}, 'trust_ins_len': True, 'verbosity': 2, 'working_directory': 'tempfile', @@ -80,6 +80,91 @@ def dysgu_default_args(): return args +def dysgu_preset_args(mode): + """ + Returns the default arguments used by dysgu + + :param mode: Choose the 'mode', one of pe | pacbio-sequel2 | nanopore-r9 | nanopore-r10 + :type path: str + + :return: A dict of available arguments + :rtype: dict + + Load a vcf file from dysgu + + :param path: The path to the vcf file + :type path: str + :param drop_na_columns: Drop columns that are all NAN + :type drop_na_columns: bool + :return: A dataframe of SVs + :rty + """ + args = dysgu_default_args() + presets = {"nanopore-r9": {"mq": 1, + "min_support": "auto", + "dist_norm": 900, + "max_cov": 150, + "pl": "nanopore", + "remap": "False", + "clip_length": -1, + "trust_ins_len": "False", + "sd": 0.6, + "symbolic_sv_size": 50000, + "divergence": "auto" + }, + "nanopore-r10": {"mq": 1, + "min_support": "auto", + "dist_norm": 600, + "max_cov": 150, + "pl": "nanopore", + "remap": "False", + "clip_length": -1, + "trust_ins_len": "False", + "sd": 0.35, + 'thresholds': {'DEL': 0.35, + 'INS': 0.35, + 'INV': 0.35, + 'DUP': 0.35, + 'TRA': 0.35}, + "symbolic_sv_size": 50000 + }, + "pacbio-sequel2": {"mq": 1, + "min_support": "auto", + "dist_norm": 600, + "max_cov": 150, + "pl": "pacbio", + "remap": "False", + "clip_length": -1, + "trust_ins_len": "True", + "sd": 0.45, + "symbolic_sv_size": 50000 + }, + "pacbio-revio": {"mq": 1, + "min_support": "auto", + "dist_norm": 600, + "max_cov": 150, + "pl": "pacbio", + "remap": "False", + "clip_length": -1, + "trust_ins_len": "True", + "sd": 0.4, + 'thresholds': {'DEL': 0.25, + 'INS': 0.25, + 'INV': 0.25, + 'DUP': 0.25, + 'TRA': 0.25}, + "symbolic_sv_size": 50000 + }, + } + if mode in presets: + for k, v in presets[mode].items(): + args[k] = v + elif mode != "pe": + raise ValueError("mode must be either: pe | pacbio-sequel2 | nanopore-r9 | nanopore-r10") + return args + + + def load_dysgu_vcf(path, drop_na_columns=True): """ Load a vcf file from dysgu diff --git a/dysgu_api_demo.ipynb b/dysgu_api_demo.ipynb index c0ef5ce..179686c 100644 --- a/dysgu_api_demo.ipynb +++ b/dysgu_api_demo.ipynb @@ -614,7 +614,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Options are specified as key word arguments during initialization. For example, to change the `min-support` option:" + "Preset options are also available using the `dysgu_preset_args(mode)` function, where mode can be \"pacbio-revio\" etc.\n", + "\n", + "Options can be specified as key word arguments during initialization. For example, to change the `min-support` option:" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 5e3a770..66a8190 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta" [project] name = "dysgu" -version = "1.6.8" +version = "1.7.0" description = "Structural variant calling" authors = [ { name = "Kez Cleal", email = "clealk@cardiff.ac.uk" }