Skip to content

Commit

Permalink
Merge pull request #98 from PNNL-CompBio/cli
Browse files Browse the repository at this point in the history
Enable nested directory output
  • Loading branch information
biodataganache authored Dec 16, 2022
2 parents bb01767 + e9e9fc8 commit 75ee8f0
Show file tree
Hide file tree
Showing 17 changed files with 828 additions and 769 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ jobs:
conda config --show
- shell: bash -l {0}
run: mamba install -y -c conda-forge snakemake==7.0 tabulate==0.8.10
- shell: bash -l {0}
run: pip install -e .

# test clustering step
- name: Snekmer Cluster
Expand All @@ -55,7 +57,7 @@ jobs:
conda info --envs
source activate snekmer
conda config --get channel_priority --json
snakemake -s snekmer/rules/cluster.smk --configfile .test/config.yaml -d .test --cores 1
snekmer cluster --configfile .test/config.yaml -d .test
rm -rf .test/output
# --use-conda --conda-cleanup-pkgs cache --show-failed-logs --conda-frontend mamba
Expand All @@ -65,7 +67,7 @@ jobs:
run: |
export PATH="/usr/share/miniconda/bin:$PATH"
source activate snekmer
snakemake -s snekmer/rules/model.smk --configfile .test/config.yaml -d .test --cores 1
snekmer model --configfile .test/config.yaml -d .test --cores 1
mkdir .test/output/example-model
mv .test/output/model/*.model .test/output/example-model/
mv .test/output/kmerize/*.kmers .test/output/example-model/
Expand All @@ -76,5 +78,5 @@ jobs:
run: |
export PATH="/usr/share/miniconda/bin:$PATH"
source activate snekmer
snakemake -s snekmer/rules/search.smk --configfile .test/config.yaml -d .test --cores 1
snekmer search --configfile .test/config.yaml -d .test --cores 1
rm -rf .test/output
6 changes: 3 additions & 3 deletions .test/test-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ dependencies:
# - snakemake == 7.0
- umap-learn
- hdbscan
- pip
- pip:
- -e git+https://github.com/PNNL-CompBio/Snekmer#egg=snekmer
# - pip
# - pip:
# - -e git+https://github.com/PNNL-CompBio/Snekmer#egg=snekmer
9 changes: 9 additions & 0 deletions docs/source/troubleshooting/common.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,22 @@ Check your Snakemake version and reinstall a lower version if necessary

Error: Directory cannot be locked.
``````````````````````````````````

The full error message should provide further instructions, but this
error will appear when Snekmer has been unexpectedly terminated.
Run ``snekmer {mode} --unlock`` (note: this command will not execute the
workflow) before rerunning the workflow.

If the error persists, delete the ``.snakemake`` directory and try again.

AttributeError in _load_configfile
``````````````````````````````````
Typically, this error arises if the path to the config.yaml file is not
specified correctly. To resolve this error, check that your config.yaml
file is located in the same directory from which you are executing Snekmer.
You can also specify the location of the config.yaml file,
e.g. ``snekmer {mode} --configfile /path/to/config.yaml``, to fix the issue.

General Usage Questions
-----------------------

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
"running SIEVE models on input sequences"
),
long_description=readme,
url="http://github.com/biodataganache/KmerPipeline/",
url="http://github.com/PNNL-CompBio/Snekmer/",
author="@christinehc, @biodataganache",
author_email="christine.chang@pnnl.gov",
license=license,
packages=pkgs,
entry_points={"console_scripts": ["snekmer = snekmer.cli:main"]},
package_data={"": ["rules/*.smk", "templates/*.html"]},
package_data={"": ["rules/*.smk", "scripts/*.py", "templates/*.html"]},
# install_requires=required,
include_package_data=True,
keywords=[],
Expand Down
2 changes: 1 addition & 1 deletion snekmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@

# from . import walk

__version__ = "1.0.2"
__version__ = "1.0.3"
6 changes: 3 additions & 3 deletions snekmer/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ def get_alphabets():
return ALPHABETS


def check_valid(alphabet):
def check_valid(alphabet: Union[str, int]) -> None:
"""Check validity of input alphabet vs. defined list.
Parameters
----------
alphabet : str
Alphabet name.
alphabet : Union[str, int]
Alphabet name or identifier.
Raises
------
Expand Down
35 changes: 24 additions & 11 deletions snekmer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
"""
# imports
import argparse
import os

from multiprocessing import cpu_count
from os.path import join
from pkg_resources import resource_filename
from snakemake import snakemake, parse_config
from snakemake import snakemake, parse_config, get_profile_file
from snekmer import __version__

# define options
Expand Down Expand Up @@ -57,6 +57,8 @@ def get_argument_parser():
)
parser["smk"].add_argument(
"--configfile",
nargs="+",
# default="config.yaml",
metavar="PATH",
help=(
"Specify or overwrite the config file of the workflow (see the docs). "
Expand Down Expand Up @@ -132,9 +134,8 @@ def get_argument_parser():
parser["smk"].add_argument(
"--cores",
"-c",
action="store",
const=cpu_count(),
nargs="?",
default=cpu_count(),
type=int,
metavar="N",
help=(
"Use at most N CPU cores/jobs in parallel. "
Expand Down Expand Up @@ -219,6 +220,7 @@ def get_argument_parser():
parser["clust"] = parser["smk"].add_argument_group("Cluster Execution Arguments")
parser["clust"].add_argument(
"--clust",
nargs="+",
metavar="PATH",
help="Path to cluster execution yaml configuration file.",
)
Expand Down Expand Up @@ -291,11 +293,22 @@ def main():
else:
cluster = None

# fix configfile path
if args.configfile is None:
configfile = ["config.yaml"]
else:
configfile = args.configfile

if (args.directory is not None) and (args.configfile is None):
configfile = [os.path.join(args.directory, c) for c in configfile]
else:
configfile = list(map(os.path.abspath, configfile))

# parse operation mode
if args.mode == "cluster":
snakemake(
resource_filename("snekmer", join("rules", "cluster.smk")),
configfiles=[args.configfile],
resource_filename("snekmer", os.path.join("rules", "cluster.smk")),
configfiles=configfile,
config=config,
cluster_config=args.clust,
cluster=cluster,
Expand All @@ -318,8 +331,8 @@ def main():

elif args.mode == "model":
snakemake(
resource_filename("snekmer", join("rules", "model.smk")),
configfiles=[args.configfile],
resource_filename("snekmer", os.path.join("rules", "model.smk")),
configfiles=configfile,
config=config,
cluster_config=args.clust,
cluster=cluster,
Expand All @@ -342,8 +355,8 @@ def main():

elif args.mode == "search":
snakemake(
resource_filename("snekmer", join("rules", "search.smk")),
configfiles=[args.configfile],
resource_filename("snekmer", os.path.join("rules", "search.smk")),
configfiles=configfile,
config=config,
cluster_config=args.clust,
cluster=cluster,
Expand Down
15 changes: 10 additions & 5 deletions snekmer/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
import json
import pickle
import re
from ast import literal_eval
from os.path import basename, join, splitext
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -49,8 +50,8 @@ def load_npz(
"seqs": "sequence",
"vecs": "sequence_vector",
},
objects: tuple = ("kmerlist",)
) -> pd.DataFrame:
objects: Tuple = ("kmerlist",),
) -> Tuple[List, pd.DataFrame]:
"""Compile .npz results into dataframe.
Parameters
Expand All @@ -65,11 +66,13 @@ def load_npz(
"vecs": "sequence_vector",
}
).
objects : Tuple[str]
Column names for additional objects to return
Returns
-------
pd.DataFrame
Tabulated .npz data.
Tuple[List, pd.DataFrame]
Tuple with list of data objects and tabulated .npz data.
"""
data = np.load(filename)
Expand Down Expand Up @@ -181,6 +184,8 @@ def define_output_dir(alphabet: Union[str, int], k: int, nested: bool = False) -
Name of output directory, given nested directory parameters.
"""
if isinstance(nested, str):
nested = literal_eval(nested)
if not nested:
return "output"
if not isinstance(alphabet, str):
Expand Down
Loading

0 comments on commit 75ee8f0

Please sign in to comment.