Merge pull request #79 from KevinMenden/development

PR for Scaden v1.0.2
KevinMenden · Mar 13, 2021 · 6a7354e · 6a7354e
2 parents 8e204ec + 367f70d
commit 6a7354e
Show file tree

Hide file tree

Showing 12 changed files with 363 additions and 311 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Scaden Changelog
 
+### Version 1.0.2
+
+* General improvement of logging using the 'rich' library for colorized output
+* Added verification check for '--train_datasets' parameter to notify user of 
+  unavailable datasets
+
 ### Version 1.0.1
 
 * Made identification of datasets more robust to fix issue [#66](https://github.com/KevinMenden/scaden/issues/66)

diff --git a/README.md b/README.md
@@ -1,7 +1,8 @@
 ![Scaden](docs/img/scaden_logo.png)
 
 
-![Scaden version](https://img.shields.io/badge/Scaden-v1.0.0-cyan)
+![Scaden version](https://img.shields.io/badge/scaden-v1.0.2-cyan)
+
 ![MIT](https://img.shields.io/badge/License-MIT-black)
 ![Install with pip](https://img.shields.io/badge/Install%20with-pip-blue)
 ![Install with Bioconda](https://img.shields.io/badge/Install%20with-conda-green)

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,11 @@
 # Scaden Changelog
 
+### Version 1.0.2
+
+* General improvement of logging using the 'rich' library for colorized output
+* Added verification check for '--train_datasets' parameter to notify user of 
+  unavailable datasets
+
 ### Version 1.0.1
 
 * Made identification of datasets more robust to fix issue [#66](https://github.com/KevinMenden/scaden/issues/66)
@@ -13,7 +19,6 @@
 
 ### Version 0.9.6
 
-
 + fixed Dockerfile (switched to pip installation)
 + added better error messages to `simulate` command
 + cleaned up dependencies
@@ -51,5 +56,4 @@ Commands:
 
 * `scaden process`: Process a training dataset for training
 * `scaden train`: Train a Scaden model
-* `scaden predict`: Predict cell type compositions of a given sample
-
+* `scaden predict`: Predict cell type compositions of a given sample
diff --git a/scaden/__main__.py b/scaden/__main__.py
@@ -1,12 +1,16 @@
 import click
+import sys
 import scaden
+import rich
+import rich.logging
 import logging
 import os
 from scaden.train import training
 from scaden.predict import prediction
 from scaden.process import processing
 from scaden.simulate import simulation
 from scaden.example import exampleData
+
 """
 
 author: Kevin Menden
@@ -17,7 +21,16 @@
 # Logging
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+logger.addHandler(
+    rich.logging.RichHandler(
+        level=logging.INFO,
+        console=rich.console.Console(file=sys.stderr),
+        show_time=False,
+        markup=True,
+    )
+)
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"
 
 
 def main():
@@ -29,11 +42,11 @@ def main():
     |____/ \___\__,_|\__,_|\___|_| |_|
 
     """
-    click.echo(click.style(text, fg='blue'))
+    click.echo(click.style(text, fg="blue"))
     cli()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
 """
 Set up the command line client with different commands to execute
@@ -52,34 +65,36 @@ def cli():
 
 
 @cli.command()
-@click.argument('data_path',
-                type=click.Path(exists=True),
-                required=True,
-                metavar='<training data>')
+@click.argument(
+    "data_path", type=click.Path(exists=True), required=True, metavar="<training data>"
+)
+@click.option(
+    "--train_datasets",
+    default="",
+    help="Comma-separated list of datasets used for training. Uses all by default.",
+)
+@click.option("--model_dir", default="./", help="Path to store the model in")
+@click.option(
+    "--batch_size", default=128, help="Batch size to use for training. Default: 128"
+)
 @click.option(
-    '--train_datasets',
-    default='',
-    help=
-    'Comma-separated list of datasets used for training. Uses all by default.')
-@click.option('--model_dir', default="./", help='Path to store the model in')
-@click.option('--batch_size',
-              default=128,
-              help='Batch size to use for training. Default: 128')
-@click.option('--learning_rate',
-              default=0.0001,
-              help='Learning rate used for training. Default: 0.0001')
-@click.option('--steps', default=5000, help='Number of training steps')
-@click.option('--seed', default=0, help="Set random seed")
-def train(data_path, train_datasets, model_dir, batch_size, learning_rate,
-          steps, seed):
+    "--learning_rate",
+    default=0.0001,
+    help="Learning rate used for training. Default: 0.0001",
+)
+@click.option("--steps", default=5000, help="Number of training steps")
+@click.option("--seed", default=0, help="Set random seed")
+def train(data_path, train_datasets, model_dir, batch_size, learning_rate, steps, seed):
     """ Train a Scaden model """
-    training(data_path=data_path,
-             train_datasets=train_datasets,
-             model_dir=model_dir,
-             batch_size=batch_size,
-             learning_rate=learning_rate,
-             num_steps=steps,
-             seed=seed)
+    training(
+        data_path=data_path,
+        train_datasets=train_datasets,
+        model_dir=model_dir,
+        batch_size=batch_size,
+        learning_rate=learning_rate,
+        num_steps=steps,
+        seed=seed,
+    )
 
 
 """
@@ -88,21 +103,20 @@ def train(data_path, train_datasets, model_dir, batch_size, learning_rate,
 
 
 @cli.command()
-@click.argument('data_path',
-                type=click.Path(exists=True),
-                required=True,
-                metavar='<prediction data>')
-@click.option('--model_dir', default="./", help='Path to trained model')
-@click.option('--outname',
-              default="scaden_predictions.txt",
-              help='Name of predictions file.')
-@click.option('--seed', default=0, help="Set random seed")
+@click.argument(
+    "data_path",
+    type=click.Path(exists=True),
+    required=True,
+    metavar="<prediction data>",
+)
+@click.option("--model_dir", default="./", help="Path to trained model")
+@click.option(
+    "--outname", default="scaden_predictions.txt", help="Name of predictions file."
+)
+@click.option("--seed", default=0, help="Set random seed")
 def predict(data_path, model_dir, outname, seed):
     """ Predict cell type composition using a trained Scaden model"""
-    prediction(model_dir=model_dir,
-               data_path=data_path,
-               out_name=outname,
-               seed=seed)
+    prediction(model_dir=model_dir, data_path=data_path, out_name=outname, seed=seed)
 
 
 """
@@ -111,29 +125,37 @@ def predict(data_path, model_dir, outname, seed):
 
 
 @cli.command()
-@click.argument('data_path',
-                type=click.Path(exists=True),
-                required=True,
-                metavar='<training dataset to be processed>')
-@click.argument('prediction_data',
-                type=click.Path(exists=True),
-                required=True,
-                metavar='<data for prediction>')
-@click.option('--processed_path',
-              default="processed.h5ad",
-              help='Path of processed file. Must end with .h5ad')
+@click.argument(
+    "data_path",
+    type=click.Path(exists=True),
+    required=True,
+    metavar="<training dataset to be processed>",
+)
+@click.argument(
+    "prediction_data",
+    type=click.Path(exists=True),
+    required=True,
+    metavar="<data for prediction>",
+)
 @click.option(
-    '--var_cutoff',
+    "--processed_path",
+    default="processed.h5ad",
+    help="Path of processed file. Must end with .h5ad",
+)
+@click.option(
+    "--var_cutoff",
     default=0.1,
-    help=
-    'Filter out genes with a variance less than the specified cutoff. A low cutoff is recommended,'
-    'this should only remove genes that are obviously uninformative.')
+    help="Filter out genes with a variance less than the specified cutoff. A low cutoff is recommended,"
+    "this should only remove genes that are obviously uninformative.",
+)
 def process(data_path, prediction_data, processed_path, var_cutoff):
     """ Process a dataset for training """
-    processing(data_path=prediction_data,
-               training_data=data_path,
-               processed_path=processed_path,
-               var_cutoff=var_cutoff)
+    processing(
+        data_path=prediction_data,
+        training_data=data_path,
+        processed_path=processed_path,
+        var_cutoff=var_cutoff,
+    )
 
 
 """
@@ -142,44 +164,46 @@ def process(data_path, prediction_data, processed_path, var_cutoff):
 
 
 @cli.command()
-@click.option('--out',
-              '-o',
-              default='./',
-              help="Directory to store output files in")
-@click.option('--data', '-d', default='.', help="Path to scRNA-seq dataset(s)")
-@click.option('--cells',
-              '-c',
-              default=100,
-              help="Number of cells per sample [default: 100]")
-@click.option('--n_samples',
-              '-n',
-              default=1000,
-              help="Number of samples to simulate [default: 1000]")
+@click.option("--out", "-o", default="./", help="Directory to store output files in")
+@click.option("--data", "-d", default=".", help="Path to scRNA-seq dataset(s)")
+@click.option(
+    "--cells", "-c", default=100, help="Number of cells per sample [default: 100]"
+)
+@click.option(
+    "--n_samples",
+    "-n",
+    default=1000,
+    help="Number of samples to simulate [default: 1000]",
+)
 @click.option(
-    '--pattern',
+    "--pattern",
     default="*_counts.txt",
-    help="File pattern to recognize your processed scRNA-seq count files")
+    help="File pattern to recognize your processed scRNA-seq count files",
+)
 @click.option(
-    '--unknown',
-    '-u',
+    "--unknown",
+    "-u",
     multiple=True,
-    default=['unknown'],
-    help=
-    "Specifiy cell types to merge into the unknown category. Specify this flag for every cell type you want to merge in unknown. [default: unknown]"
-)
-@click.option('--prefix',
-              '-p',
-              default="data",
-              help="Prefix to append to training .h5ad file [default: data]")
+    default=["unknown"],
+    help="Specifiy cell types to merge into the unknown category. Specify this flag for every cell type you want to merge in unknown. [default: unknown]",
+)
+@click.option(
+    "--prefix",
+    "-p",
+    default="data",
+    help="Prefix to append to training .h5ad file [default: data]",
+)
 def simulate(out, data, cells, n_samples, pattern, unknown, prefix):
     """ Create artificial bulk RNA-seq data from scRNA-seq dataset(s)"""
-    simulation(simulate_dir=out,
-               data_dir=data,
-               sample_size=cells,
-               num_samples=n_samples,
-               pattern=pattern,
-               unknown_celltypes=unknown,
-               out_prefix=prefix)
+    simulation(
+        simulate_dir=out,
+        data_dir=data,
+        sample_size=cells,
+        num_samples=n_samples,
+        pattern=pattern,
+        unknown_celltypes=unknown,
+        out_prefix=prefix,
+    )
 
 
 """
@@ -188,25 +212,12 @@ def simulate(out, data, cells, n_samples, pattern, unknown, prefix):
 
 
 @cli.command()
-@click.option('--out',
-              '-o',
-              default='./',
-              help="Directory to store output files in")
-@click.option('--cells',
-              '-c',
-              default=10,
-              help="Number of cells [default: 10]")
-@click.option('--genes',
-              '-g',
-              default=100,
-              help="Number of genes [default: 100]")
-@click.option('--out',
-              '-o',
-              default="./",
-              help="Output directory [default: ./]")
-@click.option('--samples',
-              '-n',
-              default=10,
-              help="Number of bulk samples [default: 10]")
+@click.option("--out", "-o", default="./", help="Directory to store output files in")
+@click.option("--cells", "-c", default=10, help="Number of cells [default: 10]")
+@click.option("--genes", "-g", default=100, help="Number of genes [default: 100]")
+@click.option("--out", "-o", default="./", help="Output directory [default: ./]")
+@click.option(
+    "--samples", "-n", default=10, help="Number of bulk samples [default: 10]"
+)
 def example(cells, genes, samples, out):
     exampleData(n_cells=cells, n_genes=genes, n_samples=samples, out_dir=out)