Skip to content

Commit

Permalink
Merge pull request #79 from KevinMenden/development
Browse files Browse the repository at this point in the history
PR for Scaden v1.0.2
  • Loading branch information
KevinMenden authored Mar 13, 2021
2 parents 8e204ec + 367f70d commit 6a7354e
Show file tree
Hide file tree
Showing 12 changed files with 363 additions and 311 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Scaden Changelog

### Version 1.0.2

* General improvement of logging using the 'rich' library for colorized output
* Added verification check for '--train_datasets' parameter to notify user of
unavailable datasets

### Version 1.0.1

* Made identification of datasets more robust to fix issue [#66](https://github.com/KevinMenden/scaden/issues/66)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
![Scaden](docs/img/scaden_logo.png)


![Scaden version](https://img.shields.io/badge/Scaden-v1.0.0-cyan)
![Scaden version](https://img.shields.io/badge/scaden-v1.0.2-cyan)

![MIT](https://img.shields.io/badge/License-MIT-black)
![Install with pip](https://img.shields.io/badge/Install%20with-pip-blue)
![Install with Bioconda](https://img.shields.io/badge/Install%20with-conda-green)
Expand Down
10 changes: 7 additions & 3 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Scaden Changelog

### Version 1.0.2

* General improvement of logging using the 'rich' library for colorized output
* Added verification check for '--train_datasets' parameter to notify user of
unavailable datasets

### Version 1.0.1

* Made identification of datasets more robust to fix issue [#66](https://github.com/KevinMenden/scaden/issues/66)
Expand All @@ -13,7 +19,6 @@

### Version 0.9.6


+ fixed Dockerfile (switched to pip installation)
+ added better error messages to `simulate` command
+ cleaned up dependencies
Expand Down Expand Up @@ -51,5 +56,4 @@ Commands:

* `scaden process`: Process a training dataset for training
* `scaden train`: Train a Scaden model
* `scaden predict`: Predict cell type compositions of a given sample

* `scaden predict`: Predict cell type compositions of a given sample
237 changes: 124 additions & 113 deletions scaden/__main__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import click
import sys
import scaden
import rich
import rich.logging
import logging
import os
from scaden.train import training
from scaden.predict import prediction
from scaden.process import processing
from scaden.simulate import simulation
from scaden.example import exampleData

"""
author: Kevin Menden
Expand All @@ -17,7 +21,16 @@
# Logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
logger.addHandler(
rich.logging.RichHandler(
level=logging.INFO,
console=rich.console.Console(file=sys.stderr),
show_time=False,
markup=True,
)
)

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"


def main():
Expand All @@ -29,11 +42,11 @@ def main():
|____/ \___\__,_|\__,_|\___|_| |_|
"""
click.echo(click.style(text, fg='blue'))
click.echo(click.style(text, fg="blue"))
cli()


if __name__ == '__main__':
if __name__ == "__main__":
main()
"""
Set up the command line client with different commands to execute
Expand All @@ -52,34 +65,36 @@ def cli():


@cli.command()
@click.argument('data_path',
type=click.Path(exists=True),
required=True,
metavar='<training data>')
@click.argument(
"data_path", type=click.Path(exists=True), required=True, metavar="<training data>"
)
@click.option(
"--train_datasets",
default="",
help="Comma-separated list of datasets used for training. Uses all by default.",
)
@click.option("--model_dir", default="./", help="Path to store the model in")
@click.option(
"--batch_size", default=128, help="Batch size to use for training. Default: 128"
)
@click.option(
'--train_datasets',
default='',
help=
'Comma-separated list of datasets used for training. Uses all by default.')
@click.option('--model_dir', default="./", help='Path to store the model in')
@click.option('--batch_size',
default=128,
help='Batch size to use for training. Default: 128')
@click.option('--learning_rate',
default=0.0001,
help='Learning rate used for training. Default: 0.0001')
@click.option('--steps', default=5000, help='Number of training steps')
@click.option('--seed', default=0, help="Set random seed")
def train(data_path, train_datasets, model_dir, batch_size, learning_rate,
steps, seed):
"--learning_rate",
default=0.0001,
help="Learning rate used for training. Default: 0.0001",
)
@click.option("--steps", default=5000, help="Number of training steps")
@click.option("--seed", default=0, help="Set random seed")
def train(data_path, train_datasets, model_dir, batch_size, learning_rate, steps, seed):
""" Train a Scaden model """
training(data_path=data_path,
train_datasets=train_datasets,
model_dir=model_dir,
batch_size=batch_size,
learning_rate=learning_rate,
num_steps=steps,
seed=seed)
training(
data_path=data_path,
train_datasets=train_datasets,
model_dir=model_dir,
batch_size=batch_size,
learning_rate=learning_rate,
num_steps=steps,
seed=seed,
)


"""
Expand All @@ -88,21 +103,20 @@ def train(data_path, train_datasets, model_dir, batch_size, learning_rate,


@cli.command()
@click.argument('data_path',
type=click.Path(exists=True),
required=True,
metavar='<prediction data>')
@click.option('--model_dir', default="./", help='Path to trained model')
@click.option('--outname',
default="scaden_predictions.txt",
help='Name of predictions file.')
@click.option('--seed', default=0, help="Set random seed")
@click.argument(
"data_path",
type=click.Path(exists=True),
required=True,
metavar="<prediction data>",
)
@click.option("--model_dir", default="./", help="Path to trained model")
@click.option(
"--outname", default="scaden_predictions.txt", help="Name of predictions file."
)
@click.option("--seed", default=0, help="Set random seed")
def predict(data_path, model_dir, outname, seed):
""" Predict cell type composition using a trained Scaden model"""
prediction(model_dir=model_dir,
data_path=data_path,
out_name=outname,
seed=seed)
prediction(model_dir=model_dir, data_path=data_path, out_name=outname, seed=seed)


"""
Expand All @@ -111,29 +125,37 @@ def predict(data_path, model_dir, outname, seed):


@cli.command()
@click.argument('data_path',
type=click.Path(exists=True),
required=True,
metavar='<training dataset to be processed>')
@click.argument('prediction_data',
type=click.Path(exists=True),
required=True,
metavar='<data for prediction>')
@click.option('--processed_path',
default="processed.h5ad",
help='Path of processed file. Must end with .h5ad')
@click.argument(
"data_path",
type=click.Path(exists=True),
required=True,
metavar="<training dataset to be processed>",
)
@click.argument(
"prediction_data",
type=click.Path(exists=True),
required=True,
metavar="<data for prediction>",
)
@click.option(
'--var_cutoff',
"--processed_path",
default="processed.h5ad",
help="Path of processed file. Must end with .h5ad",
)
@click.option(
"--var_cutoff",
default=0.1,
help=
'Filter out genes with a variance less than the specified cutoff. A low cutoff is recommended,'
'this should only remove genes that are obviously uninformative.')
help="Filter out genes with a variance less than the specified cutoff. A low cutoff is recommended,"
"this should only remove genes that are obviously uninformative.",
)
def process(data_path, prediction_data, processed_path, var_cutoff):
""" Process a dataset for training """
processing(data_path=prediction_data,
training_data=data_path,
processed_path=processed_path,
var_cutoff=var_cutoff)
processing(
data_path=prediction_data,
training_data=data_path,
processed_path=processed_path,
var_cutoff=var_cutoff,
)


"""
Expand All @@ -142,44 +164,46 @@ def process(data_path, prediction_data, processed_path, var_cutoff):


@cli.command()
@click.option('--out',
'-o',
default='./',
help="Directory to store output files in")
@click.option('--data', '-d', default='.', help="Path to scRNA-seq dataset(s)")
@click.option('--cells',
'-c',
default=100,
help="Number of cells per sample [default: 100]")
@click.option('--n_samples',
'-n',
default=1000,
help="Number of samples to simulate [default: 1000]")
@click.option("--out", "-o", default="./", help="Directory to store output files in")
@click.option("--data", "-d", default=".", help="Path to scRNA-seq dataset(s)")
@click.option(
"--cells", "-c", default=100, help="Number of cells per sample [default: 100]"
)
@click.option(
"--n_samples",
"-n",
default=1000,
help="Number of samples to simulate [default: 1000]",
)
@click.option(
'--pattern',
"--pattern",
default="*_counts.txt",
help="File pattern to recognize your processed scRNA-seq count files")
help="File pattern to recognize your processed scRNA-seq count files",
)
@click.option(
'--unknown',
'-u',
"--unknown",
"-u",
multiple=True,
default=['unknown'],
help=
"Specifiy cell types to merge into the unknown category. Specify this flag for every cell type you want to merge in unknown. [default: unknown]"
)
@click.option('--prefix',
'-p',
default="data",
help="Prefix to append to training .h5ad file [default: data]")
default=["unknown"],
help="Specifiy cell types to merge into the unknown category. Specify this flag for every cell type you want to merge in unknown. [default: unknown]",
)
@click.option(
"--prefix",
"-p",
default="data",
help="Prefix to append to training .h5ad file [default: data]",
)
def simulate(out, data, cells, n_samples, pattern, unknown, prefix):
""" Create artificial bulk RNA-seq data from scRNA-seq dataset(s)"""
simulation(simulate_dir=out,
data_dir=data,
sample_size=cells,
num_samples=n_samples,
pattern=pattern,
unknown_celltypes=unknown,
out_prefix=prefix)
simulation(
simulate_dir=out,
data_dir=data,
sample_size=cells,
num_samples=n_samples,
pattern=pattern,
unknown_celltypes=unknown,
out_prefix=prefix,
)


"""
Expand All @@ -188,25 +212,12 @@ def simulate(out, data, cells, n_samples, pattern, unknown, prefix):


@cli.command()
@click.option('--out',
'-o',
default='./',
help="Directory to store output files in")
@click.option('--cells',
'-c',
default=10,
help="Number of cells [default: 10]")
@click.option('--genes',
'-g',
default=100,
help="Number of genes [default: 100]")
@click.option('--out',
'-o',
default="./",
help="Output directory [default: ./]")
@click.option('--samples',
'-n',
default=10,
help="Number of bulk samples [default: 10]")
@click.option("--out", "-o", default="./", help="Directory to store output files in")
@click.option("--cells", "-c", default=10, help="Number of cells [default: 10]")
@click.option("--genes", "-g", default=100, help="Number of genes [default: 100]")
@click.option("--out", "-o", default="./", help="Output directory [default: ./]")
@click.option(
"--samples", "-n", default=10, help="Number of bulk samples [default: 10]"
)
def example(cells, genes, samples, out):
exampleData(n_cells=cells, n_genes=genes, n_samples=samples, out_dir=out)
Loading

0 comments on commit 6a7354e

Please sign in to comment.