diff --git a/.gitignore b/.gitignore
index a2dcf092c..2a0b463d2 100755
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,10 @@ docs/build
docs/source/dynamo*
docs/source/_autosummary
+docs/generated/
+docs/_build/
+docs/api/reference/*rst
+
# always-ignore directories
/build/
/dist/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..31ac15d16
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "docs/tutorials/notebooks"]
+ path = docs/tutorials/notebooks
+ url = https://github.com/aristoteleo/dynamo-tutorials.git
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 4a85e4e60..0cb602e76 100755
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -9,7 +9,7 @@ build:
# Build documentation in the docs/ directory with Sphinx
sphinx:
- configuration: docs/source/conf.py
+ configuration: docs/conf.py
# python:
# version: 3.7
@@ -23,4 +23,9 @@ python:
extra_requirements:
- docs
# - method: setuptools
- # path: package
\ No newline at end of file
+ # path: package
+
+submodules:
+ include:
+ - "docs/tutorials/notebooks"
+ recursive: true
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 345ead81c..fda61cce8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,15 +1,340 @@
-Released
-==========
-v1.0.0 Release:
+# Release Notes
-Added for new features.
+## Dynamo Ver 1.4.1
-Changed for changes in existing functionality.
+### DEBUG
-Deprecated for soon-to-be removed features.
+- Debug and refactor scPotential ([PR 624](https://github.com/aristoteleo/dynamo-release/pull/624)).
+- Replace deprecated `np.asscalar()` with `np.ndarray.item()` ([PR 643](https://github.com/aristoteleo/dynamo-release/pull/643)).
+- Create chunk option for normalization and gene selection ([PR 598](https://github.com/aristoteleo/dynamo-release/pull/598)).
+- Debug `pd.state_graph()` ([PR 630](https://github.com/aristoteleo/dynamo-release/pull/630)).
+- Debug `pl.jacobian_heatmap()` ([PR 653](https://github.com/aristoteleo/dynamo-release/pull/653)).
+- Debug `pl.nneighbors()` ([PR 644](https://github.com/aristoteleo/dynamo-release/pull/644)).
+- Retry codecov upload ([PR 656](https://github.com/aristoteleo/dynamo-release/pull/656)).
+- Debug vectorfield given layer input ([PR 619](https://github.com/aristoteleo/dynamo-release/pull/619)).
+- Debug simulation module ([PR 658](https://github.com/aristoteleo/dynamo-release/pull/658)).
+- Extra filter after pearson residuals normalization ([PR 665](https://github.com/aristoteleo/dynamo-release/pull/665)).
+- Add missing return value to deprecated functions ([PR 663](https://github.com/aristoteleo/dynamo-release/pull/663)).
+- Debug networks plot ([PR 657](https://github.com/aristoteleo/dynamo-release/pull/657)).
+- Implement `pl.plot_connectivity()` ([PR 652](https://github.com/aristoteleo/dynamo-release/pull/652)).
+- Debug the preprocessing of integer matrix input ([PR 664](https://github.com/aristoteleo/dynamo-release/pull/664)).
+- Missing return value in `pl.lap_min_time()` ([PR 668](https://github.com/aristoteleo/dynamo-release/pull/668)).
+- Update matplotlib `Colorbar.draw_all()` to `Colorbar._draw_all()` ([PR 669](https://github.com/aristoteleo/dynamo-release/pull/669)).
+- Optimize code coverage tests ([PR 605](https://github.com/aristoteleo/dynamo-release/pull/605)).
+- Debug `test_gradop()` by ([PR 677](https://github.com/aristoteleo/dynamo-release/pull/677)).
+- Constraint on matplotlib version by ([PR 679](https://github.com/aristoteleo/dynamo-release/pull/679)).
+- Upgrade code coverage to v4 ([PR 684](https://github.com/aristoteleo/dynamo-release/pull/684)).
+- Init a branch for updating dependency ([PR 690](https://github.com/aristoteleo/dynamo-release/pull/690)).
+- Replace `louvain` with `leiden` ([PR 692](https://github.com/aristoteleo/dynamo-release/pull/692)).
+- Debug `pl.highest_frac_genes()` ([PR 681](https://github.com/aristoteleo/dynamo-release/pull/681)).
+- Deprecate more sparse matrix `.A` attributes ([PR 695](https://github.com/aristoteleo/dynamo-release/pull/695)).
+- Fix matplotlib version issues and a circular import issue ([PR 686](https://github.com/aristoteleo/dynamo-release/pull/686)).
+- Debug `set_figure_params()` ([PR 698](https://github.com/aristoteleo/dynamo-release/pull/698)).
+- Debug: shape and name mismatch in cell-wise alpha saving ([PR 697](https://github.com/aristoteleo/dynamo-release/pull/697)).
+- Debug: The sizes of the scatter plots are not set correctly ([PR 696](https://github.com/aristoteleo/dynamo-release/pull/696)).
-Removed for now removed features.
+### Others
-Fixed for any bug fixes.
+- Refactor `pd.fate()` with Trajectory class ([PR 645](https://github.com/aristoteleo/dynamo-release/pull/645)).
+- Reorganize estimation module ([PR 662](https://github.com/aristoteleo/dynamo-release/pull/662)).
+- Refactor `pl.scatters()` and `pl.scatters_interactive()` ([PR 654](https://github.com/aristoteleo/dynamo-release/pull/654)).
+- Refactor `vf.VectorField()` function ([PR 620](https://github.com/aristoteleo/dynamo-release/pull/620)).
+- Docstring and type hints for the prediction module ([PR 666](https://github.com/aristoteleo/dynamo-release/pull/666)).
+- Update docstr and type hints for External module ([PR 661](https://github.com/aristoteleo/dynamo-release/pull/661)).
+- Add doctring and type hints for simulation module ([PR 660](https://github.com/aristoteleo/dynamo-release/pull/660)).
+- Docstring and type hints for root folder python files ([PR 667](https://github.com/aristoteleo/dynamo-release/pull/667)).
-Security in case of vulnerabilities.
+## Dynamo Ver 1.4.0
+
+### Feature Changes
+
+- Shiny web application for in silico perturbation and least square action path analyses
+ ([PR 582](https://github.com/aristoteleo/dynamo-release/pull/582)).
+
+- More 3D plots ([PR 597](https://github.com/aristoteleo/dynamo-release/pull/597)):
+
+ - 3D scatters with Plotly and Pyvista `dyn.pl.scatters_interactive()`.
+ - 3D vectors with Plotly and Pyvista `dyn.pl.cell_wise_vectors_3d()`.
+ - 3D topography with Plotly and Pyvista `dyn.pl.topography_3d()`.
+ - 3D animation with Pyvista `dyn.mv.PyvistaAnim()`.
+
+- Saved the velocity parameters in `adata.varm` instead of `adata.var`
+ ([PR 579](https://github.com/aristoteleo/dynamo-release/pull/579)).
+
+- DDRtree based pseudotime and graph learning ([PR 564](https://github.com/aristoteleo/dynamo-release/pull/564)):
+ `dyn.tl.order_cells()`, `dyn.tl.construct_velocity_tree()`.
+
+- Integrated `hnswlib` fast nearest neighbors method ([PR 552](https://github.com/aristoteleo/dynamo-release/pull/552)).
+
+- A helper functon to convert the AnnData object from Dynamo to Scvelo, or vice versa
+ ([PR 551](https://github.com/aristoteleo/dynamo-release/pull/551)).
+
+- The tools module has been reorganized ([PR 625](https://github.com/aristoteleo/dynamo-release/pull/625)):
+
+ - Deprecate files `dynamo_fitting.py`, `dynamo_bk.py`, `dynamics_deprecated.py`, `utils_moments_deprecated.py`.
+ - Deprecate legacy functions in `construct_velocity_tree.py`,`pseudotime.py`, `moments.py`, `clustering.py`.
+ - Merge `utils_markers.py` and `markers.py`.
+ - Merge `time_series.py` (learns a direct principal graph by integrating the transition matrix between and DDRTree)
+ and `construct_velocity_tree.py`(Integrate pseudotime ordering with velocity to automatically assign the direction
+ of the learned trajectory.) to `DDRTree_graph.py`.
+ - Reorganize some functions to utils in the following file: `time_series.py`, `multiomics.py`.
+ - Rename: `DDRTree_py.py` to `DDRTree.py`, `psl_py.py` to `psl.py`.
+
+- Deprecate infomap clustering ([PR 555](https://github.com/aristoteleo/dynamo-release/pull/555)).
+
+### DEBUG
+
+- Fixed the bug that the `dyn.pl.kinetic_heatmap()` couldn't be transposed caused by wrong initialization
+ ([PR 558](https://github.com/aristoteleo/dynamo-release/pull/558))
+ ([PR 636](https://github.com/aristoteleo/dynamo-release/pull/636)).
+- Fixed the bug that `dyn.pl.cell_wise_vectors()` only output one color
+ ([PR 559](https://github.com/aristoteleo/dynamo-release/pull/559)).
+- Debugged the sampling method in tools modules
+ ([PR 565](https://github.com/aristoteleo/dynamo-release/pull/565)).
+- Fixed the panda error in `dyn.tl.gene_wise_confidence()`
+ ([PR 567](https://github.com/aristoteleo/dynamo-release/pull/567)).
+- Fixed the bug that `pysal` submodules were not imported explicitly
+ ([PR 568](https://github.com/aristoteleo/dynamo-release/pull/568)).
+- Debugged `dyn.tl.score_cells()` ([PR 569](https://github.com/aristoteleo/dynamo-release/pull/569)).
+- Debugged the ambiguous if statement in `dyn.tl.psl()`
+ ([PR 573](https://github.com/aristoteleo/dynamo-release/pull/573)).
+- Updated all the expired links of sample dataset ([PR 577](https://github.com/aristoteleo/dynamo-release/pull/577)).
+- Fixed the bug that processed AnnData object couldn't be saved under some cases
+ ([PR 580](https://github.com/aristoteleo/dynamo-release/pull/580)).
+- Debugged `pp/transform.py` ([PR 581](https://github.com/aristoteleo/dynamo-release/pull/581)).
+- Debugged `dyn.tl.cell_velocities()` ([PR 585](https://github.com/aristoteleo/dynamo-release/pull/585)).
+- Debugged `dyn.pl.kinetic_curves()` ([PR 587](https://github.com/aristoteleo/dynamo-release/pull/587)).
+- Fixed the error caused by wrong type hints in `dyn.tl.BaseVectorField.find_fixed_points()`
+ ([PR 597](https://github.com/aristoteleo/dynamo-release/pull/597)).
+- Fixed the error caused by excessive memory usage in tests
+ ([PR 602](https://github.com/aristoteleo/dynamo-release/pull/602)).
+- Fixed the KeyError in `dyn.pp.convert2symbol()` when all genes are found
+ ([PR 603](https://github.com/aristoteleo/dynamo-release/pull/603)).
+- Fixed the issue that `dyn.pp.highest_frac_genes()` didn't support sparse input
+ ([PR 604](https://github.com/aristoteleo/dynamo-release/pull/604)).
+- Debugged `dyn.tl.cell_growth_rate()` ([PR 606](https://github.com/aristoteleo/dynamo-release/pull/606)).
+- Debugged the arclength sampling method in `dyn.pd.fate()`
+ ([PR 592](https://github.com/aristoteleo/dynamo-release/pull/592))
+ ([PR 610](https://github.com/aristoteleo/dynamo-release/pull/610)).
+- Removed unnecessary import of pandas ([PR 614](https://github.com/aristoteleo/dynamo-release/pull/614)).
+- Debugged the `dyn.pl.topography()` when the color is not provided
+ ([PR 617](https://github.com/aristoteleo/dynamo-release/pull/617)).
+- Fixed the error that list object doesn't have to_list() method in `dyn.vf.hessian()`
+ ([PR 623](https://github.com/aristoteleo/dynamo-release/pull/623)).
+- Fixed the ambiguous if statement in the `dyn.tl.MarkovChain.is_normalized()`
+ ([PR 626](https://github.com/aristoteleo/dynamo-release/pull/626)).
+- Debugged the `dyn.pd.classify_clone_cell_type()` ([PR 627](https://github.com/aristoteleo/dynamo-release/pull/627)).
+- Fixed the input of `minimize()` in `dyn.pd.lap_T()`
+ ([PR 628](https://github.com/aristoteleo/dynamo-release/pull/628)).
+- Fixed the bug that average parameter didn't work in `dyn.pd.fate()`
+ ([PR 629](https://github.com/aristoteleo/dynamo-release/pull/629)).
+- Debugged the `dyn.pl.line_integral_conv()` ([PR 639](https://github.com/aristoteleo/dynamo-release/pull/639)).
+
+### Others
+
+- Now available on [conda forge](https://anaconda.org/conda-forge/dynamo-release).
+- Removed `cdlib` dependency ([PR 532](https://github.com/aristoteleo/dynamo-release/pull/532)).
+- Removed `KDEpy` dependency ([PR 533](https://github.com/aristoteleo/dynamo-release/pull/533)).
+- Added code coverage report ([PR 555](https://github.com/aristoteleo/dynamo-release/pull/555)).
+- Optimized the structure of the umap dimension reduction
+ ([PR 556](https://github.com/aristoteleo/dynamo-release/pull/556)).
+- Optimized the structure and supported sparse input in `tools/graph_calculus.py`
+ ([PR 557](https://github.com/aristoteleo/dynamo-release/pull/557)).
+- Updated `networkx` API ([PR 560](https://github.com/aristoteleo/dynamo-release/pull/560)).
+- Replaced `python-igraph` dependency with `igraph` ([PR 563](https://github.com/aristoteleo/dynamo-release/pull/563)).
+- Added docstrings for tools module ([PR 570](https://github.com/aristoteleo/dynamo-release/pull/570)).
+- Removed duplicate size factor calculation ([PR 596](https://github.com/aristoteleo/dynamo-release/pull/596)).
+- Implemented a helper function for saving the plots
+ ([PR 609](https://github.com/aristoteleo/dynamo-release/pull/609))
+ ([PR 635](https://github.com/aristoteleo/dynamo-release/pull/635)).
+- Added docstrings for estimation module ([PR 611](https://github.com/aristoteleo/dynamo-release/pull/611)).
+- Merged `dyn.pd.rank_cells()` and `dyn.pd.rank_cell_groups()`
+ ([PR 613](https://github.com/aristoteleo/dynamo-release/pull/613)).
+- Added the conda badge ([PR 618](https://github.com/aristoteleo/dynamo-release/pull/618)).
+- Handled the duplicate files when downloading sample data
+ ([PR 621](https://github.com/aristoteleo/dynamo-release/pull/621)).
+- Debugged the ROC curve in Shiny app ([PR 637](https://github.com/aristoteleo/dynamo-release/pull/637)).
+
+## Dynamo Ver 1.3.0
+
+### Feature Changes
+
+- The preprocessing module has been refactored:
+
+ - Class *Preprocessor* is recommended for most preprocessing methods and recipes. `pp.recipe_monocle,`
+ `pp.recipe_velocyto` has been deprecated ([PR 497](https://github.com/aristoteleo/dynamo-release/pull/497))
+ ([PR 500](https://github.com/aristoteleo/dynamo-release/pull/500)).
+ Check the tutorials [here](Preprocessor_tutorial.rst) for more instructions.
+ - Normalization has been refactored ([PR 474](https://github.com/aristoteleo/dynamo-release/pull/474))
+ ([PR 475](https://github.com/aristoteleo/dynamo-release/pull/475)): `pp.normalize_cell_expr_by_size_factors`
+ has been deprecated, and new APIs are:
+
+ - `pp.normalize_cell_expr_by_size_factors` -> `pp.calc_sz_factor, pp.normalize`.
+
+ - Gene selection has been refactored ([PR 474](https://github.com/aristoteleo/dynamo-release/pull/474)). Now support
+ genes selected by fano factors. APIs are `pp.select_genes_monocle` and `pp.select_genes_by_seurat_recipe`.
+ - PCA has been refactored ([PR 469](https://github.com/aristoteleo/dynamo-release/pull/469)). `dyn.pp.pca_monocle`
+ has been deprecated. The new API is:
+
+ - `pp.pca_monocle` -> `pp.pca`.
+
+ - sctransform and pearson residuals recipe has been refactored
+ ([PR 510](https://github.com/aristoteleo/dynamo-release/pull/510))
+ ([PR 512](https://github.com/aristoteleo/dynamo-release/pull/512)). Now those advanced methods will only be
+ performed on X layer. Other layers will get normalized by size factors.
+ - Calculation of `ntr` rate and `pp.cell_cycle_scores` has been added to the Preprocessor
+ ([PR 513](https://github.com/aristoteleo/dynamo-release/pull/513)). To enable cell cycle scores, set parameter
+ `cell_cycle_score_enable` to `True` when initializing the `pp.Preprocessor`.
+ - Now the size factors normalization will normalize all layers with its own size factors by default
+ ([PR 521](https://github.com/aristoteleo/dynamo-release/pull/521)). To normalize the labeled data with total size
+ factors, we need to set the `total_szfactor` to `total_Size_Factor` explicitly.
+ - Multiple new features added, includes genes selection by fano factors
+ ([PR 474](https://github.com/aristoteleo/dynamo-release/pull/474)), external data integration methods
+ ([PR 473](https://github.com/aristoteleo/dynamo-release/pull/473)) and `pp.regress_out`
+ ([PR 470](https://github.com/aristoteleo/dynamo-release/pull/470))
+ ([PR 483](https://github.com/aristoteleo/dynamo-release/pull/483))
+ ([PR 484](https://github.com/aristoteleo/dynamo-release/pull/484)).
+ - Created more tests for preprocessing module ([PR 485](https://github.com/aristoteleo/dynamo-release/pull/485)).
+ - Replaced `adata.obsm["X"]` with `adata.obsm["X_pca"]`
+ ([PR 514](https://github.com/aristoteleo/dynamo-release/pull/514)).
+ - Removed some console output. They can still be displayed with `DEBUG` logging mode.
+ - Other deprecated APIs include: `pp.calc_sz_factor_legacy, pp.filter_cells_legacy`,
+ `pp.filter_genes_by_outliers_legacy, pp.select_genes_monocle_legacy, pp.select_genes_by_dispersion_general`,
+ `pp.cook_dist, pp.normalize_cell_expr_by_size_factors`. More information can be found on our
+ [preprocessing tutorials](Preprocessor_tutorial.rst).
+
+### DEBUG
+
+- Fixed the bug that save_show_or_return flags not working
+ ([PR 414](https://github.com/aristoteleo/dynamo-release/pull/414)).
+- Enabled the leiden algorithm to accept the resolution parameters
+ ([PR 441](https://github.com/aristoteleo/dynamo-release/pull/441)).
+- Fixed the wrong attribute name of anndata object in `utils_dimensionReduction.py`
+ ([PR 458](https://github.com/aristoteleo/dynamo-release/pull/458)).
+- Fixed the dimensionality issue in `moments.py`
+ ([PR 461](https://github.com/aristoteleo/dynamo-release/pull/461)).
+- Fixed part of the bug that h5ad file cannot be saved correctly
+ ([PR 467](https://github.com/aristoteleo/dynamo-release/pull/467)).
+- Fixed the bug that `pca_mean` will be `None` under some circumstances
+ ([PR 482](https://github.com/aristoteleo/dynamo-release/pull/482)).
+- Removing warning message for nxviz
+ ([PR 489](https://github.com/aristoteleo/dynamo-release/pull/489)).
+- Corrected the norm log-likelihood function
+ ([PR 495](https://github.com/aristoteleo/dynamo-release/pull/495)).
+- Removed deprecated parameters in gseapy functions
+ ([PR 496](https://github.com/aristoteleo/dynamo-release/pull/496)).
+- Fixed the bugs that functions will raise error when no fixed points are found in vector field by sampling
+ ([PR 501](https://github.com/aristoteleo/dynamo-release/pull/501)).
+- Removed unwanted operations in dimension reduction
+ ([PR 502](https://github.com/aristoteleo/dynamo-release/pull/502)).
+
+### Tutorial Updates on Readthedocs
+
+- Documentation, Tutorials, and readthedocs update:
+
+ - Update requirements for readthedocs ([PR 466](https://github.com/aristoteleo/dynamo-release/pull/466)).
+ - Update readme ([PR 479](https://github.com/aristoteleo/dynamo-release/pull/479)).
+ - Fixed documentation error caused by importing Literal
+ ([PR 486](https://github.com/aristoteleo/dynamo-release/pull/486)).
+ - Fixed readthedocs error caused by the new version of urllib3
+ ([PR 488](https://github.com/aristoteleo/dynamo-release/pull/488)).
+
+### Other Changes
+
+- Docstring and type hints update:
+
+ - Updated docstring and type hints for tools module
+ ([PR 419](https://github.com/aristoteleo/dynamo-release/pull/419)).
+ - Updated docstring and type hints for vector field module
+ ([PR 434](https://github.com/aristoteleo/dynamo-release/pull/434)).
+ - Updated the docstring and type hints for simulation and predicting module
+ ([PR 457](https://github.com/aristoteleo/dynamo-release/pull/457)).
+ - Update the docstring and type hints for hzplot
+ ([PR 456](https://github.com/aristoteleo/dynamo-release/pull/456)).
+
+## Dynamo Ver 1.1.0
+
+### Feature Changes
+
+- Following new function are added, exported or documented in API / class page:
+
+ - *Preprocessing*: `pp.convert2symbol, pp.filter_cells, pp.filter_gene,`
+ `pp.filter_genes_by_pattern, pp.normalize_cells, pp.scale, pp.log1p, pp.pca`
+ - *Kinetic parameters and RNA/protein velocity*: `tl.recipe_deg_data, tl.recipe_kin_data,`
+ `tl.recipe_mix_kin_deg_data, tl.recipe_one_shot_data, tl.velocity_N`
+ - *Labeling Velocity recipes*: `tl.infomap, tl.leiden, tl.louvain, tl.scc`
+ - *Clustering*: `tl.run_scvelo, tl.run_velocyto, tl.vlm_to_adata`
+ - *Converter and helper*: `vf.graphize_vecfld, vf.vector_field_function`
+ - *Vector field reconstruction*: `vf.FixedPoints, vf.VectorField2D, vf.assign_fixedpoints`
+ - *Beyond RNA velocity*: `vf.jacobian, vf.sensitivity`
+ - *Vector field ranking*: `vf.rank_cells, vf.rank_genes, vf.rank_expression_genes,`
+ `vf.rank_jacobian_genes, vf.rank_s_divergence_genes, vf.rank_sensitivity_genes`
+ - *Vector field clustering and graph*: `vf.cluster_field, vf.streamline_clusters`
+ - *Prediction* `pd.andecestor, pd.get_init_path, pd.least_action, pd.perturbation,`
+ `pd.rank_perturbation_cell_clusters, pd.rank_perturbation_cells, pd.rank_perturbation_genes,`
+ `pd.state_graph, pd.tree_model`
+ - *Preprocessing plot*: `pl.biplot, pl.loading, pl.highest_frac_genes, pl.bubble`
+ - *Space plot*: `pl.space`
+ - *Kinetics plot*: `pl.sensitivity_kinetics`
+ - *Vector field plots*: `pl.cell_wise_vectors_3d, pl.plot_fixed_points_2d`
+ - *differential geometry plots*: `pl.acceleration`
+ - *Regulatory network plots* `pl.arcPlot, pl.circosPlot, pl.circosPlotDeprecated, pl.hivePlot`
+ - *fate plots* `pl.fate`
+ - *heatmap plots* `pl.causality, pl.comb_logic, pl.plot_hill_function, pl.response`
+ - *Predictions plots* `pl.lap_min_time`
+ - *External functionality* `ext.normalize_layers_pearson_residuals,`
+ `ext.select_genes_by_pearson_residuals, ext.sctransform`
+
+- More differential geometry analyses
+
+ - include the `switch` mode in rank_jacobian_genes
+ - added calculation of `sensitivity` matrix and relevant ranking
+
+- most probable path and *in silico* perturbation prediction
+
+ - implemented least action path optimization (can be done in high dimensional space) with analytical Jacobian
+ - include genetic perturbation prediction by either changing the vector field function or simulate genetic perturbation via analytical Jacobian
+
+- preprocessor class implementation
+
+ - extensible modular preprocess steps
+ - support following recipes: monocle (dynamo), seurat (seurat V3 flavor), sctransform (seurat), pearson residuals and pearson residuals for feature selection, combined with monocle recipe (ensure no negative values)
+ - following recipes tested on zebrafish dataset to make implemetation results consistent:
+ - monocle, seurat, pearson residuals
+- CDlib integration
+
+ - leiden, louvain, infomap community detection for cell clustering
+ - wrappers in `dyn.tl.*` for computing clusters
+ - wrappers in `dyn.pl.*` for plotting
+
+### Tutorial Updates on Readthedocs
+
+- human HSC hematopoiesis RNA velocity analysis tutorials
+- *in silico* perturbation and least action path (LAP) predictions tutorials on HSC dataset
+- differential geometry analysis on HSC dataset
+
+ - Molecular mechanism of megakaryocytes
+ - Minimal network for basophil lineage commitment
+ - Cell-wise analyses: dominant interactions
+- gallery: Pancreatic endocrinogenesis differential geometry
+
+Sample Dataset Updates
+
+### CI/CD Updates
+
+- update dynamo testing and pytest structure
+- test building workflow on 3.7, 3.8, 3.9 (3.6 no longer tested on github building CI)
+
+Performance Improvements
+
+### API Changes
+
+- preprocess
+
+ - `pp.pca` -> `pca.pca_monocle`
+- Native implementation of various graphical calculus using Numpy without using igraph.
+
+### Other Changes
+
+- **general code refactor and bug fixing**
+- **pl.scatters** refactor
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
old mode 100755
new mode 100644
index 69fe55ecf..d4bb2cbb9
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,11 +1,12 @@
# Minimal makefile for Sphinx documentation
#
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-SOURCEDIR = source
-BUILDDIR = build
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@@ -16,4 +17,4 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/Preprocessor_tutorial_files/output_20_1.png b/docs/_static/Preprocessor_tutorial_files/output_20_1.png
new file mode 100644
index 000000000..dd6196a29
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_20_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_23_1.png b/docs/_static/Preprocessor_tutorial_files/output_23_1.png
new file mode 100644
index 000000000..c1bfe965d
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_23_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_26_1.png b/docs/_static/Preprocessor_tutorial_files/output_26_1.png
new file mode 100644
index 000000000..020c18ba1
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_26_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_29_1.png b/docs/_static/Preprocessor_tutorial_files/output_29_1.png
new file mode 100644
index 000000000..4fd30ce98
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_29_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_38_0.png b/docs/_static/Preprocessor_tutorial_files/output_38_0.png
new file mode 100644
index 000000000..0b59b5934
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_38_0.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_40_1.png b/docs/_static/Preprocessor_tutorial_files/output_40_1.png
new file mode 100644
index 000000000..4dbdf16fb
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_40_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_42_0.png b/docs/_static/Preprocessor_tutorial_files/output_42_0.png
new file mode 100644
index 000000000..f23e9f3c2
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_42_0.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_49_1.png b/docs/_static/Preprocessor_tutorial_files/output_49_1.png
new file mode 100644
index 000000000..871b502d5
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_49_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_54_1.png b/docs/_static/Preprocessor_tutorial_files/output_54_1.png
new file mode 100644
index 000000000..daa408ed3
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_54_1.png differ
diff --git a/docs/_static/Preprocessor_tutorial_files/output_67_1.png b/docs/_static/Preprocessor_tutorial_files/output_67_1.png
new file mode 100644
index 000000000..ec81c25c0
Binary files /dev/null and b/docs/_static/Preprocessor_tutorial_files/output_67_1.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/1_1.png b/docs/_static/Shiny_tutorial_files/lap/1_1.png
new file mode 100644
index 000000000..e3acb6ca4
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/1_1.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/1_2.png b/docs/_static/Shiny_tutorial_files/lap/1_2.png
new file mode 100644
index 000000000..918d40eab
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/1_2.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_1.jpg b/docs/_static/Shiny_tutorial_files/lap/2_1.jpg
new file mode 100644
index 000000000..977f8a16a
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_2.jpg b/docs/_static/Shiny_tutorial_files/lap/2_2.jpg
new file mode 100644
index 000000000..899207d74
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_2.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_3.jpg b/docs/_static/Shiny_tutorial_files/lap/2_3.jpg
new file mode 100644
index 000000000..929cfec90
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_3.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_4.png b/docs/_static/Shiny_tutorial_files/lap/2_4.png
new file mode 100644
index 000000000..89cf83228
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_4.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_5.jpg b/docs/_static/Shiny_tutorial_files/lap/2_5.jpg
new file mode 100644
index 000000000..5a56c31cf
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_5.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/2_6.jpg b/docs/_static/Shiny_tutorial_files/lap/2_6.jpg
new file mode 100644
index 000000000..5314d6287
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/2_6.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/3_1.jpg b/docs/_static/Shiny_tutorial_files/lap/3_1.jpg
new file mode 100644
index 000000000..84a85b077
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/3_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/3_2.png b/docs/_static/Shiny_tutorial_files/lap/3_2.png
new file mode 100644
index 000000000..6c3890070
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/3_2.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/4_1.jpg b/docs/_static/Shiny_tutorial_files/lap/4_1.jpg
new file mode 100644
index 000000000..23f8de23b
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/4_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/4_2.png b/docs/_static/Shiny_tutorial_files/lap/4_2.png
new file mode 100644
index 000000000..4008ab6a2
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/4_2.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/5_1.jpg b/docs/_static/Shiny_tutorial_files/lap/5_1.jpg
new file mode 100644
index 000000000..a53af1cdb
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/5_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/5_2.jpg b/docs/_static/Shiny_tutorial_files/lap/5_2.jpg
new file mode 100644
index 000000000..538822ee5
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/5_2.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/6.png b/docs/_static/Shiny_tutorial_files/lap/6.png
new file mode 100644
index 000000000..58d587940
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/6.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/7_1.png b/docs/_static/Shiny_tutorial_files/lap/7_1.png
new file mode 100644
index 000000000..7f0e470dd
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/7_1.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/7_2.jpg b/docs/_static/Shiny_tutorial_files/lap/7_2.jpg
new file mode 100644
index 000000000..041f24852
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/7_2.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/8_1.jpg b/docs/_static/Shiny_tutorial_files/lap/8_1.jpg
new file mode 100644
index 000000000..b3bf53ab8
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/8_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/8_2.png b/docs/_static/Shiny_tutorial_files/lap/8_2.png
new file mode 100644
index 000000000..726712825
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/8_2.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_1.png b/docs/_static/Shiny_tutorial_files/lap/9_1.png
new file mode 100644
index 000000000..7f5ad84f2
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_1.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_2.jpg b/docs/_static/Shiny_tutorial_files/lap/9_2.jpg
new file mode 100644
index 000000000..28744399f
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_2.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_3.jpg b/docs/_static/Shiny_tutorial_files/lap/9_3.jpg
new file mode 100644
index 000000000..269e7496b
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_3.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_4.jpg b/docs/_static/Shiny_tutorial_files/lap/9_4.jpg
new file mode 100644
index 000000000..d550812f0
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_4.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_5.png b/docs/_static/Shiny_tutorial_files/lap/9_5.png
new file mode 100644
index 000000000..4e6e617db
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_5.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_6.jpg b/docs/_static/Shiny_tutorial_files/lap/9_6.jpg
new file mode 100644
index 000000000..26e0b54f8
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_6.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_7.png b/docs/_static/Shiny_tutorial_files/lap/9_7.png
new file mode 100644
index 000000000..7d8ce95ca
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_7.png differ
diff --git a/docs/_static/Shiny_tutorial_files/lap/9_8.png b/docs/_static/Shiny_tutorial_files/lap/9_8.png
new file mode 100644
index 000000000..78e8420c8
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/lap/9_8.png differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/1.png b/docs/_static/Shiny_tutorial_files/perturbation/1.png
new file mode 100644
index 000000000..2017945e8
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/1.png differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/1_1.jpg b/docs/_static/Shiny_tutorial_files/perturbation/1_1.jpg
new file mode 100644
index 000000000..cdc76fb69
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/1_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/2_1.jpg b/docs/_static/Shiny_tutorial_files/perturbation/2_1.jpg
new file mode 100644
index 000000000..6d7dafa0d
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/2_1.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/2_2.jpg b/docs/_static/Shiny_tutorial_files/perturbation/2_2.jpg
new file mode 100644
index 000000000..4e1b64f9c
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/2_2.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/2_3.jpg b/docs/_static/Shiny_tutorial_files/perturbation/2_3.jpg
new file mode 100644
index 000000000..f9279ee95
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/2_3.jpg differ
diff --git a/docs/_static/Shiny_tutorial_files/perturbation/3_1.jpg b/docs/_static/Shiny_tutorial_files/perturbation/3_1.jpg
new file mode 100644
index 000000000..80251bf7f
Binary files /dev/null and b/docs/_static/Shiny_tutorial_files/perturbation/3_1.jpg differ
diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
new file mode 100644
index 000000000..d6d520dc6
--- /dev/null
+++ b/docs/_static/css/custom.css
@@ -0,0 +1,37 @@
+.caption-text{
+ padding: 0px;
+}
+
+.rst-content .output_area img {
+ max-width: unset;
+ width: 100% !important;
+ height: auto !important;
+}
+
+
+/* sidebar */
+.rst-content .sidebar {
+ /* margin: 0px 0px 0px 12px; */
+ padding-bottom: 0px;
+}
+.rst-content .sidebar p {
+ margin-bottom: 12px;
+}
+.rst-content .sidebar p,
+.rst-content .sidebar ul,
+.rst-content .sidebar dl {
+ font-size: 13px;
+}
+
+.scrollit {
+ overflow-x:auto;
+}
+
+/* Sidebar header (and topbar for mobile) */
+.wy-side-nav-search, .wy-nav-top {
+ background: #1b1b1b;
+}
+/* Sidebar */
+.wy-nav-side {
+background: #373737;
+}
\ No newline at end of file
diff --git a/docs/_static/css/override.css b/docs/_static/css/override.css
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/_static/dynamo-horizontal.svg b/docs/_static/dynamo-horizontal.svg
new file mode 100644
index 000000000..a4f72f3d1
--- /dev/null
+++ b/docs/_static/dynamo-horizontal.svg
@@ -0,0 +1,13 @@
+
+
diff --git a/docs/_static/img/anndata_manager_schematic.svg b/docs/_static/img/anndata_manager_schematic.svg
new file mode 100644
index 000000000..59c29a7fd
--- /dev/null
+++ b/docs/_static/img/anndata_manager_schematic.svg
@@ -0,0 +1 @@
+
diff --git a/docs/_static/img/setup_anndata_before_after.svg b/docs/_static/img/setup_anndata_before_after.svg
new file mode 100644
index 000000000..5c29af94b
--- /dev/null
+++ b/docs/_static/img/setup_anndata_before_after.svg
@@ -0,0 +1 @@
+
diff --git a/docs/_static/logo.png b/docs/_static/logo.png
new file mode 100644
index 000000000..44fdbd294
Binary files /dev/null and b/docs/_static/logo.png differ
diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg
new file mode 100644
index 000000000..ac037f1c1
--- /dev/null
+++ b/docs/_static/logo.svg
@@ -0,0 +1,5 @@
+
+
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
new file mode 100644
index 000000000..e4665dfc7
--- /dev/null
+++ b/docs/_templates/autosummary/class.rst
@@ -0,0 +1,61 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. add toctree option to make autodoc generate the pages
+
+.. autoclass:: {{ objname }}
+
+{% block attributes %}
+{% if attributes %}
+Attributes table
+~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in attributes %}
+ ~{{ fullname }}.{{ item }}
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+{% block methods %}
+{% if methods %}
+Methods table
+~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in methods %}
+ {%- if item != '__init__' %}
+ ~{{ fullname }}.{{ item }}
+ {%- endif -%}
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+{% block attributes_documentation %}
+{% if attributes %}
+Attributes
+~~~~~~~~~~~
+
+{% for item in attributes %}
+
+.. autoattribute:: {{ [objname, item] | join(".") }}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
+
+{% block methods_documentation %}
+{% if methods %}
+Methods
+~~~~~~~
+
+{% for item in methods %}
+{%- if item != '__init__' %}
+
+.. automethod:: {{ [objname, item] | join(".") }}
+{%- endif -%}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
diff --git a/docs/_templates/class_no_inherited.rst b/docs/_templates/class_no_inherited.rst
new file mode 100644
index 000000000..837d4e5a5
--- /dev/null
+++ b/docs/_templates/class_no_inherited.rst
@@ -0,0 +1,68 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. add toctree option to make autodoc generate the pages
+
+.. autoclass:: {{ objname }}
+ :show-inheritance:
+
+{% block attributes %}
+{% if attributes %}
+Attributes table
+~~~~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in attributes %}
+ {%- if item not in inherited_members%}
+ ~{{ fullname }}.{{ item }}
+ {%- endif -%}
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+
+{% block methods %}
+{% if methods %}
+Methods table
+~~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in methods %}
+ {%- if item != '__init__' and item not in inherited_members%}
+ ~{{ fullname }}.{{ item }}
+ {%- endif -%}
+
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+{% block attributes_documentation %}
+{% if attributes %}
+Attributes
+~~~~~~~~~~
+
+{% for item in attributes %}
+{%- if item not in inherited_members%}
+
+.. autoattribute:: {{ [objname, item] | join(".") }}
+{%- endif -%}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
+
+{% block methods_documentation %}
+{% if methods %}
+Methods
+~~~~~~~
+
+{% for item in methods %}
+{%- if item != '__init__' and item not in inherited_members%}
+
+.. automethod:: {{ [objname, item] | join(".") }}
+{%- endif -%}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
diff --git a/docs/api/datasets.md b/docs/api/datasets.md
new file mode 100644
index 000000000..4c9857f1a
--- /dev/null
+++ b/docs/api/datasets.md
@@ -0,0 +1,38 @@
+# Datasets
+
+Import dynamo as:
+
+```
+import dynamo as dyn
+```
+
+```{eval-rst}
+.. currentmodule:: dynamo
+
+```
+
+## Built in data
+
+Here we host some published datasets that are useful for benchmarking and testing models.
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ sample_data.scNT_seq_neuron_splicing
+ sample_data.scNT_seq_neuron_labeling
+ sample_data.zebrafish
+ sample_data.DentateGyrus
+ sample_data.Haber
+ sample_data.hgForebrainGlutamatergic
+ sample_data.chromaffin
+ sample_data.BM
+ sample_data.pancreatic_endocrinogenesis
+ sample_data.DentateGyrus_scvelo
+ sample_data.scEU_seq_rpe1
+ sample_data.scEU_seq_organoid
+ sample_data.hematopoiesis
+ sample_data.hematopoiesis_raw
+
+```
diff --git a/docs/api/index.md b/docs/api/index.md
new file mode 100644
index 000000000..a3899ebcc
--- /dev/null
+++ b/docs/api/index.md
@@ -0,0 +1,14 @@
+# API
+
+Import dynamo as:
+
+```
+import dynamo as dyn
+```
+
+```{toctree}
+:maxdepth: 2
+
+user
+datasets
+```
diff --git a/docs/api/user.md b/docs/api/user.md
new file mode 100644
index 000000000..111ec0e49
--- /dev/null
+++ b/docs/api/user.md
@@ -0,0 +1,686 @@
+# User
+
+Import dynamo as:
+
+```
+import dynamo as dyn
+```
+
+```{eval-rst}
+.. currentmodule:: dynamo
+
+```
+
+## Data IO
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ read
+ read_h5ad
+ read_loom
+
+```
+
+## Tools (tl)
+
+_kNN and moments of expressions_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.neighbors
+ tl.mnn
+ tl.moments
+
+```
+
+_Kinetic parameters and RNA/protein velocity_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.dynamics
+
+```
+
+_Labeling Velocity recipes_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.recipe_deg_data
+ tl.recipe_kin_data
+ tl.recipe_mix_kin_deg_data
+ tl.recipe_one_shot_data
+ tl.velocity_N
+
+
+```
+
+_Labeling Velocity recipes_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.reduceDimension
+ tl.DDRTree
+ tl.psl
+
+```
+
+_Clustering_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.hdbscan
+ tl.leiden
+ tl.louvain
+ tl.scc
+
+```
+
+_Velocity projection_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.cell_velocities
+ tl.confident_cell_velocities
+
+```
+
+_Velocity metrics_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.cell_wise_confidence
+ tl.gene_wise_confidence
+
+```
+
+_Markov chain_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.generalized_diffusion_map
+ tl.stationary_distribution
+ tl.diffusion
+ tl.expected_return_time
+
+```
+
+_Markers and differential expressions_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.moran_i
+ tl.find_group_markers
+ tl.two_groups_degs
+ tl.top_n_markers
+ tl.glm_degs
+
+```
+
+_Cell proliferation and apoptosis_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.score_cells
+ tl.cell_growth_rate
+
+```
+
+_Converter and helper_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ tl.converter
+ tl.run_scvelo
+ tl.run_velocyto
+ tl.vlm_to_adata
+
+```
+
+## Vector field (vf)
+
+_Vector field reconstruction_
+
+:::{note}
+ Vector field class is internally to vf.VectorField. See our vector field classes here: [vector field](https://dynamo-release.readthedocs.io/en/latest/Class.html#vector-field)
+:::
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.VectorField
+ vf.SparseVFC
+ vf.BaseVectorField
+ vf.SvcVectorField
+ vf.graphize_vecfld
+ vf.vector_field_function
+
+```
+
+_Vector field topology_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.cluster_field
+ vf.topography
+ vf.FixedPoints
+ vf.assign_fixedpoints
+
+```
+
+_Beyond RNA velocity_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.velocities
+ vf.speed
+ vf.jacobian
+ vf.divergence
+ vf.curl
+ vf.acceleration
+ vf.curvature
+ vf.torsion
+ vf.sensitivity
+
+```
+
+_Beyond velocity vector field_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.cell_accelerations
+ vf.cell_curvatures
+
+```
+
+_Vector field ranking_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.rank_genes
+ vf.rank_expression_genes
+ vf.rank_velocity_genes
+ vf.rank_divergence_genes
+ vf.rank_acceleration_genes
+ vf.rank_curvature_genes
+ vf.rank_jacobian_genes
+ vf.rank_s_divergence_genes
+ vf.rank_sensitivity_genes
+
+```
+
+_Single cell potential: three approaches_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.gen_fixed_points
+ vf.gen_gradient
+ vf.IntGrad
+ vf.DiffusionMatrix
+ vf.action
+ vf.Potential
+ vf.path_integral
+ vf.alignment
+ vf.Wang_action
+ vf.Wang_LAP
+ vf.transition_rate
+ vf.MFPT
+ vf.Ao_pot_map
+ vf.solveQ
+
+```
+
+_Stochastic processes_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+
+ vf.diffusionMatrix
+
+```
+
+_Vector field clustering and graph_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ vf.cluster_field
+ vf.streamline_clusters
+ vf.vfGraph
+
+```
+
+## Prediction (pd)
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pd.andecestor
+ pd.fate
+ pd.fate_bias
+ pd.get_init_path
+ pd.least_action
+ pd.perturbation
+ pd.state_graph
+ pd.KO
+ pd.rank_perturbation_cell_clusters
+ pd.rank_perturbation_cells
+ pd.rank_perturbation_genes
+ pd.tree_model
+
+```
+
+## Plotting (pl)
+
+_Preprocessing_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.basic_stats
+ pl.show_fraction
+ pl.feature_genes
+ pl.biplot
+ pl.loading
+ pl.variance_explained
+ pl.highest_frac_genes
+ pl.exp_by_groups
+ pl.bubble
+
+```
+
+_Cell cycle staging_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.cell_cycle_scores
+
+```
+
+_Scatter base_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.scatters
+
+```
+
+_Space plot_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.space
+
+```
+
+_Phase diagram: conventional scRNA-seq_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.phase_portraits
+
+```
+
+_Kinetic models: labeling based scRNA-seq_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.dynamics
+
+```
+
+_Kinetics_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.kinetic_curves
+ pl.kinetic_heatmap
+ pl.jacobian_kinetics
+ pl.sensitivity_kinetics
+
+```
+
+_Dimension reduction_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.pca
+ pl.tsne
+ pl.umap
+ pl.trimap
+
+```
+
+_Clustering_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.leiden
+ pl.louvain
+ pl.infomap
+ pl.streamline_clusters
+
+```
+
+_Neighbor graph_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.nneighbors
+ pl.state_graph
+
+```
+
+_Vector field plots: velocities and accelerations_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.cell_wise_vectors
+ pl.cell_wise_vectors_3d
+ pl.grid_vectors
+ pl.streamline_plot
+ pl.line_integral_conv
+ pl.plot_energy
+ pl.plot_3d_streamtube
+
+```
+
+_Vector field topology_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.plot_flow_field
+ pl.plot_fixed_points
+ pl.plot_fixed_points_2d
+ pl.plot_nullclines
+ pl.plot_separatrix
+ pl.plot_traj
+ pl.topography
+ pl.response
+
+```
+
+_Beyond RNA velocity_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.speed
+ pl.divergence
+ pl.acceleration
+ pl.curl
+ pl.curvature
+ pl.jacobian
+ pl.jacobian_heatmap
+ pl.sensitivity
+ pl.sensitivity_heatmap
+
+```
+
+_Regulatory network_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.arcPlot
+ pl.circosPlot
+ pl.circosPlotDeprecated
+ pl.hivePlot
+
+```
+
+_Potential landscape_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.show_landscape
+
+```
+
+_Cell fate_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.fate
+ pl.fate_bias
+
+```
+
+_Heatmaps_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.causality
+ pl.comb_logic
+ pl.plot_hill_function
+ pl.response
+
+```
+
+_Predictions_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.lap_min_time
+
+```
+
+_Save figures_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ pl.save_fig
+
+```
+
+## Movie (mv)
+
+:::{note}
+ animation class is internally to mv.animate_fates. See our animation classes here: [animation](https://dynamo-release.readthedocs.io/en/latest/Class.html#movie)
+:::
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ mv.animate_fates
+
+```
+
+## Simulation (sim)
+
+_Simple ODE vector field simulation_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ sim.toggle
+ sim.Ying_model
+
+```
+
+_Gillespie simulation_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ sim.Gillespie
+ sim.Simulator
+ sim.state_space_sampler
+ sim.evaluate
+
+```
+
+## External (ext)
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ ext.ddhodge
+ ext.enrichr
+ ext.scribe
+ ext.coexp_measure
+ ext.scifate_glmnet
+
+```
+
+## Utilities
+
+_Package versions_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ get_all_dependencies_version
+
+```
+
+_Clean up adata_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ cleanup
+
+```
+
+_Figures configuration_
+
+```{eval-rst}
+.. autosummary::
+ :toctree: reference/
+ :nosignatures:
+
+ configuration.set_figure_params
+ configuration.set_pub_style
+
+```
+
+[anndata]: https://anndata.readthedocs.io/en/stable/
+[scanpy]: https://scanpy.readthedocs.io/en/stable/index.html
+[utilities]: https://scanpy.readthedocs.io/en/stable/api/index.html#reading
+[ray tune]: https://docs.ray.io/en/latest/tune/index.html
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 000000000..d9e79ba64
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,3 @@
+```{include} ../CHANGELOG.md
+
+```
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 000000000..51cb2bdb0
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,250 @@
+import importlib.util
+import inspect
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+from importlib.metadata import metadata
+from datetime import datetime
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from typing import Any
+
+HERE = Path(__file__).parent
+sys.path[:0] = [str(HERE.parent), str(HERE / "extensions")]
+
+# -- Project information -----------------------------------------------------
+
+info = metadata("dynamo-release")
+project_name = info["Name"]
+author = info["Author"]
+copyright = f"{datetime.now():%Y}, {author}."
+version = info["Version"]
+repository_url = f"https://github.com/aristoteleo/{project_name}"
+
+# The full version, including alpha/beta/rc tags
+release = info["Version"]
+
+bibtex_bibfiles = ["references.bib"]
+templates_path = ["_templates"]
+nitpicky = True # Warn about broken links
+needs_sphinx = "4.0"
+
+html_context = {
+ "display_github": True, # Integrate GitHub
+ "github_user": "aristoteleo", # Username
+ "github_repo": project_name, # Repo name
+ "github_version": "main", # Version
+ "conf_py_path": "/docs/", # Path in the checkout to the docs root
+}
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = [
+ "myst_nb",
+ "sphinx.ext.autodoc",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.linkcode",
+ "sphinx.ext.mathjax",
+ "sphinx.ext.napoleon",
+ "sphinx_autodoc_typehints", # needs to be after napoleon
+ "sphinx.ext.extlinks",
+ "sphinx.ext.autosummary",
+ "sphinxcontrib.bibtex",
+ *[p.stem for p in (HERE / "extensions").glob("*.py")],
+ "sphinx_copybutton",
+ "sphinx_design",
+ "sphinxext.opengraph",
+ "hoverxref.extension",
+]
+
+
+# for sharing urls with nice info
+#ogp_site_url = "https://docs.scvi-tools.org/"
+#ogp_image = "https://docs.scvi-tools.org/en/stable/_static/logo.png"
+
+
+# Generate the API documentation when building
+autosummary_generate = True
+autodoc_member_order = "bysource"
+bibtex_reference_style = "author_year"
+napoleon_google_docstring = True # for pytorch lightning
+napoleon_numpy_docstring = True # use numpydoc style
+napoleon_include_init_with_doc = False
+napoleon_use_rtype = True # having a separate entry generally helps readability
+napoleon_use_param = True
+napoleon_custom_sections = [("Params", "Parameters")]
+todo_include_todos = False
+myst_enable_extensions = [
+ "amsmath",
+ "colon_fence",
+ "deflist",
+ "dollarmath",
+ "html_image",
+ "html_admonition",
+]
+myst_url_schemes = ("http", "https", "mailto")
+nb_output_stderr = "remove"
+nb_execution_mode = "off"
+nb_merge_streams = True
+typehints_defaults = "braces"
+
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".ipynb": "myst-nb",
+ ".myst": "myst-nb",
+}
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
+
+# extlinks config
+extlinks = {
+ "issue": (f"{repository_url}/issues/%s", "#%s"),
+ "pr": (f"{repository_url}/pull/%s", "#%s"),
+ "ghuser": ("https://github.com/%s", "@%s"),
+}
+
+intersphinx_mapping = {
+ "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
+ "ipython": ("https://ipython.readthedocs.io/en/stable/", None),
+ "matplotlib": ("https://matplotlib.org/", None),
+ "numpy": ("https://numpy.org/doc/stable/", None),
+ "pandas": ("https://pandas.pydata.org/docs/", None),
+ "python": ("https://docs.python.org/3", None),
+ "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+ "sklearn": ("https://scikit-learn.org/stable/", None),
+ "torch": ("https://pytorch.org/docs/master/", None),
+ "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
+ "lightning": ("https://lightning.ai/docs/pytorch/stable/", None),
+ "pyro": ("http://docs.pyro.ai/en/stable/", None),
+ "pymde": ("https://pymde.org/", None),
+ "flax": ("https://flax.readthedocs.io/en/latest/", None),
+ "jax": ("https://jax.readthedocs.io/en/latest/", None),
+ "ml_collections": ("https://ml-collections.readthedocs.io/en/latest/", None),
+ "mudata": ("https://mudata.readthedocs.io/en/latest/", None),
+ "ray": ("https://docs.ray.io/en/latest/", None),
+ "huggingface_hub": ("https://huggingface.co/docs/huggingface_hub/main/en", None),
+ "sparse": ("https://sparse.pydata.org/en/stable/", None),
+}
+
+# -- Options for HTML output -------------------------------------------
+
+# html_show_sourcelink = True
+html_theme = "sphinx_book_theme"
+html_title = project_name
+
+html_logo = "_static/logo.png"
+
+html_theme_options = {
+ "repository_url": repository_url,
+ "use_repository_button": True,
+ "logo_only": True,
+ "show_toc_level": 1,
+ "launch_buttons": {"colab_url": "https://colab.research.google.com"},
+ "path_to_docs": "docs/",
+ "repository_branch": version,
+}
+
+pygments_style = "default"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+html_css_files = ["css/override.css"]
+html_show_sphinx = False
+
+
+def setup(app):
+ """App setup hook."""
+ app.add_config_value(
+ "recommonmark_config",
+ {
+ "auto_toc_tree_section": "Contents",
+ "enable_auto_toc_tree": True,
+ "enable_math": True,
+ "enable_inline_math": False,
+ "enable_eval_rst": True,
+ },
+ True,
+ )
+
+
+# -- Config for linkcode -------------------------------------------
+
+
+def git(*args):
+ """Run git command and return output as string."""
+ return subprocess.check_output(["git", *args]).strip().decode()
+
+
+# https://github.com/DisnakeDev/disnake/blob/7853da70b13fcd2978c39c0b7efa59b34d298186/docs/conf.py#L192
+# Current git reference. Uses branch/tag name if found, otherwise uses commit hash
+git_ref = None
+try:
+ git_ref = git("name-rev", "--name-only", "--no-undefined", "HEAD")
+ git_ref = re.sub(r"^(remotes/[^/]+|tags)/", "", git_ref)
+except Exception:
+ pass
+
+# (if no name found or relative ref, use commit hash instead)
+if not git_ref or re.search(r"[\^~]", git_ref):
+ try:
+ git_ref = git("rev-parse", "HEAD")
+ except Exception:
+ git_ref = "main"
+
+# https://github.com/DisnakeDev/disnake/blob/7853da70b13fcd2978c39c0b7efa59b34d298186/docs/conf.py#L192
+_dynamo_module_path = os.path.dirname(importlib.util.find_spec("dynamo").origin) # type: ignore
+
+
+def linkcode_resolve(domain, info):
+ """Determine the URL corresponding to Python object."""
+ if domain != "py":
+ return None
+
+ try:
+ obj: Any = sys.modules[info["module"]]
+ for part in info["fullname"].split("."):
+ obj = getattr(obj, part)
+ obj = inspect.unwrap(obj)
+
+ if isinstance(obj, property):
+ obj = inspect.unwrap(obj.fget) # type: ignore
+
+ path = os.path.relpath(inspect.getsourcefile(obj), start=_dynamo_module_path) # type: ignore
+ src, lineno = inspect.getsourcelines(obj)
+ except Exception:
+ return None
+
+ path = f"{path}#L{lineno}-L{lineno + len(src) - 1}"
+ return f"{repository_url}/blob/{git_ref}/scvi/{path}"
+
+
+# -- Config for hoverxref -------------------------------------------
+
+hoverx_default_type = "tooltip"
+hoverxref_domains = ["py"]
+hoverxref_role_types = dict.fromkeys(
+ ["ref", "class", "func", "meth", "attr", "exc", "data", "mod"],
+ "tooltip",
+)
+hoverxref_intersphinx = [
+ "python",
+ "numpy",
+ "scanpy",
+ "anndata",
+ "pytorch_lightning",
+ "scipy",
+ "pandas",
+ "ml_collections",
+ "ray",
+]
+# use proxied API endpoint on rtd to avoid CORS issues
+if os.environ.get("READTHEDOCS"):
+ hoverxref_api_host = "/_"
diff --git a/docs/developer.md b/docs/developer.md
new file mode 100644
index 000000000..66c1f98d3
--- /dev/null
+++ b/docs/developer.md
@@ -0,0 +1,3 @@
+```{include} ../CONTRIBUTING.md
+
+```
diff --git a/docs/extensions/edit_colab_url.py b/docs/extensions/edit_colab_url.py
new file mode 100644
index 000000000..1762e5689
--- /dev/null
+++ b/docs/extensions/edit_colab_url.py
@@ -0,0 +1,39 @@
+from sphinx.application import Sphinx
+
+
+def edit_colab_url(
+ app: Sphinx,
+ pagename: str,
+ templatename: str,
+ context: dict,
+ doctree: str,
+):
+ """Edit the colab url to point to the correct repo.
+
+ This assumes that the tutorials repo makes the same tag releases as the main repo,
+ in addition to only using colab urls (no binder or jupyterhub)
+
+ If this code needs updating, see how the sphinx book theme handles launch buttons.
+ """
+ try:
+ header_buttons = context["header_buttons"]
+ except KeyError:
+ return
+ for button in header_buttons:
+ # get launch buttons
+ if button["label"] == "launch-buttons":
+ # only one items in the launch buttons list as we only use colab
+ # remove "tutorials/notebooks" from url
+ button["buttons"][0]["url"] = button["buttons"][0]["url"].replace(
+ "/docs/tutorials/notebooks", ""
+ )
+ button["buttons"][0]["url"] = button["buttons"][0]["url"].replace(
+ "scvi-tools", "scvi-tutorials"
+ )
+
+
+def setup(app: Sphinx):
+ """Setup the extension."""
+ # Priority is set to 502 to ensure that this runs after the sphinx-book-theme
+ # The launch buttons are added in the sphinx-book-theme with priority 501
+ app.connect("html-page-context", edit_colab_url, priority=502)
diff --git a/docs/extensions/typed_returns.py b/docs/extensions/typed_returns.py
new file mode 100644
index 000000000..47292453a
--- /dev/null
+++ b/docs/extensions/typed_returns.py
@@ -0,0 +1,35 @@
+# code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py
+# with some minor adjustment
+from __future__ import annotations
+
+import re
+
+from sphinx.ext.napoleon import NumpyDocstring
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from sphinx.application import Sphinx
+ from collections.abc import Generator, Iterable
+
+
+def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
+ for line in lines:
+ if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line):
+ yield f'-{m["param"]} (:class:`~{m["type"]}`)'
+ else:
+ yield line
+
+
+def _parse_returns_section(self: NumpyDocstring, section: str) -> list[str]:
+ lines_raw = self._dedent(self._consume_to_next_section())
+ if lines_raw[0] == ":":
+ del lines_raw[0]
+ lines = self._format_block(":returns: ", list(_process_return(lines_raw)))
+ if lines and lines[-1]:
+ lines.append("")
+ return lines
+
+
+def setup(app: Sphinx):
+ """Set app."""
+ NumpyDocstring._parse_returns_section = _parse_returns_section
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 000000000..40a3ab9db
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1,2 @@
+# Frequently asked questions
+
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..70534dee8
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,74 @@
+# Documentation
+
+## Dynamo: Mapping Vector Field of Single Cells
+
+Inclusive model of expression dynamics with metabolic labeling based scRNA-seq / multiomics, vector field reconstruction, potential landscape mapping, differential geometry analyses, and most probably paths / in silico perturbation predictions.
+
+![dyname-abstract](https://user-images.githubusercontent.com/7456281/152110270-7ee1b0ed-1205-495d-9d65-59c7984d2fa2.png)
+
+If you find dynamo to be useful for your research, please consider citing the [dynamo manuscript](https://www.sciencedirect.com/science/article/pii/S0092867421015774)
+
+::::{grid} 1 2 3 3
+:gutter: 2
+
+:::{grid-item-card} Installation {octicon}`plug;1em;`
+:link: installation
+:link-type: doc
+
+New to _dynamo_? Check out the installation guide.
+:::
+
+:::{grid-item-card} User guide {octicon}`info;1em;`
+:link: user_guide/index
+:link-type: doc
+
+The user guide provides distilled mathematical descriptions of
+the models implemented in dynamo and connects the math
+with the code.
+:::
+
+:::{grid-item-card} API reference {octicon}`book;1em;`
+:link: api/index
+:link-type: doc
+
+The API reference contains a detailed description of
+the dynamo API.
+:::
+
+:::{grid-item-card} Tutorials {octicon}`play;1em;`
+:link: tutorials/index
+:link-type: doc
+
+The tutorials walk you through real-world applications of dynamo.
+:::
+
+:::{grid-item-card} Discussion {octicon}`megaphone;1em;`
+:link: https://github.com/aristoteleo/dynamo-release/discussions
+
+Need help? Reach out on our forum to get your questions answered!
+:::
+
+:::{grid-item-card} GitHub {octicon}`mark-github;1em;`
+:link: https://github.com/aristoteleo/dynamo-release/
+
+Find a bug? Interested in improving dynamo? Checkout our GitHub for the latest developments.
+:::
+::::
+
+```{toctree}
+:hidden: true
+:maxdepth: 3
+:titlesonly: true
+
+introduction/index
+installation
+tutorials/index
+faq
+user_guide/index
+api/index
+developer
+changelog.md
+references
+Discussion
+GitHub
+```
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 000000000..7e1ee48c2
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,53 @@
+# Installation
+
+## Quick install
+
+dynamo can be installed via `conda` or `pip`. We recommend installing into a virtual
+environment to avoid conflicts with other packages.
+
+```bash
+conda install -c conda-forge dynamo-release
+```
+
+or
+
+```bash
+pip install dynamo-release
+```
+
+To install the newest version of dynamo, you can git clone our repo and then pip install:
+
+```bash
+git clone https://github.com/aristoteleo/dynamo-release.git
+pip install dynamo-release/ --user
+```
+
+Don't know how to get started with virtual environments or `conda`/`pip`? Check out the
+[prerequisites](#prerequisites) section.
+
+## Prerequisites
+
+### Virtual environment
+
+A virtual environment can be created with either `conda` or `venv`. We recommend using `conda`. We
+currently support Python 3.10 - 3.12.
+
+For `conda`, we recommend using the [Miniforge](https://github.com/conda-forge/miniforge)
+distribution, which is generally faster than the official distribution and comes with conda-forge
+as the default channel (where dynamo is hosted).
+
+```bash
+conda create -n dynamo-env python=3.10 # any python 3.10 to 3.12
+conda activate dynamo-env
+```
+
+For `venv`, we recommend using [uv](https://github.com/astral-sh/uv).
+
+```bash
+pip install -U uv
+uv venv .dynamo-env
+source .dynamo-env/bin/activate # for macOS and Linux
+.scvi-env\Scripts\activate # for Windows
+```
+
+
diff --git a/docs/introduction/dynamo_paper_figures/fig6_a.png b/docs/introduction/dynamo_paper_figures/fig6_a.png
new file mode 100644
index 000000000..dd7cd9d18
Binary files /dev/null and b/docs/introduction/dynamo_paper_figures/fig6_a.png differ
diff --git a/docs/introduction/dynamo_paper_figures/fig6_b.png b/docs/introduction/dynamo_paper_figures/fig6_b.png
new file mode 100644
index 000000000..324b61324
Binary files /dev/null and b/docs/introduction/dynamo_paper_figures/fig6_b.png differ
diff --git a/docs/introduction/dynamo_paper_figures/fig6_c.png b/docs/introduction/dynamo_paper_figures/fig6_c.png
new file mode 100644
index 000000000..8b6fd7bcf
Binary files /dev/null and b/docs/introduction/dynamo_paper_figures/fig6_c.png differ
diff --git a/docs/introduction/dynamo_paper_figures/fig7_a.png b/docs/introduction/dynamo_paper_figures/fig7_a.png
new file mode 100644
index 000000000..365adc08b
Binary files /dev/null and b/docs/introduction/dynamo_paper_figures/fig7_a.png differ
diff --git a/docs/introduction/index.md b/docs/introduction/index.md
new file mode 100644
index 000000000..31745a6fa
--- /dev/null
+++ b/docs/introduction/index.md
@@ -0,0 +1,16 @@
+# Introduction
+
+Single-cell (sc)RNA-seq, together with RNA velocity and metabolic labeling, reveals cellular states and transitions at unprecedented resolution. Fully exploiting these data, however, requires kinetic models capable of unveiling governing regulatory functions. Here, we introduce an analytical framework dynamo, which infers absolute RNA velocity, reconstructs continuous vector fields that predict cell fates, employs differential geometry to extract underlying regulations, and ultimately predicts optimal reprogramming paths and perturbation outcomes. We highlight dynamo’s power to overcome fundamental limitations of conventional splicing-based RNA velocity analyses to enable accurate velocity estimations on a metabolically labeled human hematopoiesis scRNA-seq dataset. Furthermore, differential geometry analyses reveal mechanisms driving early megakaryocyte appearance and elucidate asymmetrical regulation within the PU.1-GATA1 circuit. Leveraging the least-action-path method, dynamo accurately predicts drivers of numerous hematopoietic transitions. Finally, in silico perturbations predict cell-fate diversions induced by gene perturbations. Dynamo, thus, represents an important step in advancing quantitative and predictive theories of cell-state transitions.
+
+
+```{toctree}
+:maxdepth: 2
+
+index_time
+index_geo
+index_cellfate
+index_silico
+
+```
+
+
diff --git a/docs/introduction/index_cellfate.md b/docs/introduction/index_cellfate.md
new file mode 100644
index 000000000..d33c9af2e
--- /dev/null
+++ b/docs/introduction/index_cellfate.md
@@ -0,0 +1,116 @@
+
+# Optimal cell fate transitions via most probable path
+
+The ability to drive conversion between different cell states has garnered a great deal of attention as a promising avenue for disease modeling. A fundamental challenge in the field of stem cell biology is, thus, to assess the feasibility and identify optimal paths and key transcription factors (TFs) of such inter-conversions. We summarize this grand problem of predicting optimal cell fate conversions (OPCs) in the figure below [here](#lap_theory_dynamo_paper_fig6_a).
+
+![The grand problem of predicting OPtimal cell-fate Conversions(OPCs).](dynamo_paper_figures/fig6_a.png){:align="center" width="400"}
+
+The least action path (LAP) principle, first proposed as early as 1744 by [Terrall](#) and famously advocated by Feynman with his reformulation of quantum mechanics via the path integral of the classical Hamilton action [Feynman, 1965](#), has previously been used in predicting the optimal transition path of cell fate transition for simplistic and designed systems [Qiu et al., 2012; Wang et al., 2014; Wang et al., 2017](#). We reason that with the reconstructed continuous and differentiable vector field, we can extend the LAP approach to real datasets in transcriptomic space to computationally explore optimal paths for differentiation and reprogramming (dedifferentiation and transdifferentiation), which then helps us identify key transcription factors whose expression levels vary strongest along these paths.
+
+The hematopoietic scNT-seq dataset we generated in this study is well suited for testing LAP. Among the cell types from our tscRNA-seq data [developmental tree](#lap_theory_dynamo_paper_fig6_b), there are five developmental events (from HSC to each of the terminal cell type), one reported dedifferentiation event (from Meg to HSC), and a total of eight reported transdifferentiation events. Considering all-against-all conversions, we are left with 18 unreported transitions between different mature cell types [transition matrix](#lap_theory_dynamo_paper_fig6_b).
+
+![Predicting OPCs for hematopoietic cell types.](dynamo_paper_figures/fig6_b.png)
+
+Here we first briefly introduce the intuition of the LAP and what we can do with it. Intuitively, the optimal path between any two cell states (e.g., the fixed point of HSCs and that of megakaryocytes) is searched by varying the continuous path connecting the source state to the target while minimizing its action and updating the associated transition time [LAP](#lap_theory_dynamo_paper_fig6_c). The resultant LAP has the highest transition probability and is associated with a particular transition time. In order to identify the associated key regulators, we focus only on TFs and rank them by the path integral of the mean square displacement (MSD) of gene expression with respect to the initial expression.
+
+![The optimal paths for hematopoietic transitions can be found by identifying the LAPs between the fixed points that correspond to each stable cell type.](dynamo_paper_figures/fig6_c.png){:align="center" width="400"}
+
+Given the vector field function, $\boldsymbol{f}$, optimal pathways of cell fate conversion can be mathematically analyzed by least action paths (LAPs) [Freidlin & Wentzell, 2012; Onsager, 1953; Maier & Stein, 1997](#). The action is defined as:
+
+```math
+S_T(\boldsymbol{x}) = \frac{1}{2} \int_{0}^{T}\mathrm{d}t \left(\boldsymbol{v}(t) - \boldsymbol{f}(\boldsymbol{x}(t))\right)^\mathsf{T} \boldsymbol{D}^{-1} \left(\boldsymbol{v}(t) - \boldsymbol{f}(\boldsymbol{x}(t))\right),
+```
+
+where $\boldsymbol{x}$ is a path and $\boldsymbol{v}$ is $\boldsymbol{x}$'s tangential velocity (the path is parametrized by time $t$, so $\boldsymbol{v}(t) = \dot{\boldsymbol{x}}(t)$). $\boldsymbol{D}$ is the diffusion coefficient matrix accounting for the stochasticity of gene expression, and for simplicity here we assume it to be a constant. $T$ is the time needed for a cell to traverse the path. By this definition, a path that strictly follows a streamline of a vector field whose tangential velocity also equals the evaluated velocity of the vector field has zero action, whereas any deviation increases action. In other words, developmental processes are (mostly) a spontaneous process and driven by intrinsic cell states, whereas dedifferentiation requires external forces such as ectopic expression of exogenous TFs or specific chemical inductions.
+
+Computationally, given the starting and end cell states $\boldsymbol{x}_0$ and $\boldsymbol{x}_{n}$, such as HSCs and megakaryocytes, and a specific traversal time $T$, the LAP can be found by discretizing the path as a sequence of points $P=\{\boldsymbol{x}_0, \boldsymbol{x}_1, \dots, \boldsymbol{x}_n\}$, which forms $n$ line segments. For each line segment, the discrete tangential velocity can be calculated as $\boldsymbol{v}_k = (\boldsymbol{x}_k-\boldsymbol{x}_{k-1})/\Delta t$, where $\Delta t = T/n$. The action along the discrete path is defined as [Perez-Carrasco et al., 2016; Tang et al., 2017](#):
+
+```math
+S_T(P) = \frac{1}{2D}\sum_{k=1}^{n} \left(\boldsymbol{v}_k - \boldsymbol{f}(\boldsymbol{y}_k)\right)^2\Delta t,
+```
+
+where $\boldsymbol{y}_k$ are the middle points of the line segments, i.e., $\boldsymbol{y}_k = (\boldsymbol{x}_{k-1} + \boldsymbol{x}_k)/2$. Given a traversal time $T$, the LAP is a path such that:
+
+```math
+P^* = \underset{P}{\operatorname{argmin}}\ S_T(P) = \underset{P}{\operatorname{argmin}}\ \frac{1}{2D}\sum_{k=1}^{n} \left(\boldsymbol{v}_k - \boldsymbol{f}(\boldsymbol{y}_k)\right)^2\Delta t.
+```
+
+To obtain the global LAP, the optimal traversal time $T^*$ is determined as:
+
+```math
+T^* = \underset{T}{\operatorname{argmin}}\ S_T(P).
+```
+
+The algorithm discretizes the path as a sequence of points, $P=\{\boldsymbol{x}_0, \boldsymbol{x}_1, \dots, \boldsymbol{x}_n\}$, which forms $n$ line segments. For each line segment, the discrete tangential velocity can be calculated as $\boldsymbol{v}_k=(\boldsymbol{x}_k - \boldsymbol{x}_{k-1})/\Delta t$, where $\Delta t$ is the time step for the cell to move from $\boldsymbol{x}_{k-1}$. In addition to the deterministic vector field, we also assume a certain degree of stochasticity in the system:
+
+```math
+\dot{\boldsymbol{x}} = \boldsymbol{f}(\boldsymbol{x}) + \sigma \boldsymbol{\eta}(t),
+```
+
+where $\boldsymbol{\eta}(t)$ is a stochastic white noise and $\boldsymbol{\sigma}$ the size of it. The action $S$ along the discrete path is defined as (Perez-Carrasco et al., 2016):
+
+```math
+S(P, \Delta t) = \frac{1}{2D}\sum_{k=1}^{n}\left(\boldsymbol{v}_k - \boldsymbol{f}(\boldsymbol{y}_k)\right)^2\Delta t,
+```
+
+where $\boldsymbol{y}_k$ are the middle points of the line segments, i.e., $\boldsymbol{y}_k = (\boldsymbol{x}_{k-1} + \boldsymbol{x}_k)/2$. We have also assumed the diffusion matrix to be a constant $D$, such that $D=\sigma^2/2$. It is intuitive that a path whose tangential velocities $\boldsymbol{v}$ align with the vector field has smaller action than paths that do not. The LAP is a path such that:
+
+```math
+P^* = \underset{P, \Delta t}{\operatorname{argmin}} S(P, \Delta t) = \underset{P, \Delta t}{\operatorname{argmin}}\frac{1}{2D}\sum_{k=1}^{n}\left(\boldsymbol{v}_k - \boldsymbol{f}(\boldsymbol{y}_k)\right)^2\Delta t,
+```
+
+The algorithm for finding the LAP therefore consists of two steps:
+
+- Minimization of the action by varying the time step. The optimal time step given a fixed path is a simple univariate least square minimization, i.e.:
+
+```math
+\Delta t^* = \underset{\Delta t}{\operatorname{argmin}}\frac{1}{2D}\sum_{k=1}^{n}\left(\frac{\boldsymbol{x}_k - \boldsymbol{x}_{k-1}}{\Delta t} - \boldsymbol{f}(\boldsymbol{y}_k)\right)^2\Delta t,
+```
+
+- Minimization of the action by varying the path without moving the starting and end points. The optimal path given a fixed time step is found by:
+
+```math
+P^* = \underset{\{\boldsymbol{x}_1, \boldsymbol{x}_2, \dots, \boldsymbol{x}_{n-1}\}}{\operatorname{argmin}}\frac{1}{2D}\sum_{k=1}^{n}\left(\frac{\boldsymbol{x}_k - \boldsymbol{x}_{k-1}}{\Delta t} - \boldsymbol{f}\left(\frac{\boldsymbol{x}_{k-1} + \boldsymbol{x}_k}{2}\right)\right)^2\Delta t,
+```
+
+For a $d$-dimensional vector field, the number of variables in the above optimization problem is $d\times n$. To mitigate the computational cost, the Jacobian of the action w.r.t. the path (more specifically, the a-th component of the $k$-th point) is analytically computed:
+
+```math
+\frac{\partial{S}}{\partial{x_k^a}} = \frac{1}{D}\left(v_k^a - v_{k+1}^a + f^a(\boldsymbol{y}_{k+1}) - f^a(\boldsymbol{y}_k)\right) -\frac{1}{2D}\left(\left(\boldsymbol{v}_{k+1} - \boldsymbol{f}(\boldsymbol{x}_{k+1})\right) \cdot \frac{\partial{f}}{\partial{x^a}}\Big|_{\boldsymbol{x}_{k+1}} + \left(\boldsymbol{v}_k - \boldsymbol{f}(\boldsymbol{x}_k)\right)\cdot\frac{\partial f}{\partial{x^a}}\Big|_{\boldsymbol{x}_k}\right)
+```
+
+Note that the partial derivative of the vector field is the $a$-th row of the Jacobian of the vector field. With the analytical Jacobian, the computation efficiency of the LAP optimization improves tremendously, making the LAP calculation feasible to operate in high-dimensional space, such as the top 30 PCs.
+
+The LAP is found by iterating between the two steps, and empirically we found that the path converges in two or three iterations. By default, the LAP optimization is initialized with the interpolated shortest path on the kNN graph of cells.
+
+Notably, when LAPs are calculated in the PCA space, we can transform them back to the original gene expression space to predict the full transcriptomic kinetics along the optimal path, inspect waves of those kinetics along the path, and do so in absolute time units when the vector field used is based on tscRNA-seq.
+
+For rare transitions with $S_{T^*} \gg 0$ (e.g., dedifferentiation and transdifferentiation), the transition rate (number of transitions per unit time) is proportional to the exponential of actions of all paths. The Freidlin–Wentzell theorem dictates that the LAP with the minimal traversal time (which will be referred to as the optimal path below) contributes the most to this transition rate [Freidlin & Wentzell, 2012; Onsager, 1953; Maier & Stein, 1997; Aurell et al., 2002](#):
+
+```math
+R(A\rightarrow B) \approx C\exp(-S_{T^*}),
+```
+
+where $A$ and $B$ are two cell types, $S_{T^*}$ the action of the optimal path, and $C$ a proportional factor. Furthermore, the transition time, or more specifically the mean first passage time (MFPT), is related to the transition rate:
+
+```math
+\mathrm{MFPT} = \frac{1}{R(A\rightarrow B)}
+```
+
+Therefore, the action of the optimal path predicts both the likelihood and transition time for such rare transitions. Again, most reprogramming experiments take a few weeks or months, depending on the exact initial and terminal cell states [Takahashi & Yamanaka, 2006](#).
+
+For natural transitions between points that are connected by the vector field streamlines (e.g., from a repulsor to an adjacent attractor), the actions of LAPs, within a certain range of $T$, are all zero, because a path following the streamline downstream is a LAP with zero action. The above approximation that the LAP contributes the most to the transition rate no longer applies. Differentiation processes are often close to such natural transitions, and the action of a differentiation LAP cannot tell us any information on the transition rate. However, LAPs are still the most probable paths for cells to take, as they are optimized to follow the streamline of the vector field. The waiting time for the cell to initiate the transition is negligible in this case, so the transition time can be approximated by the traversal time of the LAP.
+
+In addition to the computation of transition time and traversal time (see below), analyzing gene expression variations along LAPs provides essential information on regulatory genes, and their dynamics, during cell fate transitions.
+
+- Transition time: the expected waiting time for a cell to initiate and finish the transition between two states, regardless of the path it takes. This corresponds to the experimentally measured time for one cell type to commit into another.
+
+- Traversal time: the time the cell spends traveling along a specific path. Theoretically, this is the time for a single cell to complete the cell type conversion once the cell has decided on the commitment.
+
+We calculate the mean squared displacement (MSD) for every gene $i$ along the optimal path:
+
+```math
+\mathrm{MSD}_i = \sum_{t=0}^{T} \big(y_i(t) - y_i(0)\big)^2
+```
+
+Genes with large MSD are potentially genes that regulate the corresponding transitions.
\ No newline at end of file
diff --git a/docs/introduction/index_geo.md b/docs/introduction/index_geo.md
new file mode 100644
index 000000000..87464e1f3
--- /dev/null
+++ b/docs/introduction/index_geo.md
@@ -0,0 +1,75 @@
+
+
+
+# Primer on differential geometry
+
+In this work, we introduced **dynamical systems theory** and **differential geometry** analysis to single-cell genomics. A dynamical system describes the time dependence of points in a geometrical space, e.g., planetary motion or cell fate transitions, whereas differential geometry uses the techniques of differential/integral calculus and linear/multilinear algebra to study problems in geometry, e.g., the topology or geometric features along a streamline in vector field of the gene expression space.
+
+A vector field function $\mathbf{f}$, a fundamental topic of dynamical systems theories, takes spatial coordinate input $\mathbf{x}$ (e.g., single-cell expression in gene state space) in a high-dimensional space (each gene corresponds to a dimension) as input and outputs a vector $\mathbf{v}$ (e.g., corresponds to gene expression velocity vector from a single cell) in the same space, i.e. $\mathbf{v} = \mathbf{f}(\mathbf{x})$. In this study, we specifically discuss velocity vector fields that can be used to derive acceleration and curvature vector fields (see **below**). With analytical velocity vector field functions, including the ones that we learned directly from data, we can move beyond velocity to high-order quantities, including the Jacobian, divergence, acceleration, curvature, curl, etc., using theories developed in differential geometry. The discussion of the velocity vector field in this study focuses on transcriptomic space; vector fields, however, can be generally applicable to other spaces, such as morphological, proteomic, or metabolic space.
+
+Because $\mathbf{f}$ is a vector-valued multivariate function, a $d\times d$ matrix encoding its derivatives, called the *Jacobian*, plays a fundamental role in differential geometry analysis of vector fields:
+
+\[
+\mathbf{J} = \begin{bmatrix}
+\dfrac{\partial f_1}{\partial x_1} & \dfrac{\partial f_1}{\partial x_2} & \cdots & \dfrac{\partial f_1}{\partial x_d} \\
+\dfrac{\partial f_2}{\partial x_1} & \dfrac{\partial f_2}{\partial x_2} & \cdots & \dfrac{\partial f_2}{\partial x_d} \\
+\vdots & \vdots & \ddots & \vdots \\
+\dfrac{\partial f_d}{\partial x_1} & \dfrac{\partial f_d}{\partial x_2} & \cdots & \dfrac{\partial f_d}{\partial x_d}
+\end{bmatrix}
+\]
+
+A Jacobian element $\partial f_i/\partial x_j$ reflects how the velocity of $x_i$ is impacted by changes in $x_j$.
+
+## Box Fig. 1. Divergence, curl, acceleration and curvature of vector field.
+
+![Box Fig. 1](https://raw.githubusercontent.com/Xiaojieqiu/jungle/master/Box1.png)
+
+The trace of the Jacobian is divergence:
+\[
+\nabla \cdot \mathbf{f} = \sum_{i=1}^{d}\dfrac{\partial f_i}{\partial x_i} = \mathrm{tr} \mathbf{J}
+\]
+
+Divergence measures the degree of “outgoingness” at any point, summarized in **Box Fig. 1A**.
+
+By definition, an attractor (repulsor) converges (diverges) in any direction. Note that it is possible to have a point where the vectors converge in one direction but diverge in another, a case that is not depicted in the diagram above. This means that although an attractor (repulsor) always has negative (positive) divergence, the opposite does not necessarily hold.
+
+*Curl* is a quantity measuring the degree of rotation at a given point in the vector field. It is well-defined only in two or three dimensions (e.g. two or three reduced principal components or UMAP components):
+
+\[
+\nabla \times \mathbf{f} = \begin{bmatrix}
+\dfrac{\partial f_z}{\partial y} - \dfrac{\partial f_y}{\partial z} \\
+\dfrac{\partial f_x}{\partial z} - \dfrac{\partial f_z}{\partial x} \\
+\dfrac{\partial f_y}{\partial x} - \dfrac{\partial f_x}{\partial y}
+\end{bmatrix}
+\]
+
+The behavior of curl is summarized in **Box Fig. 1B**.
+
+Many differential geometry quantities are defined on *streamlines*, which are curves everywhere tangent to the vector field. The streamlines can be parametrized with time $t$, denoted $\mathbf{x}(t)$, as they are essentially trajectories of cells moving in the vector field. In practice, they are often calculated using numerical integration methods, e.g., the Runge–Kutta algorithm. The *acceleration* is the time derivative of the velocity, as shown in **Box Fig. 1C** (orange shade), and can be defined as:
+
+\[
+\mathbf{a} = \dfrac{\mathrm{d} \mathbf{v}}{\mathrm{d} t} = \dfrac{\mathrm{d}}{\mathrm{d} t}\mathbf{f}\Big(\mathbf{x}(t)\Big) = \sum_{i=1}^{d} \dfrac{\partial \mathbf{f}}{\partial x_i}\dfrac{\partial x_i}{\partial t} = \mathbf{J} \mathbf{v}
+\]
+
+The curvature vector (**Box Fig. 1C**, green shade) of a curve is defined as the derivative of the unit tangent vector $\left(\frac{\mathrm{d}}{\mathrm{dt}}\frac{\mathbf{v}}{|\mathbf{v}|}\right)$, divided by the length of the tangent ($|\mathbf{v}|$):
+
+\[
+\kappa = \dfrac{1}{|\mathbf{v}|}\dfrac{\mathrm{d}}{\mathrm{d} t}\dfrac{\mathbf{v}}{|\mathbf{v}|} = \dfrac{\mathbf{J}\mathbf{v}(\mathbf{v}\cdot \mathbf{v}) - \mathbf{v}(\mathbf{v}\cdot \mathbf{J}\mathbf{v})}{|\mathbf{v}|^4}
+\]
+
+In the context of velocity vector fields and streamlines, the unit tangent vector is the normalized velocity.
+
+By definition, acceleration measures the rate of change of velocity in terms of both its magnitude and direction. Curvature, on the other hand, measures only the change in direction, as the velocity vector is normalized. **Box Fig. 1C** (green shade) illustrates how the acceleration can be decomposed into a tangential and a radial component, and the latter is connected to the curvature:
+
+\[
+\mathbf{a} = \mathbf{a}_t + |\mathbf{v}|^2\mathbf{\kappa}
+\]
+
+Although acceleration and curvature are mathematically defined on streamlines, the actual calculation, as shown above, can be done pointwise using only the velocity and the Jacobian evaluated at the point of interest. Because the acceleration or the curvature can be calculated for any point in the state space, one obtains the acceleration or curvature vector field.
+
+Other relevant differential geometric analyses, including torsion (applicable to three-dimensional vector field), vector Laplacian, etc., can also be computed using vector field functions, although they were not extensively studied in this work.
diff --git a/docs/introduction/index_silico.md b/docs/introduction/index_silico.md
new file mode 100644
index 000000000..995329fb7
--- /dev/null
+++ b/docs/introduction/index_silico.md
@@ -0,0 +1,44 @@
+
+# *In silico* perturbation
+
+\[
+\newcommand{\pdv}[2]{\dfrac{\partial #1}{\partial #2}} \newcommand{\trp}{\mathsf{T}}
+\]
+
+We leverage the analytical Jacobian of the reconstructed vector field function to make *in silico* genetic perturbations (left panel in [this figure](#dynamo_fig7_a)) and predict cell-fate outcomes after the perturbation (right panel in [this figure](#dynamo_fig7_a)).
+
+![*In silico* genetic perturbation of the velocity vector field.](dynamo_paper_figures/fig7_a.png){:align="center"}
+
+Intuitively, to simulate the genetic perturbation effects, we will introduce genetic perturbations to the system (encoded by the perturbation vector) and then let the perturbations propagate in the gene regulatory network (encoded by the Jacobian matrix) to execute downstream responses. Mathematically, for gene \(i\) in any cell, the genetic perturbation effects or changes in its velocity (or more accurately, the vector field) w.r.t. to small perturbations in the expression of all genes in the network (encoded by the Jacobian matrix \(\boldsymbol J\)), \(\mathrm dx_1\), \(\mathrm dx_2\),…, \(\mathrm dx_n\), can be calculated with the *exact differential*:
+
+```math
+\mathrm{d} f_i = \pdv{f_i}{x_1}\mathrm{d}x_1 + \pdv{f_i}{x_2}\mathrm{d}x_2 + \ldots + \pdv{f_i}{x_n}\mathrm{d}x_n.
+```
+
+In vectorized form:
+
+```math
+\begin{bmatrix} \mathrm{d}f_1 \\[1.5ex] \mathrm{d}f_2 \\[1.5ex] \dots \\[1.5ex] \mathrm{d}f_n \end{bmatrix} = \begin{bmatrix} \pdv{f_1}{x_1} & \pdv{f_1}{x_2} & \dots & \pdv{f_1}{x_n} \\[2ex] \pdv{f_2}{x_1} & \pdv{f_2}{x_2} & \dots & \pdv{f_2}{x_n} \\[2ex] \dots & \dots & \dots & \dots \\[2ex] \pdv{f_n}{x_1} & \pdv{f_n}{x_2} & \dots & \pdv{f_n}{x_n} \end{bmatrix} \begin{bmatrix} \mathrm{d}x_1 \\[1.5ex] \mathrm{d}x_2 \\[1.5ex] \dots \\[1.5ex] \mathrm{d}x_n \end{bmatrix}.
+```
+
+The matrix on the right hand side is the Jacobian of the vector field. Replacing infinitesimal changes with finite perturbations, the above equation becomes:
+
+```math
+\Delta \boldsymbol{f} = \boldsymbol{J} \Delta \boldsymbol{x}.
+```
+
+In practice, a proportionality constant \(c\) (i.e. setting a perturbation to be 100 or -100) is often added to the perturbation \(\Delta \boldsymbol{x}\) to amplify the response \(\Delta \boldsymbol{f}\). Furthermore, because vector fields are often learned in the PCA space, the perturbations in the \(d\)-dimensional gene space are first transformed to the \(k\)-dimensional PCA space by:
+
+```math
+\Delta \boldsymbol{x} = \boldsymbol{Q}^\trp (\Delta \boldsymbol{y} - \boldsymbol{\mu}).
+```
+
+where \(\boldsymbol{Q}\) is the \(d\)-by-\(k\) PCA loading matrix, and \(\boldsymbol{\mu}\) is the mean of the PCA-transformed data. The response \(\Delta \boldsymbol{f}\) can be transformed back to the PCA space:
+
+```math
+\Delta \boldsymbol{g} = \boldsymbol{Q} \Delta \boldsymbol{f} + \boldsymbol{\mu}.
+```
+
+One can then use \(\Delta \boldsymbol{f}\), a gene by cell matrix, to identify the strongest positive or negative responders of the genetic perturbation across cells.
+
+Importantly, because \(\Delta \boldsymbol{f}\) implies how each cell state will be affected after genetic perturbations, we can predict the cell fate trajectory under genetic perturbations by integrating the perturbation effects across cells over gene expression space. To visualize the cell fate trajectory, pairs of \(\boldsymbol{x}\) and \(\Delta \boldsymbol{g}\) are used in the same vein as the gene expression and RNA velocity vector to be further projected onto the UMAP or other low dimensional embeddings using the transition matrix [Bergen et al., 2020; La Manno et al., 2018](#) and then plotted with streamlines.
diff --git a/docs/introduction/index_time.md b/docs/introduction/index_time.md
new file mode 100644
index 000000000..2a287aa7c
--- /dev/null
+++ b/docs/introduction/index_time.md
@@ -0,0 +1,63 @@
+
+
+
+# Time-resolved scRNA-seq
+
+## Seminal RNA velocity limitations
+
+Although the seminal RNA velocity work is exciting, it has the following limitations:
+
+1. It can only predict short-term direction and magnitude of RNA dynamics.
+2. It is mostly a descriptive instead of a predictive tool.
+3. It relies on the `mis-priming` of intron reads for current single-cell platforms and thus the intron measures are biased and inaccurate.
+4. RNA velocity was estimated as \(U - \gamma / \beta S\) (\(U\): unspliced RNA, \(S\): spliced RNA, \(\gamma\): degradation rate, \(\beta\): splicing rate, \(\gamma / \beta\) is the slope of the steady state cell fitting.), it is thus scaled by the splicing rate and lacks real physical meanings (i.e. molecules / hour).
+
+We reason that metabolic labeling based method which measures both the historical or old, and the new and nascent RNA of cells in a controllable way will be better measurements for RNA velocity and transcriptomic dynamics. When extending metabolic labeling to single cell RNA-seq, labeling based scRNA-seq essentially measures two modalities or timepoints for the same cell.
+
+## How does metabolic labeling work
+
+How can we quantify nascent RNA via metabolic labeling? Overall there are two different methods, the biotin purification or chemical conversion based approach. Both approaches are quite similar in that we first need to apply different labeling strategies to label the cells. For biotin purification, we need to use thiol-specific biotinylation to tag labeled mRNA. Then the streptavidin beads can be used to pull down and separate the pre-existing RNA and newly transcribed RNA. Then we will follow by preparing two separate libraries, old and new RNAs, for sequencing. There are a few very well-known issues regarding this method:
+
+1. It often introduces 20-30% cross-contamination between old and new RNAs.
+2. It also leads to some normalization issues between different libraries.
+
+On the other hand, the chemical conversion based approaches avoid the laborious and error-prone procedure of separating old/old RNA and preparing two different libraries and emerged as the favored strategy recently. The key idea of chemical conversion based methods are that by some chemical reaction we can artificially introduce T to C mutation which can then be used to distinguish labelled and thus new RNA from old RNA. There are about three different chemistry developed: IAA alkylation or hydrogen bond reconfiguration via TimeLapse-seq or TUC-seq chemistry.
+
+In fact, metabolic labeling has been widely adapted for the past few decades. We can use various nucleotides to label RNA, for example, BrU, Eu and Biotin-NTP. For 4sU based labeling, there are about three different strategies, namely, SLAM-seq, TUC-seq, and Time-lapse-seq.
+
+![Metabolic labeling](https://user-images.githubusercontent.com/7456281/93838316-346af300-fc57-11ea-9cf9-79d37d8ff927.png)
+
+## Metabolic labeling based scRNA-seq
+
+Recently a few groups adapted the bulk method to either the plate-based scRNA-seq with SMART-seq2 method, for example, [scSLAM-seq](https://www.nature.com/articles/s41586-019-1369-y) or [NASC-seq](https://www.nature.com/articles/s41467-019-11028-9). [scEU-seq](https://science.sciencemag.org/content/367/6482/1151.full) is based on [CEL-Seq2](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0938-8) and is also plate-based but uses UMI in contrast to scSLAM-seq or NASC-seq. The scEU-seq method is based on EU and required purification and it thus may involve cross-contamination or normalization issues.
+
+[Cao, et al](https://www.nature.com/articles/s41587-020-0480-9#:~:text=Abstract,not%20directly%20capture%20transcriptional%20dynamics.&text=We%20used%20sci%2Dfate%20to,in%20%3E6%2C000%20single%20cultured%20cells) recently developed sci-fate which integrates 4sU labeling and combinatorial indexing based scRNA-seq so it can potentially enable measuring hundreds of thousands of single cells.
+
+For the first time, [Wu lab](https://www.wulabupenn.org/) from Upenn developed a drop-seq based metabolic labeling based scRNA-seq, scNT-seq.
+
+![scNT-seq](https://user-images.githubusercontent.com/7456281/93839221-4601ca00-fc5a-11ea-8e56-d39ec1725a6b.png)
+
+## Comparison between different labeling based scRNA-seq methods
+
+In [Qiu, Hu, et. al](https://www.nature.com/articles/s41592-020-0935-4), we performed a detailed comparison (Supplementary table 7) between scNT-seq with other available methods. Especially for the improved second-strand synthesis based strategy, we are able to obtain substantially high number of genes and UMIs per cell with relatively few number of reads. Thus scNT-seq is arguably one of the best metabolic labeling based scRNA-seq strategies.
+
+In our study, we show that dynamo can be used to leverage scNT-seq datasets for time-resolved RNA-velocity analysis. Those results demonstrate the power of dynamo and scNT-seq in revealing the fine-grained transcriptomic dynamics.
+
+![Comparison](https://user-images.githubusercontent.com/7456281/93838287-1b624200-fc57-11ea-9674-76006ba07950.png)
+
+## Labeling strategies
+
+We can be very creative and smart in designing the metabolic labeling experiments. For example, you can design an experiment where you can take different days and perform a kinetic experiment at each day. This can help you obtain transcription rate, splicing and degradation rate over time. But this is often time-consuming, so we may just choose a typical day for a single kinetic experiment. In addition, we may also perform a degradation experiment where we label the cells with 4sU for an extended time period to saturate the 4sU labeling in cells. Then we can wash out the 4sU and replaced with excess U, followed by chasing at different time points. This can help us to estimate the splicing and degradation rates (and half-life) of RNA. We can also just design a one-shot labeling experiment to label cells at different time points. Since splicing and degradation rate of mRNA is often constant, thus combining one-shot experiments with degradation experiments, we are able to get even more accurate estimates of the transcription rate at each time point. We also want to note that we can combine different labeling strategies, for example, combining pulse chase in a single experiment or integrating metabolic labeling with drug treatment or genetic perturbations.
+
+![Labeling strategies](https://user-images.githubusercontent.com/7456281/93838322-392fa700-fc57-11ea-9019-e76358160f57.png)
+
+## Dynamo’s comprehensive model framework for analyzing labeling datasets
+
+In order to fully take advantage of the scSLAM-seq data, we recently developed a sophisticated framework, dynamo that provides an inclusive model of expression dynamics with scSLAM-seq and multiomics, vector field reconstruction and potential landscape mapping. In dynamo, we abstract every step from RNA transcription, splicing, metabolic labeling, translation and RNA or protein degradation. We can model the mean and variance of RNA species via a set of moment equations, we then transform them into a matrix format and solve them efficiently. In dynamo, we also implemented the traditional RNA velocity method based on the steady state assumptions to support analyzing regular 10x data. Similarly, dynamo supports studying cite-seq data to estimate protein velocity.
+
+![Dynamo framework](https://user-images.githubusercontent.com/7456281/93838322-392fa700-fc57-11ea-9019-e76358160f57.png)
diff --git a/docs/introduction/lap_tutorial/bas-hsc-kinetic.png b/docs/introduction/lap_tutorial/bas-hsc-kinetic.png
new file mode 100644
index 000000000..1efbae01d
Binary files /dev/null and b/docs/introduction/lap_tutorial/bas-hsc-kinetic.png differ
diff --git a/docs/introduction/lap_tutorial/lap_tutorial.rst b/docs/introduction/lap_tutorial/lap_tutorial.rst
new file mode 100644
index 000000000..648817c09
--- /dev/null
+++ b/docs/introduction/lap_tutorial/lap_tutorial.rst
@@ -0,0 +1,1221 @@
+.. raw:: html
+
+
+
+Most probable path predictions
+==============================
+
+Introduction
+~~~~~~~~~~~~
+
+| The ability to drive conversion between different cell states has
+ garnered a great deal of attention as a promising avenue for disease
+ modeling. A fundamental challenge in the field
+ of stem cell biology is to identify and assess the feasibility of
+ optimal paths and key TFs (transcription factors) of such
+ interconversions (Figure 6A of :cite:p:`QIU2022`). The least action path (LAP) is a principled method that has previously been used in
+ theoretical efforts to predict the most probable path a cell will
+ follow during fate transition. Specifically, the optimal path between any two cell states
+ (e.g. the fixed point of HSCs and that of megakaryocytes) is searched
+ by variating the continuous path connecting the source state to the
+ target while minimizing its action and updating the associated
+ transition time. The resultant least action path has the highest
+ transition probability and is associated with a particular transition
+ time. Once the LAP is identified, we can focus only on TFs and rank them by the path integral of the mean square displacement (MSD) of gene expression with respect to the initial expression to identify key transcription factors of the associated cell fate transitions.
+
+| In this tutorial, we will demonstrate how to
+- perform LAP analyses;
+- visualize transition paths found by the LAP approach on the vector field;
+- plot heatmaps of actions and transition times matrix between all hematopoietic cell types;
+- prioritize transcription factors of each predicted optimal path;
+- ROC analyses of the LAP predictions.
+
+Import relevant packages
+
+.. code:: ipython3
+
+ import numpy as np
+ import pandas as pd
+ import seaborn as sns
+ import matplotlib.pyplot
+ import matplotlib.pyplot as plt
+
+ import sys
+ import os
+
+ import dynamo as dyn
+
+ dyn.dynamo_logger.main_silence()
+
+
+
+.. parsed-literal::
+
+ |-----> setting visualization default mode in dynamo. Your customized matplotlib settings might be overritten.
+
+
+Let us first load the human hematopoiesis scNT-seq dataset, which has stored as a sample dataset within dynamo that can be download directly using the above function, produced in this study (:cite:p:`QIU2022`). In this tutorial we will focus on analyzing this scNT-seq dataset because decades of researches in hematopoiesis make it a well suited system for testing LAP predictions.
+
+.. code:: ipython3
+
+ adata_labeling = dyn.sample_data.hematopoiesis()
+
+Let us take a glance at what is in ``adata`` object. Preprocessing, normalization, umap dimension reduction, total RNA velocity, as well as the continous RNA velocity vector field are computed (notebooks on these operations will be released shortly. Please also check other existing notebooks for these operations).
+
+.. code:: ipython3
+
+ adata_labeling
+
+
+.. parsed-literal::
+
+ AnnData object with n_obs × n_vars = 1947 × 1956
+ obs: 'batch', 'time', 'cell_type', 'nGenes', 'nCounts', 'pMito', 'pass_basic_filter', 'new_Size_Factor', 'initial_new_cell_size', 'total_Size_Factor', 'initial_total_cell_size', 'spliced_Size_Factor', 'initial_spliced_cell_size', 'unspliced_Size_Factor', 'initial_unspliced_cell_size', 'Size_Factor', 'initial_cell_size', 'ntr', 'cell_cycle_phase', 'leiden', 'umap_leiden', 'umap_louvain', 'control_point_pca', 'inlier_prob_pca', 'obs_vf_angle_pca', 'pca_ddhodge_div', 'pca_ddhodge_potential', 'umap_ddhodge_div', 'umap_ddhodge_potential', 'curl_umap', 'divergence_umap', 'control_point_umap', 'inlier_prob_umap', 'obs_vf_angle_umap', 'acceleration_pca', 'curvature_pca', 'n_counts', 'mt_frac', 'jacobian_det_pca', 'manual_selection', 'divergence_pca', 'curvature_umap', 'acceleration_umap', 'control_point_umap', 'inlier_prob_umap', 'obs_vf_angle_umap', 'curvature_umap', 'curv_leiden', 'curv_louvain', 'SPI1->GATA1_jacobian', 'jacobian'
+ var: 'gene_name', 'gene_id', 'nCells', 'nCounts', 'pass_basic_filter', 'use_for_pca', 'frac', 'ntr', 'time_3_alpha', 'time_3_beta', 'time_3_gamma', 'time_3_half_life', 'time_3_alpha_b', 'time_3_alpha_r2', 'time_3_gamma_b', 'time_3_gamma_r2', 'time_3_gamma_logLL', 'time_3_delta_b', 'time_3_delta_r2', 'time_3_bs', 'time_3_bf', 'time_3_uu0', 'time_3_ul0', 'time_3_su0', 'time_3_sl0', 'time_3_U0', 'time_3_S0', 'time_3_total0', 'time_3_beta_k', 'time_3_gamma_k', 'time_5_alpha', 'time_5_beta', 'time_5_gamma', 'time_5_half_life', 'time_5_alpha_b', 'time_5_alpha_r2', 'time_5_gamma_b', 'time_5_gamma_r2', 'time_5_gamma_logLL', 'time_5_bs', 'time_5_bf', 'time_5_uu0', 'time_5_ul0', 'time_5_su0', 'time_5_sl0', 'time_5_U0', 'time_5_S0', 'time_5_total0', 'time_5_beta_k', 'time_5_gamma_k', 'use_for_dynamics', 'gamma', 'gamma_r2', 'use_for_transition', 'gamma_k', 'gamma_b'
+ uns: 'PCs', 'VecFld_pca', 'VecFld_umap', 'VecFld_umap', 'X_umap_neighbors', 'cell_phase_genes', 'cell_type_colors', 'dynamics', 'explained_variance_ratio_', 'feature_selection', 'grid_velocity_pca', 'grid_velocity_umap', 'grid_velocity_umap', 'grid_velocity_umap_perturbation', 'grid_velocity_umap_test', 'grid_velocity_umap_perturbation', 'jacobian_pca', 'leiden', 'neighbors', 'pca_mean', 'pp', 'response'
+ obsm: 'X', 'X_pca', 'X_pca_SparseVFC', 'X_umap', 'X_umap_SparseVFC', 'X_umap', 'X_umap_SparseVFC', 'X_umap_perturbation', 'X_umap_test', 'X_umap_perturbation', 'acceleration_pca', 'acceleration_umap', 'cell_cycle_scores', 'curvature_pca', 'curvature_umap', 'curvature_umap', 'j_delta_x_perturbation', 'velocity_pca', 'velocity_pca_SparseVFC', 'velocity_umap', 'velocity_umap_SparseVFC', 'velocity_umap', 'velocity_umap_SparseVFC', 'velocity_umap_perturbation', 'velocity_umap_test', 'velocity_umap_perturbation'
+ layers: 'M_n', 'M_nn', 'M_t', 'M_tn', 'M_tt', 'X_new', 'X_total', 'velocity_alpha_minus_gamma_s'
+ obsp: 'X_umap_connectivities', 'X_umap_distances', 'connectivities', 'cosine_transition_matrix', 'distances', 'fp_transition_rate', 'moments_con', 'pca_ddhodge', 'perturbation_transition_matrix', 'umap_ddhodge'
+
+
+We will first show the streamline plot of this dataset in the UMAP space. From which, we can see that we have six major cell types, namely hematopoietic stem cells (HSC), neutrophil (Neu), monocyte (Mon), basophil (Bas), megakaryocyte (Meg) and erythrocytes (Ery). From the streamline plot, we can see that HSC will first become GMP (granulocyte monocyte progenitor)-like or MEP (megakaryocyte and erythrocyte progenitor)-like cells and then bifurcate into Neu and Mon or Ery, Bas and Meg, respectively. Here we will select a few characteristic cells for each specific cell type via ``dyn.tl.select_cell``.
+
+Among the cell types from our tscRNA-seq data, there are five developmental events (from HSC to each of the terminal cell type), one reported dedifferentiation event (from Meg to HSC), and a total of eight reported transdifferentiation events. Considering all-against-all conversions, we are left with 18 unreported transitions between different mature cell types. Thus, this system provides a broad range of known transitions and associated transcription factors to confirm our predictions while also allows us to make non-trivial predictions for the remaining 18 unreported transitions.
+
+.. code:: ipython3
+
+ dyn.pl.streamline_plot(adata_labeling, basis="umap", color="cell_type")
+
+ HSC_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "HSC")
+ Meg_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "Meg")
+ Ery_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "Ery")
+ Bas_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "Bas")
+ Mon_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "Mon")
+ Neu_cells = dyn.tl.select_cell(adata_labeling, "cell_type", "Neu")
+
+
+.. image:: output_6_0.png
+ :width: 487px
+
+
+| We select the five closest cells of the identified fixed_points that correspond to each of the six cell types to represent the typical cell state of these cells (note that fixed_points often don't correspond to any particular cell).
+
+| Then nearest cells of these ``fixed_points`` are saved to
+ ``*_cells_indices variables``, which points to their cell indices in
+ the adata object. Note that we could just take the fixed_points for LAP analyses but using the actual cells offering us the benefits to take advantage of the nearest neighbor graph of cells to intialize the searching of LAP (see below).
+
+.. code:: ipython3
+
+ from dynamo.tools.utils import nearest_neighbors
+
+ fixed_points = np.array(
+ [
+ [8.45201833, 9.37697661],
+ [14.00630381, 2.53853712],
+ [17.30550636, 6.81561775],
+ [18.06891717, 11.9840678],
+ [14.13613403, 15.22244713],
+ [9.72644402, 14.83745969],
+ ]
+ )
+
+ HSC_cells_indices = nearest_neighbors(fixed_points[0], adata_labeling.obsm["X_umap"])
+ Meg_cells_indices = nearest_neighbors(fixed_points[1], adata_labeling.obsm["X_umap"])
+ Ery_cells_indices = nearest_neighbors(fixed_points[2], adata_labeling.obsm["X_umap"])
+ Bas_cells_indices = nearest_neighbors(fixed_points[3], adata_labeling.obsm["X_umap"])
+ Mon_cells_indices = nearest_neighbors(fixed_points[4], adata_labeling.obsm["X_umap"])
+ Neu_cells_indices = nearest_neighbors(fixed_points[5], adata_labeling.obsm["X_umap"])
+
+
+.. code:: ipython3
+
+ import matplotlib.pyplot as plt
+
+ # plt.figure(figsize=(4, 4))
+
+ plt.scatter(*adata_labeling.obsm["X_umap"].T)
+ for indices in [
+ HSC_cells_indices,
+ Meg_cells_indices,
+ Ery_cells_indices,
+ Bas_cells_indices,
+ Mon_cells_indices,
+ Neu_cells_indices,
+ ]:
+ plt.scatter(*adata_labeling[indices[0]].obsm["X_umap"].T)
+
+
+.. image:: output_9_1.png
+ :width: 543px
+
+
+We can see, for example, the cell indices ``1587, 1557, 1725, 1091, 1070`` are the nearest cells to the identified HSC attractor.
+
+.. code:: ipython3
+
+ HSC_cells_indices
+
+
+.. parsed-literal::
+
+ array([[1587, 1557, 1725, 1091, 1070]])
+
+
+
+..
+ Development path for Meg, Ery, Bas, Mon and Neu cells
+ -----------------------------------------------------
+
+Now we are ready to perform the LAP analyses. We will start with computing the neighbor graph of cells in the umap space (pca space works too) and use the shortest paths between any two represented cells as the initial guess of the LAP. We will next run the LAP analyses between all pair-wise combinations of cells. We can either perform the LAP analyses on the UMAP space or in the PCA space, using the vector field reconstructed in UMAP or PCA space, respectively. With the vector field learned in the PCA space, we can further projected the optimized LAP back to the original gene expression space to reveal the transcriptomic kinetics along the LAP.
+
+Compute neighbor graph based on ``umap``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: ipython3
+
+ dyn.tl.neighbors(adata_labeling, basis="umap", result_prefix="umap")
+
+
+
+.. parsed-literal::
+
+ |-----> Start computing neighbor graph...
+ |-----------> X_data is None, fetching or recomputing...
+ |-----> fetching X data from layer:None, basis:umap
+ |-----> method arg is None, choosing methods automatically...
+ |-----------> method kd_tree selected
+ |-----> umap_connectivities to obsp in AnnData Object.
+ |-----> umap_distances to obsp in AnnData Object.
+ |-----> umap_neighbors to uns in AnnData Object.
+ |-----> umap_neighbors.indices to uns in AnnData Object.
+ |-----> umap_neighbors.params to uns in AnnData Object.
+
+
+
+Run pairwise least action path analyses among six distinct hematopoietic cell types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section will demonstrate how to compute LAPs for all possible cell type transition pairs in our scNT-seq dataset. The corresponding function in
+*dynamo* is ``dyn.pd.least_action``. This function takes an ``adata`` object that has continous vector field reconstructed, a start
+cell and a target cell to compute least action path or most probable path between them. As shown
+above, either UMAP or PCA basis can be used. Here we use the UMAP basis to visualize the LAP and the PCA basis
+for downstream transcription factor prioritization and other analyses.
+
+Note that the following block also demonstrates using the `GeneTrajectory` function to reverse project the optimized LAP in PCA space back to the original gene expression space to reveal the transcriptomic kinetics along the LAP. We then calculate the accumulative MSD (mean square displacement) with respect to the initial state of each gene along the LAP in the original gene expression space (with `calc_msd` function) and use this score to prioritize the importance of each gene (with `rank_genes` function). Genes with top MSD have higher variances with respect to the initial state and will be ranked higher, which may also indicate key roles in making the cell fate conversions.
+
+Please refer to the API documentation of each of these functions for
+detailed explanation of their input parameters, output, etc. Please also check our primers on the optimal path and the Cell paper for more in-depth understandings.
+
+.. code:: ipython3
+
+ %%capture
+
+ dyn.dynamo_logger.main_silence()
+ transition_graph = {}
+ cell_type = ["HSC", "Meg", "Ery", "Bas", "Mon", "Neu"]
+ start_cell_indices = [
+ HSC_cells_indices,
+ Meg_cells_indices,
+ Ery_cells_indices,
+ Bas_cells_indices,
+ Mon_cells_indices,
+ Neu_cells_indices,
+ ]
+ end_cell_indices = start_cell_indices
+ for i, start in enumerate(start_cell_indices):
+ for j, end in enumerate(end_cell_indices):
+ if start is not end:
+ min_lap_t = True if i == 0 else False
+ dyn.pd.least_action(
+ adata_labeling,
+ [adata_labeling.obs_names[start[0]][0]],
+ [adata_labeling.obs_names[end[0]][0]],
+ basis="umap",
+ adj_key="X_umap_distances",
+ min_lap_t= min_lap_t,
+ EM_steps=2,
+ )
+ dyn.pl.least_action(adata_labeling, basis="umap")
+ lap = dyn.pd.least_action(
+ adata_labeling,
+ [adata_labeling.obs_names[start[0]][0]],
+ [adata_labeling.obs_names[end[0]][0]],
+ basis="pca",
+ adj_key="cosine_transition_matrix",
+ min_lap_t=min_lap_t,
+ EM_steps=2,
+ )
+ dyn.pl.kinetic_heatmap(
+ adata_labeling,
+ basis="pca",
+ mode="lap",
+ genes=adata_labeling.var_names[adata_labeling.var.use_for_transition],
+ project_back_to_high_dim=True,
+ )
+ # The `GeneTrajectory` class can be used to output trajectories for any set of genes of interest
+ gtraj = dyn.pd.GeneTrajectory(adata_labeling)
+ gtraj.from_pca(lap.X, t=lap.t)
+ gtraj.calc_msd()
+ ranking = dyn.vf.rank_genes(adata_labeling, "traj_msd")
+
+ print(start, "->", end)
+ genes = ranking[:5]["all"].to_list()
+ arr = gtraj.select_gene(genes)
+
+ dyn.pl.multiplot(lambda k: [plt.plot(arr[k, :]), plt.title(genes[k])], np.arange(len(genes)))
+
+ transition_graph[cell_type[i] + "->" + cell_type[j]] = {
+ "lap": lap,
+ "LAP_umap": adata_labeling.uns["LAP_umap"],
+ "LAP_pca": adata_labeling.uns["LAP_pca"],
+ "ranking": ranking,
+ "gtraj": gtraj,
+ }
+
+
+
+.. parsed-literal::
+
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [4.8274s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [87.3331s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [6.1928s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [87.8599s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [6.2292s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [81.9887s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [5.9224s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [82.8575s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [4.7673s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [120.9742s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.3654s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [17.5986s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.3836s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [17.3726s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4288s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [20.0245s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5213s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [17.8978s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4509s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [27.8622s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4895s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [16.6043s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.3779s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [12.5543s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4918s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [21.3984s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6045s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [18.5405s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6157s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [21.0733s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6774s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [28.5954s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4858s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [22.6107s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.3991s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [23.0945s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5327s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [24.0878s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6087s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [24.2374s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5198s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [18.8253s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6873s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [14.9045s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5671s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [10.4933s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4429s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [13.5975s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.3534s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [19.1570s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5459s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [30.7210s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.6741s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [17.5307s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5748s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [22.9212s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.5324s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [18.0897s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [0.4742s]
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [29.8258s]
+
+
+The LAPs between all pairs of cell types are stored in the `transition_graph` object. Here we will use the LAP results to visualize the developmental, reprogram and transdifferentiation least action paths. Interestingly, we show that the LAP is not simply the shortest paths between two cell states but instead follow the curved vector field flow.
+
+Visualize developmental LAPs
+----------------------------
+
+.. code:: ipython3
+
+ develope_keys = ["HSC->Meg", "HSC->Ery", "HSC->Bas", "HSC->Mon", "HSC->Neu"]
+ reprogram_keys = ["Meg->HSC", "Ery->HSC", "Bas->HSC", "Mon->HSC", "Neu->HSC"]
+ transdifferentiation = [
+ "Ery->Meg",
+ "Neu->Bas",
+ "Mon->Ery",
+ "Bas->Meg",
+ "Neu->Meg",
+ "Meg->Bas",
+ "Mon->Bas",
+ "Neu->Mon",
+ "Meg->Ery",
+ "Ery->Bas",
+ "Bas->Mon",
+ "Mon->Neu",
+ "Neu->Ery",
+ "Mon->Meg",
+ "Bas->Neu",
+ "Meg->Neu",
+ "Ery->Mon",
+ "Meg->Mon",
+ "Ery->Neu",
+ "Bas->Ery",
+ ]
+
+We define a helper function ``plot_lap`` to visualize different set of paths. Here we visualize developmental LAPs.
+
+.. code:: ipython3
+
+ from dynamo.plot.utils import map2color
+ def plot_lap(paths):
+ fig, ax = plt.subplots(figsize=(5, 4))
+ ax = dyn.pl.streamline_plot(
+ adata_labeling, basis="umap", save_show_or_return="return", ax=ax, color="cell_type", frontier=True
+ )
+ ax = ax[0]
+ x, y = 0, 1
+
+ # plot paths
+ for path in paths:
+ lap_dict = transition_graph[path]["LAP_umap"]
+ for prediction, action in zip(lap_dict["prediction"], lap_dict["action"]):
+ ax.scatter(*prediction[:, [x, y]].T, c=map2color(action))
+ ax.plot(*prediction[:, [x, y]].T, c="k")
+ plot_lap(develope_keys)
+
+
+
+
+.. image:: output_19_0.png
+ :width: 407px
+
+**Reprogram LAPs**
+
+.. code:: ipython3
+
+ plot_lap(reprogram_keys)
+
+.. image:: reprogram-lap.png
+ :width: 407px
+
+**Transdifferentiation LAPs**
+
+.. code:: ipython3
+
+ plot_lap(transdifferentiation)
+
+.. image:: transdifferentiation-lap.png
+ :width: 407px
+
+
+Next, we will focus on transcription factors (TFs) and rank them based on their MSD along the LAP path to prioritize the importance of each TF. Meanwhile, we will also keep the action (an functional of the LAP) and the least action path time, with `action_df` and `t_df`, respectively, of each of these conversions.
+
+.. code:: ipython3
+
+ human_tfs = dyn.sample_data.human_tfs()
+ human_tfs_names = list(human_tfs["Symbol"])
+
+
+.. code:: ipython3
+
+ action_df = pd.DataFrame(index=cell_type, columns=cell_type)
+ t_df = pd.DataFrame(index=cell_type, columns=cell_type)
+ for i, start in enumerate(
+ [
+ HSC_cells_indices,
+ Meg_cells_indices,
+ Ery_cells_indices,
+ Bas_cells_indices,
+ Mon_cells_indices,
+ Neu_cells_indices,
+ ]
+ ):
+ for j, end in enumerate(
+ [
+ HSC_cells_indices,
+ Meg_cells_indices,
+ Ery_cells_indices,
+ Bas_cells_indices,
+ Mon_cells_indices,
+ Neu_cells_indices,
+ ]
+ ):
+ if start is not end:
+ print(cell_type[i] + "->" + cell_type[j], end=",")
+ lap = transition_graph[cell_type[i] + "->" + cell_type[j]]["lap"] # lap
+ gtraj = transition_graph[cell_type[i] + "->" + cell_type[j]]["gtraj"]
+ ranking = transition_graph[cell_type[i] + "->" + cell_type[j]]["ranking"].copy()
+ ranking["TF"] = [i in human_tfs_names for i in list(ranking["all"])]
+ genes = ranking.query("TF == True").head(10)["all"].to_list()
+ arr = gtraj.select_gene(genes)
+ action_df.loc[cell_type[i], cell_type[j]] = lap.action()[-1]
+ t_df.loc[cell_type[i], cell_type[j]] = lap.t[-1]
+
+
+
+.. parsed-literal::
+
+ HSC->Meg,HSC->Ery,HSC->Bas,HSC->Mon,HSC->Neu,Meg->HSC,Meg->Ery,Meg->Bas,Meg->Mon,Meg->Neu,Ery->HSC,Ery->Meg,Ery->Bas,Ery->Mon,Ery->Neu,Bas->HSC,Bas->Meg,Bas->Ery,Bas->Mon,Bas->Neu,Mon->HSC,Mon->Meg,Mon->Ery,Mon->Bas,Mon->Neu,Neu->HSC,Neu->Meg,Neu->Ery,Neu->Bas,Neu->Mon,
+
+We now visualize the LAP time of all developmental LAPs. Interestingly, we show that the LAP time from HSC to Meg lineage LAP (28 hour) is the shortest among all
+developmental LAPs, consistent with the fact that megakaryocyte is the earliest cell type to appear. The predicted 28 hours is also on the time-scale of what has been reported for the single HSC transplantation experiments. We want to note that because we used the metabolic labeling based scRNA-seq, we obtained absolute RNA velocity and thus we can predict the actual time (with units of hour) of the LAP, a rather remarkable feature of the labeling data.
+
+.. code:: ipython3
+
+ dyn.configuration.set_pub_style(scaler=1.5)
+ develop_time_df = pd.DataFrame({"integration time": t_df.iloc[0, :].T})
+ develop_time_df["lineage"] = ["HSC", "Meg", "Ery", "Bas", "Mon", "Neu"]
+ print(develop_time_df)
+ ig, ax = plt.subplots(figsize=(4, 3))
+ dynamo_color_dict = {
+ "Mon": "#b88c7a",
+ "Meg": "#5b7d80",
+ "MEP-like": "#6c05e8",
+ "Ery": "#5d373b",
+ "Bas": "#d70000",
+ "GMP-like": "#ff4600",
+ "HSC": "#c35dbb",
+ "Neu": "#2f3ea8",
+ }
+
+ sns.barplot(
+ y="lineage",
+ x="integration time",
+ hue="lineage",
+ data=develop_time_df.iloc[1:, :],
+ dodge=False,
+ palette=dynamo_color_dict,
+ ax=ax,
+ )
+ ax.set_ylabel("")
+ plt.tight_layout()
+ plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
+
+.. parsed-literal::
+
+ integration time lineage
+ HSC NaN HSC
+ Meg 28.335868 Meg
+ Ery 46.227644 Ery
+ Bas 45.575254 Bas
+ Mon 41.797433 Mon
+ Neu 76.469544 Neu
+
+
+
+
+.. parsed-literal::
+
+
+
+
+
+
+.. image:: output_22_2.png
+ :width: 449px
+
+Here we are going to visualize the transition matrices of actions and LAP time between all pair-wise cell type conversions with heatmaps. Overall, we find the the developmental LAP time is much larger than that of the dedifferentiation LAP while the action has the opposite pattern.
+
+Heatmap of LAP actions and LAP time matrices of pairwise cell fate conversions
+-------------------------------------------------------------------------------
+
+.. code:: ipython3
+
+ action_df = action_df.fillna(0)
+ f, ax = plt.subplots(figsize=(5, 5))
+ dyn.configuration.set_pub_style(scaler=3)
+ ax = sns.heatmap(action_df, annot=True, ax=ax, fmt=".2g")
+
+
+
+
+.. image:: output_24_0.png
+ :width: 448px
+
+
+.. code:: ipython3
+
+ t_df = t_df.fillna(0)
+ dyn.configuration.set_pub_style(scaler=3)
+ ax = sns.heatmap(t_df, annot=True, fmt=".3g")
+
+
+
+
+.. image:: output_25_0.png
+ :width: 515px
+
+
+Kinetics heatmap of gene expression dynamics along the LAP
+-----------------------------------------------------------
+
+As mentioned above, we are able to obtain the gene-wise kinetics when we reverse projected the LAP learned in PCA space back to gene-wise space. In this section we will show how to do so and we will create a kinetics heatmap of the transcriptomic dynamics along the LAP from HSC to basophil lineage. We will rely on mainly two functions, ``dyn.pd.least_action`` and ``dyn.pl.kinetic_heatmap``. ``dyn.pd.least_action`` can be used to computes the optimal paths between any two cell states, as mentioned above while ``dyn.pl.kinetic_heatmap`` can be used to plot kinetics
+heatmap.
+
+Here we will identify the LAP from the HSC to basophil lineage, and thus one typical HSC and one typical basophil cell are chosen as the initial and target cell, respectively.
+
+.. code:: ipython3
+
+ init_cells = [adata_labeling.obs_names[HSC_cells_indices[0][0]]]
+ target_cells = [adata_labeling.obs_names[Bas_cells_indices[0][0]]]
+ print("init cells:", init_cells)
+ print("end cells:", target_cells)
+
+
+.. parsed-literal::
+
+ init cells: ['GGGGGGCGGCCT-JL_10']
+ end cells: ['GCAGCGAAGGCA-JL12_0']
+
+
+Now let us find the optimal path between HSC to basophil lineage via the ``least_action`` function.
+
+.. code:: ipython3
+
+ dyn.configuration.set_pub_style(scaler=0.6)
+
+ lap = dyn.pd.least_action(
+ adata_labeling,
+ init_cells=init_cells,
+ target_cells=target_cells,
+ basis="pca",
+ adj_key="cosine_transition_matrix",
+ )
+
+
+
+.. parsed-literal::
+
+ |-----> [iterating through 1 pairs] in progress: 100.0000%
+ |-----> [iterating through 1 pairs] finished [9.2680s]
+
+
+Now let us plot the kinetic heatmap of the gene expression kinetics of all transcription factors (restricted only to those that are used for calculating the velocity transition matrix) along the LAP from HSC to basophil lineage.
+
+.. code:: ipython3
+
+
+ is_human_tfs = [gene in human_tfs_names for gene in adata_labeling.var_names[adata_labeling.var.use_for_transition]]
+ human_genes = adata_labeling.var_names[adata_labeling.var.use_for_transition][is_human_tfs]
+ dyn.configuration.set_pub_style(scaler=0.6)
+ sns.set(font_scale=0.8)
+ sns_heatmap = dyn.pl.kinetic_heatmap(
+ adata_labeling,
+ basis="pca",
+ mode="lap",
+ figsize=(10, 5),
+ genes=human_genes,
+ project_back_to_high_dim=True,
+ save_show_or_return="return",
+ color_map="bwr",
+ transpose=True,
+ xticklabels=True,
+ yticklabels=False
+ )
+
+ plt.setp(sns_heatmap.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
+ plt.tight_layout()
+
+
+
+
+.. image:: output_31_0.png
+ :width: 818px
+
+
+Now let us swap ``target_cells`` and ``init_cells``, when passing parameters into ``dyn.pd.least_action``, to draw the the kinetic heatmap of the gene expression kinetics of all transcription factors along the LAP from basophil to HSC.
+
+.. code:: ipython3
+
+ from matplotlib import pyplot, transforms
+
+ is_human_tfs = [gene in human_tfs_names for gene in adata_labeling.var_names[adata_labeling.var.use_for_transition]]
+ human_genes = adata_labeling.var_names[adata_labeling.var.use_for_transition][is_human_tfs]
+ lap = dyn.pd.least_action(
+ adata_labeling,
+ init_cells=target_cells,
+ target_cells=init_cells,
+ basis="pca",
+ adj_key="cosine_transition_matrix",
+ )
+ sns.set(font_scale=0.8)
+ sns_heatmap = dyn.pl.kinetic_heatmap(
+ adata_labeling,
+ basis="pca",
+ mode="lap",
+ figsize=(16, 8),
+ genes=human_genes,
+ project_back_to_high_dim=True,
+ save_show_or_return="return",
+ color_map="bwr",
+ transpose=True,
+ xticklabels=True,
+ yticklabels=False,
+ )
+ plt.setp(sns_heatmap.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
+ plt.tight_layout()
+
+.. image:: bas-hsc-kinetic.png
+ :width: 818px
+
+
+Evaluate TF rankings based on LAP analyses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As mentioned above, we can rank TFs based on the mean square displacement (MSD) along the LAP . In this section, we are going to evaluate rankings from LAP analyses by comparing with known transcription factors that enable the successful cell fate conversion, reported from literature. More details can be found in the dynamo paper :cite:p:`QIU2022`.
+
+We first prepare TF ranking dataframes that will be used to create ranking statistics in this
+section. We first identify the TFs from all genes (``["TF"]`` key) and tag TFs that are known transcription factor for the corresponding cell fate conversion (``["known_TF"]`` key). To the best we can, we all manually compiled all known factors for all known hematopoietic cell fate transitions (including developmental process). Please see supplementary table 2 from dynamo paper :cite:p:`QIU2022` for more details.
+
+This part is specific to our scNT-seq dataset but should be easily changed to meet your needs as well.
+
+.. code:: ipython3
+
+ %%capture
+ HSC_Meg_ranking = transition_graph["HSC->Meg"]["ranking"]
+ HSC_Meg_ranking["TF"] = [i in human_tfs_names for i in list(HSC_Meg_ranking["all"])]
+
+ HSC_Meg_ranking = HSC_Meg_ranking.query("TF == True")
+ HSC_Meg_ranking["known_TF"] = [
+ i in ["GATA1", "GATA2", "ZFPM1", "GFI1B", "FLI1", "NFE2"] for i in list(HSC_Meg_ranking["all"])
+ ]
+
+ HSC_Ery_ranking = transition_graph["HSC->Ery"]["ranking"]
+ HSC_Ery_ranking["TF"] = [i in human_tfs_names for i in list(HSC_Ery_ranking["all"])]
+
+ HSC_Ery_ranking = HSC_Ery_ranking.query("TF == True")
+ HSC_Ery_ranking["known_TF"] = [
+ i in ["GATA1", "ZFPM1", "GFI1B", "KLF1", "SPI1", "GATA2", "LDB1", "TAL1", "ZFPM1"]
+ for i in list(HSC_Ery_ranking["all"])
+ ]
+
+ HSC_Bas_ranking = transition_graph["HSC->Bas"]["ranking"]
+ HSC_Bas_ranking["TF"] = [i in human_tfs_names for i in list(ranking["all"])]
+
+ HSC_Bas_ranking = HSC_Bas_ranking.query("TF == True")
+ HSC_Bas_ranking["known_TF"] = [i in ["CEBPA", "GATA2", "GATA1", "RUNX1"] for i in list(HSC_Bas_ranking["all"])]
+ HSC_Bas_ranking
+
+ HSC_Mon_ranking = transition_graph["HSC->Mon"]["ranking"]
+ HSC_Mon_ranking["TF"] = [i in human_tfs_names for i in list(ranking["all"])]
+
+ HSC_Mon_ranking = HSC_Mon_ranking.query("TF == True")
+ HSC_Mon_ranking["known_TF"] = [i in ["SPI1", "IRF8", "IRF5", "ZEB2", "KLF4"] for i in list(HSC_Mon_ranking["all"])]
+ HSC_Mon_ranking
+
+ HSC_Neu_ranking = transition_graph["HSC->Neu"]["ranking"]
+ HSC_Neu_ranking["TF"] = [i in human_tfs_names for i in list(HSC_Neu_ranking["all"])]
+
+ HSC_Neu_ranking = HSC_Neu_ranking.query("TF == True")
+ HSC_Neu_ranking["known_TF"] = [i in ["GFI1", "PER3", "GATA1", "ETS3"] for i in list(HSC_Neu_ranking["all"])]
+ HSC_Neu_ranking
+
+ #
+ Meg_HSC_ranking = transition_graph["Meg->HSC"]["ranking"]
+ Meg_HSC_ranking["TF"] = [i in human_tfs_names for i in list(Meg_HSC_ranking["all"])]
+
+ Meg_HSC_ranking = Meg_HSC_ranking.query("TF == True")
+ Meg_HSC_ranking["known_TF"] = [
+ i in ["RUN1T1", "HLF", "LMO2", "PRDM5", "PBX1", "ZFP37", "MYCN", "MEIS1"] for i in list(Meg_HSC_ranking["all"])
+ ]
+
+
+ Ery_Mon_ranking = transition_graph["Ery->Mon"]["ranking"]
+ Ery_Mon_ranking["TF"] = [i in human_tfs_names for i in list(Ery_Mon_ranking["all"])]
+
+ Ery_Mon_ranking = Ery_Mon_ranking.query("TF == True")
+ Ery_Mon_ranking["known_TF"] = [i in ["LSD1", "RUNX1"] for i in list(Ery_Mon_ranking["all"])]
+
+ Ery_Neu_ranking = transition_graph["Ery->Neu"]["ranking"]
+ Ery_Neu_ranking["TF"] = [i in human_tfs_names for i in list(Ery_Neu_ranking["all"])]
+
+ Ery_Neu_ranking = Ery_Neu_ranking.query("TF == True")
+ Ery_Neu_ranking["known_TF"] = [i in ["LSD1", "RUNX1"] for i in list(Ery_Neu_ranking["all"])]
+
+ # http://genesdev.cshlp.org/content/20/21/3010.long
+
+ Mon_Bas_ranking = transition_graph["Mon->Bas"]["ranking"]
+ Mon_Bas_ranking["TF"] = [i in human_tfs_names for i in list(Mon_Bas_ranking["all"])]
+
+ Mon_Bas_ranking = Mon_Bas_ranking.query("TF == True")
+ Mon_Bas_ranking["known_TF"] = [i in ["GATA2", "CEBPA"] for i in list(Mon_Bas_ranking["all"])]
+
+ Neu_Bas_ranking = transition_graph["Neu->Bas"]["ranking"]
+ Neu_Bas_ranking["TF"] = [i in human_tfs_names for i in list(Neu_Bas_ranking["all"])]
+
+ Neu_Bas_ranking = Neu_Bas_ranking.query("TF == True")
+ Neu_Bas_ranking["known_TF"] = [i in ["GATA2", "CEBPA"] for i in list(Mon_Bas_ranking["all"])]
+
+
+ # GATA-1 Converts Lymphoid and Myelomonocytic Progenitors into the Megakaryocyte/Erythrocyte Lineages
+
+ Mon_Meg_ranking = transition_graph["Mon->Meg"]["ranking"]
+ Mon_Meg_ranking["TF"] = [i in human_tfs_names for i in list(Mon_Meg_ranking["all"])]
+
+ Mon_Meg_ranking = Mon_Meg_ranking.query("TF == True")
+ Mon_Meg_ranking["known_TF"] = [i in ["GATA1", "ZFPM1", "GATA2"] for i in list(Mon_Meg_ranking["all"])]
+
+ Mon_Ery_ranking = transition_graph["Mon->Ery"]["ranking"]
+ Mon_Ery_ranking["TF"] = [i in human_tfs_names for i in list(Mon_Ery_ranking["all"])]
+
+ Mon_Ery_ranking = Mon_Ery_ranking.query("TF == True")
+ Mon_Ery_ranking["known_TF"] = [i in ["GATA1", "ZFPM1", "GATA2"] for i in list(Mon_Ery_ranking["all"])]
+
+
+ # Tom's paper
+ Meg_Neu_ranking = transition_graph["Meg->Neu"]["ranking"]
+ Meg_Neu_ranking["TF"] = [i in human_tfs_names for i in list(Meg_Neu_ranking["all"])]
+
+ Meg_Neu_ranking = Meg_Neu_ranking.query("TF == True")
+ Meg_Neu_ranking["known_TF"] = [i in ["CEBPA", "CEBPB", "CEBPE", "SPI1"] for i in list(Meg_Neu_ranking["all"])]
+
+ Ery_Neu_ranking = transition_graph["Ery->Neu"]["ranking"]
+ Ery_Neu_ranking["TF"] = [i in human_tfs_names for i in list(Ery_Neu_ranking["all"])]
+
+ Ery_Neu_ranking = Ery_Neu_ranking.query("TF == True")
+ Ery_Neu_ranking["known_TF"] = [i in ["CEBPA", "CEBPB", "CEBPE", "SPI1"] for i in list(Ery_Neu_ranking["all"])]
+
+
+.. code:: ipython3
+
+ lap_dict = transition_graph[cell_type[0] + "->" + cell_type[3]]["LAP_pca"]
+ lap_dict["t"] *= 3
+ adata_labeling.uns["LAP_pca"] = lap_dict
+
+
+Ranking TF's importance for each LAP
+------------------------------------
+
+Let's re-rank each known TF from each known hematopoietic fate conversion based on their MSD rankings among all TFs. We will use the helper function
+``assign_tf_ranks`` to achieve this purpose. All the known TFs are collected from literature as mentioned above.
+
+.. code:: ipython3
+
+ def assign_tf_ranks(transition_graph: dict, transition: str, tfs: list, tfs_key="TFs", tfs_rank_key="TFs_rank"):
+ ranking = transition_graph[transition]["ranking"]
+ ranking["TF"] = [i in human_tfs_names for i in list(ranking["all"])]
+ true_tf_list = list(ranking.query("TF == True")["all"])
+ all_tfs = list(ranking.query("TF == True")["all"])
+ transition_graph[transition][tfs_key] = tfs
+
+ transition_graph[transition][tfs_rank_key] = [
+ all_tfs.index(key) if key in true_tf_list else -1 for key in transition_graph[transition][tfs_key]
+ ]
+
+
+ assign_tf_ranks(transition_graph, "HSC->Meg", ["GATA1", "GATA2", "ZFPM1", "GFI1B", "FLI1", "NFE2"])
+
+
+.. code:: ipython3
+
+ transition_graph["HSC->Meg"]["TFs"]
+
+
+
+
+
+.. parsed-literal::
+
+ ['GATA1', 'GATA2', 'ZFPM1', 'GFI1B', 'FLI1', 'NFE2']
+
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(
+ transition_graph, "HSC->Ery", ["GATA1", "ZFPM1", "GFI1B", "KLF1", "SPI1", "GATA2", "LDB1", "TAL1", "ZFPM1"]
+ )
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "HSC->Bas", ["STAT5", "GATA2", "CEBPA", "MITF"])
+ assign_tf_ranks(transition_graph, "HSC->Bas", ["CEBPA", "GATA2", "GATA1", "RUNX1"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "HSC->Mon", ["SPI1", "IRF8", "IRF5", "ZEB2", "KLF4"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "HSC->Neu", ["GFI1", "PER3", "GATA1", "ETS3"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "Meg->HSC", ["RUN1T1", "HLF", "LMO2", "PRDM5", "PBX1", "ZFP37", "MYCN", "MEIS1"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "Mon->Meg", ["GATA1", "ZFPM1", "GATA2"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "Mon->Ery", ["GATA1", "ZFPM1", "GATA2"])
+
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "Meg->Neu", ["CEBPA", "CEBPB", "CEBPE", "SPI1"])
+
+
+.. code:: ipython3
+
+ # Tom's paper
+ assign_tf_ranks(
+ transition_graph, "Ery->Neu", ["CEBPA", "CEBPB", "CEBPE", "SPI1"], tfs_rank_key="TFs_rank2", tfs_key="TFs2"
+ )
+
+
+An erythroid to myeloid cell fate conversion is elicited by LSD1
+inactivation
+
+.. code:: ipython3
+
+ assign_tf_ranks(transition_graph, "Ery->Mon", ["LSD1", "RUNX1"])
+ assign_tf_ranks(transition_graph, "Ery->Neu", ["LSD1", "RUNX1"], tfs_rank_key="TFs_rank1", tfs_key="TFs1")
+
+
+.. code:: ipython3
+
+ # http://genesdev.cshlp.org/content/20/21/3010.long
+ assign_tf_ranks(transition_graph, "Mon->Bas", ["GATA2", "CEBPA"])
+
+ assign_tf_ranks(transition_graph, "Neu->Bas", ["GATA2", "CEBPA"])
+
+
+.. code:: ipython3
+
+ transition_graph["Ery->Neu"]["TFs2"], transition_graph["Ery->Neu"]["TFs_rank2"]
+
+
+
+
+
+.. parsed-literal::
+
+ (['CEBPA', 'CEBPB', 'CEBPE', 'SPI1'], [0, -1, -1, 17])
+
+
+Here we will convert the rankings of known TFs to a priority score, simply defined as :math:`1 - \frac{\# rank}{\# TF}`.
+
+.. code:: ipython3
+
+ from functools import reduce
+
+ reprogramming_mat_dict = {
+ "HSC->Meg": {
+ "genes": transition_graph["HSC->Meg"]["TFs"],
+ "rank": transition_graph["HSC->Meg"]["TFs_rank"],
+ "PMID": 18295580,
+ },
+ "HSC->Ery": {
+ "genes": transition_graph["HSC->Ery"]["TFs"],
+ "rank": transition_graph["HSC->Ery"]["TFs_rank"],
+ "PMID": 18295580,
+ },
+ "HSC->Bas": {
+ "genes": transition_graph["HSC->Ery"]["TFs"],
+ "rank": transition_graph["HSC->Ery"]["TFs_rank"],
+ "PMID": 18295580,
+ },
+ "HSC->Mon": {
+ "genes": transition_graph["HSC->Mon"]["TFs"],
+ "rank": transition_graph["HSC->Mon"]["TFs_rank"],
+ "PMID": 18295580,
+ },
+ "HSC->Neu": {
+ "genes": transition_graph["HSC->Neu"]["TFs"],
+ "rank": transition_graph["HSC->Neu"]["TFs_rank"],
+ "PMID": 18295580,
+ },
+ "Meg->HSC": {
+ "genes": transition_graph["Meg->HSC"]["TFs"],
+ "rank": transition_graph["Meg->HSC"]["TFs_rank"],
+ "PMID": 24766805,
+ },
+ "Meg->Neu": {
+ "genes": transition_graph["Meg->Neu"]["TFs"],
+ "rank": transition_graph["Meg->Neu"]["TFs_rank"],
+ "PMID": 31395745,
+ },
+ "Ery->Mon": {
+ "genes": transition_graph["Ery->Mon"]["TFs"],
+ "rank": transition_graph["Ery->Mon"]["TFs_rank"],
+ "PMID": 34324630,
+ },
+ "Ery->Neu1": {
+ "genes": transition_graph["Ery->Neu"]["TFs1"],
+ "rank": transition_graph["Ery->Neu"]["TFs_rank1"],
+ "PMID": 31395745,
+ },
+ "Ery->Neu2": {
+ "genes": transition_graph["Ery->Neu"]["TFs2"],
+ "rank": transition_graph["Ery->Neu"]["TFs_rank2"],
+ "PMID": 34324630,
+ },
+ "Mon->Meg": {
+ "genes": transition_graph["Mon->Meg"]["TFs"],
+ "rank": transition_graph["Mon->Meg"]["TFs_rank"],
+ "PMID": 14499119,
+ },
+ "Mon->Ery": {
+ "genes": transition_graph["Mon->Ery"]["TFs"],
+ "rank": transition_graph["Mon->Ery"]["TFs_rank"],
+ "PMID": 14499119,
+ },
+ "Mon->Bas": {
+ "genes": transition_graph["Mon->Bas"]["TFs"],
+ "rank": transition_graph["Mon->Bas"]["TFs_rank"],
+ "PMID": 17079688,
+ },
+ "Neu->Bas": {
+ "genes": transition_graph["Neu->Bas"]["TFs"],
+ "rank": transition_graph["Neu->Bas"]["TFs_rank"],
+ "PMID": 17079688,
+ },
+ }
+
+
+ reprogramming_mat_df = pd.DataFrame(reprogramming_mat_dict)
+
+ for key in reprogramming_mat_df:
+ assert len(reprogramming_mat_df[key]["genes"]) == len(reprogramming_mat_df[key]["rank"]), str(key)
+
+
+ all_genes = reduce(lambda a, b: a + b, reprogramming_mat_df.loc["genes", :])
+ all_rank = reduce(lambda a, b: a + b, reprogramming_mat_df.loc["rank", :])
+ all_keys = np.repeat(
+ np.array(list(reprogramming_mat_dict.keys())), [len(i) for i in reprogramming_mat_df.loc["genes", :]]
+ )
+
+ reprogramming_mat_df_p = pd.DataFrame({"genes": all_genes, "rank": all_rank, "transition": all_keys})
+ reprogramming_mat_df_p = reprogramming_mat_df_p.query("rank > -1")
+ reprogramming_mat_df_p["type"] = reprogramming_mat_df_p["transition"].map(
+ {
+ "HSC->Meg": "development",
+ "HSC->Ery": "development",
+ "HSC->Bas": "development",
+ "HSC->Mon": "development",
+ "HSC->Neu": "development",
+ "Meg->HSC": "reprogramming",
+ "Meg->Neu": "transdifferentiation",
+ "Ery->Mon": "transdifferentiation",
+ "Ery->Neu1": "transdifferentiation",
+ "Ery->Neu2": "transdifferentiation",
+ "Mon->Meg": "transdifferentiation",
+ "Mon->Ery": "transdifferentiation",
+ "Mon->Bas": "transdifferentiation",
+ "Neu->Bas": "transdifferentiation",
+ }
+ )
+
+ reprogramming_mat_df_p["rank"] /= 133
+ reprogramming_mat_df_p["rank"] = 1 - reprogramming_mat_df_p["rank"]
+
+
+Plotting priority scores of known TFs for specific hematopoietic trandifferentiations
+-------------------------------------------------------------------------------------
+
+The y-axis is the hematopoietic trandifferentiation and the x-axis the TF priority scores for a specific transition.
+
+.. code:: ipython3
+
+ dyn.configuration.set_pub_style()
+ transition_color_dict = {"development": "#2E3192", "reprogramming": "#EC2227", "transdifferentiation": "#B9519E"}
+
+ reprogramming_mat_df_p_subset = reprogramming_mat_df_p.query("type == 'transdifferentiation'")
+ rank = reprogramming_mat_df_p_subset["rank"].values
+ transition = reprogramming_mat_df_p_subset["transition"].values
+ genes = reprogramming_mat_df_p_subset["genes"].values
+
+ fig, ax = plt.subplots(1, 1, figsize=(6, 4))
+ sns.scatterplot(
+ y="transition",
+ x="rank",
+ data=reprogramming_mat_df_p_subset,
+ ec=None,
+ hue="type",
+ alpha=0.8,
+ ax=ax,
+ s=50,
+ palette=transition_color_dict,
+ clip_on=False,
+ )
+
+ for i in range(reprogramming_mat_df_p_subset.shape[0]):
+ annote_text = genes[i] # STK_ID
+ ax.annotate(
+ annote_text, xy=(rank[i], transition[i]), xytext=(0, 3), textcoords="offset points", ha="center", va="bottom"
+ )
+
+ plt.axvline(0.8, linestyle="--", lw=0.5)
+ ax.set_xlim(0.6, 1.01)
+ ax.set_xlabel("")
+ ax.set_xlabel("Score")
+ ax.set_yticklabels(list(reprogramming_mat_dict.keys())[6:], rotation=0)
+ ax.legend().set_visible(False)
+ ax.spines.top.set_position(("outward", 10))
+ ax.spines.bottom.set_position(("outward", 10))
+
+ ax.spines.right.set_visible(False)
+ ax.spines.top.set_visible(False)
+ ax.yaxis.set_ticks_position("left")
+ ax.xaxis.set_ticks_position("bottom")
+ plt.show()
+
+
+.. image:: output_55_1.png
+ :width: 650px
+
+From the above plot, you can appreciate that our prediction works very well. Majority of the known TFs of the known transitions are prioritized as > 0.8 while some of them achiving perfect prioritization (score ~= 1).
+
+
+ROC curve analyses of TF priorization of the LAP predictions
+------------------------------------------------------------
+
+Last but not least, let us evaluate our TF ranking via receiver operating curve (ROC) analyses. ROC of LAP TF prioritization predictions when using all known genes of all known transitions as the gold standard (see STAR Methods of :cite:p:`QIU2022`) reveals an AUC (area under curve) of ``0.83``, again indicating our LAP predictions and TFs prioritization works quiet well.
+
+These analyses reveal the potential of the LAP approach to predict the optimal paths and TF cocktails of cell-fate transitions with high accuracy, paving the road for à la carte reprogramming between any cell types of interest for applications in regenerative medicine (Graf and Enver, 2009).
+
+.. code:: ipython3
+
+ all_ranks_list = [
+ HSC_Meg_ranking,
+ HSC_Ery_ranking,
+ HSC_Bas_ranking,
+ HSC_Mon_ranking,
+ HSC_Neu_ranking,
+ Meg_HSC_ranking,
+ Ery_Mon_ranking,
+ Ery_Neu_ranking,
+ Mon_Bas_ranking,
+ Neu_Bas_ranking,
+ Mon_Meg_ranking,
+ Mon_Ery_ranking,
+ Meg_Neu_ranking,
+ Ery_Neu_ranking,
+ ]
+
+ all_ranks_df = pd.concat(all_ranks_list)
+
+ all_ranks_df["priority_score"] = (
+ 1 - np.tile(np.arange(HSC_Bas_ranking.shape[0]), len(all_ranks_list)) / HSC_Bas_ranking.shape[0]
+ )
+ # all_ranks_df['priority_score'].hist()
+ TFs = ranking["all"][ranking["TF"]].values
+ valid_TFs = np.unique(reprogramming_mat_df_p["genes"].values)
+
+
+.. code:: ipython3
+
+ from sklearn.metrics import roc_curve, auc
+
+ use_abs = False
+ top_genes = len(TFs)
+
+ cls = all_ranks_df["known_TF"].astype(int)
+ pred = all_ranks_df["priority_score"]
+
+ fpr, tpr, _ = roc_curve(cls, pred)
+ roc_auc = auc(fpr, tpr)
+
+
+ dyn.configuration.set_pub_style_mpltex()
+ plt.figure(figsize=(1.3, 1))
+
+ lw = 0.5
+ plt.figure(figsize=(5, 5))
+ plt.plot(fpr, tpr, color="darkorange", lw=lw, label="ROC curve (area = %0.2f)" % roc_auc)
+ plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
+ plt.xlim([0.0, 1.0])
+ plt.ylim([0.0, 1.05])
+ plt.xlabel("False Positive Rate")
+ plt.ylabel("True Positive Rate")
+ # plt.title(cur_guide)
+ plt.legend(loc="lower right")
+ plt.show()
+
+ plt.tight_layout()
+
+
+
+.. image:: output_58_2.png
+ :width: 500px
+
diff --git a/docs/introduction/lap_tutorial/output_19_0.png b/docs/introduction/lap_tutorial/output_19_0.png
new file mode 100644
index 000000000..ab31c52d4
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_19_0.png differ
diff --git a/docs/introduction/lap_tutorial/output_22_2.png b/docs/introduction/lap_tutorial/output_22_2.png
new file mode 100644
index 000000000..7909db6ba
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_22_2.png differ
diff --git a/docs/introduction/lap_tutorial/output_24_0.png b/docs/introduction/lap_tutorial/output_24_0.png
new file mode 100644
index 000000000..d7bdb1116
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_24_0.png differ
diff --git a/docs/introduction/lap_tutorial/output_25_0.png b/docs/introduction/lap_tutorial/output_25_0.png
new file mode 100644
index 000000000..599a31ba9
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_25_0.png differ
diff --git a/docs/introduction/lap_tutorial/output_31_0.png b/docs/introduction/lap_tutorial/output_31_0.png
new file mode 100644
index 000000000..57b2f9659
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_31_0.png differ
diff --git a/docs/introduction/lap_tutorial/output_55_1.png b/docs/introduction/lap_tutorial/output_55_1.png
new file mode 100644
index 000000000..00a224450
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_55_1.png differ
diff --git a/docs/introduction/lap_tutorial/output_58_2.png b/docs/introduction/lap_tutorial/output_58_2.png
new file mode 100644
index 000000000..060bc3167
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_58_2.png differ
diff --git a/docs/introduction/lap_tutorial/output_6_0.png b/docs/introduction/lap_tutorial/output_6_0.png
new file mode 100644
index 000000000..31c4702ad
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_6_0.png differ
diff --git a/docs/introduction/lap_tutorial/output_9_1.png b/docs/introduction/lap_tutorial/output_9_1.png
new file mode 100644
index 000000000..b77bb6cfc
Binary files /dev/null and b/docs/introduction/lap_tutorial/output_9_1.png differ
diff --git a/docs/introduction/lap_tutorial/reprogram-lap.png b/docs/introduction/lap_tutorial/reprogram-lap.png
new file mode 100644
index 000000000..f78aca1a8
Binary files /dev/null and b/docs/introduction/lap_tutorial/reprogram-lap.png differ
diff --git a/docs/introduction/lap_tutorial/transdifferentiation-lap.png b/docs/introduction/lap_tutorial/transdifferentiation-lap.png
new file mode 100644
index 000000000..389cfb7e4
Binary files /dev/null and b/docs/introduction/lap_tutorial/transdifferentiation-lap.png differ
diff --git a/docs/introduction/perturbation_tutorial/output_14_1.png b/docs/introduction/perturbation_tutorial/output_14_1.png
new file mode 100644
index 000000000..56526b5d5
Binary files /dev/null and b/docs/introduction/perturbation_tutorial/output_14_1.png differ
diff --git a/docs/introduction/perturbation_tutorial/output_16_1.png b/docs/introduction/perturbation_tutorial/output_16_1.png
new file mode 100644
index 000000000..ef4a5f413
Binary files /dev/null and b/docs/introduction/perturbation_tutorial/output_16_1.png differ
diff --git a/docs/introduction/perturbation_tutorial/output_18_1.png b/docs/introduction/perturbation_tutorial/output_18_1.png
new file mode 100644
index 000000000..ad335bc98
Binary files /dev/null and b/docs/introduction/perturbation_tutorial/output_18_1.png differ
diff --git a/docs/introduction/perturbation_tutorial/output_20_1.png b/docs/introduction/perturbation_tutorial/output_20_1.png
new file mode 100644
index 000000000..de76ab4a2
Binary files /dev/null and b/docs/introduction/perturbation_tutorial/output_20_1.png differ
diff --git a/docs/introduction/perturbation_tutorial/output_22_1.png b/docs/introduction/perturbation_tutorial/output_22_1.png
new file mode 100644
index 000000000..96fc05e56
Binary files /dev/null and b/docs/introduction/perturbation_tutorial/output_22_1.png differ
diff --git a/docs/introduction/perturbation_tutorial/perturbation_tutorial.rst b/docs/introduction/perturbation_tutorial/perturbation_tutorial.rst
new file mode 100644
index 000000000..0783ce22b
--- /dev/null
+++ b/docs/introduction/perturbation_tutorial/perturbation_tutorial.rst
@@ -0,0 +1,204 @@
+.. raw:: html
+
+
+
+
+*in silico* perturbation
+=================
+
+In the dynamo Cell paper :cite:p:`QIU2022`, we introduced the analytical form of a
+vector field. This permits *in silico* perturbation predictions of expression
+for each gene in each cell and the cell fate diversions after
+genetic perturbations. In particular, we demonstrated the predictive
+power of hematopoietic fate trajectory predictions after genetic
+perturbations.
+
+| In this tutorial, we will cover the following topics:
+- Perturbation functionality and API in dynamo
+- How to single or combinatorial perturbation (either repression or activation) in hematopoietic scNT-seq dataset
+- Visualize gene perturbation effects
+- Reproduce results in dynamo paper Fig.7 :cite:p:`QIU2022`
+
+| - :ref:`You can read more about theory part here`.
+Perturbation method introduction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Import relevant packages
+
+.. code:: ipython3
+
+ import numpy as np
+ import pandas as pd
+ import matplotlib.pyplot as plt
+
+ import sys
+ import os
+
+ import dynamo as dyn
+ dyn.dynamo_logger.main_silence()
+
+
+.. parsed-literal::
+
+ |-----> setting visualization default mode in dynamo. Your customized matplotlib settings might be overritten.
+
+
+.. code:: ipython3
+
+ adata_labeling = dyn.sample_data.hematopoiesis()
+
+
+Let us take a glance at what is in ``adata`` object. Preprocessing, normalization, umap dimension reduction, total RNA velocity, as well as the continous RNA velocity vector field are computed (notebooks on these operations will be released shortly. Please also check other existing notebooks for these operations).
+
+.. code:: ipython3
+
+ adata_labeling
+
+
+
+
+.. parsed-literal::
+
+ AnnData object with n_obs × n_vars = 1947 × 1956
+ obs: 'batch', 'time', 'cell_type', 'nGenes', 'nCounts', 'pMito', 'pass_basic_filter', 'new_Size_Factor', 'initial_new_cell_size', 'total_Size_Factor', 'initial_total_cell_size', 'spliced_Size_Factor', 'initial_spliced_cell_size', 'unspliced_Size_Factor', 'initial_unspliced_cell_size', 'Size_Factor', 'initial_cell_size', 'ntr', 'cell_cycle_phase', 'leiden', 'umap_leiden', 'umap_louvain', 'control_point_pca', 'inlier_prob_pca', 'obs_vf_angle_pca', 'pca_ddhodge_div', 'pca_ddhodge_potential', 'umap_ori_ddhodge_div', 'umap_ori_ddhodge_potential', 'curl_umap_ori', 'divergence_umap_ori', 'control_point_umap_ori', 'inlier_prob_umap_ori', 'obs_vf_angle_umap_ori', 'acceleration_pca', 'curvature_pca', 'n_counts', 'mt_frac', 'jacobian_det_pca', 'manual_selection', 'divergence_pca', 'curvature_umap_ori', 'acceleration_umap_ori', 'control_point_umap', 'inlier_prob_umap', 'obs_vf_angle_umap', 'curvature_umap', 'curv_leiden', 'curv_louvain', 'SPI1->GATA1_jacobian', 'jacobian'
+ var: 'gene_name', 'gene_id', 'nCells', 'nCounts', 'pass_basic_filter', 'use_for_pca', 'frac', 'ntr', 'time_3_alpha', 'time_3_beta', 'time_3_gamma', 'time_3_half_life', 'time_3_alpha_b', 'time_3_alpha_r2', 'time_3_gamma_b', 'time_3_gamma_r2', 'time_3_gamma_logLL', 'time_3_delta_b', 'time_3_delta_r2', 'time_3_bs', 'time_3_bf', 'time_3_uu0', 'time_3_ul0', 'time_3_su0', 'time_3_sl0', 'time_3_U0', 'time_3_S0', 'time_3_total0', 'time_3_beta_k', 'time_3_gamma_k', 'time_5_alpha', 'time_5_beta', 'time_5_gamma', 'time_5_half_life', 'time_5_alpha_b', 'time_5_alpha_r2', 'time_5_gamma_b', 'time_5_gamma_r2', 'time_5_gamma_logLL', 'time_5_bs', 'time_5_bf', 'time_5_uu0', 'time_5_ul0', 'time_5_su0', 'time_5_sl0', 'time_5_U0', 'time_5_S0', 'time_5_total0', 'time_5_beta_k', 'time_5_gamma_k', 'use_for_dynamics', 'gamma', 'gamma_r2', 'use_for_transition', 'gamma_k', 'gamma_b'
+ uns: 'PCs', 'VecFld_pca', 'VecFld_umap', 'VecFld_umap_ori', 'X_umap_ori_neighbors', 'cell_phase_genes', 'cell_type_colors', 'dynamics', 'explained_variance_ratio_', 'feature_selection', 'grid_velocity_pca', 'grid_velocity_umap', 'grid_velocity_umap_ori', 'grid_velocity_umap_ori_perturbation', 'grid_velocity_umap_ori_test', 'grid_velocity_umap_perturbation', 'jacobian_pca', 'leiden', 'neighbors', 'pca_mean', 'pp', 'response'
+ obsm: 'X', 'X_pca', 'X_pca_SparseVFC', 'X_umap', 'X_umap_SparseVFC', 'X_umap_ori', 'X_umap_ori_SparseVFC', 'X_umap_ori_perturbation', 'X_umap_ori_test', 'X_umap_perturbation', 'acceleration_pca', 'acceleration_umap_ori', 'cell_cycle_scores', 'curvature_pca', 'curvature_umap', 'curvature_umap_ori', 'j_delta_x_perturbation', 'velocity_pca', 'velocity_pca_SparseVFC', 'velocity_umap', 'velocity_umap_SparseVFC', 'velocity_umap_ori', 'velocity_umap_ori_SparseVFC', 'velocity_umap_ori_perturbation', 'velocity_umap_ori_test', 'velocity_umap_perturbation'
+ layers: 'M_n', 'M_nn', 'M_t', 'M_tn', 'M_tt', 'X_new', 'X_total', 'velocity_alpha_minus_gamma_s'
+ obsp: 'X_umap_ori_connectivities', 'X_umap_ori_distances', 'connectivities', 'cosine_transition_matrix', 'distances', 'fp_transition_rate', 'moments_con', 'pca_ddhodge', 'perturbation_transition_matrix', 'umap_ori_ddhodge'
+
+
+*In silico* perturbation with ``dyn.pd.perturbation``
+----------------------------------------------------
+
+The ``dyn.pd.perturbation`` function from *dynamo* can be used to either upregulating or suppressing a single or multiple genes in a particular cell or across all cells to perform *in silico* genetic perturbation.
+When integrating the perturbation vectors across cells we can then also predict cell-fate outcomes after the perturbation which can be visualized as the perturbation streamlines.
+
+In the following, we will first delve into the *in silico* perturbations of the canonical PU.1/SPI1-GATA1 network motif that specifies the GMP or MEP lineage during hematopoiesis, respectively.
+
+Mutual exclusive effects after perturbing either GATA1 or SPI1 gene
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As we all know, GATA1 is the master regulator of the GMP lineage while SPI1 is the master regulator for the MEP lineage and GATA1 and PU1 forms a mutual inhibition and self-activation network motif.
+
+We first suppress the expression of GATA1 and it can divert cells from GMP-related lineages to MEP-related lineages.
+
+.. code:: ipython3
+
+ gene = "GATA1"
+ dyn.pd.perturbation(adata_labeling, gene, [-100], emb_basis="umap")
+ dyn.pl.streamline_plot(adata_labeling, color=["cell_type", gene], basis="umap_perturbation")
+
+
+.. parsed-literal::
+
+ |-----> [projecting velocity vector to low dimensional embedding] in progress: 100.0000%
+ |-----> [projecting velocity vector to low dimensional embedding] finished [0.3502s]
+
+
+
+.. image:: output_14_1.png
+ :width: 955px
+
+
+When suppressing the expression of SPI1, we find that cells from MEP-related lineages are diverted to GMP-related lineages.
+
+.. code:: ipython3
+
+ gene = "SPI1"
+ dyn.pd.perturbation(adata_labeling, gene, [-100], emb_basis="umap")
+ dyn.pl.streamline_plot(adata_labeling, color=["cell_type", gene], basis="umap_perturbation")
+
+
+.. parsed-literal::
+
+ |-----> [projecting velocity vector to low dimensional embedding] in progress: 100.0000%
+ |-----> [projecting velocity vector to low dimensional embedding] finished [0.3635s]
+
+
+
+.. image:: output_16_1.png
+ :width: 962px
+
+
+Double suppression of SPI1/GATA trap cell in the middle
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Suppression of both SPI1 and GATA1 traps cells in the progenitor states.
+These predictions align well with those reported in (Rekhtman et al., 1999) and reveal a seesaw-effect regulation between SPI1 and GATA1 in driving the GMP and the MEP lineages.
+
+.. code:: ipython3
+
+ selected_genes = [ "SPI1", "GATA1"]
+ # expr_vals = [-100, -100]
+ expr_vals = [-100, -15]
+ dyn.pd.perturbation(adata_labeling, selected_genes, expr_vals, emb_basis="umap")
+ dyn.pl.streamline_plot(adata_labeling, color=["cell_type", gene], basis="umap_perturbation")
+
+
+
+.. parsed-literal::
+
+ |-----> [projecting velocity vector to low dimensional embedding] in progress: 100.0000%
+ |-----> [projecting velocity vector to low dimensional embedding] finished [0.4156s]
+
+
+
+.. image:: output_18_1.png
+ :width: 954px
+
+
+Activate KLF1
+~~~~~~~~~~~~~
+
+Dynamo *in silico* perturbation can correctly predicts other cellular transitions, showcased in :cite:p:`QIU2022`. Here we show that activation of KLF1 leads other cells convert into erythroid cells, consistent with :cite:p:`Orkin2008-vp`.
+
+.. code:: ipython3
+
+ gene = "KLF1"
+ dyn.pd.perturbation(adata_labeling, gene, [100], emb_basis="umap")
+ dyn.pl.streamline_plot(adata_labeling, color=["cell_type", gene], basis="umap_perturbation")
+
+
+.. parsed-literal::
+
+ |-----> [projecting velocity vector to low dimensional embedding] in progress: 100.0000%
+ |-----> [projecting velocity vector to low dimensional embedding] finished [0.3362s]
+
+
+
+.. image:: output_20_1.png
+
+
+Triple activation of "GATA1", "KLF1", "TAL1"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Triple activation of GATA1, KLF1, and TAL1, known erythrocyte factors,
+and TFs used for reprogramming fibroblasts into erythrocytes, diverts
+most other cells into the Ery lineage :cite:p:`Capellera-Garcia2016-qp`.
+
+.. code:: ipython3
+
+ selected_genes = ["GATA1", "KLF1", "TAL1"]
+ expr_vals = [100, 100, 100]
+ dyn.pd.perturbation(adata_labeling, selected_genes, expr_vals, emb_basis="umap")
+ dyn.pl.streamline_plot(adata_labeling, color=["cell_type", gene], basis="umap_perturbation")
+
+
+
+.. parsed-literal::
+
+ |-----> [projecting velocity vector to low dimensional embedding] in progress: 100.0000%
+ |-----> [projecting velocity vector to low dimensional embedding] finished [0.3842s]
+
+
+
+.. image:: output_22_1.png
+ :width: 954px
+
diff --git a/docs/make.bat b/docs/make.bat
old mode 100755
new mode 100644
index 543c6b13b..32bb24529
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -7,10 +7,8 @@ REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
+set SOURCEDIR=.
+set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
@@ -21,15 +19,17 @@ if errorlevel 9009 (
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
+ echo.https://www.sphinx-doc.org/
exit /b 1
)
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
diff --git a/docs/references.bib b/docs/references.bib
new file mode 100644
index 000000000..05cc78bd3
--- /dev/null
+++ b/docs/references.bib
@@ -0,0 +1,3284 @@
+@Book{terrall,
+ address = {Chicago},
+ author = {Terrall, Mary},
+ publisher = {University of Chicago Press},
+ title = {The Man Who Flattened the Earth: Maupertuis and the Sciences in the Enlightenment},
+ year = 2006
+}
+
+@ARTICLE{Cao2020-lv,
+ title = "Sci-fate characterizes the dynamics of gene expression in single
+ cells",
+ author = "Cao, Junyue and Zhou, Wei and Steemers, Frank and Trapnell, Cole
+ and Shendure, Jay",
+ abstract = "Gene expression programs change over time, differentiation and
+ development, and in response to stimuli. However, nearly all
+ techniques for profiling gene expression in single cells do not
+ directly capture transcriptional dynamics. In the present study,
+ we present a method for combined single-cell combinatorial
+ indexing and messenger RNA labeling (sci-fate), which uses
+ combinatorial cell indexing and 4-thiouridine labeling of newly
+ synthesized mRNA to concurrently profile the whole and newly
+ synthesized transcriptome in each of many single cells. We used
+ sci-fate to study the cortisol response in >6,000 single cultured
+ cells. From these data, we quantified the dynamics of the cell
+ cycle and glucocorticoid receptor activation, and explored their
+ intersection. Finally, we developed software to infer and analyze
+ cell-state transitions. We anticipate that sci-fate will be
+ broadly applicable to quantitatively characterize transcriptional
+ dynamics in diverse systems.",
+ journal = "Nat. Biotechnol.",
+ volume = 38,
+ number = 8,
+ pages = "980--988",
+ month = aug,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Rayon2020-af,
+ title = "Species-specific pace of development is associated with
+ differences in protein stability",
+ author = "Rayon, Teresa and Stamataki, Despina and Perez-Carrasco, Ruben
+ and Garcia-Perez, Lorena and Barrington, Christopher and
+ Melchionda, Manuela and Exelby, Katherine and Lazaro, Jorge and
+ Tybulewicz, Victor L J and Fisher, Elizabeth M C and Briscoe,
+ James",
+ abstract = "Although many molecular mechanisms controlling developmental
+ processes are evolutionarily conserved, the speed at which the
+ embryo develops can vary substantially between species. For
+ example, the same genetic program, comprising sequential changes
+ in transcriptional states, governs the differentiation of motor
+ neurons in mouse and human, but the tempo at which it operates
+ differs between species. Using in vitro directed differentiation
+ of embryonic stem cells to motor neurons, we show that the
+ program runs more than twice as fast in mouse as in human. This
+ is not due to differences in signaling, nor the genomic sequence
+ of genes or their regulatory elements. Instead, there is an
+ approximately two-fold increase in protein stability and cell
+ cycle duration in human cells compared with mouse cells. This can
+ account for the slower pace of human development and suggests
+ that differences in protein turnover play a role in interspecies
+ differences in developmental tempo.",
+ journal = "Science",
+ volume = 369,
+ number = 6510,
+ month = sep,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Ma2013-sp,
+ title = "Regularized vector field learning with sparse approximation for
+ mismatch removal",
+ author = "Ma, Jiayi and Zhao, Ji and Tian, Jinwen and Bai, Xiang and Tu,
+ Zhuowen",
+ abstract = "In vector field learning, regularized kernel methods such as
+ regularized least-squares require the number of basis functions
+ to be equivalent to the training sample size, N. The learning
+ process thus has O(N3) and O(N2) in the time and space
+ complexity, respectively. This poses significant burden on the
+ vector learning problem for large datasets. In this paper, we
+ propose a sparse approximation to a robust vector field learning
+ method, sparse vector field consensus (SparseVFC), and derive a
+ statistical learning bound on the speed of the convergence. We
+ apply SparseVFC to the mismatch removal problem. The quantitative
+ results on benchmark datasets demonstrate the significant speed
+ advantage of SparseVFC over the original VFC algorithm (two
+ orders of magnitude faster) without much performance degradation;
+ we also demonstrate the large improvement by SparseVFC over
+ traditional methods like RANSAC. Moreover, the proposed method is
+ general and it can be applied to other applications in vector
+ field learning.",
+ journal = "Pattern Recognit.",
+ volume = 46,
+ number = 12,
+ pages = "3519--3532",
+ month = dec,
+ year = 2013,
+ keywords = "Vector field learning; Sparse approximation; Regularization;
+ Reproducing kernel Hilbert space; Outlier; Mismatch removal"
+}
+
+@ARTICLE{Smedley2015-su,
+ title = "The {BioMart} community portal: an innovative alternative to
+ large, centralized data repositories",
+ author = "Smedley, Damian and Haider, Syed and Durinck, Steffen and
+ Pandini, Luca and Provero, Paolo and Allen, James and Arnaiz,
+ Olivier and Awedh, Mohammad Hamza and Baldock, Richard and
+ Barbiera, Giulia and Bardou, Philippe and Beck, Tim and Blake,
+ Andrew and Bonierbale, Merideth and Brookes, Anthony J and Bucci,
+ Gabriele and Buetti, Iwan and Burge, Sarah and Cabau, C{\'e}dric
+ and Carlson, Joseph W and Chelala, Claude and Chrysostomou,
+ Charalambos and Cittaro, Davide and Collin, Olivier and Cordova,
+ Raul and Cutts, Rosalind J and Dassi, Erik and Di Genova, Alex
+ and Djari, Anis and Esposito, Anthony and Estrella, Heather and
+ Eyras, Eduardo and Fernandez-Banet, Julio and Forbes, Simon and
+ Free, Robert C and Fujisawa, Takatomo and Gadaleta, Emanuela and
+ Garcia-Manteiga, Jose M and Goodstein, David and Gray, Kristian
+ and Guerra-Assun{\c c}{\~a}o, Jos{\'e} Afonso and Haggarty,
+ Bernard and Han, Dong-Jin and Han, Byung Woo and Harris, Todd and
+ Harshbarger, Jayson and Hastings, Robert K and Hayes, Richard D
+ and Hoede, Claire and Hu, Shen and Hu, Zhi-Liang and Hutchins,
+ Lucie and Kan, Zhengyan and Kawaji, Hideya and Keliet, Aminah and
+ Kerhornou, Arnaud and Kim, Sunghoon and Kinsella, Rhoda and
+ Klopp, Christophe and Kong, Lei and Lawson, Daniel and Lazarevic,
+ Dejan and Lee, Ji-Hyun and Letellier, Thomas and Li, Chuan-Yun
+ and Lio, Pietro and Liu, Chu-Jun and Luo, Jie and Maass,
+ Alejandro and Mariette, Jerome and Maurel, Thomas and Merella,
+ Stefania and Mohamed, Azza Mostafa and Moreews, Francois and
+ Nabihoudine, Ibounyamine and Ndegwa, Nelson and Noirot,
+ C{\'e}line and Perez-Llamas, Cristian and Primig, Michael and
+ Quattrone, Alessandro and Quesneville, Hadi and Rambaldi, Davide
+ and Reecy, James and Riba, Michela and Rosanoff, Steven and
+ Saddiq, Amna Ali and Salas, Elisa and Sallou, Olivier and
+ Shepherd, Rebecca and Simon, Reinhard and Sperling, Linda and
+ Spooner, William and Staines, Daniel M and Steinbach, Delphine
+ and Stone, Kevin and Stupka, Elia and Teague, Jon W and Dayem
+ Ullah, Abu Z and Wang, Jun and Ware, Doreen and Wong-Erasmus,
+ Marie and Youens-Clark, Ken and Zadissa, Amonida and Zhang,
+ Shi-Jian and Kasprzyk, Arek",
+ abstract = "The BioMart Community Portal (www.biomart.org) is a
+ community-driven effort to provide a unified interface to
+ biomedical databases that are distributed worldwide. The portal
+ provides access to numerous database projects supported by 30
+ scientific organizations. It includes over 800 different
+ biological datasets spanning genomics, proteomics, model
+ organisms, cancer data, ontology information and more. All
+ resources available through the portal are independently
+ administered and funded by their host organizations. The BioMart
+ data federation technology provides a unified interface to all
+ the available data. The latest version of the portal comes with
+ many new databases that have been created by our ever-growing
+ community. It also comes with better support and extensibility
+ for data analysis and visualization tools. A new addition to our
+ toolbox, the enrichment analysis tool is now accessible through
+ graphical and web service interface. The BioMart community portal
+ averages over one million requests per day. Building on this
+ level of service and the wealth of information that has become
+ available, the BioMart Community Portal has introduced a new,
+ more scalable and cheaper alternative to the large data stores
+ maintained by specialized organizations.",
+ journal = "Nucleic Acids Res.",
+ volume = 43,
+ number = "W1",
+ pages = "W589--98",
+ month = jul,
+ year = 2015,
+ language = "en"
+}
+
+@MISC{Rodriques2020-xc,
+ title = "{RNA} timestamps identify the age of single molecules in {RNA}
+ sequencing",
+ author = "Rodriques, Samuel G and Chen, Linlin M and Liu, Sophia and Zhong,
+ Ellen D and Scherrer, Joseph R and Boyden, Edward S and Chen, Fei",
+ journal = "Nature Biotechnology",
+ year = 2020
+}
+
+@ARTICLE{Saunders2019-ey,
+ title = "Thyroid hormone regulates distinct paths to maturation in pigment cell lineages",
+ author = "Saunders, Lauren M and Mishra, Abhishek K and Aman, Andrew J and Lewis, Victor M and Toomey, Matthew B and Packer, Jonathan S and
+ Qiu, Xiaojie and McFaline-Figueroa, Jose L and Corbo, Joseph C
+ and Trapnell, Cole and Parichy, David M",
+ abstract = "Thyroid hormone (TH) regulates diverse developmental events and can drive disparate cellular outcomes. In zebrafish, TH has
+ opposite effects on neural crest derived pigment cells of the adult stripe pattern, limiting melanophore population expansion, yet increasing yellow/orange xanthophore numbers. To learn how TH
+ elicits seemingly opposite responses in cells having a common
+ embryological origin, we analyzed individual transcriptomes from
+ thousands of neural crest-derived cells, reconstructed
+ developmental trajectories, identified pigment cell-lineage
+ specific responses to TH, and assessed roles for TH receptors. We
+ show that TH promotes maturation of both cell types but in
+ distinct ways. In melanophores, TH drives terminal differentiation, limiting final cell numbers. In xanthophores, TH promotes accumulation of orange carotenoids, making the cells visible. TH receptors act primarily to repress these programs when TH is limiting. Our findings show how a single endocrine factor integrates very different cellular activities during the generation of adult form.",
+ journal = "Elife",
+ volume = 8,
+ month = may,
+ year = 2019,
+ keywords = "developmental biology; genetics; genomics; melanophore; neural
+ crest; pigmentation; post-embryonic development; thyroid hormone;
+ xanthophore; zebrafish",
+ language = "en"
+}
+
+% The entry below contains non-ASCII chars that could not be converted
+% to a LaTeX equivalent.
+@ARTICLE{Zhu2020-lx,
+ title = "Developmental trajectory of prehematopoietic stem cell formation
+ from endothelium",
+ author = "Zhu, Qin and Gao, Peng and Tober, Joanna and Bennett, Laura and
+ Chen, Changya and Uzun, Yasin and Li, Yan and Howell, Elizabeth D
+ and Mumau, Melanie and Yu, Wenbao and He, Bing and Speck, Nancy A
+ and Tan, Kai",
+ abstract = "Hematopoietic stem and progenitor cells (HSPCs) in the bone
+ marrow are derived from a small population of hemogenic
+ endothelial (HE) cells located in the major arteries of the
+ mammalian embryo. HE cells undergo an endothelial to
+ hematopoietic cell transition, giving rise to HSPCs that
+ accumulate in intra-arterial clusters (IAC) before colonizing the
+ fetal liver. To examine the cell and molecular transitions
+ between endothelial (E), HE, and IAC cells, and the heterogeneity
+ of HSPCs within IACs, we profiled ∼40 000 cells from the caudal
+ arteries (dorsal aorta, umbilical, vitelline) of 9.5 days post
+ coitus (dpc) to 11.5 dpc mouse embryos by single-cell RNA
+ sequencing and single-cell assay for transposase-accessible
+ chromatin sequencing. We identified a continuous developmental
+ trajectory from E to HE to IAC cells, with identifiable
+ intermediate stages. The intermediate stage most proximal to HE,
+ which we term pre-HE, is characterized by increased accessibility
+ of chromatin enriched for SOX, FOX, GATA, and SMAD motifs. A
+ developmental bottleneck separates pre-HE from HE, with RUNX1
+ dosage regulating the efficiency of the pre-HE to HE transition.
+ A distal candidate Runx1 enhancer exhibits high chromatin
+ accessibility specifically in pre-HE cells at the bottleneck, but
+ loses accessibility thereafter. Distinct developmental
+ trajectories within IAC cells result in 2 populations of CD45+
+ HSPCs; an initial wave of lymphomyeloid-biased progenitors,
+ followed by precursors of hematopoietic stem cells (pre-HSCs).
+ This multiomics single-cell atlas significantly expands our
+ understanding of pre-HSC ontogeny.",
+ journal = "Blood",
+ volume = 136,
+ number = 7,
+ pages = "845--856",
+ month = aug,
+ year = 2020,
+ language = "en"
+}
+
+@UNPUBLISHED{Maehara2019-tn,
+ title = "Modeling latent flows on single-cell data using the Hodge
+ decomposition",
+ author = "Maehara, Kazumitsu and Ohkawa, Yasuyuki",
+ abstract = "Abstract Single-cell analysis is a powerful technique used to
+ identify a specific cell population of interest during
+ differentiation, aging, or oncogenesis. Individual cells occupy a
+ particular transient state in the cell cycle, circadian rhythm,
+ or during cell death. An appealing concept of pseudo-time
+ trajectory analysis of single-cell RNA sequencing data was
+ proposed in the software Monocle, and several methods of
+ trajectory analysis have since been published to date. These aim
+ to infer the ordering of cells and enable the tracing of gene
+ expression profile trajectories in cell differentiation and
+ reprogramming. However, the methods are restricted in terms of
+ time structure because of the pre-specified structure of
+ trajectories (linear, branched, tree or cyclic) which contrasts
+ with the mixed state of single cells.Here, we propose a technique
+ to extract underlying flows in single-cell data based on the
+ Hodge decomposition (HD). HD is a theorem of vector fields on a
+ manifold which guarantees that any given flow can decompose into
+ three types of orthogonal component: gradient-flow (acyclic),
+ curl-, and harmonic-flow (cyclic). HD is generalized on a
+ simplicial complex (graph) and the discretized HD has only a weak
+ assumption that the graph is directed. Therefore, in principle,
+ HD can extract flows from any mixture of tree and cyclic time
+ flows of observed cells. The decomposed flows provide intuitive
+ interpretations about complex flow because of their linearity and
+ orthogonality. Thus, each extracted flow can be focused on
+ separately with no need to consider crosstalk.We developed
+ ddhodge software, which aims to model the underlying flow
+ structure that implies unobserved time or causal relations in the
+ hodge-podge collection of data points. We demonstrated that the
+ mathematical framework of HD is suitable to reconstruct a sparse
+ graph representation of diffusion process as a candidate model of
+ differentiation while preserving the divergence of the original
+ fully-connected graph. The preserved divergence can be used as an
+ indicator of the source and sink cells in the observed
+ population. A sparse graph representation of the diffusion
+ process transforms data analysis of the non-linear structure
+ embedded in the high-dimensional space of single-cell data into
+ inspection of the visible flow using graph algorithms. Hence,
+ ddhodge is a suitable toolkit to visualize, inspect, and
+ subsequently interpret large data sets including, but not limited
+ to, high-throughput measurements of biological data.The beta
+ version of ddhodge R package is available at:
+ https://github.com/kazumits/ddhodge",
+ journal = "Cold Spring Harbor Laboratory",
+ pages = "592089",
+ month = mar,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Weinreb2020-dz,
+ title = "Lineage tracing on transcriptional landscapes links state to fate
+ during differentiation",
+ author = "Weinreb, Caleb and Rodriguez-Fraticelli, Alejo and Camargo,
+ Fernando D and Klein, Allon M",
+ abstract = "A challenge in biology is to associate molecular differences
+ among progenitor cells with their capacity to generate mature
+ cell types. Here, we used expressed DNA barcodes to clonally
+ trace transcriptomes over time and applied this to study fate
+ determination in hematopoiesis. We identified states of primed
+ fate potential and located them on a continuous transcriptional
+ landscape. We identified two routes of monocyte differentiation
+ that leave an imprint on mature cells. Analysis of sister cells
+ also revealed cells to have intrinsic fate biases not detectable
+ by single-cell RNA sequencing. Finally, we benchmarked
+ computational methods of dynamic inference from single-cell
+ snapshots, showing that fate choice occurs earlier than is
+ detected by state-of the-art algorithms and that cells progress
+ steadily through pseudotime with precise and consistent dynamics.",
+ journal = "Science",
+ volume = 367,
+ number = 6479,
+ month = feb,
+ year = 2020,
+ language = "en"
+}
+
+% The entry below contains non-ASCII chars that could not be converted
+% to a LaTeX equivalent.
+@ARTICLE{Cusanovich2018-wz,
+ title = "A {Single-Cell} Atlas of In Vivo Mammalian Chromatin
+ Accessibility",
+ author = "Cusanovich, Darren A and Hill, Andrew J and Aghamirzaie, Delasa
+ and Daza, Riza M and Pliner, Hannah A and Berletch, Joel B and
+ Filippova, Galina N and Huang, Xingfan and Christiansen, Lena and
+ DeWitt, William S and Lee, Choli and Regalado, Samuel G and Read,
+ David F and Steemers, Frank J and Disteche, Christine M and
+ Trapnell, Cole and Shendure, Jay",
+ abstract = "We applied a combinatorial indexing assay, sci-ATAC-seq, to
+ profile genome-wide chromatin accessibility in ∼100,000 single
+ cells from 13 adult mouse tissues. We identify 85 distinct
+ patterns of chromatin accessibility, most of which can be
+ assigned to cell types, and ∼400,000 differentially accessible
+ elements. We use these data to link regulatory elements to their
+ target genes, to define the transcription factor grammar
+ specifying each cell type, and to discover in vivo correlates of
+ heterogeneity in accessibility within cell types. We develop a
+ technique for mapping single cell gene expression data to
+ single-cell chromatin accessibility data, facilitating the
+ comparison of atlases. By intersecting mouse chromatin
+ accessibility with human genome-wide association summary
+ statistics, we identify cell-type-specific enrichments of the
+ heritability signal for hundreds of complex traits. These data
+ define the in vivo landscape of the regulatory genome for common
+ mammalian cell types at single-cell resolution.",
+ journal = "Cell",
+ volume = 174,
+ number = 5,
+ pages = "1309--1324.e18",
+ month = aug,
+ year = 2018,
+ keywords = "ATAC-seq; GWAS; chromatin; chromatin accessibility; epigenetics;
+ epigenomics; regulatory; single cell",
+ language = "en"
+}
+
+@UNPUBLISHED{Cho2019-bh,
+ title = "Mathematical modeling with single-cell sequencing data",
+ author = "Cho, Heyrim and Rockne, Russell C",
+ abstract = "Abstract Single-cell sequencing technologies have revolutionized
+ molecular and cellular biology and stimulated the development of
+ computational tools to analyze the data generated from these
+ technology platforms. However, despite the recent explosion of
+ computational analysis tools, relatively few mathematical models
+ have been developed to utilize these data. Here we compare and
+ contrast two approaches for building mathematical models of cell
+ state-transitions with single-cell RNA-sequencing data with
+ hematopoeisis as a model system; by solving partial differential
+ equations on a graph representing discrete cell state
+ relationships, and by solving the equations on a continuous cell
+ state-space. We demonstrate how to calibrate model parameters
+ from single or multiple time-point single-cell sequencing data,
+ and examine the effects of data processing algorithms on the
+ model calibration and predictions. As an application of our
+ approach, we demonstrate how the calibrated models may be used to
+ mathematically perturb normal hematopoeisis to simulate, predict,
+ and study the emergence of novel cell types during the
+ pathogenesis of acute myeloid leukemia. The mathematical modeling
+ framework we present is general and can be applied to study cell
+ state-transitions in any single-cell genome sequencing
+ dataset.Author summary Here we compare and contrast graph- and
+ continuum-based approaches for constructing mathematical models
+ of cell state-transitions using single-cell RNA-sequencing data.
+ Using two publicly available datasets, we demonstrate how to
+ calibrate mathematical models of hematopoeisis and how to use the
+ models to predict dynamics of acute myeloid leukemia pathogenesis
+ by mathematically perturbing the process of cellular
+ proliferation and differentiation. We apply these modeling
+ approaches to study the effects of perturbing individual or sets
+ of genes in subsets of cells, or by modeling the dynamics of cell
+ state-transitions directly in a reduced dimensional space. We
+ examine the effects of different graph abstraction and trajectory
+ inference algorithms on calibrating the models and the subsequent
+ model predictions. We conclude that both the graph- and
+ continuum-based modeling approaches can be equally well
+ calibrated to data and discuss situations in which one method may
+ be preferable over the other. This work presents a general
+ mathematical modeling framework, applicable to any single-cell
+ sequencing dataset where cell state-transitions are of interest.",
+ journal = "Cold Spring Harbor Laboratory",
+ pages = "710640",
+ month = jul,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Lubeck2014-pa,
+ title = "Single-cell in situ {RNA} profiling by sequential hybridization",
+ author = "Lubeck, Eric and Coskun, Ahmet F and Zhiyentayev, Timur and
+ Ahmad, Mubhij and Cai, Long",
+ journal = "Nat. Methods",
+ volume = 11,
+ number = 4,
+ pages = "360--361",
+ month = apr,
+ year = 2014,
+ language = "en"
+}
+
+@ARTICLE{Karr2012-ns,
+ title = "A whole-cell computational model predicts phenotype from genotype",
+ author = "Karr, Jonathan R and Sanghvi, Jayodita C and Macklin, Derek N and
+ Gutschow, Miriam V and Jacobs, Jared M and Bolival, Jr, Benjamin
+ and Assad-Garcia, Nacyra and Glass, John I and Covert, Markus W",
+ abstract = "Understanding how complex phenotypes arise from individual
+ molecules and their interactions is a primary challenge in
+ biology that computational approaches are poised to tackle. We
+ report a whole-cell computational model of the life cycle of the
+ human pathogen Mycoplasma genitalium that includes all of its
+ molecular components and their interactions. An integrative
+ approach to modeling that combines diverse mathematics enabled
+ the simultaneous inclusion of fundamentally different cellular
+ processes and experimental measurements. Our whole-cell model
+ accounts for all annotated gene functions and was validated
+ against a broad range of data. The model provides insights into
+ many previously unobserved cellular behaviors, including in vivo
+ rates of protein-DNA association and an inverse relationship
+ between the durations of DNA replication initiation and
+ replication. In addition, experimental analysis directed by model
+ predictions identified previously undetected kinetic parameters
+ and biological functions. We conclude that comprehensive
+ whole-cell models can be used to facilitate biological discovery.",
+ journal = "Cell",
+ volume = 150,
+ number = 2,
+ pages = "389--401",
+ month = jul,
+ year = 2012,
+ language = "en"
+}
+
+@UNPUBLISHED{Emanuel2020-aw,
+ title = "Bulk and single-cell gene expression profiling of {SARS-CoV-2}
+ infected human cell lines identifies molecular targets for
+ therapeutic intervention",
+ author = "Emanuel, Wyler and Kirstin, M{\"o}sbauer and Vedran, Franke and
+ Asija, Diag and Theresa, Gottula Lina and Roberto, Arsie and
+ Filippos, Klironomos and David, Koppstein and Salah, Ayoub and
+ Christopher, Buccitelli and Anja, Richter and Ivano, Legnini and
+ Andranik, Ivanov and Tommaso, Mari and Simone, Del Giudice and
+ Patrick, Papies Jan and Alexander, M{\"u}ller Marcel and Daniela,
+ Niemeyer and Matthias, Selbach and Altuna, Akalin and Nikolaus,
+ Rajewsky and Christian, Drosten and Markus, Landthaler",
+ abstract = "The coronavirus disease 2019 (COVID-19) pandemic, caused by the
+ novel severe acute respiratory syndrome coronavirus 2
+ (SARS-CoV-2), is an ongoing global health threat with more than
+ two million infected people since its emergence in late 2019.
+ Detailed knowledge of the molecular biology of the infection is
+ indispensable for understanding of the viral replication, host
+ responses, and disease progression. We provide gene expression
+ profiles of SARS-CoV and SARS-CoV-2 infections in three human
+ cell lines (H1299, Caco-2 and Calu-3 cells), using bulk and
+ single-cell transcriptomics. Small RNA profiling showed strong
+ expression of the immunity and inflammation-associated microRNA
+ miRNA-155 upon infection with both viruses. SARS-CoV-2 elicited
+ approximately two-fold higher stimulation of the interferon
+ response compared to SARS-CoV in the permissive human epithelial
+ cell line Calu-3, and induction of cytokines such as CXCL10 or
+ IL6. Single cell RNA sequencing data showed that canonical
+ interferon stimulated genes such as IFIT2 or OAS2 were broadly
+ induced, whereas interferon beta (IFNB1) and lambda (IFNL1-4)
+ were expressed only in a subset of infected cells. In addition,
+ temporal resolution of transcriptional responses suggested
+ interferon regulatory factors (IRFs) activities precede that of
+ nuclear factor-$\kappa$B (NF-$\kappa$B). Lastly, we identified
+ heat shock protein 90 (HSP90) as a protein relevant for the
+ infection. Inhibition of the HSP90 charperone activity by
+ Tanespimycin/17-N-allylamino-17-demethoxygeldanamycin (17-AAG)
+ resulted in a reduction of viral replication, and of TNF and IL1B
+ mRNA levels. In summary, our study established in vitro cell
+ culture models to study SARS-CoV-2 infection and identified HSP90
+ protein as potential drug target for therapeutic intervention of
+ SARS-CoV-2 infection. \#\#\# Competing Interest Statement The
+ authors have declared no competing interest.",
+ journal = "Cold Spring Harbor Laboratory",
+ pages = "2020.05.05.079194",
+ month = may,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Webb2020-ox,
+ title = "Sequence homology between human {PARP14} and the {SARS-CoV-2}
+ {ADP} ribose 1'-phosphatase",
+ author = "Webb, Thomas E and Saad, Ramy",
+ journal = "Immunol. Lett.",
+ volume = 224,
+ pages = "38--39",
+ month = aug,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Lim2009-cm,
+ title = "Silencing the {Metallothionein-2A} gene inhibits cell cycle
+ progression from {G1-} to S-phase involving {ATM} and cdc25A
+ signaling in breast cancer cells",
+ author = "Lim, Daina and Jocelyn, Koh Mei-Xin and Yip, George Wai-Cheong
+ and Bay, Boon-Huat",
+ abstract = "Metallothioneins (MTs) are a group of metal-binding proteins
+ involved in cell proliferation, differentiation and apoptosis.
+ The MT-2A isoform is generally the most abundant isoform among
+ the 10 known functional MT genes. In the present study, we
+ observed that down-regulation of the MT-2A gene in MCF-7 cells
+ via siRNA-mediated silencing inhibited cell growth by inducing
+ cell cycle arrest in G1-phase (G1-arrest) and a marginal increase
+ in cells in sub-G1-phase. Scanning electron microscopic
+ examination of the cells with silenced expression of MT-2A
+ (siMT-2A cells) revealed essentially normal cell morphology with
+ presence of scattered apoptotic cells. To elucidate the
+ underlying molecular mechanism, we examined the expression of
+ cell cycle related genes in MT-2A-silenced cells and found a
+ higher expression of the ataxia telangiectasia mutated (ATM) gene
+ concomitant with a lower expression of the cdc25A gene. These
+ data suggest that MT-2A could plausibly modulate cell cycle
+ progression from G1- to S-phase via the ATM/Chk2/cdc25A pathway.",
+ journal = "Cancer Lett.",
+ volume = 276,
+ number = 1,
+ pages = "109--117",
+ month = apr,
+ year = 2009,
+ language = "en"
+}
+
+@ARTICLE{Wang2014-zc,
+ title = "Epigenetic state network approach for describing cell phenotypic
+ transitions",
+ author = "Wang, Ping and Song, Chaoming and Zhang, Hang and Wu, Zhanghan
+ and Tian, Xiao-Jun and Xing, Jianhua",
+ abstract = "Recent breakthroughs of cell phenotype reprogramming impose
+ theoretical challenges on unravelling the complexity of large
+ circuits maintaining cell phenotypes coupled at many different
+ epigenetic and gene regulation levels, and quantitatively
+ describing the phenotypic transition dynamics. A popular picture
+ proposed by Waddington views cell differentiation as a ball
+ sliding down a landscape with valleys corresponding to different
+ cell types separated by ridges. Based on theories of dynamical
+ systems, we establish a novel 'epigenetic state network'
+ framework that captures the global architecture of cell
+ phenotypes, which allows us to translate the metaphorical
+ low-dimensional Waddington epigenetic landscape concept into a
+ simple-yet-predictive rigorous mathematical framework of cell
+ phenotypic transitions. Specifically, we simplify a
+ high-dimensional epigenetic landscape into a collection of
+ discrete states corresponding to stable cell phenotypes connected
+ by optimal transition pathways among them. We then apply the
+ approach to the phenotypic transition processes among fibroblasts
+ (FBs), pluripotent stem cells (PSCs) and cardiomyocytes (CMs).
+ The epigenetic state network for this case predicts three major
+ transition pathways connecting FBs and CMs. One goes by way of
+ PSCs. The other two pathways involve transdifferentiation either
+ indirectly through cardiac progenitor cells or directly from FB
+ to CM. The predicted pathways and multiple intermediate states
+ are supported by existing microarray data and other experiments.
+ Our approach provides a theoretical framework for studying cell
+ phenotypic transitions. Future studies at single-cell levels can
+ directly test the model predictions.",
+ journal = "Interface Focus",
+ volume = 4,
+ number = 3,
+ pages = "20130068",
+ month = jun,
+ year = 2014,
+ keywords = "gene regulatory network; non-equilibrium steady state; nonlinear
+ dynamics",
+ language = "en"
+}
+
+@UNPUBLISHED{Chapman2020-oj,
+ title = "Correlated Gene Modules Uncovered by {Single-Cell}
+ Transcriptomics with High Detectability and Accuracy",
+ author = "Chapman, Alec R and Lee, David F and Cai, Wenting and Ma, Wenping
+ and Li, Xiang and Sun, Wenjie and Sunney Xie, X",
+ abstract = "Abstract Single cell transcriptome sequencing has become
+ extremely useful for cell typing. However, such differential
+ expression data has shed little light on regulatory relationships
+ among genes. Here, by examining pairwise correlations between
+ mRNA levels of any two genes under steady-state conditions, we
+ uncovered correlated gene modules (CGMs), clusters of
+ intercorrelated genes that carry out certain biological functions
+ together. We report a novel single-cell RNA-seq method called
+ MALBAC-DT with higher detectability and accuracy, allowing
+ determination of the covariance matrix of the expressed mRNAs for
+ a homogenous cell population. We observed a prevalence of
+ positive correlations between pairs of genes, with higher
+ correlations corresponding to higher likelihoods of
+ protein-protein interactions. Some CGMs, such as the p53 module
+ in a cancer cell line, are cell type specific, while others, such
+ as the protein synthesis CGM, are shared by different cell types.
+ CGMs distinguished direct targets of p53 and exposed different
+ modes of regulation of these genes in different cell types. Our
+ covariance analyses of steady-state fluctuations provides a
+ powerful way to advance our functional understanding of
+ gene-to-gene interactions.",
+ journal = "Cold Spring Harbor Laboratory",
+ pages = "2019.12.31.892190",
+ month = jan,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Frieda2017-mz,
+ title = "Synthetic recording and in situ readout of lineage information in
+ single cells",
+ author = "Frieda, Kirsten L and Linton, James M and Hormoz, Sahand and
+ Choi, Joonhyuk and Chow, Ke-Huan K and Singer, Zakary S and
+ Budde, Mark W and Elowitz, Michael B and Cai, Long",
+ abstract = "Reconstructing the lineage relationships and dynamic event
+ histories of individual cells within their native spatial context
+ is a long-standing challenge in biology. Many biological
+ processes of interest occur in optically opaque or physically
+ inaccessible contexts, necessitating approaches other than direct
+ imaging. Here we describe a synthetic system that enables cells
+ to record lineage information and event histories in the genome
+ in a format that can be subsequently read out of single cells in
+ situ. This system, termed memory by engineered mutagenesis with
+ optical in situ readout (MEMOIR), is based on a set of barcoded
+ recording elements termed scratchpads. The state of a given
+ scratchpad can be irreversibly altered by CRISPR/Cas9-based
+ targeted mutagenesis, and later read out in single cells through
+ multiplexed single-molecule RNA fluorescence hybridization
+ (smFISH). Using MEMOIR as a proof of principle, we engineered
+ mouse embryonic stem cells to contain multiple scratchpads and
+ other recording components. In these cells, scratchpads were
+ altered in a progressive and stochastic fashion as the cells
+ proliferated. Analysis of the final states of scratchpads in
+ single cells in situ enabled reconstruction of lineage
+ information from cell colonies. Combining analysis of endogenous
+ gene expression with lineage reconstruction in the same cells
+ further allowed inference of the dynamic rates at which embryonic
+ stem cells switch between two gene expression states. Finally,
+ using simulations, we show how parallel MEMOIR systems operating
+ in the same cell could enable recording and readout of dynamic
+ cellular event histories. MEMOIR thus provides a versatile
+ platform for information recording and in situ, single-cell
+ readout across diverse biological systems.",
+ journal = "Nature",
+ volume = 541,
+ number = 7635,
+ pages = "107--111",
+ month = jan,
+ year = 2017,
+ language = "en"
+}
+
+@ARTICLE{Matsuda2020-vf,
+ title = "Species-specific segmentation clock periods are due to
+ differential biochemical reaction speeds",
+ author = "Matsuda, Mitsuhiro and Hayashi, Hanako and Garcia-Ojalvo, Jordi
+ and Yoshioka-Kobayashi, Kumiko and Kageyama, Ryoichiro and
+ Yamanaka, Yoshihiro and Ikeya, Makoto and Toguchida, Junya and
+ Alev, Cantas and Ebisuya, Miki",
+ abstract = "Although mechanisms of embryonic development are similar between
+ mice and humans, the time scale is generally slower in humans. To
+ investigate these interspecies differences in development, we
+ recapitulate murine and human segmentation clocks that display 2-
+ to 3-hour and 5- to 6-hour oscillation periods, respectively. Our
+ interspecies genome-swapping analyses indicate that the period
+ difference is not due to sequence differences in the HES7 locus,
+ the core gene of the segmentation clock. Instead, we demonstrate
+ that multiple biochemical reactions of HES7, including the
+ degradation and expression delays, are slower in human cells than
+ they are in mouse cells. With the measured biochemical
+ parameters, our mathematical model accounts for the two- to
+ threefold period difference between the species. We propose that
+ cell-autonomous differences in biochemical reaction speeds
+ underlie temporal differences in development between species.",
+ journal = "Science",
+ volume = 369,
+ number = 6510,
+ pages = "1450--1455",
+ month = sep,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Kim2020-bo,
+ title = "The Architecture of {SARS-CoV-2} Transcriptome",
+ author = "Kim, Dongwan and Lee, Joo-Yeon and Yang, Jeong-Sun and Kim, Jun
+ Won and Kim, V Narry and Chang, Hyeshik",
+ abstract = "SARS-CoV-2 is a betacoronavirus responsible for the COVID-19
+ pandemic. Although the SARS-CoV-2 genome was reported recently,
+ its transcriptomic architecture is unknown. Utilizing two
+ complementary sequencing techniques, we present a high-resolution
+ map of the SARS-CoV-2 transcriptome and epitranscriptome. DNA
+ nanoball sequencing shows that the transcriptome is highly
+ complex owing to numerous discontinuous transcription events. In
+ addition to the canonical genomic and 9 subgenomic RNAs,
+ SARS-CoV-2 produces transcripts encoding unknown ORFs with
+ fusion, deletion, and/or frameshift. Using nanopore direct RNA
+ sequencing, we further find at least 41 RNA modification sites on
+ viral transcripts, with the most frequent motif, AAGAA. Modified
+ RNAs have shorter poly(A) tails than unmodified RNAs, suggesting
+ a link between the modification and the 3' tail. Functional
+ investigation of the unknown transcripts and RNA modifications
+ discovered in this study will open new directions to our
+ understanding of the life cycle and pathogenicity of SARS-CoV-2.",
+ journal = "Cell",
+ volume = 181,
+ number = 4,
+ pages = "914--921.e10",
+ month = may,
+ year = 2020,
+ keywords = "COVID-19; RNA modification; SARS-CoV-2; coronavirus; direct RNA
+ sequencing; discontinuous transcription; epitranscriptome;
+ nanopore; poly(A) tail; transcriptome",
+ language = "en"
+}
+
+@MISC{Creighton1958-ff,
+ title = "The Strategy of the Genes",
+ author = "Creighton, Harriet and Waddington, C H",
+ journal = "AIBS Bulletin",
+ volume = 8,
+ number = 2,
+ pages = "49",
+ year = 1958
+}
+
+@MISC{Alon2006-ea,
+ title = "An Introduction to Systems Biology",
+ author = "Alon, Uri",
+ year = 2006
+}
+
+@ARTICLE{McKenna2016-ma,
+ title = "Whole-organism lineage tracing by combinatorial and cumulative
+ genome editing",
+ author = "McKenna, Aaron and Findlay, Gregory M and Gagnon, James A and
+ Horwitz, Marshall S and Schier, Alexander F and Shendure, Jay",
+ abstract = "Multicellular systems develop from single cells through distinct
+ lineages. However, current lineage-tracing approaches scale
+ poorly to whole, complex organisms. Here, we use genome editing
+ to progressively introduce and accumulate diverse mutations in a
+ DNA barcode over multiple rounds of cell division. The barcode,
+ an array of clustered regularly interspaced short palindromic
+ repeats (CRISPR)/Cas9 target sites, marks cells and enables the
+ elucidation of lineage relationships via the patterns of
+ mutations shared between cells. In cell culture and zebrafish, we
+ show that rates and patterns of editing are tunable and that
+ thousands of lineage-informative barcode alleles can be
+ generated. By sampling hundreds of thousands of cells from
+ individual zebrafish, we find that most cells in adult organs
+ derive from relatively few embryonic progenitors. In future
+ analyses, genome editing of synthetic target arrays for lineage
+ tracing (GESTALT) can be used to generate large-scale maps of
+ cell lineage in multicellular systems for normal development and
+ disease.",
+ journal = "Science",
+ volume = 353,
+ number = 6298,
+ pages = "aaf7907",
+ month = jul,
+ year = 2016,
+ language = "en"
+}
+
+@ARTICLE{Cusanovich2015-vn,
+ title = "Multiplex single cell profiling of chromatin accessibility by
+ combinatorial cellular indexing",
+ author = "Cusanovich, Darren A and Daza, Riza and Adey, Andrew and Pliner,
+ Hannah A and Christiansen, Lena and Gunderson, Kevin L and
+ Steemers, Frank J and Trapnell, Cole and Shendure, Jay",
+ abstract = "Technical advances have enabled the collection of genome and
+ transcriptome data sets with single-cell resolution. However,
+ single-cell characterization of the epigenome has remained
+ challenging. Furthermore, because cells must be physically
+ separated before biochemical processing, conventional single-cell
+ preparatory methods scale linearly. We applied combinatorial
+ cellular indexing to measure chromatin accessibility in thousands
+ of single cells per assay, circumventing the need for
+ compartmentalization of individual cells. We report chromatin
+ accessibility profiles from more than 15,000 single cells and use
+ these data to cluster cells on the basis of chromatin
+ accessibility landscapes. We identify modules of coordinately
+ regulated chromatin accessibility at the level of single cells
+ both between and within cell types, with a scalable method that
+ may accelerate progress toward a human cell atlas.",
+ journal = "Science",
+ volume = 348,
+ number = 6237,
+ pages = "910--914",
+ month = may,
+ year = 2015,
+ language = "en"
+}
+
+@ARTICLE{Hochgerner2018-wk,
+ title = "Conserved properties of dentate gyrus neurogenesis across
+ postnatal development revealed by single-cell {RNA} sequencing",
+ author = "Hochgerner, Hannah and Zeisel, Amit and L{\"o}nnerberg, Peter and
+ Linnarsson, Sten",
+ abstract = "The dentate gyrus of the hippocampus is a brain region in which
+ neurogenesis persists into adulthood; however, the relationship
+ between developmental and adult dentate gyrus neurogenesis has
+ not been examined in detail. Here we used single-cell RNA
+ sequencing to reveal the molecular dynamics and diversity of
+ dentate gyrus cell types in perinatal, juvenile, and adult mice.
+ We found distinct quiescent and proliferating progenitor cell
+ types, linked by transient intermediate states to neuroblast
+ stages and fully mature granule cells. We observed shifts in the
+ molecular identity of quiescent and proliferating radial glia and
+ granule cells during the postnatal period that were then
+ maintained through adult stages. In contrast, intermediate
+ progenitor cells, neuroblasts, and immature granule cells were
+ nearly indistinguishable at all ages. These findings demonstrate
+ the fundamental similarity of postnatal and adult neurogenesis in
+ the hippocampus and pinpoint the early postnatal transformation
+ of radial glia from embryonic progenitors to adult quiescent stem
+ cells.",
+ journal = "Nat. Neurosci.",
+ volume = 21,
+ number = 2,
+ pages = "290--299",
+ month = feb,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Bastidas-Ponce2019-ma,
+ title = "Comprehensive single cell {mRNA} profiling reveals a detailed
+ roadmap for pancreatic endocrinogenesis",
+ author = "Bastidas-Ponce, Aim{\'e}e and Tritschler, Sophie and Dony,
+ Leander and Scheibner, Katharina and Tarquis-Medina, Marta and
+ Salinno, Ciro and Schirge, Silvia and Burtscher, Ingo and
+ B{\"o}ttcher, Anika and Theis, Fabian J and Lickert, Heiko and
+ Bakhti, Mostafa",
+ abstract = "Deciphering mechanisms of endocrine cell induction, specification
+ and lineage allocation in vivo will provide valuable insights
+ into how the islets of Langerhans are generated. Currently, it is
+ ill defined how endocrine progenitors segregate into different
+ endocrine subtypes during development. Here, we generated a novel
+ neurogenin 3 (Ngn3)-Venus fusion (NVF) reporter mouse line, that
+ closely mirrors the transient endogenous Ngn3 protein expression.
+ To define an in vivo roadmap of endocrinogenesis, we performed
+ single cell RNA sequencing of 36,351 pancreatic epithelial and
+ NVF+ cells during secondary transition. This allowed Ngn3low
+ endocrine progenitors, Ngn3high endocrine precursors, Fev+
+ endocrine lineage and hormone+ endocrine subtypes to be
+ distinguished and time-resolved, and molecular programs during
+ the step-wise lineage restriction steps to be delineated.
+ Strikingly, we identified 58 novel signature genes that show the
+ same transient expression dynamics as Ngn3 in the 7260 profiled
+ Ngn3-expressing cells. The differential expression of these genes
+ in endocrine precursors associated with their cell-fate
+ allocation towards distinct endocrine cell types. Thus, the
+ generation of an accurately regulated NVF reporter allowed us to
+ temporally resolve endocrine lineage development to provide a
+ fine-grained single cell molecular profile of endocrinogenesis in
+ vivo.",
+ journal = "Development",
+ volume = 146,
+ number = 12,
+ month = jun,
+ year = 2019,
+ keywords = "Endocrine cell allocation; Endocrine progenitor-precursor;
+ Endocrinogenesis; Mouse; Neurog3; Single cell RNA sequencing",
+ language = "en"
+}
+
+@ARTICLE{Liu2020-mw,
+ title = "{High-Spatial-Resolution} {Multi-Omics} Sequencing via
+ Deterministic Barcoding in Tissue",
+ author = "Liu, Yang and Yang, Mingyu and Deng, Yanxiang and Su, Graham and
+ Enninful, Archibald and Guo, Cindy C and Tebaldi, Toma and Zhang,
+ Di and Kim, Dongjoo and Bai, Zhiliang and Norris, Eileen and Pan,
+ Alisia and Li, Jiatong and Xiao, Yang and Halene, Stephanie and
+ Fan, Rong",
+ abstract = "We present deterministic barcoding in tissue for spatial omics
+ sequencing (DBiT-seq) for co-mapping of mRNAs and proteins in a
+ formaldehyde-fixed tissue slide via next-generation sequencing
+ (NGS). Parallel microfluidic channels were used to deliver DNA
+ barcodes to the surface of a tissue slide, and crossflow of two
+ sets of barcodes, A1-50 and B1-50, followed by ligation in situ,
+ yielded a 2D mosaic of tissue pixels, each containing a unique
+ full barcode AB. Application to mouse embryos revealed major
+ tissue types in early organogenesis as well as fine features like
+ microvasculature in a brain and pigmented epithelium in an eye
+ field. Gene expression profiles in 10-$\mu$m pixels conformed
+ into the clusters of single-cell transcriptomes, allowing for
+ rapid identification of cell types and spatial distributions.
+ DBiT-seq can be adopted by researchers with no experience in
+ microfluidics and may find applications in a range of fields
+ including developmental biology, cancer biology, neuroscience,
+ and clinical pathology.",
+ journal = "Cell",
+ volume = 183,
+ number = 6,
+ pages = "1665--1681.e18",
+ month = dec,
+ year = 2020,
+ keywords = "high spatial resolution; in situ barcoding; mouse embryo;
+ next-generation sequencing; spatial multi-omics",
+ language = "en"
+}
+
+@ARTICLE{Chen2015-nk,
+ title = "Spatially resolved, highly multiplexed {RNA} profiling in single
+ cells",
+ author = "Chen, Kok Hao and Boettiger, Alistair N and Moffitt, Jeffrey R
+ and Wang, Siyuan and Zhuang, Xiaowei",
+ abstract = "The basis of cellular function is where and when proteins are
+ expressed and in what quantities. Single-molecule fluorescence
+ in situ hybridization (smFISH) experiments quantify the copy
+ number and location of mRNA molecules; however, the numbers of
+ RNA species that can be simultaneously measured by smFISH has
+ been limited. Using combinatorial labeling with error-robust
+ encoding schemes, Chen et al. simultaneously imaged 100 to 1000
+ RNA species in a single cell. Such large-scale detection allows
+ regulatory interactions to be analyzed at the transcriptome
+ scale. Science , this issue p. [10.1126/science.aaa6090][1]
+ \#\#\# INTRODUCTION The copy number and intracellular
+ localization of RNA are important regulators of gene expression.
+ Measurement of these properties at the transcriptome scale in
+ single cells will give answers to many questions related to gene
+ expression and regulation. Single-molecule RNA imaging
+ approaches, such as single-molecule fluorescence in situ
+ hybridization (smFISH), are powerful tools for counting and
+ mapping RNA; however, the number of RNA species that can be
+ simultaneously imaged in individual cells has been limited. This
+ makes it challenging to perform transcriptomic analysis of
+ single cells in a spatially resolved manner. Here, we report
+ multiplexed error-robust FISH (MERFISH), a single-molecule
+ imaging method that allows thousands of RNA species to be imaged
+ in single cells by using combinatorial FISH labeling with
+ encoding schemes capable of detecting and/or correcting errors.
+ \#\#\# RATIONALE We labeled each cellular RNA with a set of
+ encoding probes, which contain targeting sequences that bind the
+ RNA and readout sequences that bind fluorescently labeled
+ readout probes. Each RNA species is encoded with a particular
+ combination of readout sequences. We used successive rounds of
+ hybridization and imaging, each with a different readout probe,
+ to identify the readout sequences bound to each RNA and to
+ decode the RNA. In principle, combinatorial labeling allows the
+ number of detectable RNA species to grow exponentially with the
+ number of imaging rounds, but the detection errors also increase
+ exponentially. To combat such accumulating errors, we exploited
+ error-robust encoding schemes used in digital electronics, such
+ as the extended Hamming code, in the design of our encoding
+ probes but modified these schemes in order to account for the
+ error properties in FISH measurements. We assigned each RNA a
+ binary word in our modified Hamming code and encoded the RNA
+ with a combination of readout sequences according to this binary
+ word. \#\#\# RESULTS We first imaged 140 RNA species in human
+ fibroblast cells using MERFISH with 16 rounds of hybridization
+ and a modified Hamming code capable of both error detection and
+ correction. We obtained ~80\% detection efficiency and observed
+ excellent correlation of RNA copy numbers determined with
+ MERFISH with both bulk RNA sequencing data and conventional
+ smFISH measurements of individual genes. Next, we used an
+ alternative MERFISH encoding scheme, which is capable of
+ detecting but not correcting errors, to image 1001 RNA species
+ in individual cells using only 14 rounds of hybridization. The
+ observed RNA copy numbers again correlate well with bulk
+ sequencing data. However, the detection efficiency is only
+ one-third that of the error-correcting encoding scheme. We
+ performed correlation analysis of the 104 to 106 pairs of
+ measured genes and identified many covarying gene groups that
+ share common regulatory elements. Such grouping allowed us to
+ hypothesize potential functions of ~100 unannotated or partially
+ annotated genes of unknown functions. We further analyzed
+ correlations in the spatial distributions of different RNA
+ species and identified groups of RNAs with different
+ distribution patterns in the cell. \#\#\# DISCUSSION This highly
+ multiplexed imaging approach enables analyses based on the
+ variation and correlation of copy numbers and spatial
+ distributions of a large number of RNA species within single
+ cells. Such analyses should facilitate the delineation of
+ regulatory networks and in situ identification of cell types. We
+ envision that this approach will allow spatially resolved
+ transcriptomes to be determined for single cells. ![Figure][2]
+ MERFISH for transcriptome imaging. Numerous RNA species can be
+ identified, counted, and localized in a single cell by using
+ MERFISH, a single-molecule imaging approach that uses
+ combinatorial labeling and sequential imaging with encoding
+ schemes capable of detection and/or correction of errors. This
+ highly multiplexed measurement of individual RNAs can be used to
+ compute the gene expression profile and noise, covariation in
+ expression among different genes, and spatial distribution of
+ RNAs within single cells. Knowledge of the expression profile
+ and spatial landscape of the transcriptome in individual cells
+ is essential for understanding the rich repertoire of cellular
+ behaviors. Here, we report multiplexed error-robust fluorescence
+ in situ hybridization (MERFISH), a single-molecule imaging
+ approach that allows the copy numbers and spatial localizations
+ of thousands of RNA species to be determined in single cells.
+ Using error-robust encoding schemes to combat single-molecule
+ labeling and detection errors, we demonstrated the imaging of
+ 100 to 1000 distinct RNA species in hundreds of individual
+ cells. Correlation analysis of the ~104 to 106 pairs of genes
+ allowed us to constrain gene regulatory networks, predict novel
+ functions for many unannotated genes, and identify distinct
+ spatial distribution patterns of RNAs that correlate with
+ properties of the encoded proteins. [1]:
+ /lookup/doi/10.1126/science.aaa6090 [2]: pending:yes",
+ journal = "Science",
+ publisher = "American Association for the Advancement of Science",
+ volume = 348,
+ number = 6233,
+ month = apr,
+ year = 2015,
+ language = "en"
+}
+
+@MISC{Iman1981-uu,
+ title = "An Approach to Sensitivity Analysis of Computer Models: Part
+ {I---Introduction}, Input Variable Selection and Preliminary
+ Variable Assessment",
+ author = "Iman, Ronald L and Helton, Jon C and Campbell, James E",
+ journal = "Journal of Quality Technology",
+ volume = 13,
+ number = 3,
+ pages = "174--183",
+ year = 1981
+}
+
+@ARTICLE{Bizzotto2020-nx,
+ title = "{SARS-CoV-2} Infection Boosts {MX1} Antiviral Effector in
+ {COVID-19} Patients",
+ author = "Bizzotto, Juan and Sanchis, Pablo and Abbate, Mercedes and
+ Lage-Vickers, Sof{\'\i}a and Lavignolle, Rosario and Toro,
+ Ayel{\'e}n and Olszevicki, Santiago and Sabater, Agustina and
+ Cascardo, Florencia and Vazquez, Elba and Cotignola, Javier and
+ Gueron, Geraldine",
+ abstract = "In a published case-control study (GSE152075) from
+ SARS-CoV-2-positive (n = 403) and -negative patients (n = 50), we
+ analyzed the response to infection assessing gene expression of
+ host cell receptors and antiviral proteins. The expression
+ analysis associated with reported risk factors for COVID-19 was
+ also assessed. SARS-CoV-2 cases had higher ACE2, but lower
+ TMPRSS2, BSG/CD147, and CTSB expression compared with negative
+ cases. COVID-19 patients' age negatively affected ACE2
+ expression. MX1 and MX2 were higher in COVID-19 patients. A
+ negative trend for MX1 and MX2 was observed as patients' age
+ increased. Principal-component analysis determined that ACE2,
+ MX1, MX2, and BSG/CD147 expression was able to cluster
+ non-COVID-19 and COVID-19 individuals. Multivariable regression
+ showed that MX1 expression significantly increased for each unit
+ of viral load increment. Altogether, these findings support
+ differences in ACE2, MX1, MX2, and BSG/CD147 expression between
+ COVID-19 and non-COVID-19 patients and point out to MX1 as a
+ critical responder in SARS-CoV-2 infection.",
+ journal = "iScience",
+ volume = 23,
+ number = 10,
+ pages = "101585",
+ month = oct,
+ year = 2020,
+ keywords = "Health Informatics; Virology",
+ language = "en"
+}
+
+@ARTICLE{Qiu2020-kj,
+ title = "Inferring Causal Gene Regulatory Networks from Coupled
+ {Single-Cell} Expression Dynamics Using Scribe",
+ author = "Qiu, Xiaojie and Rahimzamani, Arman and Wang, Li and Ren,
+ Bingcheng and Mao, Qi and Durham, Timothy and McFaline-Figueroa,
+ Jos{\'e} L and Saunders, Lauren and Trapnell, Cole and Kannan,
+ Sreeram",
+ abstract = "Here, we present Scribe
+ (https://github.com/aristoteleo/Scribe-py), a toolkit for
+ detecting and visualizing causal regulatory interactions between
+ genes and explore the potential for single-cell experiments to
+ power network reconstruction. Scribe employs restricted directed
+ information to determine causality by estimating the strength of
+ information transferred from a potential regulator to its
+ downstream target. We apply Scribe and other leading approaches
+ for causal network reconstruction to several types of single-cell
+ measurements and show that there is a dramatic drop in
+ performance for ``pseudotime''-ordered single-cell data compared
+ with true time-series data. We demonstrate that performing causal
+ inference requires temporal coupling between measurements. We
+ show that methods such as ``RNA velocity'' restore some degree of
+ coupling through an analysis of chromaffin cell fate commitment.
+ These analyses highlight a shortcoming in experimental and
+ computational methods for analyzing gene regulation at
+ single-cell resolution and suggest ways of overcoming it.",
+ journal = "Cell Syst",
+ volume = 10,
+ number = 3,
+ pages = "265--274.e11",
+ month = mar,
+ year = 2020,
+ keywords = "RNA velocity; Scribe; causal network inference; coupled dynamics;
+ gene regulatory network inference; pseudotime; real time;
+ single-cell RNA-seq; single-cell trajectories; slam-seq",
+ language = "en"
+}
+
+@ARTICLE{Baker2010-yk,
+ title = "Taking a long, hard look",
+ author = "Baker, Monya",
+ abstract = "Long-term, live-cell imaging helps to settle long-running
+ debates. Monya Baker investigates how the huge investment and
+ time commitment is finally paying off.",
+ journal = "Nature",
+ publisher = "Nature Publishing Group",
+ volume = 466,
+ number = 7310,
+ pages = "1137--1138",
+ month = aug,
+ year = 2010,
+ language = "en"
+}
+
+@BOOK{Alon2019-yd,
+ title = "An Introduction to Systems Biology: Design Principles of
+ Biological Circuits",
+ author = "Alon, Uri",
+ abstract = "Praise for the first edition: ... superb, beautifully written
+ and organized work that takes an engineering approach to systems
+ biology. Alon provides nicely written appendices to explain the
+ basic mathematical and biological concepts clearly and
+ succinctly without interfering with the main text. He starts
+ with a mathematical description of transcriptional activation
+ and then describes some basic transcription-network motifs
+ (patterns) that can be combined to form larger networks. --
+ Nature [This text deserves] serious attention from any
+ quantitative scientist who hopes to learn about modern biology
+ ... It assumes no prior knowledge of or even interest in biology
+ ... One final aspect that must be mentioned is the wonderful set
+ of exercises that accompany each chapter. ... Alon's book should
+ become a standard part of the training of graduate students. --
+ Physics Today Written for students and researchers, the second
+ edition of this best-selling textbook continues to offer a clear
+ presentation of design principles that govern the structure and
+ behavior of biological systems. It highlights simple, recurring
+ circuit elements that make up the regulation of cells and
+ tissues. Rigorously classroom-tested, this edition includes new
+ chapters on exciting advances made in the last decade. Features:
+ Includes seven new chapters The new edition has 189 exercises,
+ the previous edition had 66 Offers new examples relevant to
+ human physiology and disease The book website including course
+ videos can be found here:
+ https://www.weizmann.ac.il/mcb/UriAlon/introduction-systems-biology-design-principles-biological-circuits.",
+ publisher = "CRC Press",
+ month = jul,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Petukhov2018-ck,
+ title = "dropEst: pipeline for accurate estimation of molecular counts in
+ droplet-based single-cell {RNA-seq} experiments",
+ author = "Petukhov, Viktor and Guo, Jimin and Baryawno, Ninib and Severe,
+ Nicolas and Scadden, David T and Samsonova, Maria G and
+ Kharchenko, Peter V",
+ abstract = "Recent single-cell RNA-seq protocols based on droplet
+ microfluidics use massively multiplexed barcoding to enable
+ simultaneous measurements of transcriptomes for thousands of
+ individual cells. The increasing complexity of such data creates
+ challenges for subsequent computational processing and
+ troubleshooting of these experiments, with few software options
+ currently available. Here, we describe a flexible pipeline for
+ processing droplet-based transcriptome data that implements
+ barcode corrections, classification of cell quality, and
+ diagnostic information about the droplet libraries. We introduce
+ advanced methods for correcting composition bias and sequencing
+ errors affecting cellular and molecular barcodes to provide more
+ accurate estimates of molecular counts in individual cells.",
+ journal = "Genome Biol.",
+ volume = 19,
+ number = 1,
+ pages = "78",
+ month = jun,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Macklin2020-vj,
+ title = "Simultaneous cross-evaluation of heterogeneous E. coli datasets
+ via mechanistic simulation",
+ author = "Macklin, Derek N and Ahn-Horst, Travis A and Choi, Heejo and
+ Ruggero, Nicholas A and Carrera, Javier and Mason, John C and
+ Sun, Gwanggyu and Agmon, Eran and DeFelice, Mialy M and Maayan,
+ Inbal and Lane, Keara and Spangler, Ryan K and Gillies, Taryn E
+ and Paull, Morgan L and Akhter, Sajia and Bray, Samuel R and
+ Weaver, Daniel S and Keseler, Ingrid M and Karp, Peter D and
+ Morrison, Jerry H and Covert, Markus W",
+ abstract = "The extensive heterogeneity of biological data poses challenges
+ to analysis and interpretation. Construction of a large-scale
+ mechanistic model of Escherichia coli enabled us to integrate and
+ cross-evaluate a massive, heterogeneous dataset based on
+ measurements reported by various groups over decades. We
+ identified inconsistencies with functional consequences across
+ the data, including that the total output of the ribosomes and
+ RNA polymerases described by data are not sufficient for a cell
+ to reproduce measured doubling times, that measured metabolic
+ parameters are neither fully compatible with each other nor with
+ overall growth, and that essential proteins are absent during the
+ cell cycle-and the cell is robust to this absence. Finally,
+ considering these data as a whole leads to successful predictions
+ of new experimental outcomes, in this case protein half-lives.",
+ journal = "Science",
+ volume = 369,
+ number = 6502,
+ month = jul,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Chan2019-cc,
+ title = "Molecular recording of mammalian embryogenesis",
+ author = "Chan, Michelle M and Smith, Zachary D and Grosswendt, Stefanie
+ and Kretzmer, Helene and Norman, Thomas M and Adamson, Britt and
+ Jost, Marco and Quinn, Jeffrey J and Yang, Dian and Jones,
+ Matthew G and Khodaverdian, Alex and Yosef, Nir and Meissner,
+ Alexander and Weissman, Jonathan S",
+ abstract = "Ontogeny describes the emergence of complex multicellular
+ organisms from single totipotent cells. This field is
+ particularly challenging in mammals, owing to the indeterminate
+ relationship between self-renewal and differentiation, variation
+ in progenitor field sizes, and internal gestation in these
+ animals. Here we present a flexible, high-information,
+ multi-channel molecular recorder with a single-cell readout and
+ apply it as an evolving lineage tracer to assemble mouse
+ cell-fate maps from fertilization through gastrulation. By
+ combining lineage information with single-cell RNA sequencing
+ profiles, we recapitulate canonical developmental relationships
+ between different tissue types and reveal the nearly complete
+ transcriptional convergence of endodermal cells of
+ extra-embryonic and embryonic origins. Finally, we apply our
+ cell-fate maps to estimate the number of embryonic progenitor
+ cells and their degree of asymmetric partitioning during
+ specification. Our approach enables massively parallel,
+ high-resolution recording of lineage and other information in
+ mammalian systems, which will facilitate the construction of a
+ quantitative framework for understanding developmental processes.",
+ journal = "Nature",
+ volume = 570,
+ number = 7759,
+ pages = "77--82",
+ month = jun,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Qiu2012-yt,
+ title = "From understanding the development landscape of the canonical
+ fate-switch pair to constructing a dynamic landscape for two-step
+ neural differentiation",
+ author = "Qiu, Xiaojie and Ding, Shanshan and Shi, Tieliu",
+ abstract = "Recent progress in stem cell biology, notably cell fate
+ conversion, calls for novel theoretical understanding for cell
+ differentiation. The existing qualitative concept of Waddington's
+ ``epigenetic landscape'' has attracted particular attention
+ because it captures subsequent fate decision points, thus
+ manifesting the hierarchical (``tree-like'') nature of cell fate
+ diversification. Here, we generalized a recent work and explored
+ such a developmental landscape for a two-gene fate decision
+ circuit by integrating the underlying probability landscapes with
+ different parameters (corresponding to distinct developmental
+ stages). The change of entropy production rate along the
+ parameter changes indicates which parameter changes can represent
+ a normal developmental process while other parameters' change can
+ not. The transdifferentiation paths over the landscape under
+ certain conditions reveal the possibility of a direct and
+ reversible phenotypic conversion. As the intensity of noise
+ increases, we found that the landscape becomes flatter and the
+ dominant paths more straight, implying the importance of
+ biological noise processing mechanism in development and
+ reprogramming. We further extended the landscape of the one-step
+ fate decision to that for two-step decisions in central nervous
+ system (CNS) differentiation. A minimal network and dynamic model
+ for CNS differentiation was firstly constructed where two
+ three-gene motifs are coupled. We then implemented the SDEs
+ (Stochastic Differentiation Equations) simulation for the
+ validity of the network and model. By integrating the two
+ landscapes for the two switch gene pairs, we constructed the
+ two-step development landscape for CNS differentiation. Our work
+ provides new insights into cellular differentiation and important
+ clues for better reprogramming strategies.",
+ journal = "PLoS One",
+ volume = 7,
+ number = 12,
+ pages = "e49271",
+ month = dec,
+ year = 2012,
+ language = "en"
+}
+
+@ARTICLE{Jurges2018-aj,
+ title = "Dissecting newly transcribed and old {RNA} using {GRAND-SLAM}",
+ author = "J{\"u}rges, Christopher and D{\"o}lken, Lars and Erhard, Florian",
+ abstract = "Summary: Global quantification of total RNA is used to
+ investigate steady state levels of gene expression. However,
+ being able to differentiate pre-existing RNA (that has been
+ synthesized prior to a defined point in time) and newly
+ transcribed RNA can provide invaluable information e.g. to
+ estimate RNA half-lives or identify fast and complex regulatory
+ processes. Recently, new techniques based on metabolic labeling
+ and RNA-seq have emerged that allow to quantify new and old RNA:
+ Nucleoside analogs are incorporated into newly transcribed RNA
+ and are made detectable as point mutations in mapped reads.
+ However, relatively infrequent incorporation events and
+ significant sequencing error rates make the differentiation
+ between old and new RNA a highly challenging task. We developed a
+ statistical approach termed GRAND-SLAM that, for the first time,
+ allows to estimate the proportion of old and new RNA in such an
+ experiment. Uncertainty in the estimates is quantified in a
+ Bayesian framework. Simulation experiments show our approach to
+ be unbiased and highly accurate. Furthermore, we analyze how
+ uncertainty in the proportion translates into uncertainty in
+ estimating RNA half-lives and give guidelines for planning
+ experiments. Finally, we demonstrate that our estimates of RNA
+ half-lives compare favorably to other experimental approaches and
+ that biological processes affecting RNA half-lives can be
+ investigated with greater power than offered by any other method.
+ GRAND-SLAM is freely available for non-commercial use at
+ http://software.erhard-lab.de; R scripts to generate all figures
+ are available at zenodo (doi: 10.5281/zenodo.1162340).",
+ journal = "Bioinformatics",
+ volume = 34,
+ number = 13,
+ pages = "i218--i226",
+ month = jul,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Huang2007-ns,
+ title = "Bifurcation dynamics in lineage-commitment in bipotent progenitor
+ cells",
+ author = "Huang, Sui and Guo, Yan-Ping and May, Gillian and Enver, Tariq",
+ abstract = "Lineage specification of multipotent progenitor cells is governed
+ by a balance of lineage-affiliated transcription factors, such as
+ GATA1 and PU.1, which regulate the choice between erythroid and
+ myelomonocytic fates. But how ratios of lineage-determining
+ transcription factors stabilize progenitor cells and resolve
+ their indeterminacy to commit them to discrete, mutually
+ exclusive fates remains unexplained. We used a simple model and
+ experimental measurements to analyze the dynamics of a binary
+ fate decision governed by a gene-circuit containing
+ auto-stimulation and cross-inhibition, as embodied by the
+ GATA1-PU.1 paradigm. This circuit generates stable attractors
+ corresponding to erythroid and myelomonocytic fates, as well as
+ an uncommitted metastable state characterized by coexpression of
+ both regulators, explaining the phenomenon of ``multilineage
+ priming''. GATA1 and PU.1 mRNA and transcriptome dynamics of
+ differentiating progenitor cells confirm that commitment occurs
+ in two stages, as suggested by the model: first, the progenitor
+ state is destabilized in an almost symmetrical bifurcation event,
+ resulting in a poised state at the boundary between the two
+ lineage-specific attractors; second, the cell is driven to the
+ respective, now accessible attractors. This minimal model
+ captures fundamental features of binary cell fate decisions,
+ uniting the concepts of stochastic (selective) and deterministic
+ (instructive) regulation, and hence, may apply to a wider range
+ of binary fate decision points.",
+ journal = "Dev. Biol.",
+ volume = 305,
+ number = 2,
+ pages = "695--713",
+ month = may,
+ year = 2007,
+ language = "en"
+}
+
+@ARTICLE{Trapnell2014-kk,
+ title = "The dynamics and regulators of cell fate decisions are revealed
+ by pseudotemporal ordering of single cells",
+ author = "Trapnell, Cole and Cacchiarelli, Davide and Grimsby, Jonna and
+ Pokharel, Prapti and Li, Shuqiang and Morse, Michael and Lennon,
+ Niall J and Livak, Kenneth J and Mikkelsen, Tarjei S and Rinn,
+ John L",
+ abstract = "Defining the transcriptional dynamics of a temporal process such
+ as cell differentiation is challenging owing to the high
+ variability in gene expression between individual cells.
+ Time-series gene expression analyses of bulk cells have
+ difficulty distinguishing early and late phases of a
+ transcriptional cascade or identifying rare subpopulations of
+ cells, and single-cell proteomic methods rely on a priori
+ knowledge of key distinguishing markers. Here we describe
+ Monocle, an unsupervised algorithm that increases the temporal
+ resolution of transcriptome dynamics using single-cell RNA-Seq
+ data collected at multiple time points. Applied to the
+ differentiation of primary human myoblasts, Monocle revealed
+ switch-like changes in expression of key regulatory factors,
+ sequential waves of gene regulation, and expression of regulators
+ that were not known to act in differentiation. We validated some
+ of these predicted regulators in a loss-of function screen.
+ Monocle can in principle be used to recover single-cell gene
+ expression kinetics from a wide array of cellular processes,
+ including differentiation, proliferation and oncogenic
+ transformation.",
+ journal = "Nat. Biotechnol.",
+ volume = 32,
+ number = 4,
+ pages = "381--386",
+ month = apr,
+ year = 2014,
+ language = "en"
+}
+
+@ARTICLE{Saelens2019-ts,
+ title = "A comparison of single-cell trajectory inference methods",
+ author = "Saelens, Wouter and Cannoodt, Robrecht and Todorov, Helena and
+ Saeys, Yvan",
+ abstract = "Trajectory inference approaches analyze genome-wide omics data
+ from thousands of single cells and computationally infer the
+ order of these cells along developmental trajectories. Although
+ more than 70 trajectory inference tools have already been
+ developed, it is challenging to compare their performance because
+ the input they require and output models they produce vary
+ substantially. Here, we benchmark 45 of these methods on 110 real
+ and 229 synthetic datasets for cellular ordering, topology,
+ scalability and usability. Our results highlight the
+ complementarity of existing tools, and that the choice of method
+ should depend mostly on the dataset dimensions and trajectory
+ topology. Based on these results, we develop a set of guidelines
+ to help users select the best method for their dataset. Our
+ freely available data and evaluation pipeline (
+ https://benchmark.dynverse.org ) will aid in the development of
+ improved tools designed to analyze increasingly large and complex
+ single-cell datasets.",
+ journal = "Nat. Biotechnol.",
+ volume = 37,
+ number = 5,
+ pages = "547--554",
+ month = may,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Battich2020-gj,
+ title = "Sequencing metabolically labeled transcripts in single cells
+ reveals {mRNA} turnover strategies",
+ author = "Battich, Nico and Beumer, Joep and de Barbanson, Buys and
+ Krenning, Lenno and Baron, Chlo{\'e} S and Tanenbaum, Marvin E
+ and Clevers, Hans and van Oudenaarden, Alexander",
+ abstract = "The regulation of messenger RNA levels in mammalian cells can be
+ achieved by the modulation of synthesis and degradation rates.
+ Metabolic RNA-labeling experiments in bulk have quantified these
+ rates using relatively homogeneous cell populations. However, to
+ determine these rates during complex dynamical processes, for
+ instance during cellular differentiation, single-cell resolution
+ is required. Therefore, we developed a method that simultaneously
+ quantifies metabolically labeled and preexisting unlabeled
+ transcripts in thousands of individual cells. We determined
+ synthesis and degradation rates during the cell cycle and during
+ differentiation of intestinal stem cells, revealing major
+ regulatory strategies. These strategies have distinct
+ consequences for controlling the dynamic range and precision of
+ gene expression. These findings advance our understanding of how
+ individual cells in heterogeneous populations shape their gene
+ expression dynamics.",
+ journal = "Science",
+ volume = 367,
+ number = 6482,
+ pages = "1151--1156",
+ month = mar,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Furlan2017-aj,
+ title = "Multipotent peripheral glial cells generate neuroendocrine cells
+ of the adrenal medulla",
+ author = "Furlan, Alessandro and Dyachuk, Vyacheslav and Kastriti, Maria
+ Eleni and Calvo-Enrique, Laura and Abdo, Hind and Hadjab, Saida
+ and Chontorotzea, Tatiana and Akkuratova, Natalia and Usoskin,
+ Dmitry and Kamenev, Dmitry and Petersen, Julian and Sunadome,
+ Kazunori and Memic, Fatima and Marklund, Ulrika and Fried, Kaj
+ and Topilko, Piotr and Lallemend, Francois and Kharchenko, Peter
+ V and Ernfors, Patrik and Adameyko, Igor",
+ abstract = "Adrenaline is a fundamental circulating hormone for bodily
+ responses to internal and external stressors. Chromaffin cells of
+ the adrenal medulla (AM) represent the main neuroendocrine
+ adrenergic component and are believed to differentiate from
+ neural crest cells. We demonstrate that large numbers of
+ chromaffin cells arise from peripheral glial stem cells, termed
+ Schwann cell precursors (SCPs). SCPs migrate along the visceral
+ motor nerve to the vicinity of the forming adrenal gland, where
+ they detach from the nerve and form postsynaptic neuroendocrine
+ chromaffin cells. An intricate molecular logic drives two
+ sequential phases of gene expression, one unique for a distinct
+ transient cellular state and another for cell type specification.
+ Subsequently, these programs down-regulate SCP-gene and
+ up-regulate chromaffin cell-gene networks. The AM forms through
+ limited cell expansion and requires the recruitment of numerous
+ SCPs. Thus, peripheral nerves serve as a stem cell niche for
+ neuroendocrine system development.",
+ journal = "Science",
+ volume = 357,
+ number = 6346,
+ month = jul,
+ year = 2017,
+ language = "en"
+}
+
+@ARTICLE{Cao2020-ik,
+ title = "Tracking development at the cellular level",
+ author = "Cao, Junyue",
+ journal = "Science",
+ volume = 370,
+ number = 6519,
+ pages = "924--925",
+ month = nov,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Hu2020-an,
+ title = "{ZipSeq}: barcoding for real-time mapping of single cell
+ transcriptomes",
+ author = "Hu, Kenneth H and Eichorst, John P and McGinnis, Chris S and
+ Patterson, David M and Chow, Eric D and Kersten, Kelly and
+ Jameson, Stephen C and Gartner, Zev J and Rao, Arjun A and
+ Krummel, Matthew F",
+ abstract = "Spatial transcriptomics seeks to integrate single cell
+ transcriptomic data within the three-dimensional space of
+ multicellular biology. Current methods to correlate a cell's
+ position with its transcriptome in living tissues have various
+ limitations. We developed an approach, called 'ZipSeq', that uses
+ patterned illumination and photocaged oligonucleotides to
+ serially print barcodes ('zipcodes') onto live cells in intact
+ tissues, in real time and with an on-the-fly selection of
+ patterns. Using ZipSeq, we mapped gene expression in three
+ settings: in vitro wound healing, live lymph node sections and a
+ live tumor microenvironment. In all cases, we discovered new gene
+ expression patterns associated with histological structures. In
+ the tumor microenvironment, this demonstrated a trajectory of
+ myeloid and T cell differentiation from the periphery inward. A
+ combinatorial variation of ZipSeq efficiently scales in the
+ number of regions defined, providing a pathway for complete
+ mapping of live tissues, subsequent to real-time imaging or
+ perturbation.",
+ journal = "Nat. Methods",
+ volume = 17,
+ number = 8,
+ pages = "833--843",
+ month = aug,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Ma2020-kv,
+ title = "Chromatin Potential Identified by Shared {Single-Cell} Profiling
+ of {RNA} and Chromatin",
+ author = "Ma, Sai and Zhang, Bing and LaFave, Lindsay M and Earl, Andrew S
+ and Chiang, Zachary and Hu, Yan and Ding, Jiarui and Brack,
+ Alison and Kartha, Vinay K and Tay, Tristan and Law, Travis and
+ Lareau, Caleb and Hsu, Ya-Chieh and Regev, Aviv and Buenrostro,
+ Jason D",
+ abstract = "Cell differentiation and function are regulated across multiple
+ layers of gene regulation, including modulation of gene
+ expression by changes in chromatin accessibility. However,
+ differentiation is an asynchronous process precluding a temporal
+ understanding of regulatory events leading to cell fate
+ commitment. Here we developed simultaneous high-throughput ATAC
+ and RNA expression with sequencing (SHARE-seq), a highly scalable
+ approach for measurement of chromatin accessibility and gene
+ expression in the same single cell, applicable to different
+ tissues. Using 34,774 joint profiles from mouse skin, we develop
+ a computational strategy to identify cis-regulatory interactions
+ and define domains of regulatory chromatin (DORCs) that
+ significantly overlap with super-enhancers. During lineage
+ commitment, chromatin accessibility at DORCs precedes gene
+ expression, suggesting that changes in chromatin accessibility
+ may prime cells for lineage commitment. We computationally infer
+ chromatin potential as a quantitative measure of chromatin
+ lineage-priming and use it to predict cell fate outcomes.
+ SHARE-seq is an extensible platform to study regulatory circuitry
+ across diverse cells in tissues.",
+ journal = "Cell",
+ volume = 183,
+ number = 4,
+ pages = "1103--1116.e20",
+ month = nov,
+ year = 2020,
+ keywords = "epigenomics; gene regulation; single cell; skin; stem cell",
+ language = "en"
+}
+
+@ARTICLE{Rodriques2019-hk,
+ title = "Slide-seq: A scalable technology for measuring genome-wide
+ expression at high spatial resolution",
+ author = "Rodriques, Samuel G and Stickels, Robert R and Goeva,
+ Aleksandrina and Martin, Carly A and Murray, Evan and Vanderburg,
+ Charles R and Welch, Joshua and Chen, Linlin M and Chen, Fei and
+ Macosko, Evan Z",
+ abstract = "Spatial positions of cells in tissues strongly influence
+ function, yet a high-throughput, genome-wide readout of gene
+ expression with cellular resolution is lacking. We developed
+ Slide-seq, a method for transferring RNA from tissue sections
+ onto a surface covered in DNA-barcoded beads with known
+ positions, allowing the locations of the RNA to be inferred by
+ sequencing. Using Slide-seq, we localized cell types identified
+ by single-cell RNA sequencing datasets within the cerebellum and
+ hippocampus, characterized spatial gene expression patterns in
+ the Purkinje layer of mouse cerebellum, and defined the temporal
+ evolution of cell type-specific responses in a mouse model of
+ traumatic brain injury. These studies highlight how Slide-seq
+ provides a scalable method for obtaining spatially resolved gene
+ expression data at resolutions comparable to the sizes of
+ individual cells.",
+ journal = "Science",
+ volume = 363,
+ number = 6434,
+ pages = "1463--1467",
+ month = mar,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Erhard2019-oc,
+ title = "{scSLAM-seq} reveals core features of transcription dynamics in
+ single cells",
+ author = "Erhard, Florian and Baptista, Marisa A P and Krammer, Tobias and
+ Hennig, Thomas and Lange, Marius and Arampatzi, Panagiota and
+ J{\"u}rges, Christopher S and Theis, Fabian J and Saliba,
+ Antoine-Emmanuel and D{\"o}lken, Lars",
+ abstract = "Single-cell RNA sequencing (scRNA-seq) has highlighted the
+ important role of intercellular heterogeneity in phenotype
+ variability in both health and disease1. However, current
+ scRNA-seq approaches provide only a snapshot of gene expression
+ and convey little information on the true temporal dynamics and
+ stochastic nature of transcription. A further key limitation of
+ scRNA-seq analysis is that the RNA profile of each individual
+ cell can be analysed only once. Here we introduce single-cell,
+ thiol-(SH)-linked alkylation of RNA for metabolic labelling
+ sequencing (scSLAM-seq), which integrates metabolic RNA
+ labelling2, biochemical nucleoside conversion3 and scRNA-seq to
+ record transcriptional activity directly by differentiating
+ between new and old RNA for thousands of genes per single cell.
+ We use scSLAM-seq to study the onset of infection with lytic
+ cytomegalovirus in single mouse fibroblasts. The cell-cycle state
+ and dose of infection deduced from old RNA enable dose-response
+ analysis based on new RNA. scSLAM-seq thereby both visualizes and
+ explains differences in transcriptional activity at the
+ single-cell level. Furthermore, it depicts 'on-off' switches and
+ transcriptional burst kinetics in host gene expression with
+ extensive gene-specific differences that correlate with
+ promoter-intrinsic features (TBP-TATA-box interactions and DNA
+ methylation). Thus, gene-specific, and not cell-specific,
+ features explain the heterogeneity in transcriptomes between
+ individual cells and the transcriptional response to
+ perturbations.",
+ journal = "Nature",
+ volume = 571,
+ number = 7765,
+ pages = "419--423",
+ month = jul,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Klein2015-nw,
+ title = "Droplet barcoding for single-cell transcriptomics applied to
+ embryonic stem cells",
+ author = "Klein, Allon M and Mazutis, Linas and Akartuna, Ilke and
+ Tallapragada, Naren and Veres, Adrian and Li, Victor and Peshkin,
+ Leonid and Weitz, David A and Kirschner, Marc W",
+ abstract = "It has long been the dream of biologists to map gene expression
+ at the single-cell level. With such data one might track
+ heterogeneous cell sub-populations, and infer regulatory
+ relationships between genes and pathways. Recently, RNA
+ sequencing has achieved single-cell resolution. What is limiting
+ is an effective way to routinely isolate and process large
+ numbers of individual cells for quantitative in-depth sequencing.
+ We have developed a high-throughput droplet-microfluidic approach
+ for barcoding the RNA from thousands of individual cells for
+ subsequent analysis by next-generation sequencing. The method
+ shows a surprisingly low noise profile and is readily adaptable
+ to other sequencing-based assays. We analyzed mouse embryonic
+ stem cells, revealing in detail the population structure and the
+ heterogeneous onset of differentiation after leukemia inhibitory
+ factor (LIF) withdrawal. The reproducibility of these
+ high-throughput single-cell data allowed us to deconstruct cell
+ populations and infer gene expression relationships. VIDEO
+ ABSTRACT.",
+ journal = "Cell",
+ volume = 161,
+ number = 5,
+ pages = "1187--1201",
+ month = may,
+ year = 2015,
+ language = "en"
+}
+
+@ARTICLE{Weinreb2018-fo,
+ title = "Fundamental limits on dynamic inference from single-cell
+ snapshots",
+ author = "Weinreb, Caleb and Wolock, Samuel and Tusi, Betsabeh K and
+ Socolovsky, Merav and Klein, Allon M",
+ abstract = "Single-cell expression profiling reveals the molecular states of
+ individual cells with unprecedented detail. Because these methods
+ destroy cells in the process of analysis, they cannot measure how
+ gene expression changes over time. However, some information on
+ dynamics is present in the data: the continuum of molecular
+ states in the population can reflect the trajectory of a typical
+ cell. Many methods for extracting single-cell dynamics from
+ population data have been proposed. However, all such attempts
+ face a common limitation: for any measured distribution of cell
+ states, there are multiple dynamics that could give rise to it,
+ and by extension, multiple possibilities for underlying
+ mechanisms of gene regulation. Here, we describe the aspects of
+ gene expression dynamics that cannot be inferred from a static
+ snapshot alone and identify assumptions necessary to constrain a
+ unique solution for cell dynamics from static snapshots. We
+ translate these constraints into a practical algorithmic
+ approach, population balance analysis (PBA), which makes use of a
+ method from spectral graph theory to solve a class of
+ high-dimensional differential equations. We use simulations to
+ show the strengths and limitations of PBA, and then apply it to
+ single-cell profiles of hematopoietic progenitor cells (HPCs).
+ Cell state predictions from this analysis agree with HPC fate
+ assays reported in several papers over the past two decades. By
+ highlighting the fundamental limits on dynamic inference faced by
+ any method, our framework provides a rigorous basis for dynamic
+ interpretation of a gene expression continuum and clarifies best
+ experimental designs for trajectory reconstruction from static
+ snapshot measurements.",
+ journal = "Proc. Natl. Acad. Sci. U. S. A.",
+ volume = 115,
+ number = 10,
+ pages = "E2467--E2476",
+ month = mar,
+ year = 2018,
+ keywords = "dynamic inference; hematopoiesis; pseudotime; single cell;
+ spectral graph theory",
+ language = "en"
+}
+
+@Article{Li2020-my,
+author = {Li , Tiejun and Shi , Jifan and Wu , Yichong and Zhou , Peijie},
+title = {On the Mathematics of RNA Velocity I: Theoretical Analysis},
+journal = {CSIAM Transactions on Applied Mathematics},
+year = {2021},
+volume = {2},
+number = {1},
+pages = {1--55},
+abstract = {
+ The RNA velocity provides a new avenue to study the stemness and lineage of cells in the development in scRNA-seq data analysis. Some promising extensions of it are proposed and the community is experiencing a fast developing period.
+ However, in this stage, it is of prime importance to revisit the whole process of RNA
+ velocity analysis from the mathematical point of view, which will help to understand
+ the rationale and drawbacks of different proposals. The current paper is devoted to
+ this purpose. We present a thorough mathematical study on the RNA velocity model
+ from dynamics to downstream data analysis. We derived the analytical solution of
+ the RNA velocity model from both deterministic and stochastic point of view. We
+ presented the parameter inference framework based on the maximum likelihood estimate. We also derived the continuum limit of different downstream analysis methods,
+ which provides insights on the construction of transition probability matrix, root and
+ ending-cells identification, and the development routes finding. The overall analysis
+ aims at providing a mathematical basis for more advanced design and development
+ of RNA velocity type methods in the future.
+},
+issn = {2708-0579},
+doi = {https://doi.org/10.4208/csiam-am.SO-2020-0001},
+url = {http://global-sci.org/intro/article_detail/csiam-am/18653.html}
+}
+
+@ARTICLE{Petratou2021-hj,
+ title = "The {MITF} paralog tfec is required in neural crest development
+ for fate specification of the iridophore lineage from a
+ multipotent pigment cell progenitor",
+ author = "Petratou, Kleio and Spencer, Samantha A and Kelsh, Robert N and
+ Lister, James A",
+ abstract = "Understanding how fate specification of distinct cell-types from
+ multipotent progenitors occurs is a fundamental question in
+ embryology. Neural crest stem cells (NCSCs) generate
+ extraordinarily diverse derivatives, including multiple neural,
+ skeletogenic and pigment cell fates. Key transcription factors
+ and extracellular signals specifying NCSC lineages remain to be
+ identified, and we have only a little idea of how and when they
+ function together to control fate. Zebrafish have three neural
+ crest-derived pigment cell types, black melanocytes,
+ light-reflecting iridophores and yellow xanthophores, which offer
+ a powerful model for studying the molecular and cellular
+ mechanisms of fate segregation. Mitfa has been identified as the
+ master regulator of melanocyte fate. Here, we show that an
+ Mitf-related transcription factor, Tfec, functions as master
+ regulator of the iridophore fate. Surprisingly, our phenotypic
+ analysis of tfec mutants demonstrates that Tfec also functions in
+ the initial specification of all three pigment cell-types,
+ although the melanocyte and xanthophore lineages recover later.
+ We show that Mitfa represses tfec expression, revealing a likely
+ mechanism contributing to the decision between melanocyte and
+ iridophore fate. Our data are consistent with the long-standing
+ proposal of a tripotent progenitor restricted to pigment cell
+ fates. Moreover, we investigate activation, maintenance and
+ function of tfec in multipotent NCSCs, demonstrating for the
+ first time its role in the gene regulatory network forming and
+ maintaining early neural crest cells. In summary, we build on our
+ previous work to characterise the gene regulatory network
+ governing iridophore development, establishing Tfec as the master
+ regulator driving iridophore specification from multipotent
+ progenitors, while shedding light on possible cellular mechanisms
+ of progressive fate restriction.",
+ journal = "PLoS One",
+ volume = 16,
+ number = 1,
+ pages = "e0244794",
+ month = jan,
+ year = 2021,
+ language = "en"
+}
+
+@ARTICLE{Kester2018-dv,
+ title = "{Single-Cell} Transcriptomics Meets Lineage Tracing",
+ author = "Kester, Lennart and van Oudenaarden, Alexander",
+ abstract = "Reconstructing lineage relationships between cells within a
+ tissue or organism is a long-standing aim in biology.
+ Traditionally, lineage tracing has been achieved through the
+ (genetic) labeling of a cell followed by the tracking of its
+ offspring. Currently, lineage trajectories can also be predicted
+ using single-cell transcriptomics. Although single-cell
+ transcriptomics provides detailed phenotypic information, the
+ predicted lineage trajectories do not necessarily reflect genetic
+ relationships. Recently, techniques have been developed that
+ unite these strategies. In this Review, we discuss
+ transcriptome-based lineage trajectory prediction algorithms,
+ single-cell genetic lineage tracing, and the promising
+ combination of these techniques for stem cell and cancer
+ research.",
+ journal = "Cell Stem Cell",
+ volume = 23,
+ number = 2,
+ pages = "166--179",
+ month = aug,
+ year = 2018,
+ keywords = "lineage trajectory reconstruction; single-cell lineage tracing;
+ single-cell mRNA sequencing",
+ language = "en"
+}
+
+@BOOK{Waddington1957-ct,
+ title = "The Strategy of the Genes, a Discussion of Some Aspects of
+ Theoretical Biology, by {C.H}. Waddington, ... With an Appendix
+ [Some Physico-chemical Aspects of Biological Organisation] by H.
+ Kacser,",
+ author = "Waddington, Conrad Hall",
+ publisher = "G. Allen and Unwin",
+ year = 1957,
+ language = "en"
+}
+
+@ARTICLE{Cao2020-ng,
+ title = "A human cell atlas of fetal gene expression",
+ author = "Cao, Junyue and O'Day, Diana R and Pliner, Hannah A and Kingsley,
+ Paul D and Deng, Mei and Daza, Riza M and Zager, Michael A and
+ Aldinger, Kimberly A and Blecher-Gonen, Ronnie and Zhang, Fan and
+ Spielmann, Malte and Palis, James and Doherty, Dan and Steemers,
+ Frank J and Glass, Ian A and Trapnell, Cole and Shendure, Jay",
+ abstract = "The gene expression program underlying the specification of human
+ cell types is of fundamental interest. We generated human cell
+ atlases of gene expression and chromatin accessibility in fetal
+ tissues. For gene expression, we applied three-level
+ combinatorial indexing to >110 samples representing 15 organs,
+ ultimately profiling ~4 million single cells. We leveraged the
+ literature and other atlases to identify and annotate hundreds of
+ cell types and subtypes, both within and across tissues. Our
+ analyses focused on organ-specific specializations of broadly
+ distributed cell types (such as blood, endothelial, and
+ epithelial), sites of fetal erythropoiesis (which notably
+ included the adrenal gland), and integration with mouse
+ developmental atlases (such as conserved specification of blood
+ cells). These data represent a rich resource for the exploration
+ of in vivo human gene expression in diverse tissues and cell
+ types.",
+ journal = "Science",
+ volume = 370,
+ number = 6518,
+ month = nov,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Moffitt2018-fd,
+ title = "Molecular, spatial, and functional single-cell profiling of the
+ hypothalamic preoptic region",
+ author = "Moffitt, Jeffrey R and Bambah-Mukku, Dhananjay and Eichhorn,
+ Stephen W and Vaughn, Eric and Shekhar, Karthik and Perez, Julio
+ D and Rubinstein, Nimrod D and Hao, Junjie and Regev, Aviv and
+ Dulac, Catherine and Zhuang, Xiaowei",
+ abstract = "The hypothalamus controls essential social behaviors and
+ homeostatic functions. However, the cellular architecture of
+ hypothalamic nuclei-including the molecular identity, spatial
+ organization, and function of distinct cell types-is poorly
+ understood. Here, we developed an imaging-based in situ cell-type
+ identification and mapping method and combined it with
+ single-cell RNA-sequencing to create a molecularly annotated and
+ spatially resolved cell atlas of the mouse hypothalamic preoptic
+ region. We profiled ~1 million cells, identified ~70 neuronal
+ populations characterized by distinct neuromodulatory signatures
+ and spatial organizations, and defined specific neuronal
+ populations activated during social behaviors in male and female
+ mice, providing a high-resolution framework for mechanistic
+ investigation of behavior circuits. The approach described opens
+ a new avenue for the construction of cell atlases in diverse
+ tissues and organisms.",
+ journal = "Science",
+ volume = 362,
+ number = 6416,
+ month = nov,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Sundqvist2018-xi,
+ title = "{JUNB} governs a feed-forward network of {TGF$\beta$} signaling
+ that aggravates breast cancer invasion",
+ author = "Sundqvist, Anders and Morikawa, Masato and Ren, Jiang and
+ Vasilaki, Eleftheria and Kawasaki, Natsumi and Kobayashi, Mai and
+ Koinuma, Daizo and Aburatani, Hiroyuki and Miyazono, Kohei and
+ Heldin, Carl-Henrik and van Dam, Hans and Ten Dijke, Peter",
+ abstract = "It is well established that transforming growth factor-$\beta$
+ (TGF$\beta$) switches its function from being a tumor suppressor
+ to a tumor promoter during the course of tumorigenesis, which
+ involves both cell-intrinsic and environment-mediated mechanisms.
+ We are interested in breast cancer cells, in which SMAD mutations
+ are rare and interactions between SMAD and other transcription
+ factors define pro-oncogenic events. Here, we have performed
+ chromatin immunoprecipitation (ChIP)-sequencing analyses which
+ indicate that the genome-wide landscape of SMAD2/3 binding is
+ altered after prolonged TGF$\beta$ stimulation. De novo motif
+ analyses of the SMAD2/3 binding regions predict enrichment of
+ binding motifs for activator protein (AP)1 in addition to SMAD
+ motifs. TGF$\beta$-induced expression of the AP1 component JUNB
+ was required for expression of many late invasion-mediating
+ genes, creating a feed-forward regulatory network. Moreover, we
+ found that several components in the WNT pathway were enriched
+ among the late TGF$\beta$-target genes, including the
+ invasion-inducing WNT7 proteins. Consistently, overexpression of
+ WNT7A or WNT7B enhanced and potentiated TGF$\beta$-induced breast
+ cancer cell invasion, while inhibition of the WNT pathway reduced
+ this process. Our study thereby helps to explain how accumulation
+ of pro-oncogenic stimuli switches and stabilizes
+ TGF$\beta$-induced cellular phenotypes of epithelial cells.",
+ journal = "Nucleic Acids Res.",
+ volume = 46,
+ number = 3,
+ pages = "1180--1195",
+ month = feb,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Huang2012-ft,
+ title = "The molecular and mathematical basis of Waddington's epigenetic
+ landscape: a framework for post-Darwinian biology?",
+ author = "Huang, Sui",
+ abstract = "The Neo-Darwinian concept of natural selection is plausible when
+ one assumes a straightforward causation of phenotype by
+ genotype. However, such simple 1:1 mapping must now give place
+ to the modern concepts of gene regulatory networks and gene
+ expression noise. Both can, in the absence of genetic mutations,
+ jointly generate a diversity of inheritable randomly occupied
+ phenotypic states that could also serve as a substrate for
+ natural selection. This form of epigenetic dynamics challenges
+ Neo-Darwinism. It needs to incorporate the non-linear,
+ stochastic dynamics of gene networks. A first step is to
+ consider the mathematical correspondence between gene regulatory
+ networks and Waddington's metaphoric 'epigenetic landscape',
+ which actually represents the quasi-potential function of global
+ network dynamics. It explains the coexistence of multiple stable
+ phenotypes within one genotype. The landscape's topography with
+ its attractors is shaped by evolution through mutational
+ re-wiring of regulatory interactions - offering a link between
+ genetic mutation and sudden, broad evolutionary changes.",
+ journal = "Bioessays",
+ publisher = "Wiley",
+ volume = 34,
+ number = 2,
+ pages = "149--157",
+ month = feb,
+ year = 2012,
+ language = "en"
+}
+
+@ARTICLE{Golding2005-ia,
+ title = "Real-time kinetics of gene activity in individual bacteria",
+ author = "Golding, Ido and Paulsson, Johan and Zawilski, Scott M and Cox,
+ Edward C",
+ abstract = "Protein levels have been shown to vary substantially between
+ individual cells in clonal populations. In prokaryotes, the
+ contribution to such fluctuations from the inherent randomness of
+ gene expression has largely been attributed to having just a few
+ transcripts of the corresponding mRNAs. By contrast, eukaryotic
+ studies tend to emphasize chromatin remodeling and burst-like
+ transcription. Here, we study single-cell transcription in
+ Escherichia coli by measuring mRNA levels in individual living
+ cells. The results directly demonstrate transcriptional bursting,
+ similar to that indirectly inferred for eukaryotes. We also
+ measure mRNA partitioning at cell division and correlate mRNA and
+ protein levels in single cells. Partitioning is approximately
+ binomial, and mRNA-protein correlations are weaker earlier in the
+ cell cycle, where cell division has recently randomized the
+ relative concentrations. Our methods further extend protein-based
+ approaches by counting the integer-valued number of transcript
+ with single-molecule resolution. This greatly facilitates kinetic
+ interpretations in terms of the integer-valued random processes
+ that produce the fluctuations.",
+ journal = "Cell",
+ volume = 123,
+ number = 6,
+ pages = "1025--1036",
+ month = dec,
+ year = 2005,
+ language = "en"
+}
+
+@ARTICLE{Bradley2012-la,
+ title = "Regulation of embryonic stem cell pluripotency by heat shock
+ protein 90",
+ author = "Bradley, Eric and Bieberich, Erhard and Mivechi, Nahid F and
+ Tangpisuthipongsa, Dantera and Wang, Guanghu",
+ abstract = "Deciphering the molecular basis of stem cell pluripotency is
+ fundamental to the understanding of stem cell biology, early
+ embryonic development, and to the clinical application of
+ regenerative medicine. We report here that the molecular
+ chaperone heat shock protein 90 (Hsp90) is essential for mouse
+ embryonic stem cell (ESC) pluripotency through regulating
+ multiple pluripotency factors, including Oct4, Nanog, and signal
+ transducer and activator of transcription 3. Inhibition of Hsp90
+ by either 17-N-Allylamino-17-demethoxygeldanamycin or miRNA led
+ to ESC differentiation. Overexpression of Hsp90$\beta$ partially
+ rescued the phenotype; in particular, the levels of Oct4 and
+ Nanog were restored. Notably, Hsp90 associated with Oct4 and
+ Nanog in the same cellular complex and protected them from
+ degradation by the ubiquitin proteasome pathway, suggesting that
+ Oct4 and Nanog are potential novel Hsp90 client proteins. In
+ addition, Hsp90 inhibition reduced the mRNA level of Oct4, but
+ not that of Nanog, indicating that Hsp90 participates in Oct4
+ mRNA processing or maturation. Hsp90 inhibition also increased
+ expression of some protein markers for mesodermal lineages,
+ implying that Hsp90 suppresses mesodermal differentiation from
+ ESCs. These findings support a new role for Hsp90 in maintaining
+ ESC pluripotency by sustaining the level of multiple pluripotency
+ factors, particularly Oct4 and Nanog.",
+ journal = "Stem Cells",
+ volume = 30,
+ number = 8,
+ pages = "1624--1633",
+ month = aug,
+ year = 2012,
+ language = "en"
+}
+
+% The entry below contains non-ASCII chars that could not be converted
+% to a LaTeX equivalent.
+@ARTICLE{Adamson2016-qj,
+ title = "A Multiplexed {Single-Cell} {CRISPR} Screening Platform Enables
+ Systematic Dissection of the Unfolded Protein Response",
+ author = "Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min
+ Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline
+ E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and
+ Pak, Ryan A and Gray, Andrew N and Gross, Carol A and Dixit,
+ Atray and Parnas, Oren and Regev, Aviv and Weissman, Jonathan S",
+ abstract = "Functional genomics efforts face tradeoffs between number of
+ perturbations examined and complexity of phenotypes measured. We
+ bridge this gap with Perturb-seq, which combines droplet-based
+ single-cell RNA-seq with a strategy for barcoding CRISPR-mediated
+ perturbations, allowing many perturbations to be profiled in
+ pooled format. We applied Perturb-seq to dissect the mammalian
+ unfolded protein response (UPR) using single and combinatorial
+ CRISPR perturbations. Two genome-scale CRISPR interference
+ (CRISPRi) screens identified genes whose repression perturbs ER
+ homeostasis. Subjecting ∼100 hits to Perturb-seq enabled
+ high-precision functional clustering of genes. Single-cell
+ analyses decoupled the three UPR branches, revealed bifurcated
+ UPR branch activation among cells subject to the same
+ perturbation, and uncovered differential activation of the
+ branches across hits, including an isolated feedback loop between
+ the translocon and IRE1$\alpha$. These studies provide insight
+ into how the three sensors of ER homeostasis monitor distinct
+ types of stress and highlight the ability of Perturb-seq to
+ dissect complex cellular responses.",
+ journal = "Cell",
+ volume = 167,
+ number = 7,
+ pages = "1867--1882.e21",
+ month = dec,
+ year = 2016,
+ keywords = "CRIPSRi; CRISPR; Single-cell RNA-seq; cell-to-cell heterogeneity;
+ genome-scale screening; single-cell genomics; unfolded protein
+ response",
+ language = "en"
+}
+
+@ARTICLE{Dixit2016-br,
+ title = "{Perturb-Seq}: Dissecting Molecular Circuits with Scalable
+ {Single-Cell} {RNA} Profiling of Pooled Genetic Screens",
+ author = "Dixit, Atray and Parnas, Oren and Li, Biyu and Chen, Jenny and
+ Fulco, Charles P and Jerby-Arnon, Livnat and Marjanovic, Nemanja
+ D and Dionne, Danielle and Burks, Tyler and Raychowdhury, Raktima
+ and Adamson, Britt and Norman, Thomas M and Lander, Eric S and
+ Weissman, Jonathan S and Friedman, Nir and Regev, Aviv",
+ abstract = "Genetic screens help infer gene function in mammalian cells, but
+ it has remained difficult to assay complex phenotypes-such as
+ transcriptional profiles-at scale. Here, we develop Perturb-seq,
+ combining single-cell RNA sequencing (RNA-seq) and clustered
+ regularly interspaced short palindromic repeats (CRISPR)-based
+ perturbations to perform many such assays in a pool. We
+ demonstrate Perturb-seq by analyzing 200,000 cells in immune
+ cells and cell lines, focusing on transcription factors
+ regulating the response of dendritic cells to lipopolysaccharide
+ (LPS). Perturb-seq accurately identifies individual gene targets,
+ gene signatures, and cell states affected by individual
+ perturbations and their genetic interactions. We posit new
+ functions for regulators of differentiation, the anti-viral
+ response, and mitochondrial function during immune activation. By
+ decomposing many high content measurements into the effects of
+ perturbations, their interactions, and diverse cell metadata,
+ Perturb-seq dramatically increases the scope of pooled genomic
+ assays.",
+ journal = "Cell",
+ volume = 167,
+ number = 7,
+ pages = "1853--1866.e17",
+ month = dec,
+ year = 2016,
+ keywords = "CRISPR; epistasis; genetic interactions; pooled screen;
+ single-cell RNA-seq",
+ language = "en"
+}
+
+@ARTICLE{Wang2020-zb,
+ title = "Live-cell imaging and analysis reveal cell phenotypic transition
+ dynamics inherently missing in snapshot data",
+ author = "Wang, Weikang and Douglas, Diana and Zhang, Jingyu and Kumari,
+ Sangeeta and Enuameh, Metewo Selase and Dai, Yan and Wallace,
+ Callen T and Watkins, Simon C and Shu, Weiguo and Xing, Jianhua",
+ abstract = "Recent advances in single-cell techniques catalyze an emerging
+ field of studying how cells convert from one phenotype to
+ another, in a step-by-step process. Two grand technical
+ challenges, however, impede further development of the field.
+ Fixed cell-based approaches can provide snapshots of
+ high-dimensional expression profiles but have fundamental limits
+ on revealing temporal information, and fluorescence-based
+ live-cell imaging approaches provide temporal information but are
+ technically challenging for multiplex long-term imaging. We first
+ developed a live-cell imaging platform that tracks cellular
+ status change through combining endogenous fluorescent labeling
+ that minimizes perturbation to cell physiology and/or live-cell
+ imaging of high-dimensional cell morphological and texture
+ features. With our platform and an A549 VIM-RFP
+ epithelial-to-mesenchymal transition (EMT) reporter cell line,
+ live-cell trajectories reveal parallel paths of EMT missing from
+ snapshot data due to cell-cell dynamic heterogeneity. Our results
+ emphasize the necessity of extracting dynamical information of
+ phenotypic transitions from multiplex live-cell imaging.",
+ journal = "Sci Adv",
+ volume = 6,
+ number = 36,
+ month = sep,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Qiu2020-uf,
+ title = "Massively parallel and time-resolved {RNA} sequencing in single
+ cells with {scNT-seq}",
+ author = "Qiu, Qi and Hu, Peng and Qiu, Xiaojie and Govek, Kiya W and
+ C{\'a}mara, Pablo G and Wu, Hao",
+ abstract = "Single-cell RNA sequencing offers snapshots of whole
+ transcriptomes but obscures the temporal RNA dynamics. Here we
+ present single-cell metabolically labeled new RNA tagging
+ sequencing (scNT-seq), a method for massively parallel analysis
+ of newly transcribed and pre-existing mRNAs from the same cell.
+ This droplet microfluidics-based method enables high-throughput
+ chemical conversion on barcoded beads, efficiently marking newly
+ transcribed mRNAs with T-to-C substitutions. Using scNT-seq, we
+ jointly profiled new and old transcriptomes in ~55,000 single
+ cells. These data revealed time-resolved transcription factor
+ activities and cell-state trajectories at the single-cell level
+ in response to neuronal activation. We further determined rates
+ of RNA biogenesis and decay to uncover RNA regulatory strategies
+ during stepwise conversion between pluripotent and rare
+ totipotent two-cell embryo (2C)-like stem cell states. Finally,
+ integrating scNT-seq with genetic perturbation identifies DNA
+ methylcytosine dioxygenase as an epigenetic barrier into the
+ 2C-like cell state. Time-resolved single-cell transcriptomic
+ analysis thus opens new lines of inquiry regarding
+ cell-type-specific RNA regulatory mechanisms.",
+ journal = "Nat. Methods",
+ volume = 17,
+ number = 10,
+ pages = "991--1001",
+ month = oct,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Bergen2020-kx,
+ title = "Generalizing {RNA} velocity to transient cell states through
+ dynamical modeling",
+ author = "Bergen, Volker and Lange, Marius and Peidli, Stefan and Wolf, F
+ Alexander and Theis, Fabian J",
+ abstract = "RNA velocity has opened up new ways of studying cellular
+ differentiation in single-cell RNA-sequencing data. It describes
+ the rate of gene expression change for an individual gene at a
+ given time point based on the ratio of its spliced and unspliced
+ messenger RNA (mRNA). However, errors in velocity estimates arise
+ if the central assumptions of a common splicing rate and the
+ observation of the full splicing dynamics with steady-state mRNA
+ levels are violated. Here we present scVelo, a method that
+ overcomes these limitations by solving the full transcriptional
+ dynamics of splicing kinetics using a likelihood-based dynamical
+ model. This generalizes RNA velocity to systems with transient
+ cell states, which are common in development and in response to
+ perturbations. We apply scVelo to disentangling subpopulation
+ kinetics in neurogenesis and pancreatic endocrinogenesis. We
+ infer gene-specific rates of transcription, splicing and
+ degradation, recover each cell's position in the underlying
+ differentiation processes and detect putative driver genes.
+ scVelo will facilitate the study of lineage decisions and gene
+ regulation.",
+ journal = "Nat. Biotechnol.",
+ volume = 38,
+ number = 12,
+ pages = "1408--1414",
+ month = dec,
+ year = 2020,
+ language = "en"
+}
+
+@BOOK{Seydel1988-ub,
+ title = "From equilibrium to chaos: practical bifurcation and stability
+ analysis",
+ author = "Seydel, R{\"u}diger",
+ publisher = "North-Holland",
+ year = 1988
+}
+
+@ARTICLE{Alemany2018-se,
+ title = "Whole-organism clone tracing using single-cell sequencing",
+ author = "Alemany, Anna and Florescu, Maria and Baron, Chlo{\'e} S and
+ Peterson-Maduro, Josi and van Oudenaarden, Alexander",
+ abstract = "Embryonic development is a crucial period in the life of a
+ multicellular organism, during which limited sets of embryonic
+ progenitors produce all cells in the adult body. Determining
+ which fate these progenitors acquire in adult tissues requires
+ the simultaneous measurement of clonal history and cell identity
+ at single-cell resolution, which has been a major challenge.
+ Clonal history has traditionally been investigated by
+ microscopically tracking cells during development, monitoring the
+ heritable expression of genetically encoded fluorescent proteins
+ and, more recently, using next-generation sequencing technologies
+ that exploit somatic mutations, microsatellite instability,
+ transposon tagging, viral barcoding, CRISPR-Cas9 genome editing
+ and Cre-loxP recombination. Single-cell transcriptomics provides
+ a powerful platform for unbiased cell-type classification. Here
+ we present ScarTrace, a single-cell sequencing strategy that
+ enables the simultaneous quantification of clonal history and
+ cell type for thousands of cells obtained from different organs
+ of the adult zebrafish. Using ScarTrace, we show that a small set
+ of multipotent embryonic progenitors generate all haematopoietic
+ cells in the kidney marrow, and that many progenitors produce
+ specific cell types in the eyes and brain. In addition, we study
+ when embryonic progenitors commit to the left or right eye.
+ ScarTrace reveals that epidermal and mesenchymal cells in the
+ caudal fin arise from the same progenitors, and that
+ osteoblast-restricted precursors can produce mesenchymal cells
+ during regeneration. Furthermore, we identify resident immune
+ cells in the fin with a distinct clonal origin from other blood
+ cell types. We envision that similar approaches will have major
+ applications in other experimental systems, in which the matching
+ of embryonic clonal origin to adult cell type will ultimately
+ allow reconstruction of how the adult body is built from a single
+ cell.",
+ journal = "Nature",
+ volume = 556,
+ number = 7699,
+ pages = "108--112",
+ month = apr,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Buenrostro2015-oz,
+ title = "Single-cell chromatin accessibility reveals principles of
+ regulatory variation",
+ author = "Buenrostro, Jason D and Wu, Beijing and Litzenburger, Ulrike M
+ and Ruff, Dave and Gonzales, Michael L and Snyder, Michael P and
+ Chang, Howard Y and Greenleaf, William J",
+ abstract = "Cell-to-cell variation is a universal feature of life that
+ affects a wide range of biological phenomena, from developmental
+ plasticity to tumour heterogeneity. Although recent advances have
+ improved our ability to document cellular phenotypic variation,
+ the fundamental mechanisms that generate variability from
+ identical DNA sequences remain elusive. Here we reveal the
+ landscape and principles of mammalian DNA regulatory variation by
+ developing a robust method for mapping the accessible genome of
+ individual cells by assay for transposase-accessible chromatin
+ using sequencing (ATAC-seq) integrated into a programmable
+ microfluidics platform. Single-cell ATAC-seq (scATAC-seq) maps
+ from hundreds of single cells in aggregate closely resemble
+ accessibility profiles from tens of millions of cells and provide
+ insights into cell-to-cell variation. Accessibility variance is
+ systematically associated with specific trans-factors and
+ cis-elements, and we discover combinations of trans-factors
+ associated with either induction or suppression of cell-to-cell
+ variability. We further identify sets of trans-factors associated
+ with cell-type-specific accessibility variance across eight cell
+ types. Targeted perturbations of cell cycle or transcription
+ factor signalling evoke stimulus-specific changes in this
+ observed variability. The pattern of accessibility variation in
+ cis across the genome recapitulates chromosome compartments de
+ novo, linking single-cell accessibility variation to
+ three-dimensional genome organization. Single-cell analysis of
+ DNA accessibility provides new insight into cellular variation of
+ the 'regulome'.",
+ journal = "Nature",
+ volume = 523,
+ number = 7561,
+ pages = "486--490",
+ month = jul,
+ year = 2015,
+ language = "en"
+}
+
+@ARTICLE{Grun2014-nb,
+ title = "Validation of noise models for single-cell transcriptomics",
+ author = "Gr{\"u}n, Dominic and Kester, Lennart and van Oudenaarden,
+ Alexander",
+ abstract = "Single-cell transcriptomics has recently emerged as a powerful
+ technology to explore gene expression heterogeneity among single
+ cells. Here we identify two major sources of technical
+ variability: sampling noise and global cell-to-cell variation in
+ sequencing efficiency. We propose noise models to correct for
+ this, which we validate using single-molecule FISH. We
+ demonstrate that gene expression variability in mouse embryonic
+ stem cells depends on the culture condition.",
+ journal = "Nat. Methods",
+ volume = 11,
+ number = 6,
+ pages = "637--640",
+ month = jun,
+ year = 2014,
+ language = "en"
+}
+
+@ARTICLE{Cahan2014-qm,
+ title = "{CellNet}: network biology applied to stem cell engineering",
+ author = "Cahan, Patrick and Li, Hu and Morris, Samantha A and Lummertz da
+ Rocha, Edroaldo and Daley, George Q and Collins, James J",
+ abstract = "Somatic cell reprogramming, directed differentiation of
+ pluripotent stem cells, and direct conversions between
+ differentiated cell lineages represent powerful approaches to
+ engineer cells for research and regenerative medicine. We have
+ developed CellNet, a network biology platform that more
+ accurately assesses the fidelity of cellular engineering than
+ existing methodologies and generates hypotheses for improving
+ cell derivations. Analyzing expression data from 56 published
+ reports, we found that cells derived via directed differentiation
+ more closely resemble their in vivo counterparts than products of
+ direct conversion, as reflected by the establishment of target
+ cell-type gene regulatory networks (GRNs). Furthermore, we
+ discovered that directly converted cells fail to adequately
+ silence expression programs of the starting population and that
+ the establishment of unintended GRNs is common to virtually every
+ cellular engineering paradigm. CellNet provides a platform for
+ quantifying how closely engineered cell populations resemble
+ their target cell type and a rational strategy to guide enhanced
+ cellular engineering.",
+ journal = "Cell",
+ volume = 158,
+ number = 4,
+ pages = "903--915",
+ month = aug,
+ year = 2014,
+ language = "en"
+}
+
+@ARTICLE{Sheth2018-mz,
+ title = "{DNA-based} memory devices for recording cellular events",
+ author = "Sheth, Ravi U and Wang, Harris H",
+ abstract = "Measuring biological data across time and space is critical for
+ understanding complex biological processes and for various
+ biosurveillance applications. However, such data are often
+ inaccessible or difficult to directly obtain. Less invasive, more
+ robust and higher-throughput biological recording tools are
+ needed to profile cells and their environments. DNA-based
+ cellular recording is an emerging and powerful framework for
+ tracking intracellular and extracellular biological events over
+ time across living cells and populations. Here, we review and
+ assess DNA recorders that utilize CRISPR nucleases, integrases
+ and base-editing strategies, as well as recombinase and
+ polymerase-based methods. Quantitative characterization,
+ modelling and evaluation of these DNA-recording modalities can
+ guide their design and implementation for specific application
+ areas.",
+ journal = "Nat. Rev. Genet.",
+ volume = 19,
+ number = 11,
+ pages = "718--732",
+ month = nov,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Fisher2007-gg,
+ title = "Requirement for {ErbB2/ErbB} signaling in developing cartilage
+ and bone",
+ author = "Fisher, Melanie C and Clinton, Gail M and Maihle, Nita J and
+ Dealy, Caroline N",
+ abstract = "During endochondral ossification, the skeletal elements of
+ vertebrate limbs form and elongate via coordinated control of
+ chondrocyte and osteoblast differentiation and proliferation. The
+ role of signaling by the ErbB family of receptor tyrosine
+ kinases, which consists of ErbB1 (epidermal growth factor
+ receptor or EGFR), ErbB2, ErbB3 and ErbB4, has been little
+ studied during cartilage and bone development. Signaling by the
+ ErbB network generates a diverse array of cellular responses via
+ formation of ErbB dimers activated by distinct ligands that
+ produce distinct signal outputs. Herstatin is a soluble ErbB2
+ receptor that acts in a dominant negative fashion to inhibit ErbB
+ signaling by binding to endogenous ErbB receptors, preventing
+ functional dimer formation. Here, we examine the effects of
+ Herstatin on limb skeletal element development in transgenic
+ mice, achieved via Prx1 promoter-driven expression in limb
+ cartilage and bone. The limb skeletal elements of Prx1-Herstatin
+ embryos are shortened, and chondrocyte maturation and osteoblast
+ differentiation are delayed. In addition, proliferation by
+ chondrocytes and periosteal cells of Prx1-Herstatin limb skeletal
+ elements is markedly reduced. Our study identifies requirements
+ for ErbB signaling in the maintenance of chondrocyte and
+ osteoblast proliferation involved in the timely progression of
+ chondrocyte maturation and periosteal osteoblast differentiation.",
+ journal = "Dev. Growth Differ.",
+ volume = 49,
+ number = 6,
+ pages = "503--513",
+ month = aug,
+ year = 2007,
+ language = "en"
+}
+
+@ARTICLE{Hendriks2019-ap,
+ title = "{NASC-seq} monitors {RNA} synthesis in single cells",
+ author = "Hendriks, Gert-Jan and Jung, Lisa A and Larsson, Anton J M and
+ Lidschreiber, Michael and Andersson Forsman, Oscar and
+ Lidschreiber, Katja and Cramer, Patrick and Sandberg, Rickard",
+ abstract = "Sequencing of newly synthesised RNA can monitor transcriptional
+ dynamics with great sensitivity and high temporal resolution, but
+ is currently restricted to populations of cells. Here, we develop
+ new transcriptome alkylation-dependent single-cell RNA sequencing
+ (NASC-seq), to monitor newly synthesised and pre-existing RNA
+ simultaneously in single cells. We validate the method on
+ pre-labelled RNA, and by demonstrating that more newly
+ synthesised RNA was detected for genes with known high mRNA
+ turnover. Monitoring RNA synthesis during Jurkat T-cell
+ activation with NASC-seq reveals both rapidly up- and
+ down-regulated genes, and that induced genes are almost
+ exclusively detected as newly transcribed. Moreover, the newly
+ synthesised and pre-existing transcriptomes after T-cell
+ activation are distinct, confirming that NASC-seq simultaneously
+ measures gene expression corresponding to two time points in
+ single cells. Altogether, NASC-seq enables precise temporal
+ monitoring of RNA synthesis at single-cell resolution during
+ homoeostasis, perturbation responses and cellular
+ differentiation.",
+ journal = "Nat. Commun.",
+ volume = 10,
+ number = 1,
+ pages = "3138",
+ month = jul,
+ year = 2019,
+ language = "en"
+}
+
+@ARTICLE{Macosko2015-mn,
+ title = "Highly Parallel Genome-wide Expression Profiling of Individual
+ Cells Using Nanoliter Droplets",
+ author = "Macosko, Evan Z and Basu, Anindita and Satija, Rahul and Nemesh,
+ James and Shekhar, Karthik and Goldman, Melissa and Tirosh, Itay
+ and Bialas, Allison R and Kamitaki, Nolan and Martersteck, Emily
+ M and Trombetta, John J and Weitz, David A and Sanes, Joshua R
+ and Shalek, Alex K and Regev, Aviv and McCarroll, Steven A",
+ abstract = "Cells, the basic units of biological structure and function, vary
+ broadly in type and state. Single-cell genomics can characterize
+ cell identity and function, but limitations of ease and scale
+ have prevented its broad application. Here we describe Drop-seq,
+ a strategy for quickly profiling thousands of individual cells by
+ separating them into nanoliter-sized aqueous droplets,
+ associating a different barcode with each cell's RNAs, and
+ sequencing them all together. Drop-seq analyzes mRNA transcripts
+ from thousands of individual cells simultaneously while
+ remembering transcripts' cell of origin. We analyzed
+ transcriptomes from 44,808 mouse retinal cells and identified 39
+ transcriptionally distinct cell populations, creating a molecular
+ atlas of gene expression for known retinal cell classes and novel
+ candidate cell subtypes. Drop-seq will accelerate biological
+ discovery by enabling routine transcriptional profiling at
+ single-cell resolution. VIDEO ABSTRACT.",
+ journal = "Cell",
+ volume = 161,
+ number = 5,
+ pages = "1202--1214",
+ month = may,
+ year = 2015,
+ language = "en"
+}
+
+@ARTICLE{Ao2009-li,
+ title = "Global view of bionetwork dynamics: adaptive landscape",
+ author = "Ao, Ping",
+ abstract = "Based on recent work, I will give a nontechnical brief review of
+ a powerful quantitative concept in biology, adaptive landscape,
+ initially proposed by S. Wright over 70 years ago, reintroduced
+ by one of the founders of molecular biology and by others in
+ different biological contexts, but apparently forgotten by modern
+ biologists for many years. Nevertheless, this concept finds an
+ increasingly important role in the development of systems biology
+ and bionetwork dynamics modeling, from phage lambda genetic
+ switch to endogenous network for cancer genesis and progression.
+ It is an ideal quantification to describe the robustness and
+ stability of bionetworks. Here, I will first introduce five
+ landmark proposals in biology on this concept, to demonstrate an
+ important common thread in theoretical biology. Then I will
+ discuss a few recent results, focusing on the studies showing
+ theoretical consistency of adaptive landscape. From the
+ perspective of a working scientist and of what is needed
+ logically for a dynamical theory when confronting empirical data,
+ the adaptive landscape is useful both metaphorically and
+ quantitatively, and has captured an essential aspect of
+ biological dynamical processes. Though at the theoretical level
+ the adaptive landscape must exist and it can be used across
+ hierarchical boundaries in biology, many associated issues are
+ indeed vague in their initial formulations and their quantitative
+ realizations are not easy, and are good research topics for
+ quantitative biologists. I will discuss three types of open
+ problems associated with the adaptive landscape in a broader
+ perspective.",
+ journal = "J. Genet. Genomics",
+ volume = 36,
+ number = 2,
+ pages = "63--73",
+ month = feb,
+ year = 2009,
+ language = "en"
+}
+
+@ARTICLE{Arda2013-pa,
+ title = "Gene regulatory networks governing pancreas development",
+ author = "Arda, H Efsun and Benitez, Cecil M and Kim, Seung K",
+ abstract = "Elucidation of cellular and gene regulatory networks (GRNs)
+ governing organ development will accelerate progress toward
+ tissue replacement. Here, we have compiled reference GRNs
+ underlying pancreas development from data mining that integrates
+ multiple approaches, including mutant analysis, lineage tracing,
+ cell purification, gene expression and enhancer analysis, and
+ biochemical studies of gene regulation. Using established
+ computational tools, we integrated and represented these networks
+ in frameworks that should enhance understanding of the surging
+ output of genomic-scale genetic and epigenetic studies of
+ pancreas development and diseases such as diabetes and pancreatic
+ cancer. We envision similar approaches would be useful for
+ understanding the development of other organs.",
+ journal = "Dev. Cell",
+ volume = 25,
+ number = 1,
+ pages = "5--13",
+ month = apr,
+ year = 2013,
+ language = "en"
+}
+
+@ARTICLE{La_Manno2018-vp,
+ title = "{RNA} velocity of single cells",
+ author = "La Manno, Gioele and Soldatov, Ruslan and Zeisel, Amit and Braun,
+ Emelie and Hochgerner, Hannah and Petukhov, Viktor and
+ Lidschreiber, Katja and Kastriti, Maria E and L{\"o}nnerberg,
+ Peter and Furlan, Alessandro and Fan, Jean and Borm, Lars E and
+ Liu, Zehua and van Bruggen, David and Guo, Jimin and He, Xiaoling
+ and Barker, Roger and Sundstr{\"o}m, Erik and Castelo-Branco,
+ Gon{\c c}alo and Cramer, Patrick and Adameyko, Igor and
+ Linnarsson, Sten and Kharchenko, Peter V",
+ abstract = "RNA abundance is a powerful indicator of the state of individual
+ cells. Single-cell RNA sequencing can reveal RNA abundance with
+ high quantitative accuracy, sensitivity and throughput1. However,
+ this approach captures only a static snapshot at a point in time,
+ posing a challenge for the analysis of time-resolved phenomena
+ such as embryogenesis or tissue regeneration. Here we show that
+ RNA velocity-the time derivative of the gene expression state-can
+ be directly estimated by distinguishing between unspliced and
+ spliced mRNAs in common single-cell RNA sequencing protocols. RNA
+ velocity is a high-dimensional vector that predicts the future
+ state of individual cells on a timescale of hours. We validate
+ its accuracy in the neural crest lineage, demonstrate its use on
+ multiple published datasets and technical platforms, reveal the
+ branching lineage tree of the developing mouse hippocampus, and
+ examine the kinetics of transcription in human embryonic brain.
+ We expect RNA velocity to greatly aid the analysis of
+ developmental lineages and cellular dynamics, particularly in
+ humans.",
+ journal = "Nature",
+ volume = 560,
+ number = 7719,
+ pages = "494--498",
+ month = aug,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Clevers2017-tl,
+ title = "Lgr5 Stem Cell-based organoids in human disease",
+ author = "Clevers, Hans",
+ abstract = "The intestinal epithelium is the most rapidly self-renewing
+ tissue in adult mammals. We originally defined Lgr5 as a Wnt
+ target gene, transcribed in colon cancer cells. Two knock-in
+ alleles revealed exclusive expression of Lgr5 in cycling,
+ columnar cells at the crypt base. Using lineage tracing
+ experiments in adult mice, we found that these Lgr5+ve crypt
+ base columnar cells (CBC) generated all epithelial lineages
+ throughout life, implying that they represent the stem cell of
+ the small intestine and colon. Lgr5 was subsequently found to
+ represent an exquisitely specific and almost ?generic? marker
+ for stem cells, including in hair follicles, kidney, liver,
+ mammary gland, inner ear tongue and stomach epithelium. Single
+ sorted Lgr5+ve stem cells can initiate ever-expanding
+ crypt-villus organoids, or so called ?mini-guts? in 3D culture.
+ The technology is based on the observation that Lgr5 is the
+ receptor for a potent stem cell growth factor, R-spondin.
+ Similar 3D cultures systems have been developed for the Lgr5+ve
+ stem cells of human stomach, liver, pancreas, prostate and
+ kidney. Using CRISPR/Cas9 technology, genes can be efficiently
+ modified in organoids of various origins.",
+ journal = "The FASEB Journal",
+ publisher = "John Wiley \& Sons, Ltd",
+ volume = 31,
+ number = "S1",
+ pages = "85.1--85.1",
+ month = apr,
+ year = 2017
+}
+
+@ARTICLE{Elowitz2000-tc,
+ title = "A synthetic oscillatory network of transcriptional regulators",
+ author = "Elowitz, M B and Leibler, S",
+ abstract = "Networks of interacting biomolecules carry out many essential
+ functions in living cells, but the 'design principles' underlying
+ the functioning of such intracellular networks remain poorly
+ understood, despite intensive efforts including quantitative
+ analysis of relatively simple systems. Here we present a
+ complementary approach to this problem: the design and
+ construction of a synthetic network to implement a particular
+ function. We used three transcriptional repressor systems that
+ are not part of any natural biological clock to build an
+ oscillating network, termed the repressilator, in Escherichia
+ coli. The network periodically induces the synthesis of green
+ fluorescent protein as a readout of its state in individual
+ cells. The resulting oscillations, with typical periods of hours,
+ are slower than the cell-division cycle, so the state of the
+ oscillator has to be transmitted from generation to generation.
+ This artificial clock displays noisy behaviour, possibly because
+ of stochastic fluctuations of its components. Such 'rational
+ network design may lead both to the engineering of new cellular
+ behaviours and to an improved understanding of naturally
+ occurring networks.",
+ journal = "Nature",
+ volume = 403,
+ number = 6767,
+ pages = "335--338",
+ month = jan,
+ year = 2000,
+ language = "en"
+}
+
+@BOOK{Brauer2015-po,
+ title = "Dynamical Systems for Biological Modeling: An Introduction",
+ author = "Brauer, Fred and Kribs, Christopher",
+ abstract = "Dynamical Systems for Biological Modeling: An Introduction
+ prepares both biology and mathematics students with the
+ understanding and techniques necessary to undertake basic
+ modeling of biological systems. It achieves this through the
+ development and analysis of dynamical systems.The approach
+ emphasizes qualitative ideas rather than explicit computa",
+ publisher = "CRC Press",
+ month = dec,
+ year = 2015,
+ language = "en"
+}
+
+@ARTICLE{Kiefer2018-oy,
+ title = "Expanding the Nucleoside Recoding Toolkit: Revealing {RNA}
+ Population Dynamics with 6-Thioguanosine",
+ author = "Kiefer, Lea and Schofield, Jeremy A and Simon, Matthew D",
+ abstract = "RNA-sequencing (RNA-seq) measures RNA abundance in a biological
+ sample but does not provide temporal information about the
+ sequenced RNAs. Metabolic labeling can be used to distinguish
+ newly made RNAs from pre-existing RNAs. Mutations induced from
+ chemical recoding of the hydrogen bonding pattern of the
+ metabolic label can reveal which RNAs are new in the context of a
+ sequencing experiment. These nucleotide recoding strategies have
+ been developed for a single uridine analogue, 4-thiouridine
+ (s4U), limiting the scope of these experiments. Here we report
+ the first use of nucleoside recoding with a guanosine analogue,
+ 6-thioguanosine (s6G). Using TimeLapse sequencing
+ (TimeLapse-seq), s6G can be recoded under RNA-friendly oxidative
+ nucleophilic-aromatic substitution conditions to produce adenine
+ analogues (substituted 2-aminoadenosines). We demonstrate the
+ first use of s6G recoding experiments to reveal
+ transcriptome-wide RNA population dynamics.",
+ journal = "J. Am. Chem. Soc.",
+ volume = 140,
+ number = 44,
+ pages = "14567--14570",
+ month = nov,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Kimmerling2016-mk,
+ title = "A microfluidic platform enabling single-cell {RNA-seq} of
+ multigenerational lineages",
+ author = "Kimmerling, Robert J and Lee Szeto, Gregory and Li, Jennifer W
+ and Genshaft, Alex S and Kazer, Samuel W and Payer, Kristofor R
+ and de Riba Borrajo, Jacob and Blainey, Paul C and Irvine,
+ Darrell J and Shalek, Alex K and Manalis, Scott R",
+ abstract = "We introduce a microfluidic platform that enables off-chip
+ single-cell RNA-seq after multi-generational lineage tracking
+ under controlled culture conditions. We use this platform to
+ generate whole-transcriptome profiles of primary, activated
+ murine CD8+ T-cell and lymphocytic leukemia cell line lineages.
+ Here we report that both cell types have greater intra- than
+ inter-lineage transcriptional similarity. For CD8+ T-cells, genes
+ with functional annotation relating to lymphocyte differentiation
+ and function--including Granzyme B--are enriched among the genes
+ that demonstrate greater intra-lineage expression level
+ similarity. Analysis of gene expression covariance with matched
+ measurements of time since division reveals cell type-specific
+ transcriptional signatures that correspond with cell cycle
+ progression. We believe that the ability to directly measure the
+ effects of lineage and cell cycle-dependent transcriptional
+ profiles of single cells will be broadly useful to fields where
+ heterogeneous populations of cells display distinct clonal
+ trajectories, including immunology, cancer, and developmental
+ biology.",
+ journal = "Nat. Commun.",
+ volume = 7,
+ pages = "10220",
+ month = jan,
+ year = 2016,
+ language = "en"
+}
+
+@ARTICLE{Tabula_Muris_Consortium2020-cf,
+ title = "A single-cell transcriptomic atlas characterizes ageing tissues
+ in the mouse",
+ author = "{Tabula Muris Consortium}",
+ abstract = "Ageing is characterized by a progressive loss of physiological
+ integrity, leading to impaired function and increased
+ vulnerability to death1. Despite rapid advances over recent
+ years, many of the molecular and cellular processes that underlie
+ the progressive loss of healthy physiology are poorly
+ understood2. To gain a better insight into these processes, here
+ we generate a single-cell transcriptomic atlas across the
+ lifespan of Mus musculus that includes data from 23 tissues and
+ organs. We found cell-specific changes occurring across multiple
+ cell types and organs, as well as age-related changes in the
+ cellular composition of different organs. Using single-cell
+ transcriptomic data, we assessed cell-type-specific
+ manifestations of different hallmarks of ageing-such as
+ senescence3, genomic instability4 and changes in the immune
+ system2. This transcriptomic atlas-which we denote Tabula Muris
+ Senis, or 'Mouse Ageing Cell Atlas'-provides molecular
+ information about how the most important hallmarks of ageing are
+ reflected in a broad range of tissues and cell types.",
+ journal = "Nature",
+ volume = 583,
+ number = 7817,
+ pages = "590--595",
+ month = jul,
+ year = 2020,
+ language = "en"
+}
+
+@ARTICLE{Westendorp2012-wi,
+ title = "{E2F7} represses a network of oscillating cell cycle genes to
+ control S-phase progression",
+ author = "Westendorp, Bart and Mokry, Michal and Groot Koerkamp, Marian J A
+ and Holstege, Frank C P and Cuppen, Edwin and de Bruin, Alain",
+ abstract = "E2F transcription factors are known to be important for timely
+ activation of G(1)/S and G(2)/M genes required for cell cycle
+ progression, but transcriptional mechanisms for deactivation of
+ cell cycle-regulated genes are unknown. Here, we show that E2F7
+ is highly expressed during mid to late S-phase, occupies
+ promoters of G(1)/S-regulated genes and represses their
+ transcription. ChIP-seq analysis revealed that E2F7 binds
+ preferentially to genomic sites containing the TTCCCGCC motif,
+ which closely resembles the E2F consensus site. We identified 89
+ target genes that carry E2F7 binding sites close to the
+ transcriptional start site and that are directly repressed by
+ short-term induction of E2F7. Most of these target genes are
+ known to be activated by E2Fs and are involved in DNA
+ replication, metabolism and DNA repair. Importantly, induction of
+ E2F7 during G(0)-G(1)/S resulted in S-phase arrest and DNA
+ damage, whereas expression of E2F7 during G(2)/M failed to
+ disturb cell cycle progression. These findings provide strong
+ evidence that E2F7 directly controls the downswing of oscillating
+ G(1)/S genes during S-phase progression.",
+ journal = "Nucleic Acids Res.",
+ volume = 40,
+ number = 8,
+ pages = "3511--3523",
+ month = apr,
+ year = 2012,
+ language = "en"
+}
+
+
+@ARTICLE{Zhou2017-du,
+ author = {Jiayi Ma and Ji Zhao and Hanqi Guo and Junjun Jiang and Huabing Zhou and Yuan Gao},
+ title = {Locality Preserving Matching},
+ journal = {Proceedings of the Twenty-Sixth International Joint Conference on
+ Artificial Intelligence, {IJCAI-17}},
+ pages = {4492--4498},
+ year = {2017},
+}
+
+
+@ARTICLE{Gordon2020-pt,
+ title = "A {SARS-CoV-2} protein interaction map reveals targets for drug repurposing",
+ author = "Gordon, David E and Jang, Gwendolyn M and Bouhaddou, Mehdi and Xu, Jiewei and Obernier, Kirsten and White, Kris M and O'Meara, Matthew J and Rezelj, Veronica V and Guo, Jeffrey Z and Swaney, Danielle L and Tummino, Tia A and H{\"u}ttenhain, Ruth and Kaake, Robyn M and Richards, Alicia L and Tutuncuoglu, Beril and
+ Foussard, Helene and Batra, Jyoti and Haas, Kelsey and Modak,
+ Maya and Kim, Minkyu and Haas, Paige and Polacco, Benjamin J and
+ Braberg, Hannes and Fabius, Jacqueline M and Eckhardt, Manon and
+ Soucheray, Margaret and Bennett, Melanie J and Cakir, Merve and
+ McGregor, Michael J and Li, Qiongyu and Meyer, Bjoern and Roesch,
+ Ferdinand and Vallet, Thomas and Mac Kain, Alice and Miorin, Lisa
+ and Moreno, Elena and Naing, Zun Zar Chi and Zhou, Yuan and Peng,
+ Shiming and Shi, Ying and Zhang, Ziyang and Shen, Wenqi and
+ Kirby, Ilsa T and Melnyk, James E and Chorba, John S and Lou,
+ Kevin and Dai, Shizhong A and Barrio-Hernandez, Inigo and Memon,
+ Danish and Hernandez-Armenta, Claudia and Lyu, Jiankun and Mathy,
+ Christopher J P and Perica, Tina and Pilla, Kala Bharath and
+ Ganesan, Sai J and Saltzberg, Daniel J and Rakesh, Ramachandran
+ and Liu, Xi and Rosenthal, Sara B and Calviello, Lorenzo and
+ Venkataramanan, Srivats and Liboy-Lugo, Jose and Lin, Yizhu and
+ Huang, Xi-Ping and Liu, Yongfeng and Wankowicz, Stephanie A and
+ Bohn, Markus and Safari, Maliheh and Ugur, Fatima S and Koh,
+ Cassandra and Savar, Nastaran Sadat and Tran, Quang Dinh and
+ Shengjuler, Djoshkun and Fletcher, Sabrina J and O'Neal, Michael
+ C and Cai, Yiming and Chang, Jason C J and Broadhurst, David J
+ and Klippsten, Saker and Sharp, Phillip P and Wenzell, Nicole A
+ and Kuzuoglu-Ozturk, Duygu and Wang, Hao-Yuan and Trenker,
+ Raphael and Young, Janet M and Cavero, Devin A and Hiatt, Joseph
+ and Roth, Theodore L and Rathore, Ujjwal and Subramanian, Advait
+ and Noack, Julia and Hubert, Mathieu and Stroud, Robert M and
+ Frankel, Alan D and Rosenberg, Oren S and Verba, Kliment A and
+ Agard, David A and Ott, Melanie and Emerman, Michael and Jura,
+ Natalia and von Zastrow, Mark and Verdin, Eric and Ashworth, Alan
+ and Schwartz, Olivier and d'Enfert, Christophe and Mukherjee,
+ Shaeri and Jacobson, Matt and Malik, Harmit S and Fujimori,
+ Danica G and Ideker, Trey and Craik, Charles S and Floor, Stephen
+ N and Fraser, James S and Gross, John D and Sali, Andrej and
+ Roth, Bryan L and Ruggero, Davide and Taunton, Jack and Kortemme,
+ Tanja and Beltrao, Pedro and Vignuzzi, Marco and
+ Garc{\'\i}a-Sastre, Adolfo and Shokat, Kevan M and Shoichet,
+ Brian K and Krogan, Nevan J",
+ abstract = "A newly described coronavirus named severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), which is the causative agent of coronavirus disease 2019 (COVID-19), has infected over 2.3 million people, led to the death of more than 160,000 individuals and caused worldwide social and economic disruption1,2. There are no antiviral drugs with proven clinical efficacy for the treatment of COVID-19, nor are there any vaccines that prevent infection with SARS-CoV-2, and efforts to develop drugs and vaccines are hampered by the limited knowledge of the molecular details of how SARS-CoV-2 infects cells. Here we cloned, tagged and expressed 26 of the 29 SARS-CoV-2 proteins in human cells and identified the human proteins that physically associated with each of the SARS-CoV-2 proteins using affinity-purification mass spectrometry, identifying 332 high-confidence protein-protein interactions between SARS-CoV-2 and human proteins. Among these, we identify 66 druggable human proteins or host factors targeted by 69 compounds (of which, 29 drugs are approved by the US Food and Drug Administration, 12 are in clinical trials and 28 are preclinical compounds). We screened a subset of these in multiple viral assays and found two sets of pharmacological agents that displayed antiviral activity: inhibitors of mRNA translation and predicted regulators of the sigma-1 and sigma-2 receptors. Further studies of these host-factor-targeting agents, including their combination with drugs that directly target viral enzymes, could lead to a therapeutic regimen to treat COVID-19.",
+ journal = "Nature",
+ volume = 583,
+ number = 7816,
+ pages = "459--468",
+ month = jul,
+ year = 2020,
+ language = "en"
+}
+
+@UNPUBLISHED{Hein2021-mj,
+ title = "Functional single-cell genomics of human cytomegalovirus
+ infection",
+ author = "Hein, Marco Y and Weissman, Jonathan S",
+ abstract = "The complex life cycle of herpesviruses is orchestrated by the
+ interplay of host factors and hundreds of viral genes.
+ Understanding how they work together and how perturbations of
+ viral and host factors impact infection represents both a
+ fundamental problem in virology and the basis for designing
+ antiviral interventions. Here, we use CRISPR screening to
+ comprehensively define the functional contribution of each viral
+ and host factor to human cytomegalovirus (HCMV) infection in
+ primary cells. We then record the transcriptomes of tens of
+ thousands of single cells, and monitor how genetic perturbation
+ of critical host and viral factors alters the timing, course, and
+ progression of infection. We find that normally, the large
+ majority of cells follow a stereotypical transcriptional
+ trajectory. Perturbing critical host factors does not change this
+ trajectory per se, but can either stall, delay or accelerate
+ progression along the trajectory, allowing us to pinpoint
+ systematically the stage of infection at which each host factor
+ acts. Conversely, perturbation of viral factors can create
+ distinct, abortive trajectories. Our results reveal a dichotomy
+ between the roles of host and viral factors and more generally
+ provide a road map for functional dissection of host-pathogen
+ interactions. \#\#\# Competing Interest Statement The authors
+ have declared no competing interest.",
+ journal = "Cold Spring Harbor Laboratory",
+ pages = "775080",
+ month = jan,
+ year = 2021,
+ language = "en"
+}
+
+@ARTICLE{Stoeckius2017-vw,
+ title = "Simultaneous epitope and transcriptome measurement in single cells",
+ author = "Stoeckius, Marlon and Hafemeister, Christoph and Stephenson, William and Houck-Loomis, Brian and Chattopadhyay, Pratip K and Swerdlow, Harold and Satija, Rahul and Smibert, Peter",
+ abstract = "High-throughput single-cell RNA sequencing has transformed our
+ understanding of complex cell populations, but it does not
+ provide phenotypic information such as cell-surface protein
+ levels. Here, we describe cellular indexing of transcriptomes and
+ epitopes by sequencing (CITE-seq), a method in which
+ oligonucleotide-labeled antibodies are used to integrate cellular
+ protein and transcriptome measurements into an efficient,
+ single-cell readout. CITE-seq is compatible with existing
+ single-cell sequencing approaches and scales readily with
+ throughput increases.",
+ journal = "Nat. Methods",
+ volume = 14,
+ number = 9,
+ pages = "865--868",
+ month = sep,
+ year = 2017,
+ language = "en"
+}
+
+@ARTICLE{Kim2000-lb,
+ title = "Multiconfiguration molecular mechanics algorithm for potential
+ energy surfaces of chemical reactions",
+ author = "Kim, Yongho and Corchado, Jos{\'e} C and Vill{\`a}, Jordi and
+ Xing, Jianhua and Truhlar, Donald G",
+ abstract = "We present an efficient algorithm for generating semiglobal
+ potential energy surfaces of reactive systems. The method takes
+ as input molecular mechanics force fields for reactants and
+ products and a quadratic expansion of the potential energy
+ surface around a small number of geometries whose locations are
+ determined by an iterative process. These Hessian expansions
+ might come, for example, from ab initio electronic structure
+ calculations, density functional theory, or semiempirical
+ molecular orbital theory. A 2?2 electronic diabatic Hamiltonian
+ matrix is constructed from these data such that, by
+ construction, the lowest eigenvalue of this matrix provides a
+ semiglobal approximation to the lowest electronically adiabatic
+ potential energy surface. The theory is illustrated and tested
+ by applications to rate constant calculations for three
+ gas-phase test reactions, namely, the isomerization of
+ 1,3-cis-pentadiene, OH+CH4?H2O+CH3, and CH2Cl+CH3F?CH3Cl+CH2F.",
+ journal = "J. Chem. Phys.",
+ publisher = "American Institute of Physics",
+ volume = 112,
+ number = 6,
+ pages = "2718--2735",
+ month = feb,
+ year = 2000
+}
+
+@ARTICLE{Gorin2020-yh,
+ title = "Protein velocity and acceleration from single-cell multiomics
+ experiments",
+ author = "Gorin, Gennady and Svensson, Valentine and Pachter, Lior",
+ abstract = "The simultaneous quantification of protein and RNA makes possible
+ the inference of past, present, and future cell states from
+ single experimental snapshots. To enable such temporal analysis
+ from multimodal single-cell experiments, we introduce an
+ extension of the RNA velocity method that leverages estimates of
+ unprocessed transcript and protein abundances to extrapolate cell
+ states. We apply the model to six datasets and demonstrate
+ consistency among cell landscapes and phase portraits. The
+ analysis software is available as the protaccel Python package.",
+ journal = "Genome Biol.",
+ volume = 21,
+ number = 1,
+ pages = "39",
+ month = feb,
+ year = 2020,
+ keywords = "Bioinformatics; Computational biology; Multiomics; Protein
+ acceleration; Protein velocity; RNA velocity; Transcriptomics",
+ language = "en"
+}
+
+@ARTICLE{Stoeckius2018-cb,
+ title = "Cell Hashing with barcoded antibodies enables multiplexing and
+ doublet detection for single cell genomics",
+ author = "Stoeckius, Marlon and Zheng, Shiwei and Houck-Loomis, Brian and
+ Hao, Stephanie and Yeung, Bertrand Z and Mauck, 3rd, William M
+ and Smibert, Peter and Satija, Rahul",
+ abstract = "Despite rapid developments in single cell sequencing,
+ sample-specific batch effects, detection of cell multiplets, and
+ experimental costs remain outstanding challenges. Here, we
+ introduce Cell Hashing, where oligo-tagged antibodies against
+ ubiquitously expressed surface proteins uniquely label cells from
+ distinct samples, which can be subsequently pooled. By sequencing
+ these tags alongside the cellular transcriptome, we can assign
+ each cell to its original sample, robustly identify cross-sample
+ multiplets, and ``super-load'' commercial droplet-based systems
+ for significant cost reduction. We validate our approach using a
+ complementary genetic approach and demonstrate how hashing can
+ generalize the benefits of single cell multiplexing to diverse
+ samples and experimental designs.",
+ journal = "Genome Biol.",
+ volume = 19,
+ number = 1,
+ pages = "224",
+ month = dec,
+ year = 2018,
+ language = "en"
+}
+
+@ARTICLE{Love2014-na,
+ title = "Moderated estimation of fold change and dispersion for {RNA-seq}
+ data with {DESeq2}",
+ author = "Love, Michael I and Huber, Wolfgang and Anders, Simon",
+ abstract = "In comparative high-throughput sequencing assays, a fundamental
+ task is the analysis of count data, such as read counts per gene
+ in RNA-seq, for evidence of systematic changes across
+ experimental conditions. Small replicate numbers, discreteness,
+ large dynamic range and the presence of outliers require a
+ suitable statistical approach. We present DESeq2, a method for
+ differential analysis of count data, using shrinkage estimation
+ for dispersions and fold changes to improve stability and
+ interpretability of estimates. This enables a more quantitative
+ analysis focused on the strength rather than the mere presence of
+ differential expression. The DESeq2 package is available at
+ http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
+ webcite.",
+ journal = "Genome Biol.",
+ volume = 15,
+ number = 12,
+ pages = "550",
+ year = 2014,
+ language = "en"
+}
+
+@BOOK{Marsden2012-zj,
+ title = "Vector Calculus",
+ author = "Marsden, Jerrold E and Tromba, Anthony",
+ abstract = "",
+ publisher = "W. H. Freeman and Company",
+ year = 2012,
+ language = "en"
+}
+
+@ARTICLE{Perez-Carrasco2016,
+ doi = {10.1371/journal.pcbi.1005154},
+ author = {Perez-Carrasco, Ruben and Guerrero, Pilar and Briscoe, James and Page, Karen M.},
+ journal = {PLOS Computational Biology},
+ publisher = {Public Library of Science},
+ title = {Intrinsic Noise Profoundly Alters the Dynamics and Steady State of Morphogen-Controlled Bistable Genetic Switches},
+ year = {2016},
+ month = {10},
+ volume = {12},
+ url = {https://doi.org/10.1371/journal.pcbi.1005154},
+ pages = {1-23},
+ abstract = {During tissue development, patterns of gene expression determine the spatial arrangement of cell types. In many cases, gradients of secreted signalling molecules—morphogens—guide this process by controlling downstream transcriptional networks. A mechanism commonly used in these networks to convert the continuous information provided by the gradient into discrete transitions between adjacent cell types is the genetic toggle switch, composed of cross-repressing transcriptional determinants. Previous analyses have emphasised the steady state output of these mechanisms. Here, we explore the dynamics of the toggle switch and use exact numerical simulations of the kinetic reactions, the corresponding Chemical Langevin Equation, and Minimum Action Path theory to establish a framework for studying the effect of gene expression noise on patterning time and boundary position. This provides insight into the time scale, gene expression trajectories and directionality of stochastic switching events between cell states. Taking gene expression noise into account predicts that the final boundary position of a morphogen-induced toggle switch, although robust to changes in the details of the noise, is distinct from that of the deterministic system. Moreover, the dramatic increase in patterning time close to the boundary predicted from the deterministic case is substantially reduced. The resulting stochastic switching introduces differences in patterning time along the morphogen gradient that result in a patterning wave propagating away from the morphogen source with a velocity determined by the intrinsic noise. The wave sharpens and slows as it advances and may never reach steady state in a biologically relevant time. This could explain experimentally observed dynamics of pattern formation. Together the analysis reveals the importance of dynamical transients for understanding morphogen-driven transcriptional networks and indicates that gene expression noise can qualitatively alter developmental patterning.},
+ number = {10},
+}
+
+@Article{Tang2017,
+ author={Tang, Ying
+ and Yuan, Ruoshi
+ and Wang, Gaowei
+ and Zhu, Xiaomei
+ and Ao, Ping},
+ title={Potential landscape of high dimensional nonlinear stochastic dynamics with large noise},
+ journal={Scientific Reports},
+ year={2017},
+ month={Nov},
+ day={17},
+ volume={7},
+ number={1},
+ pages={15762},
+ abstract={Quantifying stochastic processes is essential to understand many natural phenomena, particularly in biology, including the cell-fate decision in developmental processes as well as the genesis and progression of cancers. While various attempts have been made to construct potential landscape in high dimensional systems and to estimate transition rates, they are practically limited to the cases where either noise is small or detailed balance condition holds. A general and practical approach to investigate real-world nonequilibrium systems, which are typically high-dimensional and subject to large multiplicative noise and the breakdown of detailed balance, remains elusive. Here, we formulate a computational framework that can directly compute the relative probabilities between locally stable states of such systems based on a least action method, without the necessity of simulating the steady-state distribution. The method can be applied to systems with arbitrary noise intensities through A-type stochastic integration, which preserves the dynamical structure of the deterministic counterpart dynamics. We demonstrate our approach in a numerically accurate manner through solvable examples. We further apply the method to investigate the role of noise on tumor heterogeneity in a 38-dimensional network model for prostate cancer, and provide a new strategy on controlling cell populations by manipulating noise strength.},
+ issn={2045-2322},
+ doi={10.1038/s41598-017-15889-2},
+ url={https://doi.org/10.1038/s41598-017-15889-2}
+}
+
+@book{freidlin2012random,
+ added-at = {2014-11-14T03:12:52.000+0100},
+ author = {Freidlin, Mark I. and Wentzell, Alexander D.},
+ biburl = {https://www.bibsonomy.org/bibtex/26df179288a704c367421a6f9801fe949/peter.ralph},
+ interhash = {0926f80cc9f0d77d5bce51922d3ff9ea},
+ intrahash = {6df179288a704c367421a6f9801fe949},
+ isbn = {9783642258473},
+ keywords = {Freidlin-Wentzell_theory dynamical_systems large_deviations stochastic_perturbation},
+ publisher = {Springer},
+ series = {Grundlehren der mathematischen Wissenschaften},
+ timestamp = {2014-11-14T03:15:07.000+0100},
+ title = {Random Perturbations of Dynamical Systems},
+ url = {http://books.google.de/books?id=p8LFMILAiMEC},
+ year = 2012
+}
+
+@article{onsager1953,
+ title = {Fluctuations and Irreversible Processes},
+ author = {Onsager, Lars and Machlup, Stefan},
+ journal = {Phys. Rev.},
+ volume = {91},
+ issue = {6},
+ pages = {1505--1512},
+ numpages = {0},
+ year = {1953},
+ month = {Sep},
+ publisher = {American Physical Society},
+ doi = {10.1103/PhysRev.91.1505},
+ url = {https://link.aps.org/doi/10.1103/PhysRev.91.1505}
+}
+
+
+@article{Maier1997,
+ ISSN = {00361399},
+ URL = {http://www.jstor.org/stable/2951902},
+ abstract = {Consider a two-dimensional continuous-time dynamical system, with an attracting fixed point S. If the deterministic dynamics are perturbed by white noise (random perturbations) of strength ε, the system state will eventually leave the domain of attraction Ω of S. We analyze the case when, as ε → 0, the exit location on the boundary ∂Ω is increasingly concentrated near a saddle point H of the deterministic dynamics. We show using formal methods that the asymptotic form of the exit location distribution on ∂Ω is generically non-Gaussian and asymmetric, and classify the possible limiting distributions. A key role is played by a parameter μ, equal to the ratio |λs(H)|λu(H) of the stable and unstable eigenvalues of the linearized deterministic flow at H. If $\mu < 1$, then the exit location distribution is generically asymptotic as ε → 0 to a Weibull distribution with shape parameter 2/μ, on the O(εμ/2) lengthscale near H. If $\mu > 1$, it is generically asymptotic to a distribution on the O(ε1/2) lengthscale, whose moments we compute. Our treatment employs both matched asymptotic expansions and stochastic analysis. As a byproduct of our treatment, we clarify the limitations of the traditional Eyring formula for the weak-noise exit time asymptotics.},
+ author = {Robert S. Maier and Daniel L. Stein},
+ journal = {SIAM Journal on Applied Mathematics},
+ number = {3},
+ pages = {752--790},
+ publisher = {Society for Industrial and Applied Mathematics},
+ title = {Limiting Exit Location Distributions in the Stochastic Exit Problem},
+ volume = {57},
+ year = {1997}
+}
+
+@article{Aurell2002,
+ title = {Epigenetics as a First Exit Problem},
+ author = {Aurell, Erik and Sneppen, Kim},
+ journal = {Phys. Rev. Lett.},
+ volume = {88},
+ issue = {4},
+ pages = {048101},
+ numpages = {4},
+ year = {2002},
+ month = {Jan},
+ publisher = {American Physical Society},
+ doi = {10.1103/PhysRevLett.88.048101},
+ url = {https://link.aps.org/doi/10.1103/PhysRevLett.88.048101}
+}
+
+@incollection{VANKAMPEN2007193,
+ title = {Chapter VIII - THE FOKKER–PLANCK EQUATION},
+ editor = {N.G. Van Kampen},
+ booktitle = {Stochastic Processes in Physics and Chemistry (Third Edition)},
+ publisher = {Elsevier},
+ edition = {Third Edition},
+ address = {Amsterdam},
+ pages = {193-218},
+ year = {2007},
+ series = {North-Holland Personal Library},
+ issn = {09255818},
+ doi = {https://doi.org/10.1016/B978-044452965-7/50011-8},
+ url = {https://www.sciencedirect.com/science/article/pii/B9780444529657500118},
+ author = {N.G. Van Kampen}
+}
+
+@article{Merkl2013,
+ author = {Merkl, Claudia and Saalfrank, Anja and Riesen, Nathalie and Kühn, Ralf and Pertek, Anna and Eser, Stefan and Hardt, Markus and Kind, Alexander and Saur, Dieter and Wurst, Wolfgang and Iglesias, Antonio and Schnieke, Angelika},
+ year = {2013},
+ month = {01},
+ pages = {e55170},
+ title = {Efficient Generation of Rat Induced Pluripotent Stem Cells Using a Non-Viral Inducible Vector},
+ volume = {8},
+ journal = {PloS one},
+ doi = {10.1371/journal.pone.0055170}
+}
+
+@book{fey65,
+ added-at = {2008-06-25T19:30:29.000+0200},
+ address = {New York},
+ annote = {polarones and Path Integrals},
+ author = {Feynman, Richard P. and Hibbs, Albert R.},
+ biburl = {https://www.bibsonomy.org/bibtex/20ce4ceae88fcbe31e3dafdb3aa8f8a66/jgl},
+ citeulike-article-id = {2484177},
+ comment = {polarones and Path Integrals},
+ interhash = {23ae1411bb88c2017745e1b468ac27c3},
+ intrahash = {0ce4ceae88fcbe31e3dafdb3aa8f8a66},
+ keywords = {high-tc, htsct, theory},
+ posted-at = {2008-03-07 13:36:20},
+ priority = {2},
+ publisher = {McGraw-Hill},
+ timestamp = {2008-06-25T19:31:36.000+0200},
+ title = {Quantum Mechanics and Path Integrals},
+ year = 1965
+}
+@article{takahashi2006induction,
+ title={Induction of pluripotent stem cells from mouse embryonic and adult fibroblast cultures by defined factors},
+ author={Takahashi, Kazutoshi and Yamanaka, Shinya},
+ journal={cell},
+ volume={126},
+ number={4},
+ pages={663--676},
+ year={2006},
+ publisher={Elsevier}
+}
+@ARTICLE{2020SciPy-NMeth,
+ author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and Haberland, Matt and Reddy, Tyler and Cournapeau, David and
+ Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
+ Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
+ Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and Kern, Robert and Larson, Eric and Carey, C J and Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
+ Harris, Charles R. and Archibald, Anne M. and
+ Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
+ {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
+ title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
+ Computing in Python}},
+ journal = {Nature Methods},
+ year = {2020},
+ volume = {17},
+ pages = {261--272},
+ adsurl = {https://rdcu.be/b08Wh},
+ doi = {10.1038/s41592-019-0686-2},
+}
+
+@article{larsson2019genomic,
+ title={Genomic encoding of transcriptional burst kinetics},
+ author={Larsson, Anton JM and Johnsson, Per and Hagemann-Jensen, Michael and Hartmanis, Leonard and Faridani, Omid R and Reinius, Bj{\"o}rn and Segerstolpe, {\AA}sa and Rivera, Chloe M and Ren, Bing and Sandberg, Rickard},
+ journal={Nature},
+ volume={565},
+ number={7738},
+ pages={251--254},
+ year={2019},
+ publisher={Nature Publishing Group}
+}
+
+@article{grun2014validation,
+ title={Validation of noise models for single-cell transcriptomics},
+ author={Gr{\"u}n, Dominic and Kester, Lennart and Van Oudenaarden, Alexander},
+ journal={Nature methods},
+ volume={11},
+ number={6},
+ pages={637--640},
+ year={2014},
+ publisher={Nature Publishing Group}
+}
+
+@article{scikit-learn,
+ title={Scikit-learn: Machine Learning in {P}ython},
+ author={Pedregosa, Fabian and Varoquaux, Gaël and Gramfort, Alexandre and Michel, Vincent.
+ and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter
+ and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and
+ Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, Édouard},
+ journal={Journal of Machine Learning Research},
+ volume={12},
+ pages={2825--2830},
+ year={2011}
+}
+
+@article {Wang8257,
+ author = {Wang, Jin and Zhang, Kun and Xu, Li and Wang, Erkang},
+ title = {Quantifying the Waddington landscape and biological paths for development and differentiation},
+ volume = {108},
+ number = {20},
+ pages = {8257--8262},
+ year = {2011},
+ doi = {10.1073/pnas.1017017108},
+ publisher = {National Academy of Sciences},
+ abstract = {We developed a theoretical framework to prove the existence and quantify the Waddington landscape as well as chreode-biological paths for development and differentiation. The cells can have states with the higher probability ones giving the different cell types. Different cell types correspond to different basins of attractions of the probability landscape. We study how the cells develop from undifferentiated cells to differentiated cells from landscape perspectives. We quantified the Waddington landscape through construction of underlying probability landscape for cell development. We show the developmental process proceeds as moving from undifferentiated to the differentiated basins of attractions. The barrier height of the basins of attractions correlates with the escape time that determines the stability of cell types. We show that the developmental process can be quantitatively described and uncovered by the biological paths on the quantified Waddington landscape from undifferentiated to the differentiated cells. We found the dynamics of the developmental process is controlled by a combination of the gradient and curl force on the landscape. The biological paths often do not follow the steepest descent path on the landscape. The landscape framework also quantifies the possibility of reverse differentiation process such as cell reprogramming from differentiated cells back to the original stem cell. We show that the biological path of reverse differentiation is irreversible and different from the one for differentiation process. We found that the developmental process described by the underlying landscape and the associated biological paths is relatively stable and robust against the influences of environmental perturbations.},
+ issn = {0027-8424},
+ URL = {https://www.pnas.org/content/108/20/8257},
+ eprint = {https://www.pnas.org/content/108/20/8257.full.pdf},
+ journal = {Proceedings of the National Academy of Sciences}
+}
\ No newline at end of file
diff --git a/docs/references.md b/docs/references.md
new file mode 100644
index 000000000..00ad6a6ea
--- /dev/null
+++ b/docs/references.md
@@ -0,0 +1,5 @@
+# References
+
+```{bibliography}
+:cited:
+```
diff --git a/docs/requirements.txt b/docs/requirements.txt
old mode 100755
new mode 100644
index f69d56a93..7af07f223
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -34,6 +34,17 @@ docutils
mock
pandocfilters
readthedocs-sphinx-ext
-sphinxcontrib-bibtex>=2.3
sphinx-gallery
typing-extensions
+
+docutils>=0.8,!=0.18.*,!=0.19.*
+ipython
+sphinx-book-theme>=1.0.1
+sphinx_copybutton
+sphinx-design
+sphinxext-opengraph
+sphinx-hoverxref
+sphinxcontrib-bibtex>=1.0.0
+myst-parser
+myst-nb
+sphinx-autodoc-typehints
\ No newline at end of file
diff --git a/docs/source/_ext/pdfembed.py b/docs/source/_ext/pdfembed.py
deleted file mode 100644
index 4b0c12da6..000000000
--- a/docs/source/_ext/pdfembed.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# We would like to acknowledge the contribution of SuperKogito for their valuable code of sphinxcontrib-pdfembed.
-# The original code can be found at https://github.com/SuperKogito/sphinxcontrib-pdfembed/blob/master/sphinxcontrib/pdfembed.py.
-
-from docutils import nodes
-
-
-def pdfembed_html(pdfembed_specs):
- """
- Build the iframe code for the pdf file,
- """
- html_base_code = """
-
- """
- return html_base_code % (
- pdfembed_specs["src"],
- pdfembed_specs["height"],
- pdfembed_specs["width"],
- pdfembed_specs["align"],
- )
-
-
-def pdfembed_role(typ, rawtext, text, lineno, inliner, options={}, content=[]):
- """
- Get iframe specifications and generate the associate HTML code for the pdf iframe.
- """
- # parse and init variables
- text = text.replace(" ", "")
- pdfembed_specs = {}
- # read specs
- for component in text.split(","):
- pdfembed_specs[component.split(":")[0]] = component.split(":")[1]
- # build node from pdf iframe html code
- node = nodes.raw("", pdfembed_html(pdfembed_specs), format="html")
- return [node], []
-
-
-def setup(app):
- """
- Set up the app with the extension function
- """
- app.add_role("pdfembed", pdfembed_role)
diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst
deleted file mode 100644
index b29757c52..000000000
--- a/docs/source/_templates/custom-class-template.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. currentmodule:: {{ module }}
-
-.. autoclass:: {{ objname }}
- :members:
- :show-inheritance:
- :inherited-members:
-
- {% block methods %}
- .. automethod:: __init__
-
- {% if methods %}
- .. rubric:: {{ _('Methods') }}
-
- .. autosummary::
- {% for item in methods %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: {{ _('Attributes') }}
-
- .. autosummary::
- {% for item in attributes %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst
deleted file mode 100644
index a23004f93..000000000
--- a/docs/source/_templates/custom-module-template.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. automodule:: {{ fullname }}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: Module Attributes
-
- .. autosummary::
- :toctree:
- {% for item in attributes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block functions %}
- {% if functions %}
- .. rubric:: {{ _('Functions') }}
-
- .. autosummary::
- :toctree:
- {% for item in functions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block classes %}
- {% if classes %}
- .. rubric:: {{ _('Classes') }}
-
- .. autosummary::
- :toctree:
- :template: custom-class-template.rst
- {% for item in classes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block exceptions %}
- {% if exceptions %}
- .. rubric:: {{ _('Exceptions') }}
-
- .. autosummary::
- :toctree:
- {% for item in exceptions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
-{% block modules %}
-{% if modules %}
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
- :template: custom-class-template.rst
- :recursive:
-{% for item in modules %}
- {{ item }}
-{%- endfor %}
-{% endif %}
-{% endblock %}
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100755
index 4af1d4583..000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
-
-# -- Path setup --------------------------------------------------------------
-
-import os
-import sys
-from pathlib import Path
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-from urllib.request import urlretrieve
-
-module_path = os.path.join(os.path.dirname(__file__), "../..")
-sys.path.insert(0, os.path.abspath(module_path))
-sys.path.insert(0, os.path.abspath("../"))
-sys.path.insert(0, os.path.abspath("../../"))
-sys.path.append(os.path.abspath("./_ext"))
-
-import dynamo
-from docs.source.utils import _download_docs_dirs
-
-# HERE = Path(__file__).parent
-# sys.path[:0] = [str(HERE.parent)]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-source_suffix = [".rst"]
-bibtex_bibfiles = ["./notebooks/lap.bib", "./notebooks/dynamo_ref.bib"]
-bibtex_reference_style = "author_year"
-
-master_doc = "index"
-
-
-# the following are borrowed from scvelo
-# -- Retrieve notebooks ------------------------------------------------
-# notebooks_url = "https://github.com/aristoteleo/dynamo-tutorials/raw/master/"
-# notebooks = [
-# "Introduction.ipynb",
-# "Primer.ipynb",
-# "Differential_geometry.ipynb",
-# "zebrafish.ipynb",
-# # "dentategyrus_subset_scvelo.ipynb",
-# # "pancreatic_endocrinogenesis.ipynb",
-# "scNT_seq_readthedocs.ipynb",
-# "scEU_seq_rpe1_analysis_kinetic.ipynb",
-# "scEU_seq_organoid_analysis_kinetic.ipynb",
-# ]
-# for nb in notebooks:
-# try:
-# urlretrieve(notebooks_url + nb, nb)
-# except:
-# pass
-
-github_org = "aristoteleo"
-github_code_repo = "dynamo-release"
-github_ref = "master"
-github_nb_repo = "dynamo_readthedocs"
-_download_docs_dirs(repo_url=f"https://github.com/{github_org}/{github_nb_repo}")
-
-# Add notebooks prolog to Google Colab and nbviewer
-nbsphinx_prolog = r"""
-{% set docname = 'github/aristoteleo/dynamo-tutorials/blob/master/' + env.doc2path(env.docname, base=None) %}
-.. raw:: html
-
-