diff --git a/cell2tcr.ipynb b/cell2tcr.ipynb index fa203b1..b20b5d0 100644 --- a/cell2tcr.ipynb +++ b/cell2tcr.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "aquatic-variable", + "id": "robust-garbage", "metadata": {}, "source": [ "# Cell2TCR\n", @@ -12,7 +12,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "basic-cabin", + "id": "collect-insulin", "metadata": {}, "outputs": [ { @@ -34,7 +34,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "foreign-petersburg", + "id": "municipal-jacksonville", "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ }, { "cell_type": "markdown", - "id": "natural-complement", + "id": "ready-cargo", "metadata": {}, "source": [ "## Download data\n", @@ -57,7 +57,7 @@ }, { "cell_type": "markdown", - "id": "british-leader", + "id": "sensitive-instrumentation", "metadata": {}, "source": [ "## Load and format data" @@ -65,8 +65,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "animated-triangle", + "execution_count": 3, + "id": "ceramic-minnesota", "metadata": {}, "outputs": [ { @@ -80,7 +80,7 @@ " layers: 'raw'" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -93,11 +93,11 @@ { "cell_type": "code", "execution_count": 3, - "id": "executed-place", + "id": "vietnamese-timing", "metadata": {}, "outputs": [], "source": [ - "### in case of memory issues during QC / Celltypist prediction : save this T cell adata to file and load in new session ###\n", + "### in case of memory issues during QC / Celltypist prediction : save the T cell adata to file and load in new session ###\n", "\n", "# subset to T cells\n", "# adata = adata[adata.obs.TCR_chain_composition=='double_alpha_beta']\n", @@ -113,7 +113,7 @@ }, { "cell_type": "markdown", - "id": "inner-identity", + "id": "cultural-tradition", "metadata": {}, "source": [ "## QC" @@ -122,11 +122,10 @@ { "cell_type": "code", "execution_count": 4, - "id": "sophisticated-orchestra", + "id": "coordinated-drawing", "metadata": {}, "outputs": [], "source": [ - "# QC, normalisation and log1p\n", "adata.X = adata.layers['raw']\n", "sc.pp.filter_cells(adata, min_genes=200)\n", "adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'\n", @@ -138,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "conventional-salvation", + "id": "verbal-hepatitis", "metadata": {}, "source": [ "## Celltypist prediction" @@ -147,7 +146,48 @@ { "cell_type": "code", "execution_count": 5, - "id": "protective-traffic", + "id": "dutch-negative", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "📜 Retrieving model list from server https://celltypist.cog.sanger.ac.uk/models/models.json\n", + "📚 Total models in list: 19\n", + "📂 Storing models in /home/jovyan/.celltypist/data/models\n", + "💾 Downloading model [1/19]: Immune_All_Low.pkl\n", + "💾 Downloading model [2/19]: Immune_All_High.pkl\n", + "💾 Downloading model [3/19]: Adult_Mouse_Gut.pkl\n", + "💾 Downloading model [4/19]: Autopsy_COVID19_Lung.pkl\n", + "💾 Downloading model [5/19]: COVID19_HumanChallenge_Blood.pkl\n", + "💾 Downloading model [6/19]: COVID19_Immune_Landscape.pkl\n", + "💾 Downloading model [7/19]: Cells_Fetal_Lung.pkl\n", + "💾 Downloading model [8/19]: Cells_Intestinal_Tract.pkl\n", + "💾 Downloading model [9/19]: Cells_Lung_Airway.pkl\n", + "💾 Downloading model [10/19]: Developing_Human_Brain.pkl\n", + "💾 Downloading model [11/19]: Developing_Human_Thymus.pkl\n", + "💾 Downloading model [12/19]: Developing_Mouse_Brain.pkl\n", + "💾 Downloading model [13/19]: Healthy_COVID19_PBMC.pkl\n", + "💾 Downloading model [14/19]: Human_IPF_Lung.pkl\n", + "💾 Downloading model [15/19]: Human_Lung_Atlas.pkl\n", + "💾 Downloading model [16/19]: Human_PF_Lung.pkl\n", + "💾 Downloading model [17/19]: Lethal_COVID19_Lung.pkl\n", + "💾 Downloading model [18/19]: Nuclei_Lung_Airway.pkl\n", + "💾 Downloading model [19/19]: Pan_Fetal_Human.pkl\n" + ] + } + ], + "source": [ + "# download newest Celltypist models - only required once\n", + "from celltypist import models\n", + "models.download_models(force_update = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "massive-emerald", "metadata": {}, "outputs": [ { @@ -173,14 +213,13 @@ " layers: 'raw'" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# TODO - upload celltypist model\n", - "predictions = celltypist.annotate(adata, model='/nfs/team205/ld21/cell2tcr/data/sars_cov_2_challenge_pbmc.pkl')\n", + "predictions = celltypist.annotate(adata, model='COVID19_HumanChallenge_Blood.pkl')\n", "adata = predictions.to_adata()\n", "adata = adata[adata.obs.predicted_labels.str.contains('T ')]\n", "adata" @@ -188,8 +227,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "accompanied-basin", + "execution_count": 7, + "id": "billion-massage", "metadata": {}, "outputs": [ { @@ -256,7 +295,7 @@ "Sepsis;HP:0100806 0.167136 0.082163" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, @@ -284,7 +323,7 @@ }, { "cell_type": "markdown", - "id": "becoming-twelve", + "id": "banner-store", "metadata": {}, "source": [ "## Subset to T cells of interest" @@ -292,8 +331,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "secure-liquid", + "execution_count": 8, + "id": "textile-fruit", "metadata": {}, "outputs": [], "source": [ @@ -303,8 +342,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "special-thirty", + "execution_count": 9, + "id": "stupid-anaheim", "metadata": {}, "outputs": [], "source": [ @@ -322,7 +361,7 @@ }, { "cell_type": "markdown", - "id": "romance-utilization", + "id": "pediatric-spanking", "metadata": {}, "source": [ "## Infer TCR motifs" @@ -331,7 +370,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "handed-employment", + "id": "welcome-trinity", "metadata": {}, "outputs": [ { @@ -348,19 +387,22 @@ } ], "source": [ - "# If your VDJ genes don't end in *01, *02, ... : Set 'add_suffix=True'\n", - "cell2tcr.motifs(df, threshold=35, add_suffix=False) # change chunk size in case of memory allocation errors" + "cell2tcr.motifs(df, add_suffix=False) " ] }, { "cell_type": "code", "execution_count": 10, - "id": "fatal-hayes", + "id": "coastal-location", "metadata": {}, "outputs": [], "source": [ "### Troubleshooting errors with VDJ gene formats in 'cell2tcr.motifs'\n", "\n", + "# If your VDJ genes don't end in *01, *02, ... : Set 'add_suffix=True'\n", + "\n", + "# Reduce chunk_size in case of memory errors\n", + "\n", "# if specific VDJ genes are missing : manually remove those cells, like this:\n", "df = df[~df.IR_VDJ_1_v_call.isin(['TRBV24-1*02'])]\n", "df = df[~df.IR_VJ_1_v_call.isin(['TRAV36/DV7*05','TRAV1-2*03','TRAV6*07','TRAV8-2*03','TRAV29/DV5*04','TRAV30*05',])]\n", @@ -373,7 +415,7 @@ }, { "cell_type": "markdown", - "id": "black-identifier", + "id": "frozen-constitution", "metadata": {}, "source": [ "## Draw 10 most shared motifs" @@ -382,7 +424,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "accepting-spain", + "id": "important-thickness", "metadata": {}, "outputs": [], "source": [ @@ -393,7 +435,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "sweet-removal", + "id": "municipal-sarah", "metadata": {}, "outputs": [ { @@ -504,8 +546,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "destroyed-surname", + "execution_count": 14, + "id": "national-kitchen", "metadata": {}, "outputs": [ { @@ -920,7 +962,7 @@ "[2050 rows x 127 columns]" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" }