Skip to content

Commit

Permalink
Update Celltypist model
Browse files Browse the repository at this point in the history
  • Loading branch information
lisadratva committed Apr 4, 2023
1 parent 51d98ee commit 9e42877
Showing 1 changed file with 79 additions and 37 deletions.
116 changes: 79 additions & 37 deletions cell2tcr.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "aquatic-variable",
"id": "robust-garbage",
"metadata": {},
"source": [
"# Cell2TCR\n",
Expand All @@ -12,7 +12,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "basic-cabin",
"id": "collect-insulin",
"metadata": {},
"outputs": [
{
Expand All @@ -34,7 +34,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "foreign-petersburg",
"id": "municipal-jacksonville",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "markdown",
"id": "natural-complement",
"id": "ready-cargo",
"metadata": {},
"source": [
"## Download data\n",
Expand All @@ -57,16 +57,16 @@
},
{
"cell_type": "markdown",
"id": "british-leader",
"id": "sensitive-instrumentation",
"metadata": {},
"source": [
"## Load and format data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "animated-triangle",
"execution_count": 3,
"id": "ceramic-minnesota",
"metadata": {},
"outputs": [
{
Expand All @@ -80,7 +80,7 @@
" layers: 'raw'"
]
},
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -93,11 +93,11 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "executed-place",
"id": "vietnamese-timing",
"metadata": {},
"outputs": [],
"source": [
"### in case of memory issues during QC / Celltypist prediction : save this T cell adata to file and load in new session ###\n",
"### in case of memory issues during QC / Celltypist prediction : save the T cell adata to file and load in new session ###\n",
"\n",
"# subset to T cells\n",
"# adata = adata[adata.obs.TCR_chain_composition=='double_alpha_beta']\n",
Expand All @@ -113,7 +113,7 @@
},
{
"cell_type": "markdown",
"id": "inner-identity",
"id": "cultural-tradition",
"metadata": {},
"source": [
"## QC"
Expand All @@ -122,11 +122,10 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "sophisticated-orchestra",
"id": "coordinated-drawing",
"metadata": {},
"outputs": [],
"source": [
"# QC, normalisation and log1p\n",
"adata.X = adata.layers['raw']\n",
"sc.pp.filter_cells(adata, min_genes=200)\n",
"adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'\n",
Expand All @@ -138,7 +137,7 @@
},
{
"cell_type": "markdown",
"id": "conventional-salvation",
"id": "verbal-hepatitis",
"metadata": {},
"source": [
"## Celltypist prediction"
Expand All @@ -147,7 +146,48 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "protective-traffic",
"id": "dutch-negative",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"📜 Retrieving model list from server https://celltypist.cog.sanger.ac.uk/models/models.json\n",
"📚 Total models in list: 19\n",
"📂 Storing models in /home/jovyan/.celltypist/data/models\n",
"💾 Downloading model [1/19]: Immune_All_Low.pkl\n",
"💾 Downloading model [2/19]: Immune_All_High.pkl\n",
"💾 Downloading model [3/19]: Adult_Mouse_Gut.pkl\n",
"💾 Downloading model [4/19]: Autopsy_COVID19_Lung.pkl\n",
"💾 Downloading model [5/19]: COVID19_HumanChallenge_Blood.pkl\n",
"💾 Downloading model [6/19]: COVID19_Immune_Landscape.pkl\n",
"💾 Downloading model [7/19]: Cells_Fetal_Lung.pkl\n",
"💾 Downloading model [8/19]: Cells_Intestinal_Tract.pkl\n",
"💾 Downloading model [9/19]: Cells_Lung_Airway.pkl\n",
"💾 Downloading model [10/19]: Developing_Human_Brain.pkl\n",
"💾 Downloading model [11/19]: Developing_Human_Thymus.pkl\n",
"💾 Downloading model [12/19]: Developing_Mouse_Brain.pkl\n",
"💾 Downloading model [13/19]: Healthy_COVID19_PBMC.pkl\n",
"💾 Downloading model [14/19]: Human_IPF_Lung.pkl\n",
"💾 Downloading model [15/19]: Human_Lung_Atlas.pkl\n",
"💾 Downloading model [16/19]: Human_PF_Lung.pkl\n",
"💾 Downloading model [17/19]: Lethal_COVID19_Lung.pkl\n",
"💾 Downloading model [18/19]: Nuclei_Lung_Airway.pkl\n",
"💾 Downloading model [19/19]: Pan_Fetal_Human.pkl\n"
]
}
],
"source": [
"# download newest Celltypist models - only required once\n",
"from celltypist import models\n",
"models.download_models(force_update = True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "massive-emerald",
"metadata": {},
"outputs": [
{
Expand All @@ -173,23 +213,22 @@
" layers: 'raw'"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TODO - upload celltypist model\n",
"predictions = celltypist.annotate(adata, model='/nfs/team205/ld21/cell2tcr/data/sars_cov_2_challenge_pbmc.pkl')\n",
"predictions = celltypist.annotate(adata, model='COVID19_HumanChallenge_Blood.pkl')\n",
"adata = predictions.to_adata()\n",
"adata = adata[adata.obs.predicted_labels.str.contains('T ')]\n",
"adata"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "accompanied-basin",
"execution_count": 7,
"id": "billion-massage",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -256,7 +295,7 @@
"Sepsis;HP:0100806 0.167136 0.082163"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -284,16 +323,16 @@
},
{
"cell_type": "markdown",
"id": "becoming-twelve",
"id": "banner-store",
"metadata": {},
"source": [
"## Subset to T cells of interest"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "secure-liquid",
"execution_count": 8,
"id": "textile-fruit",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -303,8 +342,8 @@
},
{
"cell_type": "code",
"execution_count": 8,
"id": "special-thirty",
"execution_count": 9,
"id": "stupid-anaheim",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -322,7 +361,7 @@
},
{
"cell_type": "markdown",
"id": "romance-utilization",
"id": "pediatric-spanking",
"metadata": {},
"source": [
"## Infer TCR motifs"
Expand All @@ -331,7 +370,7 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "handed-employment",
"id": "welcome-trinity",
"metadata": {},
"outputs": [
{
Expand All @@ -348,19 +387,22 @@
}
],
"source": [
"# If your VDJ genes don't end in *01, *02, ... : Set 'add_suffix=True'\n",
"cell2tcr.motifs(df, threshold=35, add_suffix=False) # change chunk size in case of memory allocation errors"
"cell2tcr.motifs(df, add_suffix=False) "
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "fatal-hayes",
"id": "coastal-location",
"metadata": {},
"outputs": [],
"source": [
"### Troubleshooting errors with VDJ gene formats in 'cell2tcr.motifs'\n",
"\n",
"# If your VDJ genes don't end in *01, *02, ... : Set 'add_suffix=True'\n",
"\n",
"# Reduce chunk_size in case of memory errors\n",
"\n",
"# if specific VDJ genes are missing : manually remove those cells, like this:\n",
"df = df[~df.IR_VDJ_1_v_call.isin(['TRBV24-1*02'])]\n",
"df = df[~df.IR_VJ_1_v_call.isin(['TRAV36/DV7*05','TRAV1-2*03','TRAV6*07','TRAV8-2*03','TRAV29/DV5*04','TRAV30*05',])]\n",
Expand All @@ -373,7 +415,7 @@
},
{
"cell_type": "markdown",
"id": "black-identifier",
"id": "frozen-constitution",
"metadata": {},
"source": [
"## Draw 10 most shared motifs"
Expand All @@ -382,7 +424,7 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "accepting-spain",
"id": "important-thickness",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -393,7 +435,7 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "sweet-removal",
"id": "municipal-sarah",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -504,8 +546,8 @@
},
{
"cell_type": "code",
"execution_count": 15,
"id": "destroyed-surname",
"execution_count": 14,
"id": "national-kitchen",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -920,7 +962,7 @@
"[2050 rows x 127 columns]"
]
},
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 9e42877

Please sign in to comment.