Skip to content

Commit

Permalink
Export escape scores distance map based on 80th percentile
Browse files Browse the repository at this point in the history
  • Loading branch information
huddlej committed Feb 16, 2024
1 parent 4c21516 commit 13a4211
Showing 1 changed file with 76 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,20 @@
"nonnegative_ha1_escape_scores.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0fa61fc-5dce-4320-a61c-586d18c76bda",
"metadata": {},
"outputs": [],
"source": [
"upper_80th_quantile_escape_score_by_site_and_amino_acid = nonnegative_ha1_escape_scores.groupby([\n",
" \"site\",\n",
" \"wildtype\",\n",
" \"mutant\",\n",
"])[\"escape_mean\"].quantile(0.8).to_dict()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -470,7 +484,7 @@
"id": "00c62c81-aa8c-41ad-b092-fb525eb1c2ed",
"metadata": {},
"source": [
"Export a per-site-and-amino-acid distance map for the average scores calculated above. When used with augur distance, this map will calculate a weighted Hamming distance between each sample and the MRCA of the tree at any site with a mutation specifically from the wild type allele to the experimentally measured allele. The weights of the Hamming distance are the average escape score values for each site and amino acid mutation."
"Export a per-site-and-amino-acid distance map for the average and upper 80% quantile scores calculated above. When used with augur distance, this map will calculate a weighted Hamming distance between each sample and the MRCA of the tree at any site with a mutation specifically from the wild type allele to the experimentally measured allele. The weights of the Hamming distance are the average escape score values for each site and amino acid mutation."
]
},
{
Expand All @@ -481,7 +495,7 @@
"outputs": [],
"source": [
"distance_map = {\n",
" \"name\": \"Welsh et al. escape scores per site and amino acid\",\n",
" \"name\": \"Average Welsh et al. escape scores per site and amino acid\",\n",
" \"default\": 0,\n",
" \"map\": {\n",
" \"HA1\": {}\n",
Expand Down Expand Up @@ -558,6 +572,65 @@
"id": "10bb367b-ad31-43ca-afff-7226f59349a6",
"metadata": {},
"outputs": [],
"source": [
"distance_map = {\n",
" \"name\": \"Upper 80th quantile of Welsh et al. escape scores per site and amino acid\",\n",
" \"default\": 0,\n",
" \"map\": {\n",
" \"HA1\": {}\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76e774d0-732c-4169-b626-2c6c5b308841",
"metadata": {},
"outputs": [],
"source": [
"for (site, wildtype, mutant), escape_score in upper_80th_quantile_escape_score_by_site_and_amino_acid.items():\n",
" if str(site) not in distance_map[\"map\"][\"HA1\"]:\n",
" distance_map[\"map\"][\"HA1\"][str(site)] = []\n",
"\n",
" distance_map[\"map\"][\"HA1\"][str(site)].append({\n",
" \"from\": wildtype,\n",
" \"to\": mutant,\n",
" \"weight\": round(escape_score, 6),\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e64a284-d092-489e-9ab5-14801d2c6d76",
"metadata": {},
"outputs": [],
"source": [
"distance_map[\"map\"][\"HA1\"][\"140\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9c74bcd-5455-470b-8d81-6c3ade6db68b",
"metadata": {},
"outputs": [],
"source": [
"with open(\"welsh_upper_80th_quantile_escape_by_site_and_amino_acid.json\", \"w\") as oh:\n",
" json.dump(\n",
" distance_map,\n",
" oh,\n",
" indent=2,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e8284f1-d212-4d17-92cd-7aa69351b29b",
"metadata": {},
"outputs": [],
"source": []
}
],
Expand All @@ -577,7 +650,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 13a4211

Please sign in to comment.