Skip to content

Commit

Permalink
Change file paths
Browse files Browse the repository at this point in the history
  • Loading branch information
YojanaGadiya committed May 24, 2021
1 parent a2b6490 commit 6650579
Showing 1 changed file with 29 additions and 147 deletions.
176 changes: 29 additions & 147 deletions notebooks/5.0-transcriptomics_specific_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "documentary-privacy",
"metadata": {},
"outputs": [],
Expand All @@ -46,12 +46,12 @@
"\n",
"from utils import (get_paths, filter_dataset, \n",
" get_transcriptomic_paths, create_graph_from_df,\n",
" get_path_count, DATA_DIR)"
" get_path_count, DATA_DIR, KG_DATA_PATH)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "stuck-delicious",
"metadata": {},
"outputs": [],
Expand All @@ -71,20 +71,20 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "widespread-insured",
"metadata": {},
"outputs": [],
"source": [
"openbiolink_path = os.path.join(DATA_DIR, 'kg', 'normalized', 'openbiolink_kg_normalized.tsv')\n",
"custom_path = os.path.join(DATA_DIR, 'kg', 'normalized', 'custom_kg_normalized.tsv')\n",
"\n",
"# Load DF\n",
"openbiolink_df = pd.read_csv(openbiolink_path, sep='\\t')\n",
"openbiolink_df.rename(columns={'relation': 'polarity'}, inplace=True)\n",
"openbiolink_df = pd.read_csv(\n",
" os.path.join(KG_DATA_PATH, 'openbiolink_filtered_kg.tsv'),\n",
" sep='\\t'\n",
")\n",
"\n",
"custom_df = pd.read_csv(custom_path, sep='\\t')\n",
"custom_df.rename(columns={'relation': 'polarity'}, inplace=True)"
"custom_df = pd.read_csv(\n",
" os.path.join(KG_DATA_PATH, 'custom_filtered_kg.tsv'), \n",
" sep='\\t'\n",
")"
]
},
{
Expand All @@ -97,21 +97,21 @@
},
{
"cell_type": "code",
"execution_count": 4,
"id": "verified-electric",
"execution_count": null,
"id": "accomplished-daughter",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(DATA_DIR, 'creeds', 'normalized', 'harmonized_expression.json')) as file:\n",
"with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file:\n",
" creed_dict = json.load(file)\n",
" \n",
"with open(os.path.join(DATA_DIR, 'geo', 'normalized', 'harmonized_expression.json')) as file2:\n",
"with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file2:\n",
" geo_dict = json.load(file2)\n",
" \n",
"with open(os.path.join(DATA_DIR, 'l1000', 'normalized', 'harmonized_expression.json')) as file3:\n",
" l1000_dict = json.load(file3)\n",
"with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file3:\n",
" lc1000_dict = json.load(file3)\n",
" \n",
"with open(os.path.join(DATA_DIR, 'open_targets', 'normalized', 'harmonized_expression.json')) as file4:\n",
"with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file4:\n",
" open_target_dict = json.load(file4)"
]
},
Expand All @@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "conditional-object",
"metadata": {},
"outputs": [],
Expand All @@ -149,10 +149,10 @@
"open_target_dict = {'openbio': target_openbio, 'custom': target_custom}\n",
"\n",
"# L1000\n",
"l1000_openbio = filter_dataset(dataset=l1000_dict, graph_df=openbiolink_df)\n",
"l1000_custom = filter_dataset(dataset=l1000_dict, graph_df=custom_df)\n",
"lc1000_openbio = filter_dataset(dataset=lc1000_dict, graph_df=openbiolink_df)\n",
"lc1000_custom = filter_dataset(dataset=lc1000_dict, graph_df=custom_df)\n",
"\n",
"l1000_dict = {'openbio': l1000_openbio, 'custom': l1000_custom}\n"
"lc1000_dict = {'openbio': lc1000_openbio, 'custom': lc1000_custom}\n"
]
},
{
Expand All @@ -165,7 +165,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "settled-sustainability",
"metadata": {},
"outputs": [],
Expand All @@ -174,33 +174,6 @@
" clinical_pair_dict = json.load(file).keys()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "aggressive-italy",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(DATA_DIR, 'gold-standard', 'filtered-indications.json')) as file:\n",
" indication_pair_dict = json.load(file).keys()"
]
},
{
"cell_type": "markdown",
"id": "touched-grocery",
"metadata": {},
"source": [
"# Analysis path for "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "integral-prompt",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "environmental-gross",
Expand All @@ -211,7 +184,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "atmospheric-compiler",
"metadata": {},
"outputs": [],
Expand All @@ -220,109 +193,20 @@
" 'creed' : creed_dict,\n",
" 'target': open_target_dict,\n",
" 'geo': geo_dict,\n",
" 'l1000': l1000_dict,\n",
" 'lc1000': lc1000_dict,\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "marked-tower",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 385178.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"creed_target\n",
"### creed-target ###\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 4512/4512 [06:53<00:00, 10.90it/s]\n",
"Calculating concordance: 100%|██████████| 5/5 [00:07<00:00, 1.57s/it]\n",
"Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 376019.08it/s]\n",
"100%|██████████| 1925/1925 [08:20<00:00, 3.84it/s]\n",
"Calculating concordance: 100%|██████████| 5/5 [00:24<00:00, 4.81s/it]\n",
"Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 424284.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"creed_geo\n",
"### creed-geo ###\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 1728/1728 [02:50<00:00, 10.12it/s]\n",
"Calculating concordance: 100%|██████████| 5/5 [00:13<00:00, 2.72s/it]\n",
"Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 392322.96it/s]\n",
"100%|██████████| 935/935 [05:23<00:00, 2.89it/s]\n",
"Calculating concordance: 100%|██████████| 5/5 [00:49<00:00, 9.87s/it]\n",
"Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 416777.68it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"l1000_target\n",
"### l1000-target ###\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 37788/37788 [30:02<00:00, 20.96it/s] \n",
"Calculating concordance: 100%|██████████| 5/5 [00:22<00:00, 4.51s/it]\n",
"Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 358597.00it/s]\n",
"100%|██████████| 10220/10220 [49:03<00:00, 3.47it/s] \n",
"Calculating concordance: 100%|██████████| 5/5 [03:01<00:00, 36.26s/it]\n",
"Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 345137.39it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"l1000_geo\n",
"### l1000-geo ###\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 14472/14472 [13:36<00:00, 17.73it/s] \n",
"Calculating concordance: 100%|██████████| 5/5 [00:56<00:00, 11.31s/it]\n",
"Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 398025.71it/s]\n",
"100%|██████████| 4964/4964 [28:39<00:00, 2.89it/s] \n",
"Calculating concordance: 100%|██████████| 5/5 [05:00<00:00, 60.17s/it]\n"
]
}
],
"outputs": [],
"source": [
"for c, d in product(['creed', 'l1000'], ['target', 'geo']):\n",
"for c, d in product(['creed', 'lc1000'], ['target', 'geo']):\n",
" c_set = MAP[c]\n",
" d_set = MAP[d]\n",
" graph_name = c + '_' + d\n",
Expand All @@ -336,7 +220,6 @@
" 'number_of_paths',\n",
" 'number_of_concordant_paths',\n",
" 'in_clinical_trial',\n",
" 'in_drug_indication',\n",
" 'number_of_concordant_activatory_paths',\n",
" 'number_of_concordant_inhibitory_paths',\n",
" 'subgraph_size',\n",
Expand Down Expand Up @@ -425,7 +308,6 @@
" 'number_of_paths': results['number_of_paths'],\n",
" 'number_of_concordant_paths': concordant_num,\n",
" 'in_clinical_trial': results['in_clinical_trial'],\n",
" 'in_drug_indication': results['in_drug_indication'],\n",
" 'number_of_concordant_activatory_paths': activated_paths,\n",
" 'number_of_concordant_inhibitory_paths': inhibited_paths,\n",
" 'subgraph_size': results['subgraph_size'],\n",
Expand Down

0 comments on commit 6650579

Please sign in to comment.