Change file paths

enveda · May 24, 2021 · 6650579 · 6650579
1 parent a2b6490
commit 6650579
Showing 1 changed file with 29 additions and 147 deletions.
diff --git a/notebooks/5.0-transcriptomics_specific_analysis.ipynb b/notebooks/5.0-transcriptomics_specific_analysis.ipynb
@@ -31,7 +31,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "documentary-privacy",
    "metadata": {},
    "outputs": [],
@@ -46,12 +46,12 @@
     "\n",
     "from utils import (get_paths, filter_dataset, \n",
     "                   get_transcriptomic_paths, create_graph_from_df,\n",
-    "                   get_path_count, DATA_DIR)"
+    "                   get_path_count, DATA_DIR, KG_DATA_PATH)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "stuck-delicious",
    "metadata": {},
    "outputs": [],
@@ -71,20 +71,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "widespread-insured",
    "metadata": {},
    "outputs": [],
    "source": [
-    "openbiolink_path = os.path.join(DATA_DIR, 'kg', 'normalized', 'openbiolink_kg_normalized.tsv')\n",
-    "custom_path = os.path.join(DATA_DIR, 'kg', 'normalized', 'custom_kg_normalized.tsv')\n",
-    "\n",
-    "# Load DF\n",
-    "openbiolink_df = pd.read_csv(openbiolink_path, sep='\\t')\n",
-    "openbiolink_df.rename(columns={'relation': 'polarity'}, inplace=True)\n",
+    "openbiolink_df = pd.read_csv(\n",
+    "    os.path.join(KG_DATA_PATH, 'openbiolink_filtered_kg.tsv'),\n",
+    "    sep='\\t'\n",
+    ")\n",
     "\n",
-    "custom_df = pd.read_csv(custom_path, sep='\\t')\n",
-    "custom_df.rename(columns={'relation': 'polarity'}, inplace=True)"
+    "custom_df = pd.read_csv(\n",
+    "    os.path.join(KG_DATA_PATH, 'custom_filtered_kg.tsv'), \n",
+    "    sep='\\t'\n",
+    ")"
    ]
   },
   {
@@ -97,21 +97,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "verified-electric",
+   "execution_count": null,
+   "id": "accomplished-daughter",
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(DATA_DIR, 'creeds', 'normalized', 'harmonized_expression.json')) as file:\n",
+    "with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file:\n",
     "    creed_dict = json.load(file)\n",
     "    \n",
-    "with open(os.path.join(DATA_DIR, 'geo', 'normalized', 'harmonized_expression.json')) as file2:\n",
+    "with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file2:\n",
     "    geo_dict = json.load(file2)\n",
     "    \n",
-    "with open(os.path.join(DATA_DIR, 'l1000', 'normalized', 'harmonized_expression.json')) as file3:\n",
-    "    l1000_dict = json.load(file3)\n",
+    "with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file3:\n",
+    "    lc1000_dict = json.load(file3)\n",
     "    \n",
-    "with open(os.path.join(DATA_DIR, 'open_targets', 'normalized', 'harmonized_expression.json')) as file4:\n",
+    "with open(os.path.join(DATA_DIR, 'transcriptomics', 'harmonized_expression.json')) as file4:\n",
     "    open_target_dict = json.load(file4)"
    ]
   },
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "conditional-object",
    "metadata": {},
    "outputs": [],
@@ -149,10 +149,10 @@
     "open_target_dict = {'openbio': target_openbio, 'custom': target_custom}\n",
     "\n",
     "# L1000\n",
-    "l1000_openbio = filter_dataset(dataset=l1000_dict, graph_df=openbiolink_df)\n",
-    "l1000_custom = filter_dataset(dataset=l1000_dict, graph_df=custom_df)\n",
+    "lc1000_openbio = filter_dataset(dataset=lc1000_dict, graph_df=openbiolink_df)\n",
+    "lc1000_custom = filter_dataset(dataset=lc1000_dict, graph_df=custom_df)\n",
     "\n",
-    "l1000_dict = {'openbio': l1000_openbio, 'custom': l1000_custom}\n"
+    "lc1000_dict = {'openbio': lc1000_openbio, 'custom': lc1000_custom}\n"
    ]
   },
   {
@@ -165,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "settled-sustainability",
    "metadata": {},
    "outputs": [],
@@ -174,33 +174,6 @@
     "    clinical_pair_dict = json.load(file).keys()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "aggressive-italy",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(os.path.join(DATA_DIR, 'gold-standard', 'filtered-indications.json')) as file:\n",
-    "    indication_pair_dict = json.load(file).keys()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "touched-grocery",
-   "metadata": {},
-   "source": [
-    "# Analysis path for "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "integral-prompt",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "id": "environmental-gross",
@@ -211,7 +184,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "atmospheric-compiler",
    "metadata": {},
    "outputs": [],
@@ -220,109 +193,20 @@
     "    'creed' : creed_dict,\n",
     "    'target': open_target_dict,\n",
     "    'geo': geo_dict,\n",
-    "    'l1000': l1000_dict,\n",
+    "    'lc1000': lc1000_dict,\n",
     "}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "marked-tower",
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 385178.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "creed_target\n",
-      "### creed-target ###\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "100%|██████████| 4512/4512 [06:53<00:00, 10.90it/s]\n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:07<00:00,  1.57s/it]\n",
-      "Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 376019.08it/s]\n",
-      "100%|██████████| 1925/1925 [08:20<00:00,  3.84it/s]\n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:24<00:00,  4.81s/it]\n",
-      "Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 424284.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "creed_geo\n",
-      "### creed-geo ###\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "100%|██████████| 1728/1728 [02:50<00:00, 10.12it/s]\n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:13<00:00,  2.72s/it]\n",
-      "Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 392322.96it/s]\n",
-      "100%|██████████| 935/935 [05:23<00:00,  2.89it/s]\n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:49<00:00,  9.87s/it]\n",
-      "Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 416777.68it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "l1000_target\n",
-      "### l1000-target ###\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 37788/37788 [30:02<00:00, 20.96it/s]  \n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:22<00:00,  4.51s/it]\n",
-      "Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 358597.00it/s]\n",
-      "100%|██████████| 10220/10220 [49:03<00:00,  3.47it/s] \n",
-      "Calculating concordance: 100%|██████████| 5/5 [03:01<00:00, 36.26s/it]\n",
-      "Loading graph: 100%|██████████| 48878/48878 [00:00<00:00, 345137.39it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "l1000_geo\n",
-      "### l1000-geo ###\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 14472/14472 [13:36<00:00, 17.73it/s] \n",
-      "Calculating concordance: 100%|██████████| 5/5 [00:56<00:00, 11.31s/it]\n",
-      "Loading graph: 100%|██████████| 52182/52182 [00:00<00:00, 398025.71it/s]\n",
-      "100%|██████████| 4964/4964 [28:39<00:00,  2.89it/s]  \n",
-      "Calculating concordance: 100%|██████████| 5/5 [05:00<00:00, 60.17s/it]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "for c, d in product(['creed', 'l1000'], ['target', 'geo']):\n",
+    "for c, d in product(['creed', 'lc1000'], ['target', 'geo']):\n",
     "    c_set = MAP[c]\n",
     "    d_set = MAP[d]\n",
     "    graph_name = c + '_' + d\n",
@@ -336,7 +220,6 @@
     "        'number_of_paths',\n",
     "        'number_of_concordant_paths',\n",
     "        'in_clinical_trial',\n",
-    "        'in_drug_indication',\n",
     "        'number_of_concordant_activatory_paths',\n",
     "        'number_of_concordant_inhibitory_paths',\n",
     "        'subgraph_size',\n",
@@ -425,7 +308,6 @@
     "                                'number_of_paths': results['number_of_paths'],\n",
     "                                'number_of_concordant_paths': concordant_num,\n",
     "                                'in_clinical_trial': results['in_clinical_trial'],\n",
-    "                                'in_drug_indication': results['in_drug_indication'],\n",
     "                                'number_of_concordant_activatory_paths': activated_paths,\n",
     "                                'number_of_concordant_inhibitory_paths': inhibited_paths,\n",
     "                                'subgraph_size': results['subgraph_size'],\n",