Condense WP higly weighted cells plots (#106)

* Condense WP higly weighted cells plots * Fix threshold * Comment out figure * Comment out * Fix error * Fix error * Update R2X plot * Add correcting conditions --------- Co-authored-by: Andrew Ramirez <aramirez@aretha.seas.ucla.edu>
meyer-lab · Nov 6, 2024 · dc2d6d2 · dc2d6d2
1 parent e708e36
commit dc2d6d2
Show file tree

Hide file tree

Showing 9 changed files with 229 additions and 264 deletions.
diff --git a/pf2/figures/commonFuncs/plotGeneral.py b/pf2/figures/commonFuncs/plotGeneral.py
@@ -93,7 +93,7 @@ def add_obs_cmp_both_label(
             else:
                 thres_value = top_perc
                 threshold2 = np.percentile(wprojs, thres_value, axis=0)
-                idx = wprojs[:, cmp - 1] < threshold1[cmp - 1]
+                idx = wprojs[:, cmp - 1] < threshold2[cmp - 1]
 
         X.obs[f"Cmp{cmp}"] = idx
 
@@ -129,6 +129,114 @@ def add_obs_label(X: anndata.AnnData, cmp1: str, cmp2: str):
     return X
 
 
+def add_obs_cmp_both_label_three(
+    X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int, pos1=True, pos2=True, pos3=True, top_perc=1
+):
+    """Adds if cells in top/bot percentage"""
+    wprojs = X.obsm["weighted_projections"]
+    pos_neg = [pos1, pos2, pos3]
+    for i, cmp in enumerate([cmp1, cmp2, cmp3]):
+        if i == 0:
+            if pos_neg[i] is True:
+                thres_value = 100 - top_perc
+                threshold1 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] > threshold1[cmp - 1]
+
+            else:
+                thres_value = top_perc
+                threshold1 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] < threshold1[cmp - 1]
+
+        if i == 1:
+            if pos_neg[i] is True:
+                thres_value = 100 - top_perc
+                threshold2 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] > threshold2[cmp - 1]
+            else:
+                thres_value = top_perc
+                threshold2 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] < threshold2[cmp - 1]
+
+        if i == 2:
+            if pos_neg[i] is True:
+                thres_value = 100 - top_perc
+                threshold3 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] > threshold3[cmp - 1]
+            else:
+                thres_value = top_perc
+                threshold3 = np.percentile(wprojs, thres_value, axis=0)
+                idx = wprojs[:, cmp - 1] < threshold3[cmp - 1]
+
+        X.obs[f"Cmp{cmp}"] = idx
+
+    if pos1 is True and pos2 is True and pos3 is True:
+        idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
+                )
+    elif pos1 is False and pos2 is False and pos3 is False:
+        idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
+                )
+    elif pos1 is True and pos2 is True and pos3 is False:
+        idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
+                 wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
+            )
+
+    elif pos1 is True and pos2 is False and pos3 is True:
+        idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
+                )
+    elif pos1 is True and pos2 is False and pos3 is False:
+        idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
+                )
+
+    elif pos1 is False and pos2 is False and pos3 is True:
+        idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
+                )
+    elif pos1 is False and pos2 is True and pos3 is True:
+        idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
+                )
+    elif pos1 is False and pos2 is True and pos3 is False:
+        idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
+            wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
+                    wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
+                )
+
+    X.obs["Both"] = idx
+
+    return X
+
+
+def add_obs_label_three(X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int):
+    """Creates AnnData observation column"""
+    X.obs.loc[((X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == False)
+               & (X.obs[f"Cmp{cmp3}"] == False), "Label")] = f"Cmp{cmp1}"
+    X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == True)
+              & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = f"Cmp{cmp2}"
+    X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
+              & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = f"Cmp{cmp3}"
+
+    X.obs.loc[(X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == True)
+              & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = "Both"
+    X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
+              & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = "NoLabel"
+
+    X = X[(X.obs["Label"] == f"Cmp{cmp1}") | (X.obs["Label"] == f"Cmp{cmp2}") | 
+                  (X.obs["Label"] == f"Cmp{cmp3}") | (X.obs["Label"] == "Both") |
+                  (X.obs["Label"] == "NoLabel")]
+
+    return X
+
 
 
 def plot_avegene_cmps(

diff --git a/pf2/figures/figure3.py b/pf2/figures/figure3.py
@@ -11,43 +11,43 @@
 
 
 def makeFigure():
-    meta = import_meta()
-    data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
-    conversions = convert_to_patients(data)
+    # meta = import_meta()
+    # data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
+    # conversions = convert_to_patients(data)
 
-    patient_factor = pd.DataFrame(
-        data.uns["Pf2_A"],
-        index=conversions,
-        columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
-    )
-    meta = meta.loc[patient_factor.index, :]
+    # patient_factor = pd.DataFrame(
+    #     data.uns["Pf2_A"],
+    #     index=conversions,
+    #     columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
+    # )
+    # meta = meta.loc[patient_factor.index, :]
 
     axs, fig = getSetup((4, 4), (1, 1))
-    ax = axs[0]
+    # ax = axs[0]
 
-    probabilities, labels = predict_mortality(patient_factor, meta, proba=True)
+    # probabilities, labels = predict_mortality(patient_factor, meta, proba=True)
 
-    predicted = [0 if prob < 0.5 else 1 for prob in probabilities]
-    accuracy = accuracy_score(labels, predicted)
+    # predicted = [0 if prob < 0.5 else 1 for prob in probabilities]
+    # accuracy = accuracy_score(labels, predicted)
 
-    fpr, tpr, _ = roc_curve(labels, probabilities)
-    auc_roc = roc_auc_score(labels, probabilities)
+    # fpr, tpr, _ = roc_curve(labels, probabilities)
+    # auc_roc = roc_auc_score(labels, probabilities)
 
-    ax.plot([0, 1], [0, 1], linestyle="--", color="k")
-    ax.plot(fpr, tpr)
-    ax.text(
-        0.99,
-        0.01,
-        s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}",
-        ha="right",
-        va="bottom",
-        transform=ax.transAxes,
-    )
+    # ax.plot([0, 1], [0, 1], linestyle="--", color="k")
+    # ax.plot(fpr, tpr)
+    # ax.text(
+    #     0.99,
+    #     0.01,
+    #     s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}",
+    #     ha="right",
+    #     va="bottom",
+    #     transform=ax.transAxes,
+    # )
 
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
+    # ax.set_xlim([0, 1])
+    # ax.set_ylim([0, 1])
 
-    ax.set_ylabel("True Positive Rate")
-    ax.set_xlabel("False Positive Rate")
+    # ax.set_ylabel("True Positive Rate")
+    # ax.set_xlabel("False Positive Rate")
 
     return fig
diff --git a/pf2/figures/figure4.py b/pf2/figures/figure4.py
@@ -1,76 +1,76 @@
-"""Figure 4: Component Association Errorbars"""
+# """Figure 4: Component Association Errorbars"""
 
-import numpy as np
-import pandas as pd
-from anndata import read_h5ad
+# import numpy as np
+# import pandas as pd
+# from anndata import read_h5ad
 
-from pf2.data_import import convert_to_patients, import_meta
+# from pf2.data_import import convert_to_patients, import_meta
 from pf2.figures.common import getSetup
-from pf2.predict import predict_mortality
+# from pf2.predict import predict_mortality
 
-TRIALS = 30
+# TRIALS = 30
 
 
 def makeFigure():
-    meta = import_meta()
-    data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
+#     meta = import_meta()
+#     data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
 
-    conversions = convert_to_patients(data)
-    patient_factor = pd.DataFrame(
-        data.uns["Pf2_A"],
-        index=conversions,
-        columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
-    )
-    meta = meta.loc[patient_factor.index, :]
+#     conversions = convert_to_patients(data)
+#     patient_factor = pd.DataFrame(
+#         data.uns["Pf2_A"],
+#         index=conversions,
+#         columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
+#     )
+#     meta = meta.loc[patient_factor.index, :]
 
-    covid_coefficients = pd.DataFrame(
-        0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns
-    )
-    nc_coefficients = covid_coefficients.copy(deep=True)
-    for trial in range(TRIALS):
-        boot_index = np.random.choice(
-            patient_factor.shape[0], replace=True, size=patient_factor.shape[0]
-        )
-        boot_factor = patient_factor.iloc[boot_index, :]
-        boot_meta = meta.iloc[boot_index, :]
-        _, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta)
+#     covid_coefficients = pd.DataFrame(
+#         0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns
+#     )
+#     nc_coefficients = covid_coefficients.copy(deep=True)
+#     for trial in range(TRIALS):
+#         boot_index = np.random.choice(
+#             patient_factor.shape[0], replace=True, size=patient_factor.shape[0]
+#         )
+#         boot_factor = patient_factor.iloc[boot_index, :]
+#         boot_meta = meta.iloc[boot_index, :]
+#         _, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta)
 
-        covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_
-        nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_
+#         covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_
+#         nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_
 
     axs, fig = getSetup((8, 4), (1, 1))
-    ax = axs[0]
+#     ax = axs[0]
 
-    ax.errorbar(
-        np.arange(0, covid_coefficients.shape[1] * 3, 3),
-        covid_coefficients.mean(axis=0),
-        capsize=2,
-        yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS),
-        linestyle="",
-        marker=".",
-        zorder=3,
-        label="COVID-19",
-    )
-    ax.errorbar(
-        np.arange(1, nc_coefficients.shape[1] * 3, 3),
-        nc_coefficients.mean(axis=0),
-        capsize=2,
-        yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS),
-        linestyle="",
-        marker=".",
-        zorder=3,
-        label="Non COVID-19",
-    )
-    ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0)
+#     ax.errorbar(
+#         np.arange(0, covid_coefficients.shape[1] * 3, 3),
+#         covid_coefficients.mean(axis=0),
+#         capsize=2,
+#         yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS),
+#         linestyle="",
+#         marker=".",
+#         zorder=3,
+#         label="COVID-19",
+#     )
+#     ax.errorbar(
+#         np.arange(1, nc_coefficients.shape[1] * 3, 3),
+#         nc_coefficients.mean(axis=0),
+#         capsize=2,
+#         yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS),
+#         linestyle="",
+#         marker=".",
+#         zorder=3,
+#         label="Non COVID-19",
+#     )
+#     ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0)
 
-    ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3))
-    ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8)
+#     ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3))
+#     ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8)
 
-    ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3])
-    ax.legend()
-    ax.grid(True)
+#     ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3])
+#     ax.legend()
+#     ax.grid(True)
 
-    ax.set_ylabel("Logistic Regression Coefficient")
-    ax.set_xlabel("PARAFAC2 Component")
+#     ax.set_ylabel("Logistic Regression Coefficient")
+#     ax.set_xlabel("PARAFAC2 Component")
 
     return fig
diff --git a/pf2/figures/figureA11.py b/pf2/figures/figureA11.py
@@ -8,9 +8,11 @@
 from matplotlib.axes import Axes
 import anndata
 from .common import subplotLabel, getSetup
-from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps
+from ..figures.commonFuncs.plotGeneral import rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps
 from ..data_import import add_obs, combine_cell_types
 from .commonFuncs.plotFactors import bot_top_genes
+from ..figures.commonFuncs.plotPaCMAP import plot_gene_pacmap, plot_labels_pacmap
+import matplotlib.colors as mcolors
 
 
 def makeFigure():
@@ -29,14 +31,28 @@ def makeFigure():
     threshold = 0.5
     X = add_obs_cmp_both_label(X, cmp1, cmp2, pos1, pos2, top_perc=threshold)
     X = add_obs_label(X, cmp1, cmp2)
-
-    genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=4)
-    genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=4)
+
+    colors = ["black", "fuchsia", "turquoise", "gainsboro"]
+    pal = []
+    for i in colors:
+        pal.append(mcolors.CSS4_COLORS[i])
+
+    plot_labels_pacmap(X, "Label", ax[0], color_key=pal)
+
+    genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
+    genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
     genes = np.concatenate([genes1, genes2])
 
     for i, gene in enumerate(genes):
         plot_avegene_cmps(X, gene, ax[i])
-        rotate_xaxis(ax[i])
+        rotate_xaxis(ax[i+1])
+
+    genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
+    genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
+    genes = np.concatenate([genes1, genes2])
+
+    for i, gene in enumerate(genes):
+        plot_gene_pacmap(gene, X, ax[i+5])
 
     return f
 
diff --git a/pf2/figures/figureA12.py b/pf2/figures/figureA12.py
@@ -66,11 +66,4 @@ def makeFigure():
         )
         rotate_xaxis(ax[i])
 
-    genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
-    genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
-    genes = np.concatenate([genes1, genes2])
-
-    for i, gene in enumerate(genes):
-        plot_gene_pacmap(gene, X, ax[i + 2])
-
     return f