Skip to content

Commit

Permalink
Condense WP higly weighted cells plots (#106)
Browse files Browse the repository at this point in the history
* Condense WP higly weighted cells plots

* Fix threshold

* Comment out figure

* Comment out

* Fix error

* Fix error

* Update R2X plot

* Add correcting conditions

---------

Co-authored-by: Andrew Ramirez <aramirez@aretha.seas.ucla.edu>
  • Loading branch information
andrewram4287 and Andrew Ramirez authored Nov 6, 2024
1 parent e708e36 commit dc2d6d2
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 264 deletions.
110 changes: 109 additions & 1 deletion pf2/figures/commonFuncs/plotGeneral.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def add_obs_cmp_both_label(
else:
thres_value = top_perc
threshold2 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold1[cmp - 1]
idx = wprojs[:, cmp - 1] < threshold2[cmp - 1]

X.obs[f"Cmp{cmp}"] = idx

Expand Down Expand Up @@ -129,6 +129,114 @@ def add_obs_label(X: anndata.AnnData, cmp1: str, cmp2: str):
return X


def add_obs_cmp_both_label_three(
X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int, pos1=True, pos2=True, pos3=True, top_perc=1
):
"""Adds if cells in top/bot percentage"""
wprojs = X.obsm["weighted_projections"]
pos_neg = [pos1, pos2, pos3]
for i, cmp in enumerate([cmp1, cmp2, cmp3]):
if i == 0:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold1 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold1[cmp - 1]

else:
thres_value = top_perc
threshold1 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold1[cmp - 1]

if i == 1:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold2 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold2[cmp - 1]
else:
thres_value = top_perc
threshold2 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold2[cmp - 1]

if i == 2:
if pos_neg[i] is True:
thres_value = 100 - top_perc
threshold3 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] > threshold3[cmp - 1]
else:
thres_value = top_perc
threshold3 = np.percentile(wprojs, thres_value, axis=0)
idx = wprojs[:, cmp - 1] < threshold3[cmp - 1]

X.obs[f"Cmp{cmp}"] = idx

if pos1 is True and pos2 is True and pos3 is True:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is False and pos3 is False:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)
elif pos1 is True and pos2 is True and pos3 is False:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

elif pos1 is True and pos2 is False and pos3 is True:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is True and pos2 is False and pos3 is False:
idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

elif pos1 is False and pos2 is False and pos3 is True:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is True and pos3 is True:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1]
)
elif pos1 is False and pos2 is True and pos3 is False:
idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & (
wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & (
wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1]
)

X.obs["Both"] = idx

return X


def add_obs_label_three(X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int):
"""Creates AnnData observation column"""
X.obs.loc[((X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == False), "Label")] = f"Cmp{cmp1}"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == True)
& (X.obs[f"Cmp{cmp3}"] == False), "Label"] = f"Cmp{cmp2}"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == True), "Label"] = f"Cmp{cmp3}"

X.obs.loc[(X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == True)
& (X.obs[f"Cmp{cmp3}"] == True), "Label"] = "Both"
X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False)
& (X.obs[f"Cmp{cmp3}"] == False), "Label"] = "NoLabel"

X = X[(X.obs["Label"] == f"Cmp{cmp1}") | (X.obs["Label"] == f"Cmp{cmp2}") |
(X.obs["Label"] == f"Cmp{cmp3}") | (X.obs["Label"] == "Both") |
(X.obs["Label"] == "NoLabel")]

return X



def plot_avegene_cmps(
Expand Down
58 changes: 29 additions & 29 deletions pf2/figures/figure3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,43 +11,43 @@


def makeFigure():
meta = import_meta()
data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
conversions = convert_to_patients(data)
# meta = import_meta()
# data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
# conversions = convert_to_patients(data)

patient_factor = pd.DataFrame(
data.uns["Pf2_A"],
index=conversions,
columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
)
meta = meta.loc[patient_factor.index, :]
# patient_factor = pd.DataFrame(
# data.uns["Pf2_A"],
# index=conversions,
# columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
# )
# meta = meta.loc[patient_factor.index, :]

axs, fig = getSetup((4, 4), (1, 1))
ax = axs[0]
# ax = axs[0]

probabilities, labels = predict_mortality(patient_factor, meta, proba=True)
# probabilities, labels = predict_mortality(patient_factor, meta, proba=True)

predicted = [0 if prob < 0.5 else 1 for prob in probabilities]
accuracy = accuracy_score(labels, predicted)
# predicted = [0 if prob < 0.5 else 1 for prob in probabilities]
# accuracy = accuracy_score(labels, predicted)

fpr, tpr, _ = roc_curve(labels, probabilities)
auc_roc = roc_auc_score(labels, probabilities)
# fpr, tpr, _ = roc_curve(labels, probabilities)
# auc_roc = roc_auc_score(labels, probabilities)

ax.plot([0, 1], [0, 1], linestyle="--", color="k")
ax.plot(fpr, tpr)
ax.text(
0.99,
0.01,
s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}",
ha="right",
va="bottom",
transform=ax.transAxes,
)
# ax.plot([0, 1], [0, 1], linestyle="--", color="k")
# ax.plot(fpr, tpr)
# ax.text(
# 0.99,
# 0.01,
# s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}",
# ha="right",
# va="bottom",
# transform=ax.transAxes,
# )

ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
# ax.set_xlim([0, 1])
# ax.set_ylim([0, 1])

ax.set_ylabel("True Positive Rate")
ax.set_xlabel("False Positive Rate")
# ax.set_ylabel("True Positive Rate")
# ax.set_xlabel("False Positive Rate")

return fig
116 changes: 58 additions & 58 deletions pf2/figures/figure4.py
Original file line number Diff line number Diff line change
@@ -1,76 +1,76 @@
"""Figure 4: Component Association Errorbars"""
# """Figure 4: Component Association Errorbars"""

import numpy as np
import pandas as pd
from anndata import read_h5ad
# import numpy as np
# import pandas as pd
# from anndata import read_h5ad

from pf2.data_import import convert_to_patients, import_meta
# from pf2.data_import import convert_to_patients, import_meta
from pf2.figures.common import getSetup
from pf2.predict import predict_mortality
# from pf2.predict import predict_mortality

TRIALS = 30
# TRIALS = 30


def makeFigure():
meta = import_meta()
data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")
# meta = import_meta()
# data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r")

conversions = convert_to_patients(data)
patient_factor = pd.DataFrame(
data.uns["Pf2_A"],
index=conversions,
columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
)
meta = meta.loc[patient_factor.index, :]
# conversions = convert_to_patients(data)
# patient_factor = pd.DataFrame(
# data.uns["Pf2_A"],
# index=conversions,
# columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1,
# )
# meta = meta.loc[patient_factor.index, :]

covid_coefficients = pd.DataFrame(
0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns
)
nc_coefficients = covid_coefficients.copy(deep=True)
for trial in range(TRIALS):
boot_index = np.random.choice(
patient_factor.shape[0], replace=True, size=patient_factor.shape[0]
)
boot_factor = patient_factor.iloc[boot_index, :]
boot_meta = meta.iloc[boot_index, :]
_, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta)
# covid_coefficients = pd.DataFrame(
# 0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns
# )
# nc_coefficients = covid_coefficients.copy(deep=True)
# for trial in range(TRIALS):
# boot_index = np.random.choice(
# patient_factor.shape[0], replace=True, size=patient_factor.shape[0]
# )
# boot_factor = patient_factor.iloc[boot_index, :]
# boot_meta = meta.iloc[boot_index, :]
# _, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta)

covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_
nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_
# covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_
# nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_

axs, fig = getSetup((8, 4), (1, 1))
ax = axs[0]
# ax = axs[0]

ax.errorbar(
np.arange(0, covid_coefficients.shape[1] * 3, 3),
covid_coefficients.mean(axis=0),
capsize=2,
yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS),
linestyle="",
marker=".",
zorder=3,
label="COVID-19",
)
ax.errorbar(
np.arange(1, nc_coefficients.shape[1] * 3, 3),
nc_coefficients.mean(axis=0),
capsize=2,
yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS),
linestyle="",
marker=".",
zorder=3,
label="Non COVID-19",
)
ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0)
# ax.errorbar(
# np.arange(0, covid_coefficients.shape[1] * 3, 3),
# covid_coefficients.mean(axis=0),
# capsize=2,
# yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS),
# linestyle="",
# marker=".",
# zorder=3,
# label="COVID-19",
# )
# ax.errorbar(
# np.arange(1, nc_coefficients.shape[1] * 3, 3),
# nc_coefficients.mean(axis=0),
# capsize=2,
# yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS),
# linestyle="",
# marker=".",
# zorder=3,
# label="Non COVID-19",
# )
# ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0)

ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3))
ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8)
# ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3))
# ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8)

ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3])
ax.legend()
ax.grid(True)
# ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3])
# ax.legend()
# ax.grid(True)

ax.set_ylabel("Logistic Regression Coefficient")
ax.set_xlabel("PARAFAC2 Component")
# ax.set_ylabel("Logistic Regression Coefficient")
# ax.set_xlabel("PARAFAC2 Component")

return fig
26 changes: 21 additions & 5 deletions pf2/figures/figureA11.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from matplotlib.axes import Axes
import anndata
from .common import subplotLabel, getSetup
from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps
from ..figures.commonFuncs.plotGeneral import rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps
from ..data_import import add_obs, combine_cell_types
from .commonFuncs.plotFactors import bot_top_genes
from ..figures.commonFuncs.plotPaCMAP import plot_gene_pacmap, plot_labels_pacmap
import matplotlib.colors as mcolors


def makeFigure():
Expand All @@ -29,14 +31,28 @@ def makeFigure():
threshold = 0.5
X = add_obs_cmp_both_label(X, cmp1, cmp2, pos1, pos2, top_perc=threshold)
X = add_obs_label(X, cmp1, cmp2)

genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=4)
genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=4)

colors = ["black", "fuchsia", "turquoise", "gainsboro"]
pal = []
for i in colors:
pal.append(mcolors.CSS4_COLORS[i])

plot_labels_pacmap(X, "Label", ax[0], color_key=pal)

genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
genes = np.concatenate([genes1, genes2])

for i, gene in enumerate(genes):
plot_avegene_cmps(X, gene, ax[i])
rotate_xaxis(ax[i])
rotate_xaxis(ax[i+1])

genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
genes = np.concatenate([genes1, genes2])

for i, gene in enumerate(genes):
plot_gene_pacmap(gene, X, ax[i+5])

return f

7 changes: 0 additions & 7 deletions pf2/figures/figureA12.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,4 @@ def makeFigure():
)
rotate_xaxis(ax[i])

genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1)
genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1)
genes = np.concatenate([genes1, genes2])

for i, gene in enumerate(genes):
plot_gene_pacmap(gene, X, ax[i + 2])

return f
Loading

0 comments on commit dc2d6d2

Please sign in to comment.