diff --git a/pf2/figures/commonFuncs/plotGeneral.py b/pf2/figures/commonFuncs/plotGeneral.py index 0bdeb02..a8fb9b2 100644 --- a/pf2/figures/commonFuncs/plotGeneral.py +++ b/pf2/figures/commonFuncs/plotGeneral.py @@ -93,7 +93,7 @@ def add_obs_cmp_both_label( else: thres_value = top_perc threshold2 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] < threshold1[cmp - 1] + idx = wprojs[:, cmp - 1] < threshold2[cmp - 1] X.obs[f"Cmp{cmp}"] = idx @@ -129,6 +129,114 @@ def add_obs_label(X: anndata.AnnData, cmp1: str, cmp2: str): return X +def add_obs_cmp_both_label_three( + X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int, pos1=True, pos2=True, pos3=True, top_perc=1 +): + """Adds if cells in top/bot percentage""" + wprojs = X.obsm["weighted_projections"] + pos_neg = [pos1, pos2, pos3] + for i, cmp in enumerate([cmp1, cmp2, cmp3]): + if i == 0: + if pos_neg[i] is True: + thres_value = 100 - top_perc + threshold1 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] > threshold1[cmp - 1] + + else: + thres_value = top_perc + threshold1 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] < threshold1[cmp - 1] + + if i == 1: + if pos_neg[i] is True: + thres_value = 100 - top_perc + threshold2 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] > threshold2[cmp - 1] + else: + thres_value = top_perc + threshold2 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] < threshold2[cmp - 1] + + if i == 2: + if pos_neg[i] is True: + thres_value = 100 - top_perc + threshold3 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] > threshold3[cmp - 1] + else: + thres_value = top_perc + threshold3 = np.percentile(wprojs, thres_value, axis=0) + idx = wprojs[:, cmp - 1] < threshold3[cmp - 1] + + X.obs[f"Cmp{cmp}"] = idx + + if pos1 is True and pos2 is True and pos3 is True: + idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] + ) + elif pos1 is False and pos2 is False and pos3 is False: + idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] + ) + elif pos1 is True and pos2 is True and pos3 is False: + idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] + ) + + elif pos1 is True and pos2 is False and pos3 is True: + idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] + ) + elif pos1 is True and pos2 is False and pos3 is False: + idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] + ) + + elif pos1 is False and pos2 is False and pos3 is True: + idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] + ) + elif pos1 is False and pos2 is True and pos3 is True: + idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] + ) + elif pos1 is False and pos2 is True and pos3 is False: + idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( + wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( + wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] + ) + + X.obs["Both"] = idx + + return X + + +def add_obs_label_three(X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int): + """Creates AnnData observation column""" + X.obs.loc[((X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == False) + & (X.obs[f"Cmp{cmp3}"] == False), "Label")] = f"Cmp{cmp1}" + X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == True) + & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = f"Cmp{cmp2}" + X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False) + & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = f"Cmp{cmp3}" + + X.obs.loc[(X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == True) + & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = "Both" + X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False) + & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = "NoLabel" + + X = X[(X.obs["Label"] == f"Cmp{cmp1}") | (X.obs["Label"] == f"Cmp{cmp2}") | + (X.obs["Label"] == f"Cmp{cmp3}") | (X.obs["Label"] == "Both") | + (X.obs["Label"] == "NoLabel")] + + return X + def plot_avegene_cmps( diff --git a/pf2/figures/figure3.py b/pf2/figures/figure3.py index 0c64d5e..4f3c63d 100644 --- a/pf2/figures/figure3.py +++ b/pf2/figures/figure3.py @@ -11,43 +11,43 @@ def makeFigure(): - meta = import_meta() - data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r") - conversions = convert_to_patients(data) + # meta = import_meta() + # data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r") + # conversions = convert_to_patients(data) - patient_factor = pd.DataFrame( - data.uns["Pf2_A"], - index=conversions, - columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1, - ) - meta = meta.loc[patient_factor.index, :] + # patient_factor = pd.DataFrame( + # data.uns["Pf2_A"], + # index=conversions, + # columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1, + # ) + # meta = meta.loc[patient_factor.index, :] axs, fig = getSetup((4, 4), (1, 1)) - ax = axs[0] + # ax = axs[0] - probabilities, labels = predict_mortality(patient_factor, meta, proba=True) + # probabilities, labels = predict_mortality(patient_factor, meta, proba=True) - predicted = [0 if prob < 0.5 else 1 for prob in probabilities] - accuracy = accuracy_score(labels, predicted) + # predicted = [0 if prob < 0.5 else 1 for prob in probabilities] + # accuracy = accuracy_score(labels, predicted) - fpr, tpr, _ = roc_curve(labels, probabilities) - auc_roc = roc_auc_score(labels, probabilities) + # fpr, tpr, _ = roc_curve(labels, probabilities) + # auc_roc = roc_auc_score(labels, probabilities) - ax.plot([0, 1], [0, 1], linestyle="--", color="k") - ax.plot(fpr, tpr) - ax.text( - 0.99, - 0.01, - s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}", - ha="right", - va="bottom", - transform=ax.transAxes, - ) + # ax.plot([0, 1], [0, 1], linestyle="--", color="k") + # ax.plot(fpr, tpr) + # ax.text( + # 0.99, + # 0.01, + # s=f"AUC ROC: {round(auc_roc, 2)}\nAccuracy: {round(accuracy, 2)}", + # ha="right", + # va="bottom", + # transform=ax.transAxes, + # ) - ax.set_xlim([0, 1]) - ax.set_ylim([0, 1]) + # ax.set_xlim([0, 1]) + # ax.set_ylim([0, 1]) - ax.set_ylabel("True Positive Rate") - ax.set_xlabel("False Positive Rate") + # ax.set_ylabel("True Positive Rate") + # ax.set_xlabel("False Positive Rate") return fig diff --git a/pf2/figures/figure4.py b/pf2/figures/figure4.py index 0d530a1..d265687 100644 --- a/pf2/figures/figure4.py +++ b/pf2/figures/figure4.py @@ -1,76 +1,76 @@ -"""Figure 4: Component Association Errorbars""" +# """Figure 4: Component Association Errorbars""" -import numpy as np -import pandas as pd -from anndata import read_h5ad +# import numpy as np +# import pandas as pd +# from anndata import read_h5ad -from pf2.data_import import convert_to_patients, import_meta +# from pf2.data_import import convert_to_patients, import_meta from pf2.figures.common import getSetup -from pf2.predict import predict_mortality +# from pf2.predict import predict_mortality -TRIALS = 30 +# TRIALS = 30 def makeFigure(): - meta = import_meta() - data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r") +# meta = import_meta() +# data = read_h5ad("/opt/northwest_bal/full_fitted.h5ad", backed="r") - conversions = convert_to_patients(data) - patient_factor = pd.DataFrame( - data.uns["Pf2_A"], - index=conversions, - columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1, - ) - meta = meta.loc[patient_factor.index, :] +# conversions = convert_to_patients(data) +# patient_factor = pd.DataFrame( +# data.uns["Pf2_A"], +# index=conversions, +# columns=np.arange(data.uns["Pf2_A"].shape[1]) + 1, +# ) +# meta = meta.loc[patient_factor.index, :] - covid_coefficients = pd.DataFrame( - 0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns - ) - nc_coefficients = covid_coefficients.copy(deep=True) - for trial in range(TRIALS): - boot_index = np.random.choice( - patient_factor.shape[0], replace=True, size=patient_factor.shape[0] - ) - boot_factor = patient_factor.iloc[boot_index, :] - boot_meta = meta.iloc[boot_index, :] - _, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta) +# covid_coefficients = pd.DataFrame( +# 0, dtype=float, index=np.arange(TRIALS) + 1, columns=patient_factor.columns +# ) +# nc_coefficients = covid_coefficients.copy(deep=True) +# for trial in range(TRIALS): +# boot_index = np.random.choice( +# patient_factor.shape[0], replace=True, size=patient_factor.shape[0] +# ) +# boot_factor = patient_factor.iloc[boot_index, :] +# boot_meta = meta.iloc[boot_index, :] +# _, _, (covid_plsr, nc_plsr) = predict_mortality(boot_factor, boot_meta) - covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_ - nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_ +# covid_coefficients.loc[trial + 1, covid_plsr.coef_.index] = covid_plsr.coef_ +# nc_coefficients.loc[trial + 1, nc_plsr.coef_.index] = nc_plsr.coef_ axs, fig = getSetup((8, 4), (1, 1)) - ax = axs[0] +# ax = axs[0] - ax.errorbar( - np.arange(0, covid_coefficients.shape[1] * 3, 3), - covid_coefficients.mean(axis=0), - capsize=2, - yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS), - linestyle="", - marker=".", - zorder=3, - label="COVID-19", - ) - ax.errorbar( - np.arange(1, nc_coefficients.shape[1] * 3, 3), - nc_coefficients.mean(axis=0), - capsize=2, - yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS), - linestyle="", - marker=".", - zorder=3, - label="Non COVID-19", - ) - ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0) +# ax.errorbar( +# np.arange(0, covid_coefficients.shape[1] * 3, 3), +# covid_coefficients.mean(axis=0), +# capsize=2, +# yerr=1.96 * covid_coefficients.std(axis=0) / np.sqrt(TRIALS), +# linestyle="", +# marker=".", +# zorder=3, +# label="COVID-19", +# ) +# ax.errorbar( +# np.arange(1, nc_coefficients.shape[1] * 3, 3), +# nc_coefficients.mean(axis=0), +# capsize=2, +# yerr=1.96 * nc_coefficients.std(axis=0) / np.sqrt(TRIALS), +# linestyle="", +# marker=".", +# zorder=3, +# label="Non COVID-19", +# ) +# ax.plot([-1, 200], [0, 0], linestyle="--", color="k", zorder=0) - ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3)) - ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8) +# ax.set_xticks(np.arange(0.5, data.uns["Pf2_A"].shape[1] * 3, 3)) +# ax.set_xticklabels(np.arange(data.uns["Pf2_A"].shape[1]) + 1, fontsize=8) - ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3]) - ax.legend() - ax.grid(True) +# ax.set_xlim([-1, data.uns["Pf2_A"].shape[1] * 3]) +# ax.legend() +# ax.grid(True) - ax.set_ylabel("Logistic Regression Coefficient") - ax.set_xlabel("PARAFAC2 Component") +# ax.set_ylabel("Logistic Regression Coefficient") +# ax.set_xlabel("PARAFAC2 Component") return fig diff --git a/pf2/figures/figureA11.py b/pf2/figures/figureA11.py index 6bec013..a8b9e3a 100644 --- a/pf2/figures/figureA11.py +++ b/pf2/figures/figureA11.py @@ -8,9 +8,11 @@ from matplotlib.axes import Axes import anndata from .common import subplotLabel, getSetup -from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps +from ..figures.commonFuncs.plotGeneral import rotate_xaxis, add_obs_cmp_both_label, add_obs_label, plot_avegene_cmps from ..data_import import add_obs, combine_cell_types from .commonFuncs.plotFactors import bot_top_genes +from ..figures.commonFuncs.plotPaCMAP import plot_gene_pacmap, plot_labels_pacmap +import matplotlib.colors as mcolors def makeFigure(): @@ -29,14 +31,28 @@ def makeFigure(): threshold = 0.5 X = add_obs_cmp_both_label(X, cmp1, cmp2, pos1, pos2, top_perc=threshold) X = add_obs_label(X, cmp1, cmp2) - - genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=4) - genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=4) + + colors = ["black", "fuchsia", "turquoise", "gainsboro"] + pal = [] + for i in colors: + pal.append(mcolors.CSS4_COLORS[i]) + + plot_labels_pacmap(X, "Label", ax[0], color_key=pal) + + genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1) + genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1) genes = np.concatenate([genes1, genes2]) for i, gene in enumerate(genes): plot_avegene_cmps(X, gene, ax[i]) - rotate_xaxis(ax[i]) + rotate_xaxis(ax[i+1]) + + genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1) + genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1) + genes = np.concatenate([genes1, genes2]) + + for i, gene in enumerate(genes): + plot_gene_pacmap(gene, X, ax[i+5]) return f diff --git a/pf2/figures/figureA12.py b/pf2/figures/figureA12.py index de27958..f17a648 100644 --- a/pf2/figures/figureA12.py +++ b/pf2/figures/figureA12.py @@ -66,11 +66,4 @@ def makeFigure(): ) rotate_xaxis(ax[i]) - genes1 = bot_top_genes(X, cmp=cmp1, geneAmount=1) - genes2 = bot_top_genes(X, cmp=cmp2, geneAmount=1) - genes = np.concatenate([genes1, genes2]) - - for i, gene in enumerate(genes): - plot_gene_pacmap(gene, X, ax[i + 2]) - return f diff --git a/pf2/figures/figureA13.py b/pf2/figures/figureA13.py index 98af36a..c50d850 100644 --- a/pf2/figures/figureA13.py +++ b/pf2/figures/figureA13.py @@ -13,10 +13,6 @@ def makeFigure(): subplotLabel(ax) - plot_toppfun(cmp=9, ax=ax[0]) - plot_toppfun(cmp=15, ax=ax[1]) - # plot_toppfun(cmp=27, ax=ax[1]) - plot_toppfun(cmp=46, ax=ax[2]) return f diff --git a/pf2/figures/figureA14.py b/pf2/figures/figureA14.py deleted file mode 100644 index 120ea05..0000000 --- a/pf2/figures/figureA14.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Figure A14: -""" -from .commonFuncs.plotPaCMAP import plot_labels_pacmap -from ..data_import import combine_cell_types, add_obs -import anndata -from .common import subplotLabel, getSetup -from ..figures.commonFuncs.plotGeneral import add_obs_cmp_both_label, add_obs_label -import seaborn as sns -import matplotlib.colors as mcolors - -def makeFigure(): - """Get a list of the axis objects and create a figure.""" - ax, f = getSetup((6, 6), (2, 2)) - - subplotLabel(ax) - - X = anndata.read_h5ad("/opt/northwest_bal/full_fitted.h5ad") - add_obs(X, "binary_outcome") - add_obs(X, "patient_category") - combine_cell_types(X) - - cmp1 = 27; cmp2 = 46 - pos1 = True; pos2 = True - threshold = 0.5 - X = add_obs_cmp_both_label(X, cmp1, cmp2, pos1, pos2, top_perc=threshold) - X = add_obs_label(X, cmp1, cmp2) - - colors = ["black", "fuchsia", "turquoise", "gainsboro"] - pal = [] - for i in colors: - pal.append(mcolors.CSS4_COLORS[i]) - - plot_labels_pacmap(X, "Label", ax[0], color_key=pal) - - - return f - - diff --git a/pf2/figures/figureA15.py b/pf2/figures/figureA15.py index 5963927..1193bcc 100644 --- a/pf2/figures/figureA15.py +++ b/pf2/figures/figureA15.py @@ -5,13 +5,10 @@ from ..data_import import combine_cell_types, add_obs import anndata from .common import subplotLabel, getSetup -import seaborn as sns import matplotlib.colors as mcolors import numpy as np from .commonFuncs.plotFactors import bot_top_genes -from ..figures.commonFuncs.plotGeneral import bal_combine_bo_covid, rotate_xaxis, plot_avegene_cmps -import pandas as pd - +from ..figures.commonFuncs.plotGeneral import rotate_xaxis, plot_avegene_cmps, add_obs_cmp_both_label_three, add_obs_label_three from ..figures.commonFuncs.plotPaCMAP import plot_gene_pacmap @@ -50,115 +47,6 @@ def makeFigure(): for i, gene in enumerate(genes): plot_gene_pacmap(gene, X, ax[i+7]) - - return f -def add_obs_cmp_both_label_three( - X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int, pos1=True, pos2=True, pos3=True, top_perc=1 -): - """Adds if cells in top/bot percentage""" - wprojs = X.obsm["weighted_projections"] - pos_neg = [pos1, pos2, pos3] - for i, cmp in enumerate([cmp1, cmp2, cmp3]): - if i == 0: - if pos_neg[i] is True: - thres_value = 100 - top_perc - threshold1 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] > threshold1[cmp - 1] - - else: - thres_value = top_perc - threshold1 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] < threshold1[cmp - 1] - - if i == 1: - if pos_neg[i] is True: - thres_value = 100 - top_perc - threshold2 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] > threshold2[cmp - 1] - else: - thres_value = top_perc - threshold2 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] < threshold2[cmp - 1] - - if i == 2: - if pos_neg[i] is True: - thres_value = 100 - top_perc - threshold3 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] > threshold3[cmp - 1] - else: - thres_value = top_perc - threshold3 = np.percentile(wprojs, thres_value, axis=0) - idx = wprojs[:, cmp - 1] < threshold3[cmp - 1] - - X.obs[f"Cmp{cmp}"] = idx - - if pos1 is True and pos2 is True and pos3 is True: - idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] - ) - elif pos1 is False and pos2 is False and pos3 is False: - idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] - ) - elif pos1 is True and pos2 is True and pos3 is False: - idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] - ) - - elif pos1 is True and pos2 is False and pos3 is True: - idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] - ) - elif pos1 is True and pos2 is False and pos3 is False: - idx = (wprojs[:, cmp1 - 1] >= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] - ) - - elif pos1 is False and pos2 is False and pos3 is True: - idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] <= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] - ) - elif pos1 is False and pos2 is True and pos3 is True: - idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] >= threshold3[cmp3 - 1] - ) - elif pos1 is False and pos2 is True and pos3 is False: - idx = (wprojs[:, cmp1 - 1] <= threshold1[cmp1 - 1]) & ( - wprojs[:, cmp2 - 1] >= threshold2[cmp2 - 1]) & ( - wprojs[:, cmp3 - 1] <= threshold3[cmp3 - 1] - ) - - X.obs["Both"] = idx - - return X - - -def add_obs_label_three(X: anndata.AnnData, cmp1: int, cmp2: int, cmp3: int): - """Creates AnnData observation column""" - X.obs.loc[((X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == False) - & (X.obs[f"Cmp{cmp3}"] == False), "Label")] = f"Cmp{cmp1}" - X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == True) - & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = f"Cmp{cmp2}" - X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False) - & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = f"Cmp{cmp3}" - - X.obs.loc[(X.obs[f"Cmp{cmp1}"] == True) & (X.obs[f"Cmp{cmp2}"] == True) - & (X.obs[f"Cmp{cmp3}"] == True), "Label"] = "Both" - X.obs.loc[(X.obs[f"Cmp{cmp1}"] == False) & (X.obs[f"Cmp{cmp2}"] == False) - & (X.obs[f"Cmp{cmp3}"] == False), "Label"] = "NoLabel" - - X = X[(X.obs["Label"] == f"Cmp{cmp1}") | (X.obs["Label"] == f"Cmp{cmp2}") | - (X.obs["Label"] == f"Cmp{cmp3}") | (X.obs["Label"] == "Both") | - (X.obs["Label"] == "NoLabel")] - - return X diff --git a/pf2/figures/figureA17.py b/pf2/figures/figureA17.py index cc2d1eb..066fc17 100644 --- a/pf2/figures/figureA17.py +++ b/pf2/figures/figureA17.py @@ -4,6 +4,7 @@ import pandas as pd from ..figures.common import getSetup, subplotLabel +from ..tensor import correct_conditions import numpy as np from pf2.data_import import convert_to_patients, import_data, import_meta from pf2.figures.common import getSetup @@ -15,23 +16,25 @@ def makeFigure(): ax, f = getSetup((6, 6), (2, 2)) subplotLabel(ax) - meta = import_meta() - data = import_data() - conversions = convert_to_patients(data) - + X = import_data() + meta = import_meta(drop_duplicates=False) + conversions = convert_to_patients(X, sample=True) + meta.set_index("sample_id", inplace=True) # ranks = np.arange(5, 65, 5) - ranks = np.arange(2, 4) + ranks = np.arange(2, 3) r2xs = pd.Series(0, dtype=float, index=ranks) accuracies = pd.Series(0, dtype=float, index=ranks) for rank in ranks: - fac, r2x = pf2(data, rank, do_embedding=False) + fac, r2x = pf2(X, rank, do_embedding=False) + fac.uns["Pf2_A"] = correct_conditions(X) patient_factor = pd.DataFrame( fac.uns["Pf2_A"], index=conversions, columns=np.arange(fac.uns["Pf2_A"].shape[1]) + 1, ) - if meta.shape[0] != patient_factor.shape[0]: - meta = meta.loc[patient_factor.index, :] + shared_indices = patient_factor.index.intersection(meta.index) + patient_factor = patient_factor.loc[shared_indices, :] + meta = meta.loc[shared_indices, :] acc, _, _ = predict_mortality(patient_factor, meta) r2xs.loc[rank] = r2x