Skip to content

Commit

Permalink
pregnancy control cohort type2, deleted unbalanced/not reseaonable fe…
Browse files Browse the repository at this point in the history
…atures, 1:10, spline n knots 5, not using obc comorbiditis, age stratified sampling, also stratified by index date stractum
  • Loading branch information
calvin-zcx committed Mar 17, 2023
1 parent 7c9bc9f commit bba8809
Show file tree
Hide file tree
Showing 4 changed files with 282 additions and 14 deletions.
176 changes: 168 additions & 8 deletions iptw/plot_hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,10 +504,21 @@ def plot_forest_for_dx_organ_V4(database='V15_COVID19', star=True, pasc_dx=False
# p.colors(pointcolor='r')
# '#F65453', '#82A2D3'
# c = ['#870001', '#F65453', '#fcb2ab', '#003396', '#5494DA','#86CEFA']
c = '#F65453'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='black'), # , linecolor='#fcb2ab')
ax = p.plot(figsize=(8.6, .38 * len(labs)), t_adjuster=0.0108, max_value=3.5, min_value=0.9, size=5, decimal=2,
text_right=text_right)

if database == 'V15_COVID19':
c = '#F65453'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='black'), # , linecolor='#fcb2ab')
ax = p.plot_with_incidence(figsize=(8.6, .38 * len(labs)), t_adjuster=0.0108, max_value=3.5, min_value=0.9,
size=5, decimal=2,
text_right=text_right)
if database == 'oneflorida':
c = '#A986B5'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='black'), # , linecolor='#fcb2ab')
ax = p.plot_with_incidence(figsize=(8.6, .38 * len(labs)), t_adjuster=0.042, max_value=3.5, min_value=0.9,
size=5, decimal=2,
text_right=text_right)


# plt.title(drug_name, loc="right", x=.7, y=1.045) #"Random Effect Model(Risk Ratio)"
# plt.title('pasc', loc="center", x=0, y=0)
# plt.suptitle("Missing Data Imputation Method", x=-0.1, y=0.98)
Expand Down Expand Up @@ -882,12 +893,12 @@ def plot_forest_for_med_organ_V3(database='V15_COVID19', pvalue=0.05 / 459, ):
if database == 'oneflorida':
c = '#A986B5' # '#A986B5'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='#fcb2ab')
ax = p.plot(figsize=(9, 0.43 * len(labs)), t_adjuster=0.026, max_value=3.5, min_value=0.9, size=5,
ax = p.plot_with_incidence(figsize=(9, 0.43 * len(labs)), t_adjuster=0.026, max_value=3.5, min_value=0.9, size=5,
decimal=2) # * 27 / 45 *
else:
c = '#5494DA' # '#A986B5'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='#fcb2ab')
ax = p.plot(figsize=(9, 0.43 * 0.5 * len(labs)), t_adjuster=0.01, max_value=3.5, min_value=0.9, size=5,
ax = p.plot_with_incidence(figsize=(9, 0.43 * 0.5 * len(labs)), t_adjuster=0.01, max_value=3.5, min_value=0.9, size=5,
decimal=2) #
# plt.title(drug_name, loc="right", x=.7, y=1.045) #"Random Effect Model(Risk Ratio)"
# plt.title('pasc', loc="center", x=0, y=0)
Expand Down Expand Up @@ -921,6 +932,150 @@ def plot_forest_for_med_organ_V3(database='V15_COVID19', pvalue=0.05 / 459, ):
plt.close()


def plot_forest_for_med_organ_V4(database='V15_COVID19', pvalue=0.05 / 459, ):
if database == 'oneflorida':
df = pd.read_excel(
r'../data/oneflorida/output/character/outcome/MED-all-new-trim/causal_effects_specific_med_oneflorida-MultiPval-DXMEDALL.xlsx',
sheet_name='med')
# df = df.rename(columns={'hr-w-p': 'Hazard Ratio, Adjusted, P-Value',
# 'hr-w': 'Hazard Ratio, Adjusted',
# 'hr-w-CI': 'Hazard Ratio, Adjusted, Confidence Interval'
# })
# n_threshold = 66
elif database == 'V15_COVID19':

df = pd.read_excel(
r'../data/V15_COVID19/output/character/outcome/MED-all-new-trim/causal_effects_specific_med_insight-MultiPval-DXMEDALL.xlsx',
sheet_name='med')
# n_threshold = 100

df_select = df.sort_values(by='hr-w', ascending=False)
# pvalue = 0.05 / 459 # 0.05 / 137
df_select = df_select.loc[df_select['selected'] == 1, :]
# df_select = df_select.loc[df_select['Hazard Ratio, Adjusted, P-Value'] <= pvalue, :] #
# df_select = df_select.loc[df_select['Hazard Ratio, Adjusted'] > 1, :]
# df_select = df_select.loc[df_select['no. pasc in +'] >= n_threshold, :]
# df_select = df
print('df_select.shape:', df_select.shape)

organ_list = df_select['Organ Domain'].unique()
print(organ_list)
organ_list = [
'Diseases of the Nervous System',
# 'Diseases of the Eye and Adnexa',
'Diseases of the Skin and Subcutaneous Tissue',
'Diseases of the Respiratory System',
'Diseases of the Circulatory System',
'Diseases of the Blood and Blood Forming Organs and Certain Disorders Involving the Immune Mechanism',
'Endocrine, Nutritional and Metabolic Diseases',
'Diseases of the Digestive System',
'Diseases of the Genitourinary System',
'Diseases of the Musculoskeletal System and Connective Tissue',
# 'Certain Infectious and Parasitic Diseases',
'General'
]
organ_n = np.zeros(len(organ_list))
labs = []
measure = []
lower = []
upper = []
pval = []

nabsv = []
ncumv = []

for i, organ in enumerate(organ_list):
print(i + 1, 'organ', organ)

for key, row in df_select.iterrows():
name = row['PASC Name Simple'].strip('*')
hr = row['hr-w']
ci = stringlist_2_list(row['hr-w-CI'])
p = row['hr-w-p']
domain = row['Organ Domain']

if (database == 'V15_COVID19') and (row['selected'] == 1) and (row['selected oneflorida'] == 1):
name += r'$^{‡}$'

if (database == 'oneflorida') and (row['selected'] == 1) and (row['selected insight'] == 1):
name += r'$^{‡}$'

# nabs = row['no. pasc in +']
ncum = stringlist_2_list(row['cif_1_w'])[-1] * 1000
ncum_ci = [stringlist_2_list(row['cif_1_w_CILower'])[-1] * 1000,
stringlist_2_list(row['cif_1_w_CIUpper'])[-1] * 1000]

# reuse- nabs for ci in neg
nabs = stringlist_2_list(row['cif_0_w'])[-1] * 1000

if domain == organ:
organ_n[i] += 1
if len(name.split()) >= 4:
name = ' '.join(name.split()[:3]) + '\n' + ' '.join(name.split()[3:])
labs.append(name)
measure.append(hr)
lower.append(ci[0])
upper.append(ci[1])
pval.append(p)

nabsv.append(nabs)
ncumv.append(ncum)


p = EffectMeasurePlot(label=labs, effect_measure=measure, lcl=lower, ucl=upper,
nabs=nabsv, ncumIncidence=ncumv)
p.labels(scale='log')

# organ = 'ALL'
# p.labels(effectmeasure='aHR', add_label1='CIF per\n1000', add_label2='No. of\nCases') # aHR
p.labels(effectmeasure='aHR', add_label1='CIF per\n1000\nin Pos', add_label2='CIF per\n1000\nin Neg')

# p.colors(pointcolor='r')
# '#F65453', '#82A2D3'
# c = ['#870001', '#F65453', '#fcb2ab', '#003396', '#5494DA','#86CEFA']

if database == 'oneflorida':
c = '#787276' #'#A986B5' # '#A986B5'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='#fcb2ab')
ax = p.plot_with_incidence(figsize=(9, 0.43 * len(labs)), t_adjuster=0.026, max_value=3.5, min_value=0.9, size=5,
decimal=2) # * 27 / 45 *
else:
c = '#5494DA' # '#A986B5'
p.colors(pointshape="s", errorbarcolor=c, pointcolor=c) # , linecolor='#fcb2ab')
ax = p.plot_with_incidence(figsize=(9, 0.43 * 0.5 * len(labs)), t_adjuster=0.01, max_value=3.5, min_value=0.9, size=5,
decimal=2) #
# plt.title(drug_name, loc="right", x=.7, y=1.045) #"Random Effect Model(Risk Ratio)"
# plt.title('pasc', loc="center", x=0, y=0)
# plt.suptitle("Missing Data Imputation Method", x=-0.1, y=0.98)
# ax.set_xlabel("Favours Control Favours Haloperidol ", fontsize=10)

organ_n_cumsum = np.cumsum(organ_n)
for i in range(len(organ_n) - 1):
ax.axhline(y=organ_n_cumsum[i] - .5, xmin=0.09, color=p.linec, zorder=1, linestyle='--')

ax.set_yticklabels(labs, fontsize=14)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(False)
plt.tight_layout()
if database == 'oneflorida':
output_dir = r'../data/oneflorida/output/character/outcome/figure/organ/med/'
else:
output_dir = r'../data/V15_COVID19/output/character/outcome/figure/organ/med/'

check_and_mkdir(output_dir)
plt.savefig(output_dir + 'new-trim-med_hr_{}-p{:.3f}-hrGe1-nGe100-V4-2CIF.png'.format('all', pvalue), bbox_inches='tight',
dpi=600)
plt.savefig(output_dir + 'new-trim-med_hr_{}-p{:.3f}-hrGe1-nGe100-V4-2CIF.pdf'.format('all', pvalue), bbox_inches='tight',
transparent=True)
plt.show()
print()
# plt.clf()
plt.close()


def plot_forest_for_med_organ_compare2data(add_name=False, severity="all", star=False, select_criteria='',
pvalue=0.05 / 596, add_pasc=False):
if severity == 'all':
Expand Down Expand Up @@ -2870,10 +3025,15 @@ def add_drug_previous_label():
# plot_forest_for_med_organ(pvalue=0.05 / 459, star=True, datasite='oneflorida')
# plot_forest_for_med_organ_V2(pvalue=0.05/459)

# plot_forest_for_dx_organ_V4(star=False, pasc_dx=False, text_right=False)
# plot_forest_for_med_organ_V3(database='V15_COVID19')
# 2023-3-13 revision, plot two sites comparison
# plot_forest_for_dx_organ_V4(database='V15_COVID19', star=False, pasc_dx=False, text_right=False)
# plot_forest_for_dx_organ_V4(database='oneflorida', star=False, pasc_dx=False, text_right=False)

plot_forest_for_med_organ_V4(database='oneflorida')
zz
# plot_forest_for_med_organ_V3(database='V15_COVID19')
# plot_forest_for_med_organ_V3(database='oneflorida')
##
plot_forest_for_med_organ_compare2data(add_name=False, severity="all", star=False, select_criteria='',
pvalue=0.05 / 596, add_pasc=False)
zz
Expand Down
24 changes: 18 additions & 6 deletions iptw/test_multiple_comparision.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,19 @@ def add_test_to_paper_2023_2_23():
# r'../data/recover/output/results/DX-all-downsample0.50/causal_effects_specific_dx-all-downsample0.5.xlsx',
# sheet_name='dx')

df = pd.read_excel(
r'../data/recover/output/results/DX-pospreg-posnonpreg/causal_effects_specific-pospreg-posnonpreg.xlsx',
sheet_name='dx')
# df = pd.read_excel(
# r'../data/recover/output/results/DX-pospreg-posnonpreg/causal_effects_specific-pospreg-posnonpreg.xlsx',
# sheet_name='dx')

# infile = r'../data/recover/output/results/DX-all-neg1.0/causal_effects_specific-all-neg1.xlsx'
# infile = r'../data/recover/output/results/DX-deltaAndBefore-neg1.0/causal_effects_specific-deltaAndBefore-neg1.xlsx'
# infile = r'../data/recover/output/results/DX-omicron-neg1.0/causal_effects_specific-omicron-neg1.xlsx'
# infile = r'../data/recover/output/results/DX-pospreg-posnonpreg/causal_effects_specific-pospreg-posnonpreg.xlsx'
infile = r'../data/recover/output/results/DX-preg-pos-neg/causal_effects_specific-preg-pos-neg.xlsx'

outfile = infile.replace('.xlsx', '_aux_correctPvalue.xlsx')

df = pd.read_excel(infile, sheet_name='dx')

df_select = df.loc[df['hr-w-p'].notna(), :]
p_all = df_select['hr-w-p'] # pd.concat([df_select['hr-w-p'], df_med_select['hr-w-p']])
Expand Down Expand Up @@ -243,7 +253,9 @@ def add_test_to_paper_2023_2_23():
# r'../data/recover/output/results/DX-inpatienticu/causal_effects_specific-inpatienticu_aux_correctPvalue.xlsx',
# sheet_name='dx')

dfm_dx.to_excel(
r'../data/recover/output/results/DX-pospreg-posnonpreg/causal_effects_specific-pospreg-posnonpreg_aux_correctPvalue.xlsx',
sheet_name='dx')
# dfm_dx.to_excel(
# r'../data/recover/output/results/DX-pospreg-posnonpreg/causal_effects_specific-pospreg-posnonpreg_aux_correctPvalue.xlsx',
# sheet_name='dx')

dfm_dx.to_excel(outfile, sheet_name='dx')
print('Done')
47 changes: 47 additions & 0 deletions specific/any_pasc_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import sys

# for linux env.
sys.path.insert(0, '..')
import os
import pickle
import numpy as np
from collections import defaultdict, OrderedDict
import pandas as pd
import requests
import functools
from misc import utils
import re
from tqdm import tqdm
import statsmodels.stats.multitest as smsmlt
import multipy.fdr as fdr

print = functools.partial(print, flush=True)
import time

if __name__ == '__main__':
infile = r'../data/recover/output/results/DX-all-neg1.0/causal_effects_specific-all-neg1_compare_with_others.xlsx'
outfile = infile.replace('.xlsx', '_aux_sum_selection.xlsx')
df = pd.read_excel(infile, sheet_name='dx')

apx_vec = ['', ' deltaAndBefore', ' omicron', ' inpatienticu']

for apx in apx_vec:
hr = df['hr-w'+apx]
bf = df['bool_bonf'+apx]
by = df['bool_by'+apx]
top_bf = (((hr)>1) & (bf==1)).astype('int')
top_by = (((hr)>1) & (by==1)).astype('int')
df['risk_bf'+apx] = top_bf
df['risk_by' + apx] = top_by

df['risk_bf_sum'] = df[['risk_bf'+apx for apx in apx_vec]].sum(axis = 1)
df['risk_by_sum'] = df[['risk_by'+apx for apx in apx_vec]].sum(axis = 1)

df['risk_bf_sum+narrow'] = df['risk_bf_sum'] + df['selected_narrow_25'].fillna(0)
df['risk_bf_sum+broad'] = df['risk_bf_sum'] + df['selected_broad44'].fillna(0)
df['risk_by_sum+narrow'] = df['risk_by_sum'] + df['selected_narrow_25'].fillna(0)
df['risk_by_sum+broad'] = df['risk_by_sum'] + df['selected_broad44'].fillna(0)

df.to_excel(outfile, sheet_name='dx')

print('Done')
49 changes: 49 additions & 0 deletions specific/plot_pregnancy_cohort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import shutil
import zipfile

import pickle
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
import re

import numpy as np
import csv
from collections import Counter, defaultdict
import pandas as pd
from misc.utils import check_and_mkdir, stringlist_2_str, stringlist_2_list
from scipy import stats
import re
import itertools
import functools
import random
import seaborn as sns
import time
from tqdm import tqdm
from misc import utils

if __name__ == '__main__':
start_time = time.time()

df = pd.read_csv('preg_pos_neg.csv', dtype={'patid': str, 'site': str, 'zip': str},
parse_dates=['index date', 'flag_delivery_date', 'flag_pregnancy_start_date',
'flag_pregnancy_end_date'])
print('all df.shape:', df.shape)
df = df.loc[df['covid']==0, :]
print('covid positive df.shape:', df.shape)

days_since_preg = (df['index date'] - df['flag_pregnancy_start_date']).apply(lambda x: x.days)
sns.displot(days_since_preg)
plt.show()



days_between_deliv = (df['flag_delivery_date'] - df['index date']).apply(lambda x: x.days)
fig, ax = plt.subplots(figsize=(8, 6))
sns.displot(days_between_deliv, kde=True)
plt.xlabel('Delivery date - Infection date (Days)', fontsize=10)
plt.tight_layout()
plt.show()

print('Done! Total Time used:', time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

0 comments on commit bba8809

Please sign in to comment.