-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pregnancy control cohort type2, deleted unbalanced/not reseaonable fe…
…atures, 1:10, spline n knots 5, not using obc comorbiditis, age stratified sampling, also stratified by index date stractum
- Loading branch information
1 parent
7c9bc9f
commit bba8809
Showing
4 changed files
with
282 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import sys | ||
|
||
# for linux env. | ||
sys.path.insert(0, '..') | ||
import os | ||
import pickle | ||
import numpy as np | ||
from collections import defaultdict, OrderedDict | ||
import pandas as pd | ||
import requests | ||
import functools | ||
from misc import utils | ||
import re | ||
from tqdm import tqdm | ||
import statsmodels.stats.multitest as smsmlt | ||
import multipy.fdr as fdr | ||
|
||
print = functools.partial(print, flush=True) | ||
import time | ||
|
||
if __name__ == '__main__': | ||
infile = r'../data/recover/output/results/DX-all-neg1.0/causal_effects_specific-all-neg1_compare_with_others.xlsx' | ||
outfile = infile.replace('.xlsx', '_aux_sum_selection.xlsx') | ||
df = pd.read_excel(infile, sheet_name='dx') | ||
|
||
apx_vec = ['', ' deltaAndBefore', ' omicron', ' inpatienticu'] | ||
|
||
for apx in apx_vec: | ||
hr = df['hr-w'+apx] | ||
bf = df['bool_bonf'+apx] | ||
by = df['bool_by'+apx] | ||
top_bf = (((hr)>1) & (bf==1)).astype('int') | ||
top_by = (((hr)>1) & (by==1)).astype('int') | ||
df['risk_bf'+apx] = top_bf | ||
df['risk_by' + apx] = top_by | ||
|
||
df['risk_bf_sum'] = df[['risk_bf'+apx for apx in apx_vec]].sum(axis = 1) | ||
df['risk_by_sum'] = df[['risk_by'+apx for apx in apx_vec]].sum(axis = 1) | ||
|
||
df['risk_bf_sum+narrow'] = df['risk_bf_sum'] + df['selected_narrow_25'].fillna(0) | ||
df['risk_bf_sum+broad'] = df['risk_bf_sum'] + df['selected_broad44'].fillna(0) | ||
df['risk_by_sum+narrow'] = df['risk_by_sum'] + df['selected_narrow_25'].fillna(0) | ||
df['risk_by_sum+broad'] = df['risk_by_sum'] + df['selected_broad44'].fillna(0) | ||
|
||
df.to_excel(outfile, sheet_name='dx') | ||
|
||
print('Done') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import os | ||
import shutil | ||
import zipfile | ||
|
||
import pickle | ||
from sklearn.manifold import TSNE | ||
import matplotlib.pyplot as plt | ||
import matplotlib.ticker as tck | ||
import re | ||
|
||
import numpy as np | ||
import csv | ||
from collections import Counter, defaultdict | ||
import pandas as pd | ||
from misc.utils import check_and_mkdir, stringlist_2_str, stringlist_2_list | ||
from scipy import stats | ||
import re | ||
import itertools | ||
import functools | ||
import random | ||
import seaborn as sns | ||
import time | ||
from tqdm import tqdm | ||
from misc import utils | ||
|
||
if __name__ == '__main__': | ||
start_time = time.time() | ||
|
||
df = pd.read_csv('preg_pos_neg.csv', dtype={'patid': str, 'site': str, 'zip': str}, | ||
parse_dates=['index date', 'flag_delivery_date', 'flag_pregnancy_start_date', | ||
'flag_pregnancy_end_date']) | ||
print('all df.shape:', df.shape) | ||
df = df.loc[df['covid']==0, :] | ||
print('covid positive df.shape:', df.shape) | ||
|
||
days_since_preg = (df['index date'] - df['flag_pregnancy_start_date']).apply(lambda x: x.days) | ||
sns.displot(days_since_preg) | ||
plt.show() | ||
|
||
|
||
|
||
days_between_deliv = (df['flag_delivery_date'] - df['index date']).apply(lambda x: x.days) | ||
fig, ax = plt.subplots(figsize=(8, 6)) | ||
sns.displot(days_between_deliv, kde=True) | ||
plt.xlabel('Delivery date - Infection date (Days)', fontsize=10) | ||
plt.tight_layout() | ||
plt.show() | ||
|
||
print('Done! Total Time used:', time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) |