Skip to content

Commit

Permalink
Simulator dependency update (#71)
Browse files Browse the repository at this point in the history
Update pandas>2 and networkx>3 for simulation module and some unit tests #71
Add unit tests. 

Github Actions skip on error for Code Climate failures 
to avoid failing the entire workflow (even if tests are ok)
just because it didn't have access to CC token for uploading coverage report.


---------------------------------------------------------


* Allow networkx >3 dependency

---------

Signed-off-by: Diane Vincent <diane.vincent78@gmail.com>

* Allow pandas>2 dependency

* Add tests

Different link types:
- test_affine_linking
- test_poly_linking
- test_exp_linking
- test_log_linking

Effect modifier: check that it behaves correctly
with marginal structural model
- test_effect_modifier

* Dummy commit to engage CodeClimate?

It seems forked-branch pull-request do not initiate CodeClimate
properly, causing the entire PR look like it failed.
The problem is that the forked PR doesn't have access to the
upstream's secret, so CodeClimate doesn't have its token:
https://github.com/BiomedSciAI/causallib/actions/runs/10010566187/job/27714442507?pr=71#step:8:17

Before I contemplate whether to make that not-really-secret secret
a hardcoded token instead,
I want to test whether making a dummy commit by a permitted account
could make it run properly.

Signed-off-by: Ehud-Karavani <ehud.karavani@ibm.com>

* Don't fail entire pipeline for failed coverage report upload

Forked PRs have no access to secrets,
so uploading a coverage report to Code Climate can fail
as no token will be provided.

To avoid that failing the entire workflow,
try to make that step optional and see what happens on
Github Actions.

Signed-off-by: Ehud-Karavani <ehud.karavani@ibm.com>

---------

Signed-off-by: Diane Vincent <diane.vincent78@gmail.com>
Signed-off-by: Ehud-Karavani <ehud.karavani@ibm.com>
Co-authored-by: Diane Vincent <diane.vincent78@gmail.com>
Co-authored-by: ehudkr <ehudkaravani@gmail.com>
Co-authored-by: Ehud Karavani <15989012+ehudkr@users.noreply.github.com>
Co-authored-by: Ehud-Karavani <ehud.karavani@ibm.com>
  • Loading branch information
5 people authored Jul 31, 2024
1 parent bdf033c commit 52ba5ea
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 4 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,8 @@ jobs:
- name: Publish to CodeClimate
uses: paambaati/codeclimate-action@v5.0.0
env:
CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }}
CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }}
# Forked PRs have no access to secrets, so uploading a coverage report to Code Climate fails.
# To avoid that failing the entire workflow, continue on error:
continue-on-error: true

6 changes: 3 additions & 3 deletions causallib/simulation/CausalSimulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm

# check that effect modifier is independent on treatment and affects only the outcome:
for i in self.effmod_indices:
successors = self.graph_topology.successors(i)
successors = list(self.graph_topology.successors(i))
if len(successors) == 0 or self.outcome_indices.intersection(successors).size < 1:
raise ValueError("Effect modifier variable {name} must affect an outcome variable".format(name=i))
ancestors = nx.ancestors(self.graph_topology, i)
Expand Down Expand Up @@ -441,7 +441,7 @@ def generate_data(self, X_given=None, num_samples=None, random_seed=None):

# generate latent continuous covariates - every variable is guaranteed to have a population variance of 1.0
# X_latent = pd.DataFrame(index=patients_index, columns=self.var_types.index)
X = pd.DataFrame(index=patients_index, columns=self.var_types.index)
X = pd.DataFrame(index=patients_index, columns=self.var_types.index, dtype=float)
if X_given is not None: # if a dataset is given, integrate it to the current dataset being build.
X.loc[:, X_given.columns] = X_given
for col in X_given.columns:
Expand Down Expand Up @@ -1342,7 +1342,7 @@ def _poly_linking(X_parents, beta=None):
beta = pd.DataFrame(data=np.random.normal(loc=0.0, scale=4.0, size=(degree, X_parents.columns.size)),
columns=X_parents.columns, index=np.arange(degree))

result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns)
result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns, dtype=float)
degrees = beta.index.to_series()
# Apply a polynomial to every parent variable
for var_name, col in X_parents.items():
Expand Down
96 changes: 96 additions & 0 deletions causallib/tests/test_causal_simulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,82 @@ def test_linear_linking(self):
msg="discovered rank of matrix is {emp} instead of {des}."
"so the linear linking does not work properly".format(emp=rank, des=2))

def test_affine_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="affine", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the affine linking does not work properly".format(emp=rank, des=3))

def test_poly_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="poly", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the poly linking does not work properly".format(emp=rank, des=3))

def test_exp_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="exp", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the exp linking does not work properly".format(emp=rank, des=3))

def test_log_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="log", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the log linking does not work properly".format(emp=rank, des=3))

def test_treatment_logistic(self):
topology = np.zeros((6, 6), dtype=bool)
topology[2, 0] = topology[3, 0] = topology[2, 1] = topology[3, 1] = topology[4, 2] = topology[5, 3] = True
Expand Down Expand Up @@ -533,6 +609,26 @@ def test_censoring(self):
# TODO: test different link types
# TODO: test marginal structural model (both in continuous, dichotomous and probability settings)

def test_effect_modifier(self):
topology = np.zeros((4, 4), dtype=bool)
topology[2, 0] = topology[2, 1] = topology[2, 3] = True
var_types = ["effect_modifier", "treatment", "outcome", "covariate"]
snr = 1
prob_cat = [None, [0.5, 0.5], None, None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types=["linear","linear","marginal_structural_model","linear"], treatment_importances=treatment_importance,
outcome_types="continuous", snr=snr, effect_sizes=None)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

beta = sim.linking_coefs
self.assertNotEqual(beta[2].loc[0,0], beta[2].loc[0,1],
msg="coefficients for potential outcomes are the same: {beta_1} = {beta_0}."
"so the effect modifier does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))
self.assertEqual(beta[2].loc[3,0], beta[2].loc[3,1],
msg="coefficients for potential outcomes are not the same: {beta_1} != {beta_0}."
"so the covariate does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))


if __name__ == "__main__":
unittest.main()

0 comments on commit 52ba5ea

Please sign in to comment.