diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 95e8eb3..c41b1cd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,5 +43,8 @@ jobs: - name: Publish to CodeClimate uses: paambaati/codeclimate-action@v5.0.0 env: - CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }} + CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }} + # Forked PRs have no access to secrets, so uploading a coverage report to Code Climate fails. + # To avoid that failing the entire workflow, continue on error: + continue-on-error: true diff --git a/causallib/simulation/CausalSimulator3.py b/causallib/simulation/CausalSimulator3.py index a172c24..765eedb 100644 --- a/causallib/simulation/CausalSimulator3.py +++ b/causallib/simulation/CausalSimulator3.py @@ -216,7 +216,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm # check that effect modifier is independent on treatment and affects only the outcome: for i in self.effmod_indices: - successors = self.graph_topology.successors(i) + successors = list(self.graph_topology.successors(i)) if len(successors) == 0 or self.outcome_indices.intersection(successors).size < 1: raise ValueError("Effect modifier variable {name} must affect an outcome variable".format(name=i)) ancestors = nx.ancestors(self.graph_topology, i) @@ -441,7 +441,7 @@ def generate_data(self, X_given=None, num_samples=None, random_seed=None): # generate latent continuous covariates - every variable is guaranteed to have a population variance of 1.0 # X_latent = pd.DataFrame(index=patients_index, columns=self.var_types.index) - X = pd.DataFrame(index=patients_index, columns=self.var_types.index) + X = pd.DataFrame(index=patients_index, columns=self.var_types.index, dtype=float) if X_given is not None: # if a dataset is given, integrate it to the current dataset being build. X.loc[:, X_given.columns] = X_given for col in X_given.columns: @@ -1342,7 +1342,7 @@ def _poly_linking(X_parents, beta=None): beta = pd.DataFrame(data=np.random.normal(loc=0.0, scale=4.0, size=(degree, X_parents.columns.size)), columns=X_parents.columns, index=np.arange(degree)) - result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns) + result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns, dtype=float) degrees = beta.index.to_series() # Apply a polynomial to every parent variable for var_name, col in X_parents.items(): diff --git a/causallib/tests/test_causal_simulator3.py b/causallib/tests/test_causal_simulator3.py index c02ec97..3a91e23 100644 --- a/causallib/tests/test_causal_simulator3.py +++ b/causallib/tests/test_causal_simulator3.py @@ -357,6 +357,82 @@ def test_linear_linking(self): msg="discovered rank of matrix is {emp} instead of {des}." "so the linear linking does not work properly".format(emp=rank, des=2)) + def test_affine_linking(self): + topology = np.zeros((3, 3), dtype=bool) + topology[2, 0] = topology[2, 1] = True + var_types = ["covariate", "treatment", "outcome"] + snr = 1 + prob_cat = [None, [0.5, 0.5], None] + treatment_importance = None + sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat, + link_types="affine", treatment_importances=treatment_importance, + outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes) + X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) + + singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False) + eps = 1e-10 + rank = np.sum(singular_values > eps) + self.assertEqual(rank, 3, + msg="discovered rank of matrix is {emp} instead of {des}." + "so the affine linking does not work properly".format(emp=rank, des=3)) + + def test_poly_linking(self): + topology = np.zeros((3, 3), dtype=bool) + topology[2, 0] = topology[2, 1] = True + var_types = ["covariate", "treatment", "outcome"] + snr = 1 + prob_cat = [None, [0.5, 0.5], None] + treatment_importance = None + sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat, + link_types="poly", treatment_importances=treatment_importance, + outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes) + X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) + + singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False) + eps = 1e-10 + rank = np.sum(singular_values > eps) + self.assertEqual(rank, 3, + msg="discovered rank of matrix is {emp} instead of {des}." + "so the poly linking does not work properly".format(emp=rank, des=3)) + + def test_exp_linking(self): + topology = np.zeros((3, 3), dtype=bool) + topology[2, 0] = topology[2, 1] = True + var_types = ["covariate", "treatment", "outcome"] + snr = 1 + prob_cat = [None, [0.5, 0.5], None] + treatment_importance = None + sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat, + link_types="exp", treatment_importances=treatment_importance, + outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes) + X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) + + singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False) + eps = 1e-10 + rank = np.sum(singular_values > eps) + self.assertEqual(rank, 3, + msg="discovered rank of matrix is {emp} instead of {des}." + "so the exp linking does not work properly".format(emp=rank, des=3)) + + def test_log_linking(self): + topology = np.zeros((3, 3), dtype=bool) + topology[2, 0] = topology[2, 1] = True + var_types = ["covariate", "treatment", "outcome"] + snr = 1 + prob_cat = [None, [0.5, 0.5], None] + treatment_importance = None + sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat, + link_types="log", treatment_importances=treatment_importance, + outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes) + X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) + + singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False) + eps = 1e-10 + rank = np.sum(singular_values > eps) + self.assertEqual(rank, 3, + msg="discovered rank of matrix is {emp} instead of {des}." + "so the log linking does not work properly".format(emp=rank, des=3)) + def test_treatment_logistic(self): topology = np.zeros((6, 6), dtype=bool) topology[2, 0] = topology[3, 0] = topology[2, 1] = topology[3, 1] = topology[4, 2] = topology[5, 3] = True @@ -533,6 +609,26 @@ def test_censoring(self): # TODO: test different link types # TODO: test marginal structural model (both in continuous, dichotomous and probability settings) + def test_effect_modifier(self): + topology = np.zeros((4, 4), dtype=bool) + topology[2, 0] = topology[2, 1] = topology[2, 3] = True + var_types = ["effect_modifier", "treatment", "outcome", "covariate"] + snr = 1 + prob_cat = [None, [0.5, 0.5], None, None] + treatment_importance = None + sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat, + link_types=["linear","linear","marginal_structural_model","linear"], treatment_importances=treatment_importance, + outcome_types="continuous", snr=snr, effect_sizes=None) + X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) + + beta = sim.linking_coefs + self.assertNotEqual(beta[2].loc[0,0], beta[2].loc[0,1], + msg="coefficients for potential outcomes are the same: {beta_1} = {beta_0}." + "so the effect modifier does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1])) + self.assertEqual(beta[2].loc[3,0], beta[2].loc[3,1], + msg="coefficients for potential outcomes are not the same: {beta_1} != {beta_0}." + "so the covariate does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1])) + if __name__ == "__main__": unittest.main()