Merge pull request #127 from haraoka-screen/interpretability

Add functions for the interpretability of graphs
cdt15 · Feb 15, 2024 · eb27323 · eb27323
2 parents f7d0201 + 0d36b44
commit eb27323
Show file tree

Hide file tree

Showing 34 changed files with 5,085 additions and 3,964 deletions.
diff --git a/docs/image/bootstrap_hist.png b/docs/image/bootstrap_hist.png
diff --git a/docs/image/bottom_up_parce_hist.png b/docs/image/bottom_up_parce_hist.png
diff --git a/docs/image/longitudinal_hist.png b/docs/image/longitudinal_hist.png
diff --git a/docs/image/longitudinal_scatter1.png b/docs/image/longitudinal_scatter1.png
diff --git a/docs/image/longitudinal_scatter2.png b/docs/image/longitudinal_scatter2.png
diff --git a/docs/image/multiple_dataset_hist.png b/docs/image/multiple_dataset_hist.png
diff --git a/docs/image/rcd_hist.png b/docs/image/rcd_hist.png
diff --git a/docs/image/var_hist.png b/docs/image/var_hist.png
diff --git a/docs/image/varma_hist.png b/docs/image/varma_hist.png
diff --git a/docs/tutorial/bootstrap.rst b/docs/tutorial/bootstrap.rst
diff --git a/docs/tutorial/bottom_up_parce.rst b/docs/tutorial/bottom_up_parce.rst
diff --git a/docs/tutorial/longitudinal.rst b/docs/tutorial/longitudinal.rst
diff --git a/docs/tutorial/multiple_dataset.rst b/docs/tutorial/multiple_dataset.rst
diff --git a/docs/tutorial/rcd.rst b/docs/tutorial/rcd.rst
diff --git a/docs/tutorial/var.rst b/docs/tutorial/var.rst
diff --git a/docs/tutorial/varma.rst b/docs/tutorial/varma.rst
diff --git a/examples/Bootstrap.ipynb b/examples/Bootstrap.ipynb
diff --git a/examples/BottomUpParceLiNGAM.ipynb b/examples/BottomUpParceLiNGAM.ipynb
diff --git a/examples/LongitudinalLiNGAM.ipynb b/examples/LongitudinalLiNGAM.ipynb
diff --git a/examples/MultiGroupDirectLiNGAM.ipynb b/examples/MultiGroupDirectLiNGAM.ipynb
diff --git a/examples/RCD.ipynb b/examples/RCD.ipynb
diff --git a/examples/TotalEffect.ipynb b/examples/TotalEffect.ipynb
diff --git a/examples/VARLiNGAM.ipynb b/examples/VARLiNGAM.ipynb
diff --git a/examples/VARMALiNGAM.ipynb b/examples/VARMALiNGAM.ipynb
diff --git a/lingam/bootstrap.py b/lingam/bootstrap.py
@@ -8,7 +8,7 @@
 import numpy as np
 from sklearn.utils import check_array, resample
 
-from .utils import find_all_paths
+from .utils import find_all_paths, calculate_total_effect
 
 
 class BootstrapMixin:
@@ -52,8 +52,8 @@ def bootstrap(self, X, n_sampling):
             # Calculate total effects
             for c, from_ in enumerate(self._causal_order):
                 for to in self._causal_order[c + 1 :]:
-                    total_effects[i, to, from_] = self.estimate_total_effect(
-                        resampled_X, from_, to
+                    total_effects[i, to, from_] = calculate_total_effect(
+                        self._adjacency_matrix, from_, to
                     )
 
             resampled_indices.append(resampled_index)

diff --git a/lingam/bottom_up_parce_lingam.py b/lingam/bottom_up_parce_lingam.py
@@ -13,7 +13,7 @@
 
 from .bootstrap import BootstrapResult
 from .hsic import hsic_test_gamma
-from .utils import predict_adaptive_lasso, f_correlation
+from .utils import predict_adaptive_lasso, f_correlation, calculate_total_effect
 
 
 class BottomUpParceLiNGAM:
@@ -453,6 +453,56 @@ def estimate_total_effect(self, X, from_index, to_index):
 
         return coefs[0]
 
+    def estimate_total_effect2(self, from_index, to_index):
+        """Estimate total effect using causal model.
+
+        Parameters
+        ----------
+        from_index :
+            Index of source variable to estimate total effect.
+        to_index :
+            Index of destination variable to estimate total effect.
+
+        Returns
+        -------
+        total_effect : float
+            Estimated total effect.
+        """
+        # Check from/to causal order
+        for i, order in enumerate(self._causal_order):
+            if hasattr(order, "__iter__") and from_index in order:
+                from_order = i
+                break
+            elif not hasattr(order, "__iter__") and int(from_index) == int(order):
+                from_order = i
+                break
+
+        for i, order in enumerate(self._causal_order):
+            if hasattr(order, "__iter__") and to_index in order:
+                to_order = i
+                break
+            elif not hasattr(order, "__iter__") and int(to_index) == int(order):
+                to_order = i
+                break
+
+        if from_order > to_order:
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the causal order of the destination variable (to_index={to_index}) "
+                f"is earlier than the source variable (from_index={from_index})."
+            )
+
+        # Check confounders
+        if True in np.isnan(self._adjacency_matrix[from_index]):
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the source variable (from_index={from_index}) is influenced by confounders."
+            )
+            return np.nan
+
+        effect = calculate_total_effect(self._adjacency_matrix, from_index, to_index)
+        return effect
+
     def get_error_independence_p_values(self, X):
         """Calculate the p-value matrix of independence between error variables.
 
@@ -555,10 +605,10 @@ def bootstrap(self, X, n_sampling):
                         for from_item in from_:
                             total_effects[
                                 i, to, from_item
-                            ] = self.estimate_total_effect(resampled_X, from_item, to)
+                            ] = self.estimate_total_effect2(from_item, to)
                     else:
-                        total_effects[i, to, from_] = self.estimate_total_effect(
-                            resampled_X, from_, to
+                        total_effects[i, to, from_] = self.estimate_total_effect2(
+                            from_, to
                         )
 
         return BootstrapResult(adjacency_matrices, total_effects)
diff --git a/lingam/longitudinal_lingam.py b/lingam/longitudinal_lingam.py
@@ -12,7 +12,7 @@
 
 from .direct_lingam import DirectLiNGAM
 from .hsic import hsic_test_gamma
-from .utils import predict_adaptive_lasso, find_all_paths
+from .utils import predict_adaptive_lasso, find_all_paths, calculate_total_effect
 
 
 class LongitudinalLiNGAM:
@@ -151,13 +151,13 @@ def bootstrap(self, X_list, n_sampling, start_from_t=1):
                     for to in self._causal_orders[from_t][c + 1 :]:
                         total_effects[
                             i, to_t * self._p + to, from_t * self._p + from_
-                        ] = self.estimate_total_effect(resampled_X_t, from_t, from_, to_t, to)
+                        ] = self.estimate_total_effect2(from_t, from_, to_t, to)
 
                     for to_t in range(from_t + 1, self._T):
                         for to in self._causal_orders[to_t]:
                             total_effects[
                                 i, to_t * self._p + to, from_t * self._p + from_
-                            ] = self.estimate_total_effect(resampled_X_t, from_t, from_, to_t, to)
+                            ] = self.estimate_total_effect2(from_t, from_, to_t, to)
 
         return LongitudinalBootstrapResult(self._T, adjacency_matrices, total_effects)
 
@@ -166,7 +166,7 @@ def estimate_total_effect(self, X_t, from_t, from_index, to_t, to_index):
 
         Parameters
         ----------
-        X_t : array-like, shape (n_samples, n_features)
+        X_t : array-like, shape (timepoint, n_samples, n_features)
             Original data, where n_samples is the number of samples
             and n_features is the number of features.
         from _t :
@@ -220,6 +220,51 @@ def estimate_total_effect(self, X_t, from_t, from_index, to_t, to_index):
 
         return coefs[0]
 
+    def estimate_total_effect2(self, from_t, from_index, to_t, to_index):
+        """Estimate total effect using causal model.
+
+        Parameters
+        ----------
+        from _t :
+            The timepoint of source variable.
+        from_index :
+            Index of source variable to estimate total effect.
+        to_t :
+            The timepoint of destination variable.
+        to_index :
+            Index of destination variable to estimate total effect.
+
+        Returns
+        -------
+        total_effect : float
+            Estimated total effect.
+        """
+        # Check from/to causal order
+        if to_t == from_t:
+            from_order = self._causal_orders[to_t].index(from_index)
+            to_order = self._causal_orders[from_t].index(to_index)
+            if from_order > to_order:
+                warnings.warn(
+                    f"The estimated causal effect may be incorrect because "
+                    f"the causal order of the destination variable (to_t={to_t}, to_index={to_index}) "
+                    f"is earlier than the source variable (from_t={from_t}, from_index={from_index})."
+                )
+        elif to_t < from_t:
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the causal order of the destination variable (to_t={to_t}) "
+                f"is earlier than the source variable (from_t={from_t})."
+            )
+
+        am = np.concatenate([*self._adjacency_matrices[to_t]], axis=1)
+        am = np.pad(am, [(0, am.shape[1] - am.shape[0]), (0, 0)])
+
+        from_index = from_index + self._p * (to_t - from_t)
+
+        effect = calculate_total_effect(am, from_index, to_index)
+
+        return effect
+
     def get_error_independence_p_values(self):
         """Calculate the p-value matrix of independence between error variables.
 

diff --git a/lingam/multi_group_direct_lingam.py b/lingam/multi_group_direct_lingam.py
@@ -12,7 +12,7 @@
 from .bootstrap import BootstrapResult
 from .direct_lingam import DirectLiNGAM
 from .hsic import hsic_test_gamma
-from .utils import predict_adaptive_lasso
+from .utils import predict_adaptive_lasso, calculate_total_effect
 
 
 class MultiGroupDirectLiNGAM(DirectLiNGAM):
@@ -146,7 +146,7 @@ def bootstrap(self, X_list, n_sampling):
             # Calculate total effects
             for c, from_ in enumerate(self._causal_order):
                 for to in self._causal_order[c + 1 :]:
-                    effects = self.estimate_total_effect(resampled_X_list, from_, to)
+                    effects = self.estimate_total_effect2(from_, to)
                     for i, effect in enumerate(effects):
                         total_effects_list[i, n, to, from_] = effect
 
@@ -203,6 +203,38 @@ def estimate_total_effect(self, X_list, from_index, to_index):
 
         return effects
 
+    def estimate_total_effect2(self, from_index, to_index):
+        """Estimate total effect using causal model.
+
+        Parameters
+        ----------
+        from_index :
+            Index of source variable to estimate total effect.
+        to_index :
+            Index of destination variable to estimate total effect.
+
+        Returns
+        -------
+        total_effect : float
+            Estimated total effect.
+        """
+        # Check from/to causal order
+        from_order = self._causal_order.index(from_index)
+        to_order = self._causal_order.index(to_index)
+        if from_order > to_order:
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the causal order of the destination variable (to_index={to_index}) "
+                f"is earlier than the source variable (from_index={from_index})."
+            )
+
+        effects = []
+        for am in self._adjacency_matrices:
+            effect = calculate_total_effect(am, from_index, to_index)
+            effects.append(effect)
+
+        return effects
+
     def get_error_independence_p_values(self, X_list):
         """Calculate the p-value matrix of independence between error variables.
 

diff --git a/lingam/multi_group_rcd.py b/lingam/multi_group_rcd.py
@@ -15,7 +15,7 @@
 
 from .bootstrap import BootstrapResult
 from .hsic import get_gram_matrix, get_kernel_width, hsic_test_gamma, hsic_teststat
-from .utils import predict_adaptive_lasso, f_correlation
+from .utils import predict_adaptive_lasso, f_correlation, calculate_total_effect
 
 
 class MultiGroupRCD:
@@ -178,6 +178,45 @@ def estimate_total_effect(self, X_list, from_index, to_index):
 
         return effects
 
+    def estimate_total_effect2(self, from_index, to_index):
+        """Estimate total effect using causal model.
+
+        Parameters
+        ----------
+        from_index :
+            Index of source variable to estimate total effect.
+        to_index :
+            Index of destination variable to estimate total effect.
+
+        Returns
+        -------
+        total_effect : float
+            Estimated total effect.
+        """
+        # Check from/to ancestors
+        if to_index in self._ancestors_list[from_index]:
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the causal order of the destination variable (to_index={to_index}) "
+                f"is earlier than the source variable (from_index={from_index})."
+            )
+
+        effects = []
+        for am in self._adjacency_matrices:
+            # Check confounders
+            if True in np.isnan(am[from_index]):
+                warnings.warn(
+                    f"The estimated causal effect may be incorrect because "
+                    f"the source variable (from_index={from_index}) is influenced by confounders."
+                )
+                return effects
+
+            effect = calculate_total_effect(am, from_index, to_index)
+            effects.append(effect)
+
+        return effects
+
+
     def get_error_independence_p_values(self, X_list):
         """Calculate the p-value matrix of independence between error variables.
 
@@ -258,7 +297,7 @@ def bootstrap(self, X_list, n_sampling):
             # Calculate total effects
             for to, ancestors in enumerate(self._ancestors_list):
                 for from_ in ancestors:
-                    effects = self.estimate_total_effect(resampled_X_list, from_, to)
+                    effects = self.estimate_total_effect2(from_, to)
                     for i, effect in enumerate(effects):
                         total_effects_list[i, n, to, from_] = effect
 

diff --git a/lingam/rcd.py b/lingam/rcd.py
@@ -15,7 +15,7 @@
 
 from .bootstrap import BootstrapResult
 from .hsic import get_gram_matrix, get_kernel_width, hsic_test_gamma, hsic_teststat
-from .utils import predict_adaptive_lasso, f_correlation
+from .utils import predict_adaptive_lasso, f_correlation, calculate_total_effect
 
 
 class RCD:
@@ -435,6 +435,28 @@ def estimate_total_effect(self, X, from_index, to_index):
 
         return coefs[0]
 
+    def estimate_total_effect2(self, from_index, to_index):
+        # Check from/to ancestors
+        if to_index in self._ancestors_list[from_index]:
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the causal order of the destination variable (to_index={to_index}) "
+                f"is earlier than the source variable (from_index={from_index})."
+            )
+
+        # Check confounders
+        if True in np.isnan(self._adjacency_matrix[from_index]):
+            warnings.warn(
+                f"The estimated causal effect may be incorrect because "
+                f"the source variable (from_index={from_index}) is influenced by confounders."
+            )
+            return np.nan
+
+        effect = calculate_total_effect(self._adjacency_matrix, from_index, to_index)
+
+        return effect
+
+
     def get_error_independence_p_values(self, X):
         """Calculate the p-value matrix of independence between error variables.
 
@@ -532,8 +554,8 @@ def bootstrap(self, X, n_sampling):
             # Calculate total effects
             for to, ancestors in enumerate(self._ancestors_list):
                 for from_ in ancestors:
-                    total_effects[i, to, from_] = self.estimate_total_effect(
-                        resampled_X, from_, to
+                    total_effects[i, to, from_] = self.estimate_total_effect2(
+                        from_, to
                     )
 
         return BootstrapResult(adjacency_matrices, total_effects)