Rose-STL-Lab · VSumanth99 · Jun 22, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/.github/workflows/cloc.yml b/.github/workflows/cloc.yml
@@ -0,0 +1,52 @@
+name: Count Lines of Code
+
+# Controls when the action will run. Triggers the workflow on push or pull request 
+# events but only for the main branch
+on: [pull_request]
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+
+  # This workflow contains a single job called "build"
+  cloc:
+
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      # Runs djdefi/cloc-action
+      - name: Install and run cloc
+        run: |
+          sudo apt-get install cloc
+          cloc src --csv --quiet --report-file=cloc_report.csv
+
+      - name: Read CSV
+        id: csv
+        uses: juliangruber/read-file-action@v1
+        with:
+          path: ./cloc_report.csv
+
+      - name: Create MD
+        uses: petems/csv-to-md-table-action@master
+        id: csv-table-output
+        with:
+          csvinput: ${{ steps.csv.outputs.content }}
+
+      - name: Write file
+        uses: "DamianReeves/write-file-action@master"
+        with:
+          path: ./cloc_report.md
+          write-mode: overwrite
+          contents: |
+            ${{steps.csv-table-output.outputs.markdown-table}}
+
+      - name: PR comment with file
+        uses: thollander/actions-comment-pull-request@v2
+        with:
+          filePath: ./cloc_report.md
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -0,0 +1,23 @@
+name: Pylint
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint --disable=E402,E731,F541,W291,E122,E127,F401,E266,E241,C901,E741,W293,F811,W503,E203,F403,F405,B007,E0401,W0221 --max-line-length=150 $(git ls-files '*.py')
diff --git a/configs/synthetic/mcd.yaml b/configs/synthetic/mcd.yaml
@@ -1,6 +1,6 @@
 # hyperparameters
 watch_gradients: false
-num_epochs: 1
+num_epochs: 10000
 model: mcd
 monitor_checkpoint_based_on: likelihood
 

diff --git a/src/baselines/BaselineTrainer.py b/src/baselines/BaselineTrainer.py
@@ -1,13 +1,9 @@
-
-from typing import Any
 import lightning.pytorch as pl
-import torch.nn as nn
-import torch
-from torch.utils.data import DataLoader, TensorDataset
+from torch.utils.data import DataLoader
 import torch.nn.functional as F
+import numpy as np
 
 from src.dataset.BaselineTSDataset import BaselineTSDataset
-import numpy as np
 from src.utils.metrics_utils import mape_loss
 
 class BaselineTrainer(pl.LightningModule):
@@ -21,7 +17,6 @@ def __init__(self,
                  lag: int,
                  num_workers: int = 16,
                  aggregated_graph: bool = False):
-
         super().__init__()
 
         self.num_workers = num_workers
@@ -35,29 +30,20 @@ def __init__(self,
         assert adj_matrices.shape[0] == self.total_samples
 
         self.full_dataset = BaselineTSDataset(
-            X = self.full_dataset_np, 
-            adj_matrix = self.adj_matrices_np, 
-            lag=lag, 
+            X = self.full_dataset_np,
+            adj_matrix = self.adj_matrices_np,
+            lag=lag,
             aggregated_graph=self.aggregated_graph,
             return_graph_indices=True
             )
 
         self.batch_size = 1
 
-    def forward(self):
-        raise NotImplementedError
+    def compute_mse(self, x_current, x_pred):
+        return F.mse_loss(x_current, x_pred)
 
-    def compute_mse(self, X_current, X_pred):
-        return F.mse_loss(X_current, X_pred)
-
-    def compute_mape(self, X_current, X_pred):
-        return mape_loss(X_current, X_pred)
-
-    def training_step(self, batch, batch_idx):
-        raise NotImplementedError
+    def compute_mape(self, x_current, x_pred):
+        return mape_loss(x_current, x_pred)
 
     def get_full_dataloader(self) -> DataLoader:
         return DataLoader(self.full_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
-
-    def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:
-        raise NotImplementedError
diff --git a/src/baselines/DYNOTEARSTrainer.py b/src/baselines/DYNOTEARSTrainer.py
@@ -1,14 +1,15 @@
 from typing import Any
-from src.baselines.BaselineTrainer import BaselineTrainer
 import numpy as np
 import torch
 
-from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np, to_time_aggregated_scores_np, zero_out_diag_np, zero_out_diag_torch
 # import tigramite for pcmci
-from src.modules.dynotears.dynotears import from_pandas_dynamic
 import networkx as nx
 import pandas as pd
 
+from src.baselines.BaselineTrainer import BaselineTrainer
+from src.modules.dynotears.dynotears import from_pandas_dynamic
+from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np, to_time_aggregated_scores_np, zero_out_diag_np
+
 class DYNOTEARSTrainer(BaselineTrainer):
 
     def __init__(self,
@@ -43,7 +44,6 @@ def __init__(self,
                          num_workers=num_workers,
                          aggregated_graph=aggregated_graph)
 
-
         self.max_iter = max_iter
         self.lambda_w = lambda_w
         self.lambda_a = lambda_a
@@ -58,7 +58,7 @@ def __init__(self,
     def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:
         X, adj_matrix, graph_index = batch
 
-        batch_size, timesteps, num_nodes, data_dim = X.shape
+        batch_size, timesteps, num_nodes, _ = X.shape
         assert num_nodes == self.num_nodes
         X = X.view(batch_size, timesteps, -1)
 
@@ -68,10 +68,8 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
 
         graphs = np.zeros((batch_size, self.lag+1, num_nodes, num_nodes))
         scores = np.zeros((batch_size, self.lag+1, num_nodes, num_nodes))
-
         if self.group_by_graph:
             n_unique_matrices = np.max(graph_index)+1
-            unique_matrices = np.unique(adj_matrix, axis=0)
         else:
             graph_index = np.zeros((batch_size))
             n_unique_matrices = 1
@@ -81,7 +79,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
             n_samples = np.sum(graph_index == i)
             for x in X[graph_index == i]:
                 X_list.append(pd.DataFrame(x))
-
             learner = from_pandas_dynamic(
                 X_list,
                 p=self.lag,
@@ -102,16 +99,13 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
             # scores = np.hstack(temporal_adj_list)
             temporal_adj = [(score != 0).astype(int) for _ in range(n_samples)]
             score = [np.abs(score) for _ in range(n_samples)]
-
             graphs[i == graph_index] = np.array(temporal_adj)
             scores[i == graph_index] = np.array(score)
-
-
         if self.aggregated_graph:
             graphs = to_time_aggregated_graph_np(graphs)
             scores = to_time_aggregated_scores_np(scores)
             if self.ignore_self_connections:
                 graphs = zero_out_diag_np(graphs)
                 scores = zero_out_diag_np(scores)
-                
-        return torch.Tensor(graphs), torch.abs(torch.Tensor(scores)), torch.Tensor(adj_matrix)
+        return torch.Tensor(graphs), torch.abs(torch.Tensor(scores)), torch.Tensor(adj_matrix)
+
diff --git a/src/baselines/PCMCITrainer.py b/src/baselines/PCMCITrainer.py
@@ -1,16 +1,17 @@
 from typing import Any
-from src.baselines.BaselineTrainer import BaselineTrainer
+from copy import deepcopy
+
 import numpy as np
-from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np, zero_out_diag_np, zero_out_diag_torch
 # import tigramite for pcmci
-import tigramite
 from tigramite import data_processing as pp
 from tigramite.pcmci import PCMCI
 from tigramite.independence_tests.parcorr import ParCorr
 from tigramite.independence_tests.cmiknn import CMIknn
-from copy import deepcopy
 import torch
-from src.utils.causality_utils import *
+
+from src.utils.causality_utils import convert_temporal_to_static_adjacency_matrix, cpdag2dags
+from src.baselines.BaselineTrainer import BaselineTrainer
+from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np, zero_out_diag_np
 
 """
 Large parts adapted from https://github.com/microsoft/causica
@@ -33,7 +34,6 @@ def __init__(self,
                  group_by_graph: bool = False,
                  ignore_self_connections: bool = False
                  ):
-
         self.group_by_graph = group_by_graph
         self.ignore_self_connections = ignore_self_connections
         if self.group_by_graph:
@@ -97,8 +97,7 @@ def _process_adj_matrix(self, adj_matrix: np.ndarray) -> np.ndarray:
     def _run_pcmci(self, pcmci, tau_max, pc_alpha):
         if self.pcmci_plus:
             return pcmci.run_pcmciplus(tau_max=tau_max, pc_alpha=pc_alpha)
-        else:
-            return pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha)
+        return pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha)
 
     def _process_cpdag(self, adj_matrix: np.ndarray):
         """
@@ -109,7 +108,6 @@ def _process_cpdag(self, adj_matrix: np.ndarray):
         Returns:
             adj_matrix: np.ndarray with shape [num_possible_dags, lag+1, num_nodes, num_nodes]
         """
-
         lag_plus, num_nodes = adj_matrix.shape[0], adj_matrix.shape[1]
         static_temporal_graph = convert_temporal_to_static_adjacency_matrix(
             adj_matrix, conversion_type="auto_regressive"
@@ -130,22 +128,17 @@ def _process_cpdag(self, adj_matrix: np.ndarray):
 
     def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:
         X, adj_matrix, graph_index = batch
-
-        batch_size, timesteps, num_nodes, data_dim = X.shape
+        batch_size, timesteps, num_nodes, _ = X.shape
         assert num_nodes == self.num_nodes
-
         X = X.view(batch_size, timesteps, -1)
         X, adj_matrix, graph_index = X.numpy(), adj_matrix.numpy(), graph_index.numpy()
-
         graphs = [] #np.zeros((batch_size, self.lag+1, num_nodes, num_nodes))
         new_adj_matrix = []
         if self.group_by_graph:
             n_unique_matrices = np.max(graph_index)+1
         else:
             graph_index = np.zeros((batch_size))
             n_unique_matrices = 1
-
-        unique_matrices = np.unique(adj_matrix, axis=0)
         for i in range(n_unique_matrices):
             print(f"{i}/{n_unique_matrices}")
             n_samples = np.sum(graph_index == i)
@@ -156,13 +149,9 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
                 verbosity=0)
 
             results = self._run_pcmci(pcmci, self.lag, self.pc_alpha)
-
             graph = self._process_adj_matrix(results["graph"])
-
             graph = self._process_cpdag(graph)
-
             num_possible_dags = graph.shape[0]
-
             new_adj_matrix.append(np.repeat(adj_matrix[graph_index==i][0][np.newaxis, ...], n_samples*num_possible_dags, axis=0))
             graphs.append(np.repeat(graph, n_samples, axis=0))
 
@@ -173,4 +162,5 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
             if self.ignore_self_connections:
                 graphs = zero_out_diag_np(graphs)
 
-        return torch.Tensor(graphs), torch.Tensor(graphs), torch.Tensor(new_adj_matrix)
+        return torch.Tensor(graphs), torch.Tensor(graphs), torch.Tensor(new_adj_matrix)
+
diff --git a/src/baselines/VARLiNGAMTrainer.py b/src/baselines/VARLiNGAMTrainer.py
@@ -1,11 +1,12 @@
 from typing import Any
-from src.baselines.BaselineTrainer import BaselineTrainer
 import numpy as np
 
 import lingam
-from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np
 import torch
 
+from src.utils.data_utils.data_format_utils import to_time_aggregated_graph_np
+from src.baselines.BaselineTrainer import BaselineTrainer
+
 class VARLiNGAMTrainer(BaselineTrainer):
 
     def __init__(self,
@@ -17,7 +18,6 @@ def __init__(self,
                  num_workers: int = 16,
                  aggregated_graph: bool = False
                  ):
-
         super().__init__(full_dataset=full_dataset,
                          adj_matrices=adj_matrices,
                          data_dim=data_dim,
@@ -27,28 +27,23 @@ def __init__(self,
                          aggregated_graph=aggregated_graph)
 
     def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:
-        X, adj_matrix, graph_index = batch
+        X, adj_matrix, _ = batch
 
-        batch, timesteps, num_nodes, data_dim = X.shape
+        batch, timesteps, num_nodes, _ = X.shape
         X = X.view(batch, timesteps, -1)
 
         assert num_nodes == self.num_nodes
         assert batch == 1, "VARLiNGAM needs batch size 1"
 
         model_pruned = lingam.VARLiNGAM(lags=self.lag, prune=True)
         model_pruned.fit(X[0])
-
         graph = np.transpose(np.abs(model_pruned.adjacency_matrices_) > 0, axes=[0, 2, 1])
-
         if graph.shape[0] != (self.lag+1):
             while graph.shape[0] != (self.lag+1):
                 graph = np.concatenate((graph, np.zeros((1, num_nodes, num_nodes) )), axis=0)
-
         graphs = [graph]
-
         if self.aggregated_graph:
             graphs = to_time_aggregated_graph_np(graphs)
-
         print(graphs)
         print(adj_matrix)
         return torch.Tensor(graphs), torch.Tensor(graphs), torch.Tensor(adj_matrix)