From 0951b5c64982cd2570fb41a541f514598378ece2 Mon Sep 17 00:00:00 2001
From: Georgii Skorokhod <64529579+gskorokhod@users.noreply.github.com>
Date: Sat, 18 Nov 2023 15:59:06 +0000
Subject: [PATCH] Essence feature usage stats (#79)

---
 .github/workflows/essence-feature-stats.yml   |   9 +-
 tools/essence-feature-usage-stats/README.md   |  24 +--
 tools/essence-feature-usage-stats/main.py     |  37 +++-
 .../stats/essence_file.py                     |  75 +++++++-
 .../stats/essence_stats.py                    |  84 ++++++---
 .../utils/colour.py                           |   7 +
 .../utils/conjure.py                          |  28 ++-
 .../utils/files.py                            |  51 ++++-
 .../utils/git_utils.py                        |   3 +-
 .../essence-feature-usage-stats/utils/misc.py |  17 ++
 tools/essence-feature-usage-stats/web/csv.py  |  45 +++++
 .../web/static/script.js                      | 174 ------------------
 .../web/static/styles.css                     |  26 +--
 .../web/templates/base.html                   |   7 +-
 .../web/templates/index.html                  |  27 ++-
 .../web/templates/keyword_list.html           |  31 ----
 .../web/templates/table.html                  |  34 ++--
 17 files changed, 352 insertions(+), 327 deletions(-)
 create mode 100644 tools/essence-feature-usage-stats/web/csv.py
 delete mode 100644 tools/essence-feature-usage-stats/web/static/script.js
 delete mode 100644 tools/essence-feature-usage-stats/web/templates/keyword_list.html

diff --git a/.github/workflows/essence-feature-stats.yml b/.github/workflows/essence-feature-stats.yml
index 02aa2aa9a2..821d031a49 100644
--- a/.github/workflows/essence-feature-stats.yml
+++ b/.github/workflows/essence-feature-stats.yml
@@ -10,11 +10,15 @@ on:
       - tools/essence-feature-usage-stats/**
 
 env:
-  ESSENCE_DIR: "./EssenceCatalog"
+  ESSENCE_DIR: "./EssenceRepos"
   CONJURE_DIR: "./conjure"
-  ESSENCE_EXAMPLES_REPO: "https://github.com/conjure-cp/EssenceCatalog.git"
   CONJURE_REPO: "https://github.com/conjure-cp/conjure"
+  EXCLUDE_PATHS_REGEX: ".*autogen.*"
   OUTPUT_PATH: "./web/static/index.html"
+  MAX_N_FILES: 5000
+  ESSENCE_FILE_REPOS: >
+                  https://github.com/conjure-cp/EssenceCatalog::master,
+                  https://github.com/conjure-cp/conjure::main"
   KEYWORD_BLOCKLIST: >
                   mInfo,finds,givens,enumGivens,enumLettings,lettings,
                   unnameds,strategyQ,Auto,Interactive,strategyA,trailCompact,
@@ -61,6 +65,7 @@ jobs:
         working-directory: ./tools/essence-feature-usage-stats
         
       - name: Deploy to GitHub Pages
+        if: github.event_name == 'push' # Run this step only on push events
         uses: JamesIves/github-pages-deploy-action@v4.4.3
         with:
           branch: gh-pages
diff --git a/tools/essence-feature-usage-stats/README.md b/tools/essence-feature-usage-stats/README.md
index 0429df73bb..e9d3c04cc9 100644
--- a/tools/essence-feature-usage-stats/README.md
+++ b/tools/essence-feature-usage-stats/README.md
@@ -30,24 +30,18 @@ It is deployed using GitHub Pages: see https://conjure-cp.github.io/conjure-oxid
 - Table row headers show paths to Essence example files and the size of the files (in lines of code)
 - Table cells show how often a given keyword is used in a given file
 - Cells are colour coded. Red means a keyword is NOT used in this file, orange means it's used less than average, green means it's used more
-- Columns are sortable. Click on table header cells to sort rows by how often this keyword is used in each file.
-- Sorting by the first column (the one with file names) will sort by file size
-- The section above the table is a list of Essence keywords, sorted by their total usage accross files
-- The "Show" checkboxes in this section show/hide table columns
-- The "Any", "Require", "Exclude" radio button allows you to filter the table by specific keywords:
-  - "Require" means that only files that have one or more usage of this keyword will be shown
-  - "Exclude" means that only files that don't use this keyword will be shown
-  - "Any" means that files will be shown regardless of whether they use this feature
-  - These can be combined to search for exactly the right files to test specific Essence features
+- Columns are sortable. Click on table header cells to sort rows by how often this keyword is used in each file.(See top of the HTML page for detailed use instructions)
 
 ## Configuration
 
-- ESSENCE_DIR - local directory to store essence files
-- CONJURE_DIR - local directory to store conjure binaries
-- ESSENCE_EXAMPLES_REPO - repo to download Essence examples from
-- CONJURE_REPO - repo to download the latest release of conjure from
-- OUTPUT_PATH - path to save the generated HTML page. Be careful with changing this (see above).
-- KEYWORD_BLOCKLIST - comma-separated list of Essence keywords to ignore
+- `ESSENCE_DIR` - Local directory to store essence files
+- `CONJURE_DIR` - Local directory to store conjure binaries
+- `CONJURE_REPO` - GitHub repo to download conjure releases from
+- `ESSENCE_FILE_REPOS` - Comma-separated list of repos to download Essence examples from. Format: `"<repo_url>::<repo_branch>,<repo2_url>::<repo2_branch>,..."`
+- `OUTPUT_PATH` - Path to save the generated HTML page. Be careful with changing this (see above).
+- `KEYWORD_BLOCKLIST` - Comma-separated list of Essence keywords to ignore
+- `EXCLUDE_PATHS_REGEX` - Regex to exclude Essence files, e.g. `"*autogen*"`
+- `MAX_N_FILES` - Max number of Essence files to process
 
 ---
 
diff --git a/tools/essence-feature-usage-stats/main.py b/tools/essence-feature-usage-stats/main.py
index 36438eb9cd..6805c69620 100644
--- a/tools/essence-feature-usage-stats/main.py
+++ b/tools/essence-feature-usage-stats/main.py
@@ -6,16 +6,30 @@
 from jinja2 import Environment, FileSystemLoader, select_autoescape
 
 from stats.essence_stats import EssenceStats
+from utils.misc import parse_essence_repos
+from web.csv import write_csv
 
 ENV_PATH = Path("./.env").resolve()
 load_dotenv(dotenv_path=ENV_PATH)
 
-KEYWORD_BLOCKLIST = [x.strip() for x in os.getenv("KEYWORD_BLOCKLIST").split(",")]
+KEYWORD_BLOCKLIST = [
+    x.strip().replace('"', "") for x in os.getenv("KEYWORD_BLOCKLIST").split(",")
+]
+
 ESSENCE_DIR = Path(os.getenv("ESSENCE_DIR"))
 CONJURE_DIR = Path(os.getenv("CONJURE_DIR"))
 OUTPUT_PATH = Path(os.getenv("OUTPUT_PATH"))
 CONJURE_REPO = os.getenv("CONJURE_REPO")
-ESSENCE_EXAMPLES_REPO = os.getenv("ESSENCE_EXAMPLES_REPO")
+MAX_N_FILES = int(os.getenv("MAX_N_FILES", "500"))
+MAX_N_KEYWORDS = int(os.getenv("MAX_N_KEYWORDS", "200"))
+CONJURE_VERSION = os.getenv("CONJURE_VERSION", "latest")
+
+EXCLUDE_REGEX = os.getenv("EXCLUDE_PATHS_REGEX")
+if EXCLUDE_REGEX is not None:
+    EXCLUDE_REGEX = EXCLUDE_REGEX.strip().replace('"', "")
+    EXCLUDE_REGEX = rf"{EXCLUDE_REGEX}"
+
+ESSENCE_FILE_REPOS = parse_essence_repos(os.getenv("ESSENCE_FILE_REPOS"))
 
 jinja_env = Environment(
     loader=FileSystemLoader(Path("web/templates")),
@@ -24,20 +38,25 @@
 
 if __name__ == "__main__":
     stats = EssenceStats(
-        CONJURE_DIR,
-        CONJURE_REPO,
-        ESSENCE_DIR,
-        ESSENCE_EXAMPLES_REPO,
-        "master",
-        KEYWORD_BLOCKLIST,
+        conjure_dir=CONJURE_DIR,
+        conjure_repo_url=CONJURE_REPO,
+        essence_dir=ESSENCE_DIR,
+        essence_repo_urls=ESSENCE_FILE_REPOS,
+        conjure_version=CONJURE_VERSION,
+        blocklist=KEYWORD_BLOCKLIST,
+        exclude_regex=EXCLUDE_REGEX,
+        max_n_files=MAX_N_FILES,
     )
 
+    write_csv(stats, "web/static/data.csv")
+
     timestamp = datetime.datetime.now().strftime("%d.%m.%Y - %H:%M")
     template = jinja_env.get_template("index.html")
     html = template.render(
         data={
             "essence_stats": stats,
-            "n_keywords": 200,
+            "n_keywords": MAX_N_KEYWORDS,
+            "n_files": MAX_N_FILES,
             "css_path": "styles.css",
             "script_path": "script.js",
             "timestamp": timestamp,
diff --git a/tools/essence-feature-usage-stats/stats/essence_file.py b/tools/essence-feature-usage-stats/stats/essence_file.py
index aca80aed05..ebc8e159d6 100644
--- a/tools/essence-feature-usage-stats/stats/essence_file.py
+++ b/tools/essence-feature-usage-stats/stats/essence_file.py
@@ -1,5 +1,10 @@
 import os
+import re
 from pathlib import Path
+from typing import Iterable, Optional
+
+from git import Repo
+from tqdm import tqdm
 
 from utils.conjure import get_essence_file_ast
 from utils.files import count_lines, trim_path
@@ -35,12 +40,13 @@ def __init__(self, dir_path):  # noqa: D107
         super().__init__(f"The provided path '{dir_path}' is not a valid directory")
 
 
-def find_essence_files(dir_path: str | Path):
+def find_essence_files(dir_path: str | Path, exclude_regex: str | None = None):
     """
     Find all essence files in a given directory and return a list of full paths to them.
 
     :param dir_path: path to directory
     :return: a generator of paths to essence files.
+    :param exclude_regex: regular expression to exclude certain paths.
     """
     dir_path = Path(dir_path)
 
@@ -48,18 +54,26 @@ def find_essence_files(dir_path: str | Path):
     if not dir_path.is_dir():
         raise EssenceInvalidDirectoryError
 
+    if exclude_regex is None:
+        exclude_regex = r"^$"  # If not excluding anything, set exclude regex to just match an empty string
+    pattern = re.compile(exclude_regex)
+
     # Walk through the directory and its subdirectories
     for root, _, files in os.walk(dir_path):
         for file in files:
             fpath = Path(root) / file
-            if fpath.is_file() and fpath.suffix == ".essence":
+            if (
+                fpath.is_file()
+                and fpath.suffix == ".essence"
+                and not pattern.match(str(fpath))
+            ):
                 yield fpath
 
 
 class EssenceFile:
     """EssenceFile stores keyword counts and number of lines for a given file "fpath"."""
 
-    def __init__(self, fpath: str | Path, conjure_bin_path, blocklist=None):
+    def __init__(self, fpath: str | Path, conjure_bin_path, repo=None, blocklist=None):
         """Construct an EssenceFile object from a given file path."""
         fpath = Path(fpath).resolve()
 
@@ -73,9 +87,21 @@ def __init__(self, fpath: str | Path, conjure_bin_path, blocklist=None):
             )
             self._keyword_counts = flat_keys_count(self._ast, blocklist)
             self._n_lines = count_lines(fpath)
+            self._repo = repo
         except Exception as e:
             raise EssenceFileNotParsableError(fpath, str(e)) from e
 
+    @property
+    def repo(self) -> Repo | None:
+        """Get the git repo that this file belongs to."""
+        return self._repo
+
+    def get_repo_name(self, depth=0) -> str | None:
+        """Get the repo name, trimmed to a given depth."""
+        if isinstance(self.repo, Repo):
+            return trim_path(self.repo.working_dir, depth)
+        return None
+
     @property
     def path(self) -> Path:
         """Get path to this file."""
@@ -125,7 +151,7 @@ def __hash__(self):
 
     def __eq__(self, other):
         """EssenceFile objects are considered equal if their paths are the same."""
-        return self._fpath == other._fpath
+        return self.path == other.path
 
     def __str__(self):  # noqa: D105
         return f"EssenceFile({self._fpath}): {self.n_lines} lines"
@@ -145,10 +171,14 @@ def as_json(self, path_depth=0) -> dict:
         }
 
     @staticmethod
-    def get_essence_files_from_dir(
+    def get_essence_files_from_dir(  # noqa: PLR0913
         dir_path: str | Path,
         conjure_bin_path: str | Path,
-        blocklist=None,
+        repo: Optional[Repo] = None,
+        blocklist: Optional[Iterable[str]] = None,
+        verbose: bool = False,
+        exclude_regex: Optional[str] = None,
+        max_n_files: Optional[int] = None,
     ):
         """
         Get Essence files contained in a given directory.
@@ -156,10 +186,35 @@ def get_essence_files_from_dir(
         :param dir_path: path to directory with essence files
         :param conjure_bin_path: a path to conjure binary
         :param blocklist: a list of Essence keywords to ignore
+        :param verbose: Whether to print error messages
+        :param exclude_regex: Exclude file paths that match this regular expression
+        :param max_n_files: Maximum number of files to process
+        :param repo: a Git repo that this directory belongs to (optional)
         """
-        for fpath in find_essence_files(dir_path):
+        if verbose:
+            print(f"Processing Essence files in {dir_path}...")
+        counter = 0
+
+        for fpath in tqdm(find_essence_files(dir_path, exclude_regex=exclude_regex)):
             try:
-                file = EssenceFile(fpath, conjure_bin_path, blocklist=blocklist)
+                if max_n_files is not None and counter >= max_n_files:
+                    if verbose:
+                        print(
+                            f"Max number of files ({max_n_files}) reached, terminating...",
+                        )
+                    break
+
+                file = EssenceFile(
+                    fpath,
+                    conjure_bin_path,
+                    blocklist=blocklist,
+                    repo=repo,
+                )
+                counter += 1
                 yield file
-            except Exception as e:  # noqa: PERF203
-                print(f'Could not process file "{fpath}", throws exception: {e}')
+            except Exception as e:
+                if verbose:
+                    print(f'Could not process file "{fpath}", throws exception: {e}')
+
+        if verbose:
+            print(f"{counter} Essence files processed!")
diff --git a/tools/essence-feature-usage-stats/stats/essence_stats.py b/tools/essence-feature-usage-stats/stats/essence_stats.py
index cd132ad0ba..d9d2d5c32b 100644
--- a/tools/essence-feature-usage-stats/stats/essence_stats.py
+++ b/tools/essence-feature-usage-stats/stats/essence_stats.py
@@ -1,10 +1,13 @@
 from pathlib import Path
-from typing import Optional
+from typing import Iterable, Optional, Tuple
+
+from git import Repo
 
 from stats.essence_file import EssenceFile
 from stats.essence_keyword import EssenceKeyword
 from utils.conjure import download_conjure
-from utils.git_utils import clone_or_pull
+from utils.files import trim_path
+from utils.git_utils import clone_or_pull, parse_repo_url
 
 KeywordName: type = str
 FilePath: type = str
@@ -21,33 +24,44 @@ def __init__(  # noqa: PLR0913
         self,
         conjure_dir: Path,
         conjure_repo_url: str,
-        essence_repo_dir: Path,
-        essence_repo_url: str,
-        essence_branch="master",
+        essence_dir: Path,
+        essence_repo_urls: Iterable[Tuple[str, str]],
+        conjure_version: str = "latest",
         blocklist: Optional[list[KeywordName]] = None,
+        exclude_regex: Optional[str] = None,
+        max_n_files: Optional[int] = None,
     ):
         """
         Create a new EssenceStats object.
 
         :param conjure_dir: Path to a directory containing conjure binary
         :param conjure_repo_url: GitHub URL to download conjure release from
-        :param essence_repo_dir: Local repo with Essence example files
-        :param essence_repo_url: GitHub repo with Essence example files
-        :param essence_branch: Branch to download essence files from (master by default)
+        :param essence_dir: Local repo with Essence example files
+        :param essence_repo_urls: List of tuples - git repo urls and branches
+        :param conjure_version: Version of conjure to install (latest by default)
         :param blocklist: Essence keywords to ignore
         """
         if blocklist is None:
             blocklist = []
 
-        self._essence_repo = clone_or_pull(
-            essence_repo_dir,
-            essence_repo_url,
-            essence_branch,
-        )
+        self._max_n_files = max_n_files
+        self._exclude_regex = exclude_regex
+        self._essence_dir = essence_dir
+        self._essence_repos = []
+        for url, branch in essence_repo_urls:
+            repo_user, repo_name = parse_repo_url(url)
+            repo_path = self._essence_dir / repo_user / repo_name
+            repo = clone_or_pull(
+                repo_path,
+                url,
+                branch,
+            )
+            self._essence_repos.append(repo)
 
         self._conjure_bin = download_conjure(
             conjure_dir,
             repository_url=conjure_repo_url,
+            version=conjure_version,
         )
 
         self._blocklist = blocklist
@@ -60,20 +74,40 @@ def __init__(  # noqa: PLR0913
     @property
     def essence_dir(self) -> Path:
         """Get path to essence examples dir."""
-        return Path(self._essence_repo.working_dir)
+        return Path(self._essence_dir)
+
+    @property
+    def essence_repos(self) -> [Repo]:
+        """Get a list of Repo objects - repositories with Essence files."""
+        return self._essence_repos
+
+    def get_essence_repo_names(self, depth=2):
+        """Get Essence repos and paths to the repos, trimmed to a given depth."""
+        return [trim_path(x.working_dir, depth) for x in self._essence_repos]
 
     def _update_stats(self):
-        for file in EssenceFile.get_essence_files_from_dir(
-            self.essence_dir,
-            self._conjure_bin,
-            blocklist=self._blocklist,
-        ):
-            self._essence_files[file.get_str_path()] = file
-
-            for keyword in file.keywords:
-                if keyword not in self._essence_keywords:
-                    self._essence_keywords[keyword] = EssenceKeyword(keyword)
-                self._essence_keywords[keyword].add_file(file)
+        """Loop over all associated Essence files and update the essence files stats."""
+        for repo in self._essence_repos:
+            repo_dir = repo.working_dir
+
+            files = list(
+                EssenceFile.get_essence_files_from_dir(
+                    repo_dir,
+                    self._conjure_bin,
+                    repo=repo,
+                    blocklist=self._blocklist,
+                    exclude_regex=self._exclude_regex,
+                    max_n_files=self._max_n_files,
+                ),
+            )
+
+            for file in files:
+                self._essence_files[file.get_str_path()] = file
+
+                for keyword in file.keywords:
+                    if keyword not in self._essence_keywords:
+                        self._essence_keywords[keyword] = EssenceKeyword(keyword)
+                    self._essence_keywords[keyword].add_file(file)
 
     def get_essence_files(
         self,
diff --git a/tools/essence-feature-usage-stats/utils/colour.py b/tools/essence-feature-usage-stats/utils/colour.py
index cc91f6ee7b..c6c35c64a3 100644
--- a/tools/essence-feature-usage-stats/utils/colour.py
+++ b/tools/essence-feature-usage-stats/utils/colour.py
@@ -91,6 +91,13 @@ def as_hex(self) -> str:
         """Get the colour value as a hex string."""
         return Colour.rgb_to_hex(self.as_rgb())
 
+    def get_rgb_css_string(self, a=1.0) -> str:
+        """Get CSS colour string as RGB(A): e.g. rgba(255,255,255,0.5)."""
+        if a < 1.0:  # noqa: PLR2004
+            rgba = (*self.as_rgb(), a)
+            return "rgba" + str(rgba)
+        return "rgb" + str(self.as_rgb())
+
     def __str__(self) -> str:  # noqa: D105
         return self.as_hex()
 
diff --git a/tools/essence-feature-usage-stats/utils/conjure.py b/tools/essence-feature-usage-stats/utils/conjure.py
index f69e5fbce1..a033d6618a 100644
--- a/tools/essence-feature-usage-stats/utils/conjure.py
+++ b/tools/essence-feature-usage-stats/utils/conjure.py
@@ -5,7 +5,7 @@
 
 import requests
 
-from utils.files import download_and_extract, make_executable_recursive
+from utils.files import download_and_extract, find_file, make_executable_recursive
 from utils.git_utils import parse_repo_url
 
 HTTP_OK = 200
@@ -68,7 +68,7 @@ def get_release_id_by_version(repository_url: str, version: str) -> str | None:
         release_data = response.json()
         for release in release_data:
             if version in (release["name"], release["tag_name"]):
-                return release[id]
+                return release["id"]
 
     return None
 
@@ -83,8 +83,8 @@ def get_release_url(repository_url: str, version: str) -> str:
     return f"https://api.github.com/repos/{user}/{repo}/releases/{version}"
 
 
-def get_conjure_zip_file_url(assets, version):
-    """Get github relese asset for a release of conjure."""
+def get_conjure_zip_file_url(assets, version) -> str | None:
+    """Get GitHub release asset for a release of conjure."""
     for asset in assets:
         if asset["name"] == f"conjure-{version}-linux.zip":
             return asset["browser_download_url"]
@@ -95,7 +95,7 @@ def download_conjure(
     output_dir: Path | PathLike[str] | str,
     version="latest",
     repository_url="https://github.com/conjure-cp/conjure",
-):
+) -> Path | None:
     """
     Download conjure from GitHub and install the binary to a local directory.
 
@@ -109,7 +109,7 @@ def download_conjure(
         output_dir.mkdir(parents=True)
 
     print(
-        f"Downloading Conjure release {version} from {repository_url} to {output_dir}",
+        f"Getting conjure binary for Linux, version {version}",
     )
 
     api_url = get_release_url(repository_url, version)
@@ -123,10 +123,20 @@ def download_conjure(
         assets = release_data["assets"]
         asset_file_url = get_conjure_zip_file_url(assets, version)
 
-        download_and_extract(asset_file_url, output_dir)
-        make_executable_recursive(output_dir)
+        output_dir = output_dir / version
+
+        if not output_dir.exists():
+            print(
+                f"Downloading Conjure release {version} from {repository_url} to {output_dir}...",
+            )
+            output_dir.mkdir()
+            download_and_extract(asset_file_url, output_dir)
+            make_executable_recursive(output_dir)
+        else:
+            print(f"Conjure release {version} already exists at {output_dir}!")
+
+        conjure_path = find_file(output_dir, "conjure")
 
-        conjure_path = output_dir / f"conjure-{version}-linux" / "conjure"
         print(f"Conjure binary installed to {conjure_path.resolve()}")
         return conjure_path
     return None
diff --git a/tools/essence-feature-usage-stats/utils/files.py b/tools/essence-feature-usage-stats/utils/files.py
index 9cb3a40e26..16b614da55 100644
--- a/tools/essence-feature-usage-stats/utils/files.py
+++ b/tools/essence-feature-usage-stats/utils/files.py
@@ -54,21 +54,52 @@ def download_file(download_url: str, file_path: Path | str):
                 file.write(chunk)
 
 
-def make_executable_recursive(directory_path):
+def make_executable_recursive(path: Path):
     """Recursively make files in a directory executable."""
-    for item in directory_path.iterdir():
-        if item.is_file():
-            item.chmod(item.stat().st_mode | 0o111)  # Add execute permission for files
-        elif item.is_dir():
+    if path.is_file():
+        path.chmod(path.stat().st_mode | 0o111)
+    else:
+        for item in path.iterdir():
             make_executable_recursive(item)  # Recursively process subdirectories
 
 
-def download_and_extract(download_url: str, dir_path: Path | str):
+def download_and_extract(download_url: str, dir_path: Path | str) -> Path | None:
     """Download and extract a file from a URL to a local directory."""
-    temp_path = dir_path / "temp.zip"
-    download_file(download_url, temp_path)
+    file_path = None
+    zip_path = dir_path / "temp.zip"
+    download_file(download_url, zip_path)
 
-    with zipfile.ZipFile(temp_path, "r") as zip_ref:
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
+        conjure_names = list(
+            filter(lambda x: x.startswith("conjure"), zip_ref.namelist()),
+        )
+
+        if not conjure_names:
+            raise ValueError("No conjure files found in release!")  # noqa: TRY003
+
+        conjure_root = conjure_names[0]
+        for name in conjure_names:
+            if all(x.startswith(name) for x in conjure_names):
+                conjure_root = name
+
+        file_path = Path(zip_ref.extract(conjure_root, dir_path))
         zip_ref.extractall(dir_path)
 
-    temp_path.unlink()
+    zip_path.unlink()
+    return file_path
+
+
+def find_file(directory_path: Path, target_file_name: str) -> Path | None:
+    """Recursively search directory for a given file."""
+    directory_path = Path(directory_path)
+
+    if directory_path.is_file() and directory_path.name == target_file_name:
+        return directory_path
+
+    if directory_path.is_dir():
+        for file in directory_path.iterdir():
+            result = find_file(file, target_file_name)
+            if result is not None:
+                return result
+
+    return None  # File not found in the directory or its subdirectories
diff --git a/tools/essence-feature-usage-stats/utils/git_utils.py b/tools/essence-feature-usage-stats/utils/git_utils.py
index 09905b19b5..4c439f3d1f 100644
--- a/tools/essence-feature-usage-stats/utils/git_utils.py
+++ b/tools/essence-feature-usage-stats/utils/git_utils.py
@@ -3,7 +3,7 @@
 from typing import Tuple
 from urllib.parse import urlsplit
 
-from git import InvalidGitRepositoryError, RemoteProgress, Repo, NoSuchPathError
+from git import InvalidGitRepositoryError, NoSuchPathError, RemoteProgress, Repo
 from tqdm import tqdm
 
 
@@ -91,4 +91,5 @@ def parse_repo_url(repo_url: str) -> Tuple[str, str]:
         return user, repo
 
     elements = repo_url.split("/")
+
     return tuple(elements[:2])
diff --git a/tools/essence-feature-usage-stats/utils/misc.py b/tools/essence-feature-usage-stats/utils/misc.py
index cdc8018262..17731b1f39 100644
--- a/tools/essence-feature-usage-stats/utils/misc.py
+++ b/tools/essence-feature-usage-stats/utils/misc.py
@@ -41,3 +41,20 @@ def recurse_and_add_keys(
             recurse_and_add_keys(entry)
 
     return ans
+
+
+def parse_essence_repos(repolist, default_branch="master"):
+    """Parse config file notation (<repo url>::<branch>) for essence repos."""
+    ans = []
+
+    repos = repolist.split(",")
+    for repo in repos:
+        repo_name = repo.strip().replace('"', "")
+        branch = default_branch
+
+        if "::" in repo_name:
+            repo_name, branch = repo_name.split("::")
+
+        ans.append((repo_name, branch))
+
+    return ans
diff --git a/tools/essence-feature-usage-stats/web/csv.py b/tools/essence-feature-usage-stats/web/csv.py
new file mode 100644
index 0000000000..c1fc4a20fa
--- /dev/null
+++ b/tools/essence-feature-usage-stats/web/csv.py
@@ -0,0 +1,45 @@
+from os import PathLike
+from pathlib import Path
+
+from stats.essence_stats import EssenceStats
+
+
+def make_table_data(stats: EssenceStats, path_depth: int = 4):
+    """Convert EssenceStats to lines of a table."""
+    keywords = stats.get_essence_keywords(sort_mode="most-used")
+    files = stats.get_essence_files(sort_mode="most-lines", reverse=False)
+
+    # CSV File headings
+    yield ["EssenceFile", "LOC", "Repo", *[keyword.name for keyword in keywords]]
+
+    for file in files:
+        yield [
+            file.get_str_path(path_depth),
+            file.n_lines,
+            file.get_repo_name(depth=2),
+            *[file.get_uses(keyword.name) for keyword in keywords],
+        ]
+
+
+def make_csv_lines(stats: EssenceStats, delimiter: str = ",", path_depth: int = 4):
+    """Utility function to convert EssenceStats to CSV file lines."""  # noqa: D401
+    for line in make_table_data(stats, path_depth=path_depth):
+        yield delimiter.join([str(x) for x in line]) + "\n"
+
+
+def write_csv(
+    stats: EssenceStats,
+    fpath: Path | PathLike[str] | str,
+    delimiter: str = ",",
+    path_depth: int = 4,
+):
+    """Write essence stats to csv file."""
+    fpath = Path(fpath)
+
+    if fpath.exists() and not fpath.is_file():
+        raise ValueError("Must be a valid file!")  # noqa: TRY003
+
+    with fpath.open("w") as file:
+        file.writelines(
+            make_csv_lines(stats, delimiter=delimiter, path_depth=path_depth),
+        )
diff --git a/tools/essence-feature-usage-stats/web/static/script.js b/tools/essence-feature-usage-stats/web/static/script.js
deleted file mode 100644
index d78cee9834..0000000000
--- a/tools/essence-feature-usage-stats/web/static/script.js
+++ /dev/null
@@ -1,174 +0,0 @@
-let currentSortHeader = null;
-let keywordRules = {};
-
-function IntValueComparator(header) {
-    const index = header.cellIndex;
-    const mult = (header.dataset.order === "desc") ? -1 : 1;
-
-    return (a, b) => {
-        const aInt = parseInt(a.cells[index].textContent);
-        const bInt = parseInt(b.cells[index].textContent);
-        let ans = 0;
-
-        if (aInt > bInt) ans = 1;
-        if (aInt < bInt) ans = -1;
-
-        return ans * mult;
-    }
-}
-
-function FileLengthComparator(header) {
-    const index = header.cellIndex;
-    const mult = (header.dataset.order === "desc") ? -1 : 1;
-
-    return (a, b) => {
-        const aSize = parseInt(a.cells[index].getAttribute("n_lines"));
-        const bSize = parseInt(b.cells[index].getAttribute("n_lines"));
-        let ans = 0;
-
-        if (aSize > bSize) ans = 1;
-        if (aSize < bSize) ans = -1;
-
-        return ans * mult;
-    }
-}
-
-function toggleOrder(header) {
-    if (currentSortHeader !== null) {
-        if (currentSortHeader !== header)
-            currentSortHeader.className = "sort-none"
-    }
-    currentSortHeader = header;
-
-    if (currentSortHeader.dataset.order === "desc") {
-        currentSortHeader.dataset.order = "asc";
-        currentSortHeader.className = "sort-asc";
-    } else {
-        currentSortHeader.dataset.order = "desc";
-        currentSortHeader.className = "sort-desc";
-    }
-}
-
-function sortRows(table, header, comparator=IntValueComparator) {
-    const rows = Array.from(table.querySelectorAll("tbody tr"));
-    rows.sort(comparator(header));
-    rows.forEach(row => table.querySelector("tbody").appendChild(row));
-}
-
-function toggleCollapsibleList() {
-    let listItems = document.querySelectorAll('#essence-keywords li');
-    let showMoreButton = document.getElementById('show-more-button');
-    let collapsibleList = document.getElementById('collapsible-list');
-
-    if (showMoreButton.textContent === 'Show All') {
-        for (let i = 0; i < listItems.length; i++) {
-            listItems[i].style.display = 'list-item';
-        }
-        showMoreButton.textContent = 'Show Less';
-    } else {
-        for (let i = 5; i < listItems.length; i++) {
-            listItems[i].style.display = 'none';
-        }
-        showMoreButton.textContent = 'Show All';
-    }
-}
-
-function make_sortable_headers(table) {
-    const headers = table.querySelectorAll("th");
-    headers.forEach(header => {
-        header.addEventListener("click", (e) => {
-            toggleOrder(header);
-            if (header.id === "first-table-cell") {
-                sortRows(table, header, FileLengthComparator);
-            }
-            else {
-                sortRows(table, header);
-            }
-        });
-    });
-}
-
-function findColumnIndex(columnHeaders, columnName) {
-    let columnIndex = -1;
-    for (let i = 0; i < columnHeaders.length; i++) {
-        const header = columnHeaders[i];
-        if (header.getAttribute("data-column") === columnName) {
-            columnIndex = i;
-            break;
-        }
-    }
-    return columnIndex;
-}
-
-function make_hideable_columns(table) {
-    const checkboxes = document.querySelectorAll(".column-checkbox");
-    const rows = table.querySelectorAll("tbody tr");
-
-    checkboxes.forEach((checkbox) => {
-        checkbox.addEventListener("change", function(e) {
-            const columnName = e.target.getAttribute("data-column");
-            const columnHeaders = Array.from(table.querySelector("thead").querySelectorAll("th"));
-
-            let columnIndex = findColumnIndex(columnHeaders, columnName);
-
-            if (columnIndex !== -1) {
-                columnHeaders[columnIndex].style.display = e.target.checked ? "table-cell" : "none";
-                rows.forEach(function(row) {
-                    const cells = row.querySelectorAll("td");
-                    cells[columnIndex].style.display = e.target.checked ? "table-cell" : "none";
-                });
-            }
-        });
-    });
-}
-
-
-function make_file_controls(table) {
-    const radio_controls = document.querySelectorAll(".radio-controls");
-    radio_controls.forEach((group) => {
-        const radio_buttons = Array.from(group.getElementsByTagName("input"));
-        radio_buttons.forEach((button) => {
-            button.addEventListener("change", (e) => {
-                const columnName = e.target.parentElement.getAttribute("data-column");
-                keywordRules[columnName] = e.target.value;
-                updateRowVisibility(table);
-            })
-        })
-    })
-}
-
-
-function updateRowVisibility(table) {
-    const columnHeaders = Array.from(table.querySelector("thead").querySelectorAll("th"));
-    const rows = table.querySelectorAll("tbody tr");
-
-     rows.forEach((row) => {
-         row.hidden = false;
-     });
-
-    for (let columnName of Object.keys(keywordRules)) {
-        const option = keywordRules[columnName];
-        const columnIndex = findColumnIndex(columnHeaders, columnName);
-
-        rows.forEach((row) => {
-            const cells = row.querySelectorAll("td");
-            const usages = parseInt(cells[columnIndex].textContent);
-
-            if (option === "exclude") {
-                row.hidden = (usages > 0) || row.hidden;
-            } else if (option === "require") {
-                row.hidden = (usages === 0) || row.hidden;
-            }
-        });
-    }
-}
-
-
-document.addEventListener("DOMContentLoaded", function () {
-    console.log("DOM Loaded!");
-
-    const table = document.getElementById("sortable-table");
-    make_sortable_headers(table);
-    make_hideable_columns(table);
-    make_file_controls(table);
-});
diff --git a/tools/essence-feature-usage-stats/web/static/styles.css b/tools/essence-feature-usage-stats/web/static/styles.css
index d4093514a1..277364dac3 100644
--- a/tools/essence-feature-usage-stats/web/static/styles.css
+++ b/tools/essence-feature-usage-stats/web/static/styles.css
@@ -6,22 +6,6 @@ th {
     writing-mode: vertical-lr;
 }
 
-th.sort-asc::after {
-    content: " ▲";
-}
-
-th.sort-desc::after {
-    content: " ▼";
-}
-
-th::after {
-    margin-left: 4px;
-}
-
-#first-table-cell {
-    writing-mode: lr;
-}
-
 #essence-keywords li:not(:nth-child(-n+5)) {
     display: none;
 }
@@ -44,7 +28,13 @@ li form {
     border: none;
 }
 
-.keyword-text {
+.keyword-text, .repo-name {
     text-align: start;
     justify-self: start;
-}
\ No newline at end of file
+}
+
+table.dataTable tbody th, table.dataTable tbody td, td, th {
+    padding: 2px !important; /* Override the default styling of DataTables */
+    margin: 0 !important; /* Override the default styling of DataTables */
+    text-align: center;
+}
diff --git a/tools/essence-feature-usage-stats/web/templates/base.html b/tools/essence-feature-usage-stats/web/templates/base.html
index 6570cdb0fd..d2ba5503d4 100644
--- a/tools/essence-feature-usage-stats/web/templates/base.html
+++ b/tools/essence-feature-usage-stats/web/templates/base.html
@@ -4,9 +4,12 @@
     <meta charset="UTF-8">
     <title>Essence feature usage stats</title>
     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Atkinson+Hyperlegible">
-    {% block stylesheets %} {% endblock %}
+    <link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css">
+    <link rel="stylesheet" href="styles.css">
 </head>
 <body>
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
+
     <h1> Essence feature usage stats </h1>
 
     <nav id="navbar">
@@ -14,9 +17,11 @@ <h1> Essence feature usage stats </h1>
             {% block nav %}{% endblock %}
         </ul>
     </nav>
+
     <main id="content">
         {% block content %}{% endblock %}
     </main>
+
     <div id="footer">
         {% block footer %}{% endblock %}
     </div>
diff --git a/tools/essence-feature-usage-stats/web/templates/index.html b/tools/essence-feature-usage-stats/web/templates/index.html
index 7c4afb6ba2..af765bab63 100644
--- a/tools/essence-feature-usage-stats/web/templates/index.html
+++ b/tools/essence-feature-usage-stats/web/templates/index.html
@@ -1,7 +1,27 @@
 {% extends "base.html" %}
 
 {% block content %}
-    {% include "keyword_list.html" %}
+    <section>
+        <h2>How to use this tool</h2>
+        <ul>
+            <li><b>Please, allow up to a minute for the table to load.</b>
+                The script generates a static HTML file which is quite big, so it may take a while
+                for your browser to render all of it.</li>
+            <li>Click on table headers to sort</li>
+            <li>Shift+Click multiple headers to sort by multiple values</li>
+            <li>Use the search box in the top right corner of the table for search</li>
+            <li>Use the setting in the top left corner to set the number of rows to display</li>
+            <li>Use buttons at the bottom of the table to navigate between pages</li>
+            <li>See environment variables in the GitHub action file for configuration</li>
+        </ul>
+    </section>
+    <section>
+        <h2>Data</h2>
+        <p>Please, use the link bellow to download the data:</p>
+        <ul>
+            <li><a href="data.csv">CSV Table</a> (Updated: {{ data["timestamp"] }})</li>
+        </ul>
+    </section>
     <section>
         <h2>Essence feature table</h2>
         {% if "table" in data.keys() %}
@@ -18,10 +38,5 @@ <h2>Essence feature table</h2>
     Generated on: {{ data["timestamp"] }}
 {% endblock footer %}
 
-{% block stylesheets %}
-    <link rel="stylesheet" type="text/css" href="{{ data["css_path"] }}">
-{% endblock stylesheets %}
-
 {% block scripts %}
-    <script src="{{ data["script_path"] }}"></script>
 {% endblock scripts %}
diff --git a/tools/essence-feature-usage-stats/web/templates/keyword_list.html b/tools/essence-feature-usage-stats/web/templates/keyword_list.html
deleted file mode 100644
index 24f2369754..0000000000
--- a/tools/essence-feature-usage-stats/web/templates/keyword_list.html
+++ /dev/null
@@ -1,31 +0,0 @@
-<section>
-    <div id="collapsible-list">
-        {% set stats = data['essence_stats'] %}
-        {% set keywords = stats.get_essence_keywords(sort_mode='most-used')[:data.get('n_keywords', -1)] %}
-
-        <h2>Essence keywords:</h2>
-        <ol id="essence-keywords">
-            {% for keyword in keywords %}
-                <li>
-                    <form>
-                        <span class="keyword-text">
-                            {{ keyword.name }} ({{ keyword.total_usages }} usages in {{ keyword.num_files_using_keyword }} files)
-                        </span>
-                        <span class="keyword-show-control">
-                            Show: <input type="checkbox" class="column-checkbox" data-column="{{ keyword.name }}" checked>
-                        </span>
-                        <fieldset class="radio-controls" data-column="{{ keyword.name }}">
-                            <input type="radio" id="any" name="require" value="any" checked>
-                            <label for="any">Any</label><br>
-                            <input type="radio" id="require" name="require" value="require">
-                            <label for="require">Require</label><br>
-                            <input type="radio" id="exclude" name="require" value="exclude">
-                            <label for="exclude">Exclude</label><br>
-                        </fieldset>
-                    </form>
-                </li>
-            {% endfor %}
-        </ol>
-    </div>
-    <button id="show-more-button" onclick="toggleCollapsibleList()">Show All</button>
-</section>
diff --git a/tools/essence-feature-usage-stats/web/templates/table.html b/tools/essence-feature-usage-stats/web/templates/table.html
index 5a8e1ac521..1133a7a328 100644
--- a/tools/essence-feature-usage-stats/web/templates/table.html
+++ b/tools/essence-feature-usage-stats/web/templates/table.html
@@ -1,33 +1,35 @@
-<table border="1" id="sortable-table">
+<table border="1" id="data-table">
     {% set stats = data['essence_stats'] %}
     {% set keywords = stats.get_essence_keywords(sort_mode='most-used') %}
     {% set files = stats.get_essence_files(sort_mode='most-lines', reverse=False) %}
     <thead>
     <tr>
-        <th id="first-table-cell">Essence File</th>
+        <th>Essence File</th>
+        <th>File Size (LOC)</th>
+        <th>Essence File Repo</th>
         {% for essence_keyword in keywords %}
-            <th class="sort-none" data-column="{{ essence_keyword.name }}">{{ essence_keyword.name }}</th>
+            <th>{{ essence_keyword.name }}</th>
         {% endfor %}
     </tr>
     </thead>
     <tbody>
     {% for file in files %}
-        <tr hide_filters="0">
-            <td n_lines="{{ file.n_lines }}">{{ file.get_str_path(depth=2) }} <i>({{ file.n_lines }} LoC)</i></td>
+        <tr>
+            <td>{{ file.get_str_path(depth=3) }}</td>
+            <td>{{ file.n_lines }}</td>
+            <td>{{ file.get_repo_name(depth=2) }}</td>
             {% for essence_keyword in keywords %}
                 {% set n_uses = file.get_uses(essence_keyword.name) %}
-                {% set colour = essence_keyword.get_colour(n_uses).as_hex() %}
-                <td bgcolor="{{ colour }}"
-                    min_uses="{{ essence_keyword.min_usages }}"
-                    max_uses="{{ essence_keyword.max_usages }}"
-                    avg_uses="{{ essence_keyword.avg_usages }}"
-                    num_files_using_feature="{{ essence_keyword.num_files_using_feature }}"
-                    total_uses="{{ essence_keyword.total_usages }}"
-                    file_path="{{ file.path }}"
-                >
-                    {{ n_uses }}</td>
+                {% set colour = essence_keyword.get_colour(n_uses).get_rgb_css_string(0.5) %}
+                <td style="background-color: {{ colour }};">{{ n_uses }}</td>
             {% endfor %}
         </tr>
     {% endfor %}
     </tbody>
-</table>
\ No newline at end of file
+</table>
+<script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
+<script>
+    $(document).ready(function() {
+        $('#data-table').DataTable();
+    });
+</script>