diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b310993a9..7196f6729 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -20,3 +20,10 @@ updates: assignees: - "ozgurakgun" + - package-ecosystem: "pip" + directory: "/tools/essence-feature-usage-stats/" + schedule: + interval: "weekly" + assignees: + - "ozgurakgun" + diff --git a/.github/workflows/essence-feature-stats.yml b/.github/workflows/essence-feature-stats.yml new file mode 100644 index 000000000..4f5541ca5 --- /dev/null +++ b/.github/workflows/essence-feature-stats.yml @@ -0,0 +1,66 @@ + name: "tools/essence-feature-stats - Deploy to Github Pages" + + on: + push: + branches: + - main + + env: + ESSENCE_DIR: "./EssenceCatalog" + CONJURE_DIR: "./conjure" + ESSENCE_EXAMPLES_REPO: "https://github.com/conjure-cp/EssenceCatalog.git" + CONJURE_REPO: "https://github.com/conjure-cp/conjure" + OUTPUT_PATH: "./web/static/index.html" + KEYWORD_BLOCKLIST: > + mInfo,finds,givens,enumGivens,enumLettings,lettings, + unnameds,strategyQ,Auto,Interactive,strategyA,trailCompact, + nameGenState,nbExtraGivens,representations,representationsTree, + originalDomains,trailGeneralised,trailVerbose,trailRewrites, + mLanguage,language,version,mStatements,Name,Declaration,Op + + jobs: + build: + name: "tools/essence-feature-stats: Build the tool and clone EssenceCatalog repo" + + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.11"] + + permissions: + contents: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + run: pip install -r requirements.txt + working-directory: ./tools/essence-feature-usage-stats + + - name: Run main.py to generate the table + run: python main.py + working-directory: ./tools/essence-feature-usage-stats + + - name: Fix file permissions + run: chmod -v -R +rwx ./web/static/ + working-directory: ./tools/essence-feature-usage-stats + + - name: Add the .nojekyll file + run: touch ./web/static/.nojekyll + working-directory: ./tools/essence-feature-usage-stats + + - name: Deploy to GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4.4.3 + with: + branch: gh-pages + folder: ./tools/essence-feature-usage-stats/web/static + target-folder: tools/essence-feature-usage-stats + commit-message: "Actions: Deploy the essence features usage table 🚀" + diff --git a/.gitignore b/.gitignore index 4da2522f2..b15d45ac8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ +# rust/c++ target -solvers/**/vendor/build coverage +solvers/**/vendor/build + +# python +.env +venv +__pycache__ +.ruff_cache + +# IDE +.idea diff --git a/tools/essence-feature-usage-stats/README.md b/tools/essence-feature-usage-stats/README.md new file mode 100644 index 000000000..0429df73b --- /dev/null +++ b/tools/essence-feature-usage-stats/README.md @@ -0,0 +1,59 @@ +# essence-feature-usage-stats + +## About + +This is an internal tool for the [conjure-oxide](https://github.com/conjure-cp/conjure-oxide) project. + +It does the following: +- Given a directory containing Essence files, go through it and count how often every Essence language feature is used +- Display this data as a simple web page with a table + +The purpose of this is to make it easier to find Essence examples to test specific Essence language features, which should be useful over the course of rewriting the conjure tool stack in Rust + + +## Building + +The static HTML page is generated by running the main.py script. + +The page is placed in the ./web/static directory by default. +This can be configured using the OUTPUT_PATH environment variable, but paths to the script and stylesheet +inside the index.html template will need to be updated. +The GitHub action also assumes that path and won't work if it's changed. + +It is deployed using GitHub Pages: see https://conjure-cp.github.io/conjure-oxide/tools/essence-feature-usage-stats + + +## Usage + +- Essence example files are taken from the [EssenceCatalog](https://github.com/conjure-cp/EssenceCatalog) repo. This can be changed using an environment variable - see bellow. +- Table headers show Essence keywords +- Table row headers show paths to Essence example files and the size of the files (in lines of code) +- Table cells show how often a given keyword is used in a given file +- Cells are colour coded. Red means a keyword is NOT used in this file, orange means it's used less than average, green means it's used more +- Columns are sortable. Click on table header cells to sort rows by how often this keyword is used in each file. +- Sorting by the first column (the one with file names) will sort by file size +- The section above the table is a list of Essence keywords, sorted by their total usage accross files +- The "Show" checkboxes in this section show/hide table columns +- The "Any", "Require", "Exclude" radio button allows you to filter the table by specific keywords: + - "Require" means that only files that have one or more usage of this keyword will be shown + - "Exclude" means that only files that don't use this keyword will be shown + - "Any" means that files will be shown regardless of whether they use this feature + - These can be combined to search for exactly the right files to test specific Essence features + +## Configuration + +- ESSENCE_DIR - local directory to store essence files +- CONJURE_DIR - local directory to store conjure binaries +- ESSENCE_EXAMPLES_REPO - repo to download Essence examples from +- CONJURE_REPO - repo to download the latest release of conjure from +- OUTPUT_PATH - path to save the generated HTML page. Be careful with changing this (see above). +- KEYWORD_BLOCKLIST - comma-separated list of Essence keywords to ignore + +--- + +## Authors + +- Georgii Skorokhod and Hannah Zheng, 2023 +- University of St Andrews +- Developed as part of a Vertically Integrated Project by Ozgur Akgun et al +- (See [main repo](https://github.com/conjure-cp)) diff --git a/tools/essence-feature-usage-stats/main.py b/tools/essence-feature-usage-stats/main.py new file mode 100644 index 000000000..36438eb9c --- /dev/null +++ b/tools/essence-feature-usage-stats/main.py @@ -0,0 +1,51 @@ +import datetime +import os +from pathlib import Path + +from dotenv import load_dotenv +from jinja2 import Environment, FileSystemLoader, select_autoescape + +from stats.essence_stats import EssenceStats + +ENV_PATH = Path("./.env").resolve() +load_dotenv(dotenv_path=ENV_PATH) + +KEYWORD_BLOCKLIST = [x.strip() for x in os.getenv("KEYWORD_BLOCKLIST").split(",")] +ESSENCE_DIR = Path(os.getenv("ESSENCE_DIR")) +CONJURE_DIR = Path(os.getenv("CONJURE_DIR")) +OUTPUT_PATH = Path(os.getenv("OUTPUT_PATH")) +CONJURE_REPO = os.getenv("CONJURE_REPO") +ESSENCE_EXAMPLES_REPO = os.getenv("ESSENCE_EXAMPLES_REPO") + +jinja_env = Environment( + loader=FileSystemLoader(Path("web/templates")), + autoescape=select_autoescape(), +) + +if __name__ == "__main__": + stats = EssenceStats( + CONJURE_DIR, + CONJURE_REPO, + ESSENCE_DIR, + ESSENCE_EXAMPLES_REPO, + "master", + KEYWORD_BLOCKLIST, + ) + + timestamp = datetime.datetime.now().strftime("%d.%m.%Y - %H:%M") + template = jinja_env.get_template("index.html") + html = template.render( + data={ + "essence_stats": stats, + "n_keywords": 200, + "css_path": "styles.css", + "script_path": "script.js", + "timestamp": timestamp, + }, + ) + + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + with OUTPUT_PATH.open("w") as f: + f.write(html) + f.close() + print(f"Table created: {OUTPUT_PATH.resolve()}") diff --git a/tools/essence-feature-usage-stats/pyproject.toml b/tools/essence-feature-usage-stats/pyproject.toml new file mode 100644 index 000000000..f7a68eea9 --- /dev/null +++ b/tools/essence-feature-usage-stats/pyproject.toml @@ -0,0 +1,23 @@ +[tool.ruff] +select = ["E", "F", "B", "I", "N", "UP", + "A", "COM", "C4", "ISC001", "ISC002", + "ICN", "G", "INP", "PIE", "Q", "RSE", + "RET", "SIM", "ARG", "D", + "FIX", "PL", "TRY", "FLY", "PERF", + "RUF", "ERA", "PTH", "SLF"] + +# 2. Avoid enforcing line-length violations (`E501`) and module docstrings (D100) +# Use line breaks at the first line of doc string (D213), so ignore D212 +# Don't use blank lines before class docstring, so ignore D203 +ignore = ["E501", "D100", "D212", "D203"] + +# 3. Unfixable rules +# ERA: Don't autoremove all commented code, I may actually need it +unfixable = ["ERA"] + +exclude = ["EssenceCatalog"] + +# 4. Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`. +[tool.ruff.per-file-ignores] +"web/colour.py" = ["PLR2004"] +"__init__.py" = ["D"] diff --git a/tools/essence-feature-usage-stats/requirements.txt b/tools/essence-feature-usage-stats/requirements.txt new file mode 100644 index 000000000..b27cecc63 --- /dev/null +++ b/tools/essence-feature-usage-stats/requirements.txt @@ -0,0 +1,19 @@ +black==23.10.1 +certifi==2023.7.22 +charset-normalizer==3.3.2 +click==8.1.7 +gitdb==4.0.11 +GitPython==3.1.40 +idna==3.4 +Jinja2==3.1.2 +MarkupSafe==2.1.3 +mypy-extensions==1.0.0 +packaging==23.2 +pathspec==0.11.2 +platformdirs==3.11.0 +python-dotenv==1.0.0 +requests==2.31.0 +ruff==0.1.3 +smmap==5.0.1 +tqdm==4.66.1 +urllib3==2.0.7 diff --git a/tools/essence-feature-usage-stats/stats/__init__.py b/tools/essence-feature-usage-stats/stats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/essence-feature-usage-stats/stats/essence_file.py b/tools/essence-feature-usage-stats/stats/essence_file.py new file mode 100644 index 000000000..aca80aed0 --- /dev/null +++ b/tools/essence-feature-usage-stats/stats/essence_file.py @@ -0,0 +1,165 @@ +import os +from pathlib import Path + +from utils.conjure import get_essence_file_ast +from utils.files import count_lines, trim_path +from utils.misc import flat_keys_count + + +class EssenceFileError(ValueError): + """Parent class for all errors related to parsing Essence files.""" + + +class EssenceFileInvalidPathError(EssenceFileError): + """Thrown when a path to an Essence file is invalid.""" + + def __init__(self, fpath): # noqa: D107 + super().__init__(f"Not a valid Essence file: {fpath}") + + +class EssenceFileNotParsableError(EssenceFileError): + """Thrown when an Essence file cannot be parsed.""" + + def __init__(self, fpath, msg=None): # noqa: D107 + message = f"Essence file could not be parsed: {fpath}" + if msg: + message += f", reason: {msg}" + + super().__init__(message) + + +class EssenceInvalidDirectoryError(ValueError): + """Thrown when a given directory with Essence files is not a valid directory.""" + + def __init__(self, dir_path): # noqa: D107 + super().__init__(f"The provided path '{dir_path}' is not a valid directory") + + +def find_essence_files(dir_path: str | Path): + """ + Find all essence files in a given directory and return a list of full paths to them. + + :param dir_path: path to directory + :return: a generator of paths to essence files. + """ + dir_path = Path(dir_path) + + # Ensure the directory path is valid + if not dir_path.is_dir(): + raise EssenceInvalidDirectoryError + + # Walk through the directory and its subdirectories + for root, _, files in os.walk(dir_path): + for file in files: + fpath = Path(root) / file + if fpath.is_file() and fpath.suffix == ".essence": + yield fpath + + +class EssenceFile: + """EssenceFile stores keyword counts and number of lines for a given file "fpath".""" + + def __init__(self, fpath: str | Path, conjure_bin_path, blocklist=None): + """Construct an EssenceFile object from a given file path.""" + fpath = Path(fpath).resolve() + + if not (fpath.is_file() and fpath.suffix == ".essence"): + raise EssenceFileInvalidPathError(fpath) + try: + self._fpath = Path.resolve(fpath) + self._ast = get_essence_file_ast( + self._fpath, + conjure_bin_path=conjure_bin_path, + ) + self._keyword_counts = flat_keys_count(self._ast, blocklist) + self._n_lines = count_lines(fpath) + except Exception as e: + raise EssenceFileNotParsableError(fpath, str(e)) from e + + @property + def path(self) -> Path: + """Get path to this file.""" + return self._fpath + + @property + def ast(self) -> dict: + """Get the AST of this file, as provided by the `conjure pretty` tool.""" + return self._ast + + @property + def keyword_counts(self) -> dict[str, int]: + """Get a dictionary of Essence keywords and how often they appear in this file.""" + return self._keyword_counts + + @property + def keywords(self) -> set[str]: + """Get a set of Essence keywords used in the file.""" + return set(self._keyword_counts.keys()) + + @property + def n_lines(self) -> int: + """Get number of lines in the file.""" + return self._n_lines + + def get_str_path(self, depth=0) -> str: + """ + Get a formatted path to this essence file (and optionally trim it). + + :param depth: (optional) trim path, leaving a suffix of this size + :return: formatted path to file. + """ + return trim_path(self._fpath, depth) + + def get_uses(self, keyword: str) -> int: + """ + Get the number of times a given keyword is used in the file. + + :param keyword: (str) the Essence keyword to count + :return: how many times this keyword is used in the file. + """ + return self._keyword_counts.get(keyword, 0) + + def __hash__(self): + """Compute a hash of this EssenceFile object. The hash of the file's path is used.""" + return hash(self._fpath) + + def __eq__(self, other): + """EssenceFile objects are considered equal if their paths are the same.""" + return self._fpath == other._fpath + + def __str__(self): # noqa: D105 + return f"EssenceFile({self._fpath}): {self.n_lines} lines" + + def as_json(self, path_depth=0) -> dict: + """ + Get file stats in json format. + + :param path_depth: (optional) trim path, leaving a suffix of this size + :return: (dict) file stats, including its path, number of lines, keywords and AST. + """ + return { + "path": self.get_str_path(path_depth), + "ast": self._ast, + "keyword_counts": self._keyword_counts, + "n_lines": self.n_lines, + } + + @staticmethod + def get_essence_files_from_dir( + dir_path: str | Path, + conjure_bin_path: str | Path, + blocklist=None, + ): + """ + Get Essence files contained in a given directory. + + :param dir_path: path to directory with essence files + :param conjure_bin_path: a path to conjure binary + :param blocklist: a list of Essence keywords to ignore + """ + for fpath in find_essence_files(dir_path): + try: + file = EssenceFile(fpath, conjure_bin_path, blocklist=blocklist) + yield file + except Exception as e: # noqa: PERF203 + print(f'Could not process file "{fpath}", throws exception: {e}') diff --git a/tools/essence-feature-usage-stats/stats/essence_keyword.py b/tools/essence-feature-usage-stats/stats/essence_keyword.py new file mode 100644 index 000000000..9cd1d3878 --- /dev/null +++ b/tools/essence-feature-usage-stats/stats/essence_keyword.py @@ -0,0 +1,112 @@ +from typing import Dict + +from stats.essence_file import EssenceFile +from utils.colour import * # noqa: F403 + + +class EssenceKeyword: + """EssenceKeyword stores, for a particular keyword "name", the file uses of that keyword, and aggregate statistics.""" + + def __init__(self, name: str, files=None): + """ + Create a new EssenceKeyword object. + + :param name: The Essence keyword + :param files: Collection of files that use it (more can be added after creation) + """ + if files is None: + files = [] + + self.name = name + self.total_usages = 0 + self.min_usages = None + self.max_usages = None + + self._file_usages = {} + for file in files: + self.add_file(file) + + @property + def file_usages(self) -> Dict[EssenceFile, int]: + """Get a dictionary of EssenceFile objects and usages of this keyword in these files.""" + return self._file_usages + + def add_file(self, file: EssenceFile): + """Add a file that uses this EssenceKeyword to the stats.""" + if file not in self.file_usages and file.get_uses(self.name) > 0: + usages = file.get_uses(self.name) + self.file_usages[file] = usages + self.total_usages += usages + + if self.max_usages is None: + self.max_usages = usages + else: + self.max_usages = max(self.max_usages, usages) + + if self.min_usages is None: + self.min_usages = usages + else: + self.min_usages = min(self.min_usages, usages) + + @property + def files(self): + """Get all files that use this keyword.""" + return set(self.file_usages.keys()) + + @property + def num_files_using_keyword(self) -> int: + """Get number of files that use this Essence keyword.""" + return len(self.files) + + @property + def avg_usages(self) -> float: + """Get the average number of usages of this keyword per file.""" + return float(self.total_usages) / float( + self.num_files_using_keyword, + ) + + def get_file_paths(self, depth=0) -> list: + """ + Get paths to files that use this essence keyword, trimmed to a given depth. + + :param depth: trim file paths, leaving a part of the path of this length (from the end). + """ + return [x.get_str_path(depth) for x in self.files] + + def get_usages_in_file(self, file) -> int: + """Get how often this Essence keyword is used in the given file.""" + return file.get_uses(self.name) + + def as_json(self, path_depth=0) -> dict: + """Get data for this Essence keyword as a JSON.""" + return { + "name": self.name, + "used_in_files": self.get_file_paths(path_depth), + "max_usages_in_file": self.max_usages, + "min_usages_in_file": self.min_usages, + "avg_usages_per_file": self.avg_usages, + "total_usages": self.total_usages, + } + + def get_colour(self, n_uses: int) -> Colour: # noqa: F405 + """Get colour to use for this keyword's corresponding table cell.""" + avg = int(self.avg_usages) + + if n_uses == 0: + return RED # noqa: F405 + if n_uses < avg: + return get_linear_gradient_value( # noqa: F405 + n_uses, + self.min_usages, + avg, + HOT_ORANGE, # noqa: F405 + YELLOW, # noqa: F405 + ) + + return get_linear_gradient_value( # noqa: F405 + n_uses, + avg, + self.max_usages, + YELLOW, # noqa: F405 + GREEN, # noqa: F405 + ) diff --git a/tools/essence-feature-usage-stats/stats/essence_stats.py b/tools/essence-feature-usage-stats/stats/essence_stats.py new file mode 100644 index 000000000..cd132ad0b --- /dev/null +++ b/tools/essence-feature-usage-stats/stats/essence_stats.py @@ -0,0 +1,127 @@ +from pathlib import Path +from typing import Optional + +from stats.essence_file import EssenceFile +from stats.essence_keyword import EssenceKeyword +from utils.conjure import download_conjure +from utils.git_utils import clone_or_pull + +KeywordName: type = str +FilePath: type = str + +MOST_USED = "most-used" +AVG_USES = "avg-uses" +MOST_LINES = "most-lines" + + +class EssenceStats: + """Class that stores stats for a given directory with.""" + + def __init__( # noqa: PLR0913 + self, + conjure_dir: Path, + conjure_repo_url: str, + essence_repo_dir: Path, + essence_repo_url: str, + essence_branch="master", + blocklist: Optional[list[KeywordName]] = None, + ): + """ + Create a new EssenceStats object. + + :param conjure_dir: Path to a directory containing conjure binary + :param conjure_repo_url: GitHub URL to download conjure release from + :param essence_repo_dir: Local repo with Essence example files + :param essence_repo_url: GitHub repo with Essence example files + :param essence_branch: Branch to download essence files from (master by default) + :param blocklist: Essence keywords to ignore + """ + if blocklist is None: + blocklist = [] + + self._essence_repo = clone_or_pull( + essence_repo_dir, + essence_repo_url, + essence_branch, + ) + + self._conjure_bin = download_conjure( + conjure_dir, + repository_url=conjure_repo_url, + ) + + self._blocklist = blocklist + + self._essence_keywords: dict[KeywordName, EssenceKeyword] = {} + self._essence_files: dict[FilePath, EssenceFile] = {} + + self._update_stats() + + @property + def essence_dir(self) -> Path: + """Get path to essence examples dir.""" + return Path(self._essence_repo.working_dir) + + def _update_stats(self): + for file in EssenceFile.get_essence_files_from_dir( + self.essence_dir, + self._conjure_bin, + blocklist=self._blocklist, + ): + self._essence_files[file.get_str_path()] = file + + for keyword in file.keywords: + if keyword not in self._essence_keywords: + self._essence_keywords[keyword] = EssenceKeyword(keyword) + self._essence_keywords[keyword].add_file(file) + + def get_essence_files( + self, + sort_mode: Optional[str] = None, + reverse: bool = True, + ) -> list[EssenceFile]: + """Get a list of all essence example files.""" + ans = list(self._essence_files.values()) + + match sort_mode: + case "most-lines": + ans.sort(key=lambda x: x.n_lines, reverse=reverse) + case _: + pass + + return ans + + def get_essence_keywords( + self, + sort_mode: Optional[str] = None, + reverse: bool = True, + ) -> list[EssenceKeyword]: + """Get all essence keywords used across all files.""" + ans = list(self._essence_keywords.values()) + + match sort_mode: + case "most-used": + ans.sort(key=lambda x: x.total_usages, reverse=reverse) + case "avg-uses": + ans.sort(key=lambda x: x.avg_usages, reverse=reverse) + case _: + pass + + return ans + + def get_stats_for_file(self, fpath: str) -> Optional[EssenceFile]: + """Get stats for a specific file.""" + return self._essence_files.get(fpath, None) + + def get_stats_for_keyword(self, keyword: str) -> Optional[EssenceKeyword]: + """Get stats for a specific keyword.""" + return self._essence_keywords.get(keyword, None) + + def as_json(self, path_depth=0) -> dict: + """Get the essence stats as a JSON dictionary.""" + return { + "essence_files": [x.as_json(path_depth) for x in self.get_essence_files()], + "essence_keywords": [ + x.as_json(path_depth) for x in self.get_essence_keywords() + ], + } diff --git a/tools/essence-feature-usage-stats/utils/__init__.py b/tools/essence-feature-usage-stats/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/essence-feature-usage-stats/utils/colour.py b/tools/essence-feature-usage-stats/utils/colour.py new file mode 100644 index 000000000..cc91f6ee7 --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/colour.py @@ -0,0 +1,113 @@ +from collections.abc import Iterable + +from utils.maths import clamp, map_range + + +class ColourConstructorError(ValueError): + """Thrown when incorrect arguments are passed to a Colour constructor.""" + + default_message = """ + Supported inputs: + - Colour(r: int, g: int, b: int) + - Colour((r, g, b): tuple) + - Colour(hex: str) + + Values of r,g,b must be in range[0, 255] + """ + + def __init__(self, item): # noqa: D107 + self.message = f"Not a valid colour: {item}\n" + self.default_message + super(self.message) + + +class Colour: + """Represents an RGB colour value.""" + + def __init__(self, *args, **kwargs): + """ + Create a Colour object. + + Valid formats: + - Colour(r: int, g: int, b: int) + - Colour((r, g, b): tuple) + - Colour(hex: str) + - Colour(r=r: int, g=g: int, b=b: int) + """ + # If we have 3 arguments, interpret them as rgb values + if len(args) >= 3: # noqa: PLR2004 + self.r, self.g, self.b = (clamp(int(x), 0, 255) for x in args[:3]) + elif "hex" in kwargs: + self.r, self.g, self.b = Colour.hex_to_rgb(kwargs["hex"]) + elif "r" in kwargs and "g" in kwargs and "b" in kwargs: + self.r, self.g, self.b = ( + clamp(int(kwargs["r"]), 0, 255), + clamp(int(kwargs["g"]), 0, 255), + clamp(int(kwargs["b"]), 0, 255), + ) + elif isinstance(args[0], str): + self.r, self.g, self.b = Colour.hex_to_rgb(args[0]) + elif isinstance(args[0], Iterable): + self.r, self.g, self.b = (clamp(int(x), 0, 255) for x in args[0][:3]) + else: + raise ColourConstructorError(args) + + @staticmethod + def hex_to_rgb(hex_string): + """Convert a HEX colour string to an RGB tuple.""" + # Remove any leading '#' if present + hex_string = hex_string.lstrip("#") + + # Check if the hex string is a valid length (it's always 6 characters long) + if len(hex_string) != 6: # noqa: PLR2004 + raise ValueError("Invalid hex string length") # noqa: TRY003 + + # Convert the hex string to RGB values + r = int(hex_string[0:2], 16) + g = int(hex_string[2:4], 16) + b = int(hex_string[4:6], 16) + + return r, g, b + + @staticmethod + def rgb_to_hex(rgb_tuple): + """Convert an RGB tuple to a HEX string.""" + # Ensure that the RGB values are in the valid range (0-255) + r, g, b = rgb_tuple + if ( + not (0 <= r <= 255) # noqa: PLR2004 + or not (0 <= g <= 255) # noqa: PLR2004 + or not (0 <= b <= 255) # noqa: PLR2004 + ): + raise ValueError("RGB values must be in the range 0-255") # noqa: TRY003 + + # Convert the RGB values to a hex string + return f"#{r:02X}{g:02X}{b:02X}" + + def as_rgb(self) -> tuple[int, int, int]: + """Get the colour value as an RGB tuple.""" + return self.r, self.g, self.b + + def as_hex(self) -> str: + """Get the colour value as a hex string.""" + return Colour.rgb_to_hex(self.as_rgb()) + + def __str__(self) -> str: # noqa: D105 + return self.as_hex() + + def __repr__(self) -> str: # noqa: D105 + return f"Colour({self.as_hex()})" + + +GREEN = Colour(0, 255, 0) +RED = Colour(255, 0, 0) +BLUE = Colour(0, 0, 255) +YELLOW = Colour(255, 255, 0) +HOT_ORANGE = Colour(255, 100, 0) + + +def get_linear_gradient_value(x, x_min, x_max, c_min: Colour, c_max: Colour) -> Colour: + """Given an integer value x, minumum and maximum values of x, and two colours, generate x's corresponding gradient value.""" + r = int(map_range(x, x_min, x_max, float(c_min.r), float(c_max.r))) + g = int(map_range(x, x_min, x_max, float(c_min.g), float(c_max.g))) + b = int(map_range(x, x_min, x_max, float(c_min.b), float(c_max.b))) + return Colour(r, g, b) diff --git a/tools/essence-feature-usage-stats/utils/conjure.py b/tools/essence-feature-usage-stats/utils/conjure.py new file mode 100644 index 000000000..f69e5fbce --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/conjure.py @@ -0,0 +1,137 @@ +import json +import subprocess +from os import PathLike +from pathlib import Path + +import requests + +from utils.files import download_and_extract, make_executable_recursive +from utils.git_utils import parse_repo_url + +HTTP_OK = 200 + + +def get_essence_file_ast( + fpath: Path | PathLike[str] | str, + conjure_bin_path: Path | PathLike[str] | str, +) -> dict: + """ + Run the `conjure pretty` command line tool and get the parsed AST as a dict. + + :param conjure_bin_path: path to conjure binary + :param fpath: path to an essence file + :return: the Abstract Syntax Tree in json format (as a dict). + """ + result = subprocess.run( + [str(conjure_bin_path), "pretty", "--output-format=astjson", str(fpath)], + capture_output=True, + text=True, + check=True, + ) + return json.loads(result.stdout) + + +def get_version(conjure_bin_path: Path | PathLike[str] | str) -> tuple[str, str]: + """ + Get version from conjure binary. + + :param conjure_bin_path: path to conjure binary + :return: tuple of (version, commit) - conjure version and git repo version (as given by conjure --version) + """ + result = subprocess.run( + [str(conjure_bin_path), "--version"], + capture_output=True, + text=True, + check=True, + ) + + version, commit = None, None + lines = result.stdout.split("\n") + for line in lines: + if "Release version" in line: + version = "v" + line.removeprefix("Release version ") + if "Repository version" in line: + commit, *ts_parts = line.removeprefix("Repository version ").split() + + return version, commit + + +def get_release_id_by_version(repository_url: str, version: str) -> str | None: + """Get release id for a specific release version of a repo from the GitHub API.""" + user, repo = parse_repo_url(repository_url) + api_url = f"https://api.github.com/repos/{user}/{repo}/releases" + response = requests.get(api_url) + + if response.status_code != HTTP_OK: + print(f"Failed to get the latest release information from {api_url}") + else: + release_data = response.json() + for release in release_data: + if version in (release["name"], release["tag_name"]): + return release[id] + + return None + + +def get_release_url(repository_url: str, version: str) -> str: + """Build the GitHub API url for a specific release version of a repo.""" + user, repo = parse_repo_url(repository_url) + + if version != "latest": + version = get_release_id_by_version(repository_url, version) + + return f"https://api.github.com/repos/{user}/{repo}/releases/{version}" + + +def get_conjure_zip_file_url(assets, version): + """Get github relese asset for a release of conjure.""" + for asset in assets: + if asset["name"] == f"conjure-{version}-linux.zip": + return asset["browser_download_url"] + return None + + +def download_conjure( + output_dir: Path | PathLike[str] | str, + version="latest", + repository_url="https://github.com/conjure-cp/conjure", +): + """ + Download conjure from GitHub and install the binary to a local directory. + + :param output_dir: local directory to download the conjure binary to + :param version: Conjure release version ("latest" or "vX.Y.Z") + :param repository_url: the GitHub repository URL + """ + output_dir = Path(output_dir) + if not output_dir.is_dir(): + print(f"Creating directory: {output_dir.resolve()}") + output_dir.mkdir(parents=True) + + print( + f"Downloading Conjure release {version} from {repository_url} to {output_dir}", + ) + + api_url = get_release_url(repository_url, version) + response = requests.get(api_url) + + if response.status_code != HTTP_OK: + print(f"Failed to get the latest release information from {api_url}") + else: + release_data = response.json() + version = release_data["tag_name"] + assets = release_data["assets"] + asset_file_url = get_conjure_zip_file_url(assets, version) + + download_and_extract(asset_file_url, output_dir) + make_executable_recursive(output_dir) + + conjure_path = output_dir / f"conjure-{version}-linux" / "conjure" + print(f"Conjure binary installed to {conjure_path.resolve()}") + return conjure_path + return None + + +if __name__ == "__main__": + path = download_conjure("../conjure") + print(get_version(path)) diff --git a/tools/essence-feature-usage-stats/utils/files.py b/tools/essence-feature-usage-stats/utils/files.py new file mode 100644 index 000000000..9cb3a40e2 --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/files.py @@ -0,0 +1,74 @@ +import os +import zipfile +from pathlib import Path + +import requests + + +def count_lines(fpath: str | Path) -> int: + """ + Count the number of lines in a file. + + :param fpath: path to the file + :return: int, the number of lines. + """ + fpath = Path(fpath) + with fpath.open("r") as f: + return sum(1 for _ in f) + + +def trim_path(input_path: os.PathLike | Path | str, num_elements=0) -> str: + """ + Normalize path and get last N elements from the end of the path (returns whole path if num_elements is 0). + + :param input_path: the path + :param num_elements: last N elements to return + :return: whole path or a part of it (str). + """ + input_path = os.path.normpath(str(input_path)) + + if num_elements == 0: + return input_path + + path_elements = input_path.split(os.path.sep) + num_elements = min( + num_elements, + len(path_elements), + ) # Ensure num_elements is not greater than the length of the path + return os.path.sep.join( + path_elements[-num_elements:], + ) # Join the last num_elements elements to form the trimmed path + + +def download_file(download_url: str, file_path: Path | str): + """Download a file from a URL to a local file.""" + file_path = Path(file_path) + + print(f"Downloading from {download_url} to {file_path.resolve()}...") + file_path.touch(exist_ok=True) + + response = requests.get(download_url, stream=True) + with file_path.open("wb") as file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + file.write(chunk) + + +def make_executable_recursive(directory_path): + """Recursively make files in a directory executable.""" + for item in directory_path.iterdir(): + if item.is_file(): + item.chmod(item.stat().st_mode | 0o111) # Add execute permission for files + elif item.is_dir(): + make_executable_recursive(item) # Recursively process subdirectories + + +def download_and_extract(download_url: str, dir_path: Path | str): + """Download and extract a file from a URL to a local directory.""" + temp_path = dir_path / "temp.zip" + download_file(download_url, temp_path) + + with zipfile.ZipFile(temp_path, "r") as zip_ref: + zip_ref.extractall(dir_path) + + temp_path.unlink() diff --git a/tools/essence-feature-usage-stats/utils/git_utils.py b/tools/essence-feature-usage-stats/utils/git_utils.py new file mode 100644 index 000000000..09905b19b --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/git_utils.py @@ -0,0 +1,94 @@ +import shutil +from pathlib import Path +from typing import Tuple +from urllib.parse import urlsplit + +from git import InvalidGitRepositoryError, RemoteProgress, Repo, NoSuchPathError +from tqdm import tqdm + + +class InvalidGitRemoteUrlError(ValueError): + """Raised when a git remote url is invalid.""" + + def __init__(self, repo_url): # noqa: D107 + super().__init__(f"Not a valid git repository url: {repo_url}") + + +class CloneProgress(RemoteProgress): + """Progress bar for cloning a repo.""" + + def __init__(self): # noqa: D107 + super().__init__() + self.pbar = tqdm(desc="Cloning repo: ", unit="%", ncols=100) + + def update( # noqa: D102 + self, + op_code, # noqa: ARG002 + cur_count, + max_count=None, + message="", # noqa: ARG002 + ): + self.pbar.total = 100 + self.pbar.n = int((cur_count / max_count) * 100) + self.pbar.refresh() + + +def is_git_repo(path: Path | str) -> bool: + """Check whether a given directory is a git repository.""" + try: + _ = Repo(path).git_dir + except InvalidGitRepositoryError: + return False + except NoSuchPathError: + return False + else: + return True + + +def clone_or_pull( + directory_path: Path | str, + remote_url: str, + branch="master", + remote_name="origin", +) -> Repo: + """ + Clone a given GitHub repository to a given local directory, or pull latest changes if local repo exists. + + :param directory_path: local directory to use + :param remote_url: remote repo url to pull from + :param remote_name: name of the remote (origin by default) + :param branch: branch of the remote repo to pull (master by default) + """ + directory_path = Path(directory_path) + directory_path.mkdir(exist_ok=True, parents=True) + + if directory_path.is_dir() and is_git_repo(directory_path): + repo = Repo(directory_path) + repo.remote(remote_name).pull() + else: + shutil.rmtree(directory_path) + repo = Repo.clone_from( + remote_url, + directory_path, + branch=branch, + progress=CloneProgress(), + ) + + return repo + + +def parse_repo_url(repo_url: str) -> Tuple[str, str]: + """ + Get the GitHub user and repo from a repo URL. + + :param repo_url: the GitHub repo URL + :return: (user, repo) + """ + if repo_url.startswith("http"): + parsed_url = urlsplit(repo_url) + path_components = parsed_url.path.strip("/").split("/") + user, repo = path_components[:2] + return user, repo + + elements = repo_url.split("/") + return tuple(elements[:2]) diff --git a/tools/essence-feature-usage-stats/utils/maths.py b/tools/essence-feature-usage-stats/utils/maths.py new file mode 100644 index 000000000..8e9e8fc86 --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/maths.py @@ -0,0 +1,25 @@ +def map_range(x, in_min, in_max, out_min, out_max): + """ + Map x between in_min, in_max to range between out_min and out_max. + + :param x: value to map + :param in_min: min value of x + :param in_max: max value of x + :param out_min: min value of output range + :param out_max: max value of output range + :return: mapped value + """ + if in_min == in_max: + return out_min + (out_max - out_min) / 2 + return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min + + +def clamp(x, minn, maxn): + """ + Clamp x between minn and maxn. + + :param x: the value to clamp + :param minn: the minimum value + :param maxn: the maximum value + """ + return min(max(x, minn), maxn) diff --git a/tools/essence-feature-usage-stats/utils/misc.py b/tools/essence-feature-usage-stats/utils/misc.py new file mode 100644 index 000000000..cdc801826 --- /dev/null +++ b/tools/essence-feature-usage-stats/utils/misc.py @@ -0,0 +1,43 @@ +from typing import Any + + +def flat_keys_count( + data: dict[Any, dict | list] | list, + blocklist=None, +) -> dict[Any, int]: + """ + Recurse over a dict or list (potentially with nested dicts / lists) and count all dictionary keys. + + :param data: a dictionary or list containing dictionaries / lists + :param blocklist: collection of keys to ignore + :return: dict in the format of :<№ of key's occurrences in data>. + """ + ans = {} + + def add_key(key, count=1): + if (blocklist is None) or (key not in blocklist): + if key in ans: + ans[key] += count + else: + ans[key] = count + + def recurse_and_add_keys( + item, + ): # Recurse over entry (list or dict) and add its keys to the count + if isinstance(item, (list, dict)): + new_keys = flat_keys_count(item) + for key in new_keys: + add_key(key, new_keys[key]) + + if isinstance( + data, + dict, + ): # If it's a dict, add its keys and recurse over the values + for key in data: + add_key(key) + recurse_and_add_keys(data[key]) + elif isinstance(data, list): # If it's a list, recurse over all its elements + for entry in data: + recurse_and_add_keys(entry) + + return ans diff --git a/tools/essence-feature-usage-stats/web/__init__.py b/tools/essence-feature-usage-stats/web/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/essence-feature-usage-stats/web/static/script.js b/tools/essence-feature-usage-stats/web/static/script.js new file mode 100644 index 000000000..d78cee983 --- /dev/null +++ b/tools/essence-feature-usage-stats/web/static/script.js @@ -0,0 +1,174 @@ +let currentSortHeader = null; +let keywordRules = {}; + +function IntValueComparator(header) { + const index = header.cellIndex; + const mult = (header.dataset.order === "desc") ? -1 : 1; + + return (a, b) => { + const aInt = parseInt(a.cells[index].textContent); + const bInt = parseInt(b.cells[index].textContent); + let ans = 0; + + if (aInt > bInt) ans = 1; + if (aInt < bInt) ans = -1; + + return ans * mult; + } +} + +function FileLengthComparator(header) { + const index = header.cellIndex; + const mult = (header.dataset.order === "desc") ? -1 : 1; + + return (a, b) => { + const aSize = parseInt(a.cells[index].getAttribute("n_lines")); + const bSize = parseInt(b.cells[index].getAttribute("n_lines")); + let ans = 0; + + if (aSize > bSize) ans = 1; + if (aSize < bSize) ans = -1; + + return ans * mult; + } +} + +function toggleOrder(header) { + if (currentSortHeader !== null) { + if (currentSortHeader !== header) + currentSortHeader.className = "sort-none" + } + currentSortHeader = header; + + if (currentSortHeader.dataset.order === "desc") { + currentSortHeader.dataset.order = "asc"; + currentSortHeader.className = "sort-asc"; + } else { + currentSortHeader.dataset.order = "desc"; + currentSortHeader.className = "sort-desc"; + } +} + +function sortRows(table, header, comparator=IntValueComparator) { + const rows = Array.from(table.querySelectorAll("tbody tr")); + rows.sort(comparator(header)); + rows.forEach(row => table.querySelector("tbody").appendChild(row)); +} + +function toggleCollapsibleList() { + let listItems = document.querySelectorAll('#essence-keywords li'); + let showMoreButton = document.getElementById('show-more-button'); + let collapsibleList = document.getElementById('collapsible-list'); + + if (showMoreButton.textContent === 'Show All') { + for (let i = 0; i < listItems.length; i++) { + listItems[i].style.display = 'list-item'; + } + showMoreButton.textContent = 'Show Less'; + } else { + for (let i = 5; i < listItems.length; i++) { + listItems[i].style.display = 'none'; + } + showMoreButton.textContent = 'Show All'; + } +} + +function make_sortable_headers(table) { + const headers = table.querySelectorAll("th"); + headers.forEach(header => { + header.addEventListener("click", (e) => { + toggleOrder(header); + if (header.id === "first-table-cell") { + sortRows(table, header, FileLengthComparator); + } + else { + sortRows(table, header); + } + }); + }); +} + +function findColumnIndex(columnHeaders, columnName) { + let columnIndex = -1; + for (let i = 0; i < columnHeaders.length; i++) { + const header = columnHeaders[i]; + if (header.getAttribute("data-column") === columnName) { + columnIndex = i; + break; + } + } + return columnIndex; +} + +function make_hideable_columns(table) { + const checkboxes = document.querySelectorAll(".column-checkbox"); + const rows = table.querySelectorAll("tbody tr"); + + checkboxes.forEach((checkbox) => { + checkbox.addEventListener("change", function(e) { + const columnName = e.target.getAttribute("data-column"); + const columnHeaders = Array.from(table.querySelector("thead").querySelectorAll("th")); + + let columnIndex = findColumnIndex(columnHeaders, columnName); + + if (columnIndex !== -1) { + columnHeaders[columnIndex].style.display = e.target.checked ? "table-cell" : "none"; + rows.forEach(function(row) { + const cells = row.querySelectorAll("td"); + cells[columnIndex].style.display = e.target.checked ? "table-cell" : "none"; + }); + } + }); + }); +} + + +function make_file_controls(table) { + const radio_controls = document.querySelectorAll(".radio-controls"); + radio_controls.forEach((group) => { + const radio_buttons = Array.from(group.getElementsByTagName("input")); + radio_buttons.forEach((button) => { + button.addEventListener("change", (e) => { + const columnName = e.target.parentElement.getAttribute("data-column"); + keywordRules[columnName] = e.target.value; + updateRowVisibility(table); + }) + }) + }) +} + + +function updateRowVisibility(table) { + const columnHeaders = Array.from(table.querySelector("thead").querySelectorAll("th")); + const rows = table.querySelectorAll("tbody tr"); + + rows.forEach((row) => { + row.hidden = false; + }); + + for (let columnName of Object.keys(keywordRules)) { + const option = keywordRules[columnName]; + const columnIndex = findColumnIndex(columnHeaders, columnName); + + rows.forEach((row) => { + const cells = row.querySelectorAll("td"); + const usages = parseInt(cells[columnIndex].textContent); + + if (option === "exclude") { + row.hidden = (usages > 0) || row.hidden; + } else if (option === "require") { + row.hidden = (usages === 0) || row.hidden; + } + }); + } +} + + +document.addEventListener("DOMContentLoaded", function () { + console.log("DOM Loaded!"); + + const table = document.getElementById("sortable-table"); + make_sortable_headers(table); + make_hideable_columns(table); + make_file_controls(table); +}); diff --git a/tools/essence-feature-usage-stats/web/static/styles.css b/tools/essence-feature-usage-stats/web/static/styles.css new file mode 100644 index 000000000..d4093514a --- /dev/null +++ b/tools/essence-feature-usage-stats/web/static/styles.css @@ -0,0 +1,50 @@ +html { + font-family: "Atkinson Hyperlegible", sans-serif; +} + +th { + writing-mode: vertical-lr; +} + +th.sort-asc::after { + content: " ▲"; +} + +th.sort-desc::after { + content: " ▼"; +} + +th::after { + margin-left: 4px; +} + +#first-table-cell { + writing-mode: lr; +} + +#essence-keywords li:not(:nth-child(-n+5)) { + display: none; +} + +li { + width: 50%; +} + +li form { + display: grid; + grid-template-columns: 2fr 1fr 2fr; + gap: 1rem; + justify-items: center; + align-items: center; +} + +.radio-controls { + display: flex; + gap: 0.5rem; + border: none; +} + +.keyword-text { + text-align: start; + justify-self: start; +} \ No newline at end of file diff --git a/tools/essence-feature-usage-stats/web/templates/__init__.py b/tools/essence-feature-usage-stats/web/templates/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/essence-feature-usage-stats/web/templates/base.html b/tools/essence-feature-usage-stats/web/templates/base.html new file mode 100644 index 000000000..6570cdb0f --- /dev/null +++ b/tools/essence-feature-usage-stats/web/templates/base.html @@ -0,0 +1,26 @@ + + + + + Essence feature usage stats + + {% block stylesheets %} {% endblock %} + + +

Essence feature usage stats

+ + +
+ {% block content %}{% endblock %} +
+ + + {% block scripts %}{% endblock %} + + diff --git a/tools/essence-feature-usage-stats/web/templates/index.html b/tools/essence-feature-usage-stats/web/templates/index.html new file mode 100644 index 000000000..7c4afb6ba --- /dev/null +++ b/tools/essence-feature-usage-stats/web/templates/index.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} + +{% block content %} + {% include "keyword_list.html" %} +
+

Essence feature table

+ {% if "table" in data.keys() %} + {{ data["table"] }} + {% else %} + {# Include the table.html template here #} + {% include "table.html" %} + {% endif %} +
+{% endblock content %} + +{% block footer %} +
+ Generated on: {{ data["timestamp"] }} +{% endblock footer %} + +{% block stylesheets %} + +{% endblock stylesheets %} + +{% block scripts %} + +{% endblock scripts %} diff --git a/tools/essence-feature-usage-stats/web/templates/keyword_list.html b/tools/essence-feature-usage-stats/web/templates/keyword_list.html new file mode 100644 index 000000000..24f236975 --- /dev/null +++ b/tools/essence-feature-usage-stats/web/templates/keyword_list.html @@ -0,0 +1,31 @@ +
+
+ {% set stats = data['essence_stats'] %} + {% set keywords = stats.get_essence_keywords(sort_mode='most-used')[:data.get('n_keywords', -1)] %} + +

Essence keywords:

+
    + {% for keyword in keywords %} +
  1. +
    + + {{ keyword.name }} ({{ keyword.total_usages }} usages in {{ keyword.num_files_using_keyword }} files) + + + Show: + +
    + +
    + +
    + +
    +
    +
    +
  2. + {% endfor %} +
+
+ +
diff --git a/tools/essence-feature-usage-stats/web/templates/table.html b/tools/essence-feature-usage-stats/web/templates/table.html new file mode 100644 index 000000000..5a8e1ac52 --- /dev/null +++ b/tools/essence-feature-usage-stats/web/templates/table.html @@ -0,0 +1,33 @@ + + {% set stats = data['essence_stats'] %} + {% set keywords = stats.get_essence_keywords(sort_mode='most-used') %} + {% set files = stats.get_essence_files(sort_mode='most-lines', reverse=False) %} + + + + {% for essence_keyword in keywords %} + + {% endfor %} + + + + {% for file in files %} + + + {% for essence_keyword in keywords %} + {% set n_uses = file.get_uses(essence_keyword.name) %} + {% set colour = essence_keyword.get_colour(n_uses).as_hex() %} + + {% endfor %} + + {% endfor %} + +
Essence File{{ essence_keyword.name }}
{{ file.get_str_path(depth=2) }} ({{ file.n_lines }} LoC) + {{ n_uses }}
\ No newline at end of file