Skip to content

Commit

Permalink
Essence feature usage stats (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
gskorokhod authored Nov 18, 2023
1 parent f552826 commit 0951b5c
Show file tree
Hide file tree
Showing 17 changed files with 352 additions and 327 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/essence-feature-stats.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ on:
- tools/essence-feature-usage-stats/**

env:
ESSENCE_DIR: "./EssenceCatalog"
ESSENCE_DIR: "./EssenceRepos"
CONJURE_DIR: "./conjure"
ESSENCE_EXAMPLES_REPO: "https://github.com/conjure-cp/EssenceCatalog.git"
CONJURE_REPO: "https://github.com/conjure-cp/conjure"
EXCLUDE_PATHS_REGEX: ".*autogen.*"
OUTPUT_PATH: "./web/static/index.html"
MAX_N_FILES: 5000
ESSENCE_FILE_REPOS: >
https://github.com/conjure-cp/EssenceCatalog::master,
https://github.com/conjure-cp/conjure::main"
KEYWORD_BLOCKLIST: >
mInfo,finds,givens,enumGivens,enumLettings,lettings,
unnameds,strategyQ,Auto,Interactive,strategyA,trailCompact,
Expand Down Expand Up @@ -61,6 +65,7 @@ jobs:
working-directory: ./tools/essence-feature-usage-stats

- name: Deploy to GitHub Pages
if: github.event_name == 'push' # Run this step only on push events
uses: JamesIves/github-pages-deploy-action@v4.4.3
with:
branch: gh-pages
Expand Down
24 changes: 9 additions & 15 deletions tools/essence-feature-usage-stats/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,18 @@ It is deployed using GitHub Pages: see https://conjure-cp.github.io/conjure-oxid
- Table row headers show paths to Essence example files and the size of the files (in lines of code)
- Table cells show how often a given keyword is used in a given file
- Cells are colour coded. Red means a keyword is NOT used in this file, orange means it's used less than average, green means it's used more
- Columns are sortable. Click on table header cells to sort rows by how often this keyword is used in each file.
- Sorting by the first column (the one with file names) will sort by file size
- The section above the table is a list of Essence keywords, sorted by their total usage accross files
- The "Show" checkboxes in this section show/hide table columns
- The "Any", "Require", "Exclude" radio button allows you to filter the table by specific keywords:
- "Require" means that only files that have one or more usage of this keyword will be shown
- "Exclude" means that only files that don't use this keyword will be shown
- "Any" means that files will be shown regardless of whether they use this feature
- These can be combined to search for exactly the right files to test specific Essence features
- Columns are sortable. Click on table header cells to sort rows by how often this keyword is used in each file.(See top of the HTML page for detailed use instructions)

## Configuration

- ESSENCE_DIR - local directory to store essence files
- CONJURE_DIR - local directory to store conjure binaries
- ESSENCE_EXAMPLES_REPO - repo to download Essence examples from
- CONJURE_REPO - repo to download the latest release of conjure from
- OUTPUT_PATH - path to save the generated HTML page. Be careful with changing this (see above).
- KEYWORD_BLOCKLIST - comma-separated list of Essence keywords to ignore
- `ESSENCE_DIR` - Local directory to store essence files
- `CONJURE_DIR` - Local directory to store conjure binaries
- `CONJURE_REPO` - GitHub repo to download conjure releases from
- `ESSENCE_FILE_REPOS` - Comma-separated list of repos to download Essence examples from. Format: `"<repo_url>::<repo_branch>,<repo2_url>::<repo2_branch>,..."`
- `OUTPUT_PATH` - Path to save the generated HTML page. Be careful with changing this (see above).
- `KEYWORD_BLOCKLIST` - Comma-separated list of Essence keywords to ignore
- `EXCLUDE_PATHS_REGEX` - Regex to exclude Essence files, e.g. `"*autogen*"`
- `MAX_N_FILES` - Max number of Essence files to process

---

Expand Down
37 changes: 28 additions & 9 deletions tools/essence-feature-usage-stats/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,30 @@
from jinja2 import Environment, FileSystemLoader, select_autoescape

from stats.essence_stats import EssenceStats
from utils.misc import parse_essence_repos
from web.csv import write_csv

ENV_PATH = Path("./.env").resolve()
load_dotenv(dotenv_path=ENV_PATH)

KEYWORD_BLOCKLIST = [x.strip() for x in os.getenv("KEYWORD_BLOCKLIST").split(",")]
KEYWORD_BLOCKLIST = [
x.strip().replace('"', "") for x in os.getenv("KEYWORD_BLOCKLIST").split(",")
]

ESSENCE_DIR = Path(os.getenv("ESSENCE_DIR"))
CONJURE_DIR = Path(os.getenv("CONJURE_DIR"))
OUTPUT_PATH = Path(os.getenv("OUTPUT_PATH"))
CONJURE_REPO = os.getenv("CONJURE_REPO")
ESSENCE_EXAMPLES_REPO = os.getenv("ESSENCE_EXAMPLES_REPO")
MAX_N_FILES = int(os.getenv("MAX_N_FILES", "500"))
MAX_N_KEYWORDS = int(os.getenv("MAX_N_KEYWORDS", "200"))
CONJURE_VERSION = os.getenv("CONJURE_VERSION", "latest")

EXCLUDE_REGEX = os.getenv("EXCLUDE_PATHS_REGEX")
if EXCLUDE_REGEX is not None:
EXCLUDE_REGEX = EXCLUDE_REGEX.strip().replace('"', "")
EXCLUDE_REGEX = rf"{EXCLUDE_REGEX}"

ESSENCE_FILE_REPOS = parse_essence_repos(os.getenv("ESSENCE_FILE_REPOS"))

jinja_env = Environment(
loader=FileSystemLoader(Path("web/templates")),
Expand All @@ -24,20 +38,25 @@

if __name__ == "__main__":
stats = EssenceStats(
CONJURE_DIR,
CONJURE_REPO,
ESSENCE_DIR,
ESSENCE_EXAMPLES_REPO,
"master",
KEYWORD_BLOCKLIST,
conjure_dir=CONJURE_DIR,
conjure_repo_url=CONJURE_REPO,
essence_dir=ESSENCE_DIR,
essence_repo_urls=ESSENCE_FILE_REPOS,
conjure_version=CONJURE_VERSION,
blocklist=KEYWORD_BLOCKLIST,
exclude_regex=EXCLUDE_REGEX,
max_n_files=MAX_N_FILES,
)

write_csv(stats, "web/static/data.csv")

timestamp = datetime.datetime.now().strftime("%d.%m.%Y - %H:%M")
template = jinja_env.get_template("index.html")
html = template.render(
data={
"essence_stats": stats,
"n_keywords": 200,
"n_keywords": MAX_N_KEYWORDS,
"n_files": MAX_N_FILES,
"css_path": "styles.css",
"script_path": "script.js",
"timestamp": timestamp,
Expand Down
75 changes: 65 additions & 10 deletions tools/essence-feature-usage-stats/stats/essence_file.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import os
import re
from pathlib import Path
from typing import Iterable, Optional

from git import Repo
from tqdm import tqdm

from utils.conjure import get_essence_file_ast
from utils.files import count_lines, trim_path
Expand Down Expand Up @@ -35,31 +40,40 @@ def __init__(self, dir_path): # noqa: D107
super().__init__(f"The provided path '{dir_path}' is not a valid directory")


def find_essence_files(dir_path: str | Path):
def find_essence_files(dir_path: str | Path, exclude_regex: str | None = None):
"""
Find all essence files in a given directory and return a list of full paths to them.
:param dir_path: path to directory
:return: a generator of paths to essence files.
:param exclude_regex: regular expression to exclude certain paths.
"""
dir_path = Path(dir_path)

# Ensure the directory path is valid
if not dir_path.is_dir():
raise EssenceInvalidDirectoryError

if exclude_regex is None:
exclude_regex = r"^$" # If not excluding anything, set exclude regex to just match an empty string
pattern = re.compile(exclude_regex)

# Walk through the directory and its subdirectories
for root, _, files in os.walk(dir_path):
for file in files:
fpath = Path(root) / file
if fpath.is_file() and fpath.suffix == ".essence":
if (
fpath.is_file()
and fpath.suffix == ".essence"
and not pattern.match(str(fpath))
):
yield fpath


class EssenceFile:
"""EssenceFile stores keyword counts and number of lines for a given file "fpath"."""

def __init__(self, fpath: str | Path, conjure_bin_path, blocklist=None):
def __init__(self, fpath: str | Path, conjure_bin_path, repo=None, blocklist=None):
"""Construct an EssenceFile object from a given file path."""
fpath = Path(fpath).resolve()

Expand All @@ -73,9 +87,21 @@ def __init__(self, fpath: str | Path, conjure_bin_path, blocklist=None):
)
self._keyword_counts = flat_keys_count(self._ast, blocklist)
self._n_lines = count_lines(fpath)
self._repo = repo
except Exception as e:
raise EssenceFileNotParsableError(fpath, str(e)) from e

@property
def repo(self) -> Repo | None:
"""Get the git repo that this file belongs to."""
return self._repo

def get_repo_name(self, depth=0) -> str | None:
"""Get the repo name, trimmed to a given depth."""
if isinstance(self.repo, Repo):
return trim_path(self.repo.working_dir, depth)
return None

@property
def path(self) -> Path:
"""Get path to this file."""
Expand Down Expand Up @@ -125,7 +151,7 @@ def __hash__(self):

def __eq__(self, other):
"""EssenceFile objects are considered equal if their paths are the same."""
return self._fpath == other._fpath
return self.path == other.path

def __str__(self): # noqa: D105
return f"EssenceFile({self._fpath}): {self.n_lines} lines"
Expand All @@ -145,21 +171,50 @@ def as_json(self, path_depth=0) -> dict:
}

@staticmethod
def get_essence_files_from_dir(
def get_essence_files_from_dir( # noqa: PLR0913
dir_path: str | Path,
conjure_bin_path: str | Path,
blocklist=None,
repo: Optional[Repo] = None,
blocklist: Optional[Iterable[str]] = None,
verbose: bool = False,
exclude_regex: Optional[str] = None,
max_n_files: Optional[int] = None,
):
"""
Get Essence files contained in a given directory.
:param dir_path: path to directory with essence files
:param conjure_bin_path: a path to conjure binary
:param blocklist: a list of Essence keywords to ignore
:param verbose: Whether to print error messages
:param exclude_regex: Exclude file paths that match this regular expression
:param max_n_files: Maximum number of files to process
:param repo: a Git repo that this directory belongs to (optional)
"""
for fpath in find_essence_files(dir_path):
if verbose:
print(f"Processing Essence files in {dir_path}...")
counter = 0

for fpath in tqdm(find_essence_files(dir_path, exclude_regex=exclude_regex)):
try:
file = EssenceFile(fpath, conjure_bin_path, blocklist=blocklist)
if max_n_files is not None and counter >= max_n_files:
if verbose:
print(
f"Max number of files ({max_n_files}) reached, terminating...",
)
break

file = EssenceFile(
fpath,
conjure_bin_path,
blocklist=blocklist,
repo=repo,
)
counter += 1
yield file
except Exception as e: # noqa: PERF203
print(f'Could not process file "{fpath}", throws exception: {e}')
except Exception as e:
if verbose:
print(f'Could not process file "{fpath}", throws exception: {e}')

if verbose:
print(f"{counter} Essence files processed!")
84 changes: 59 additions & 25 deletions tools/essence-feature-usage-stats/stats/essence_stats.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from pathlib import Path
from typing import Optional
from typing import Iterable, Optional, Tuple

from git import Repo

from stats.essence_file import EssenceFile
from stats.essence_keyword import EssenceKeyword
from utils.conjure import download_conjure
from utils.git_utils import clone_or_pull
from utils.files import trim_path
from utils.git_utils import clone_or_pull, parse_repo_url

KeywordName: type = str
FilePath: type = str
Expand All @@ -21,33 +24,44 @@ def __init__( # noqa: PLR0913
self,
conjure_dir: Path,
conjure_repo_url: str,
essence_repo_dir: Path,
essence_repo_url: str,
essence_branch="master",
essence_dir: Path,
essence_repo_urls: Iterable[Tuple[str, str]],
conjure_version: str = "latest",
blocklist: Optional[list[KeywordName]] = None,
exclude_regex: Optional[str] = None,
max_n_files: Optional[int] = None,
):
"""
Create a new EssenceStats object.
:param conjure_dir: Path to a directory containing conjure binary
:param conjure_repo_url: GitHub URL to download conjure release from
:param essence_repo_dir: Local repo with Essence example files
:param essence_repo_url: GitHub repo with Essence example files
:param essence_branch: Branch to download essence files from (master by default)
:param essence_dir: Local repo with Essence example files
:param essence_repo_urls: List of tuples - git repo urls and branches
:param conjure_version: Version of conjure to install (latest by default)
:param blocklist: Essence keywords to ignore
"""
if blocklist is None:
blocklist = []

self._essence_repo = clone_or_pull(
essence_repo_dir,
essence_repo_url,
essence_branch,
)
self._max_n_files = max_n_files
self._exclude_regex = exclude_regex
self._essence_dir = essence_dir
self._essence_repos = []
for url, branch in essence_repo_urls:
repo_user, repo_name = parse_repo_url(url)
repo_path = self._essence_dir / repo_user / repo_name
repo = clone_or_pull(
repo_path,
url,
branch,
)
self._essence_repos.append(repo)

self._conjure_bin = download_conjure(
conjure_dir,
repository_url=conjure_repo_url,
version=conjure_version,
)

self._blocklist = blocklist
Expand All @@ -60,20 +74,40 @@ def __init__( # noqa: PLR0913
@property
def essence_dir(self) -> Path:
"""Get path to essence examples dir."""
return Path(self._essence_repo.working_dir)
return Path(self._essence_dir)

@property
def essence_repos(self) -> [Repo]:
"""Get a list of Repo objects - repositories with Essence files."""
return self._essence_repos

def get_essence_repo_names(self, depth=2):
"""Get Essence repos and paths to the repos, trimmed to a given depth."""
return [trim_path(x.working_dir, depth) for x in self._essence_repos]

def _update_stats(self):
for file in EssenceFile.get_essence_files_from_dir(
self.essence_dir,
self._conjure_bin,
blocklist=self._blocklist,
):
self._essence_files[file.get_str_path()] = file

for keyword in file.keywords:
if keyword not in self._essence_keywords:
self._essence_keywords[keyword] = EssenceKeyword(keyword)
self._essence_keywords[keyword].add_file(file)
"""Loop over all associated Essence files and update the essence files stats."""
for repo in self._essence_repos:
repo_dir = repo.working_dir

files = list(
EssenceFile.get_essence_files_from_dir(
repo_dir,
self._conjure_bin,
repo=repo,
blocklist=self._blocklist,
exclude_regex=self._exclude_regex,
max_n_files=self._max_n_files,
),
)

for file in files:
self._essence_files[file.get_str_path()] = file

for keyword in file.keywords:
if keyword not in self._essence_keywords:
self._essence_keywords[keyword] = EssenceKeyword(keyword)
self._essence_keywords[keyword].add_file(file)

def get_essence_files(
self,
Expand Down
Loading

0 comments on commit 0951b5c

Please sign in to comment.