Skip to content

Commit

Permalink
Merge pull request #25 from watermarkhu/development
Browse files Browse the repository at this point in the history
Add find/findall + mypy fixes
  • Loading branch information
watermarkhu authored Feb 19, 2024
2 parents acbaf61 + 042d255 commit 2575a83
Show file tree
Hide file tree
Showing 21 changed files with 143 additions and 88 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,5 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# VSCode settings
.vscode/

# Ruff cache
.ruff_cache/
6 changes: 1 addition & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.2.1
rev: v0.2.2
hooks:
# Run the linter.
- id: ruff
args: [--fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.8.0' # Use the sha / tag you want to point at
hooks:
- id: mypy
14 changes: 14 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"python.testing.pytestArgs": [
"test"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"files.exclude": {
"**/__pycache__": true,
"**/.mypy_cache": true,
"**/.pytest_cache": true,
"**/.ruff_cache": true,
"**/.tox": true,
},
}
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
##################################### poetry #####################################
[tool.poetry]
name = "textmate-grammar-python"
version = "0.1.2"
version = "0.2.0"
description = "An interpreter for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format."
authors = ["Mark Shui Hu <watermarkhu@gmail.com>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/watermarkhu/textmate-grammar-python"
keywords = ["textmate", "tokenization"]
packages = [{include = "textmate_grammar"}]
packages = [{include = "textmate_grammar", from = "src"}]

[tool.poetry.dependencies]
python = "^3.11"
Expand All @@ -30,8 +30,8 @@ types-pyyaml = "^6.0.12.12"
##################################### ruff #####################################
ruff = "^0.2.1"
[tool.ruff]
include = ["pyproject.toml", "textmate_grammar/**/*.py"]
exclude = ["textmate_grammar/grammars/"]
include = ["pyproject.toml", "src/textmate_grammar/**/*.py"]
exclude = ["src/textmate_grammar/grammars/"]
line-length = 100
indent-width = 4

Expand Down
File renamed without changes.
101 changes: 79 additions & 22 deletions textmate_grammar/elements.py → src/textmate_grammar/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import defaultdict
from itertools import groupby
from pprint import pprint
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Generator

from .handler import POS, ContentHandler, Match, Pattern
from .logger import LOGGER
Expand All @@ -16,13 +16,7 @@
TOKEN_DICT = dict[POS, list[str]]


class Element:
def _token_by_index(self, *args, **kwargs):
# Stub for Mypy
return


class Capture(Element):
class Capture:
"""A captured matching group.
After mathing, any pattern can have a number of capture groups for which subsequent parsers can be defined.
Expand Down Expand Up @@ -62,7 +56,7 @@ def __eq__(self, other: object) -> bool:
def __repr__(self) -> str:
return f"@capture<{self.key}>"

def dispatch(self) -> list[Element]:
def dispatch(self) -> list[Capture | ContentElement]:
"""Dispatches the remaining parse of the capture group."""
elements = []
for group_id, parser in self.parsers.items():
Expand Down Expand Up @@ -107,20 +101,20 @@ def dispatch(self) -> list[Element]:


def dispatch_list(
pending_elements: list[Element], parent: ContentElement | None = None
) -> list[Element]:
pending_elements: list[Capture | ContentElement], parent: ContentElement | None = None
) -> list[ContentElement]:
"""Dispatches all captured parsers in the list."""
elements = []
for item in pending_elements:
if isinstance(item, Capture):
captured_elements = dispatch_list(item.dispatch())
captured_elements: list[ContentElement] = dispatch_list(item.dispatch())
elements.extend(captured_elements)
elif item != parent:
elements.append(item)
return elements


class ContentElement(Element):
class ContentElement:
"""The base grammar element object."""

def __init__(
Expand All @@ -129,7 +123,7 @@ def __init__(
grammar: dict,
content: str,
characters: dict[POS, str],
children: list[Element] | None = None,
children: list[Capture | ContentElement] | None = None,
) -> None:
if children is None:
children = []
Expand All @@ -138,11 +132,15 @@ def __init__(
self.content = content
self.characters = characters
self._children_pending = children
self._children_dispached: list[Element] = []
self._children_dispached: list[ContentElement] = []
self._dispatched_children: bool = False

@property
def children(self) -> list[Element]:
def _subelements(self) -> list[ContentElement]:
return self.children

@property
def children(self) -> list[ContentElement]:
"Children elements"
if self._children_pending:
if not self._dispatched_children:
Expand Down Expand Up @@ -172,6 +170,61 @@ def to_dict(self, verbosity: int = -1, all_content: bool = False, **kwargs) -> d
)
return out_dict

def find(
self,
tokens: str | list[str],
stop_tokens: str | list[str] = "",
verbosity: int = -1,
stack: list[str] | None = None,
attribute: str = "_subelements",
) -> Generator[tuple[ContentElement, list[str]], None, None]:
"""Find the next subelement that match the input token(s).
The find method will return a generator that globs though the element-tree, searching for the next
subelement that matches the given token.
"""
if isinstance(tokens, str):
tokens = [tokens]
if isinstance(stop_tokens, str):
stop_tokens = [stop_tokens] if stop_tokens else []
if not set(tokens).isdisjoint(set(stop_tokens)):
raise ValueError("Input tokens and stop_tokens must be disjoint")

if stack is None:
stack = []
stack += [self.token]

if verbosity:
verbosity -= 1
children: list[ContentElement] = getattr(self, attribute, self._subelements)
for child in children:
if stop_tokens and (
child.token in stop_tokens
or (stop_tokens == ["*"] and child.token not in tokens)
):
return None

if child.token in tokens or tokens == ["*"]:
yield child, [e for e in stack]
if verbosity:
nested_generator = child.find(
tokens, verbosity=verbosity - 1, stack=[e for e in stack]
)
yield from nested_generator
return None

def findall(
self,
tokens: str | list[str],
stop_tokens: str | list[str] = "",
verbosity: int = -1,
attribute: str = "_subelements",
) -> list[tuple[ContentElement, list[str]]]:
"""Returns subelements that match the input token(s)."""
return list(
self.find(tokens, stop_tokens=stop_tokens, verbosity=verbosity, attribute=attribute)
)

def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]:
"""Converts the object to a flattened array of tokens per index."""
token_dict = self._token_by_index(defaultdict(list))
Expand Down Expand Up @@ -238,8 +291,8 @@ class ContentBlockElement(ContentElement):

def __init__(
self,
begin: list[Element] | None = None,
end: list[Element] | None = None,
begin: list[Capture | ContentElement] | None = None,
end: list[Capture | ContentElement] | None = None,
**kwargs,
) -> None:
if end is None:
Expand All @@ -249,13 +302,17 @@ def __init__(
super().__init__(**kwargs)
self._begin_pending = begin
self._end_pending = end
self._begin_dispached: list[Element] = []
self._end_dispached: list[Element] = []
self._begin_dispached: list[ContentElement] = []
self._end_dispached: list[ContentElement] = []
self._dispatched_begin: bool = False
self._dispatched_end: bool = False

@property
def begin(self) -> list[Element]:
def _subelements(self) -> list[ContentElement]:
return self.begin + self.children + self.end

@property
def begin(self) -> list[ContentElement]:
"Begin elements"
if self._begin_pending:
if not self._dispatched_begin:
Expand All @@ -266,7 +323,7 @@ def begin(self) -> list[Element]:
return []

@property
def end(self) -> list[Element]:
def end(self) -> list[ContentElement]:
"End elements"
if self._end_pending:
if not self._dispatched_end:
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
from pathlib import Path
import shutil
import yaml
from pathlib import Path

import yaml

tmLanguageFile = (
Path(__file__).parents[3]
/ "syntaxes"
/ "markdown"
/ "markdown.tmLanguage.base.yaml"
Path(__file__).parents[3] / "syntaxes" / "markdown" / "markdown.tmLanguage.base.yaml"
)
tmLanguageYAML = Path(__file__).parent / "grammar.yaml"


if tmLanguageFile.exists():
shutil.copyfile(tmLanguageFile, tmLanguageYAML)

with open(tmLanguageYAML, "r") as file:
with open(tmLanguageYAML) as file:
try:
GRAMMAR = yaml.load(file.read(), Loader=yaml.CLoader)
except ImportError:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
import plistlib
import yaml
from pathlib import Path

import yaml

tmLanguageFile = (
Path(__file__).parents[3]
Expand All @@ -20,7 +20,7 @@
with open(tmLanguageYAML, "w") as f:
f.write(yaml.dump(GRAMMAR, indent=2))
else:
with open(tmLanguageYAML, "r") as file:
with open(tmLanguageYAML) as file:
try:
GRAMMAR = yaml.load(file.read(), Loader=yaml.CLoader)
except ImportError:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

from .elements import Element
from .elements import Capture, ContentElement
from .exceptions import IncompatibleFileType
from .handler import POS, ContentHandler
from .logger import LOGGER
Expand Down Expand Up @@ -80,7 +80,7 @@ def _initialize_repository(self):

super()._initialize_repository()

def parse_file(self, filePath: str | Path, **kwargs) -> Element | None:
def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement | None:
"""Parses an entire file with the current grammar"""
if type(filePath) != Path:
filePath = Path(filePath)
Expand All @@ -89,6 +89,8 @@ def parse_file(self, filePath: str | Path, **kwargs) -> Element | None:
raise IncompatibleFileType(extensions=self.file_types)

handler = ContentHandler.from_path(filePath)
if handler.source == "":
return None

# Configure logger
LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
Expand All @@ -102,15 +104,15 @@ def parse_string(self, input: str, **kwargs):
LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
return self._parse_language(handler, **kwargs)

def _parse_language(self, handler: ContentHandler, **kwargs) -> Element | None:
def _parse_language(self, handler: ContentHandler, **kwargs) -> Capture | ContentElement | None:
"""Parses the current stream with the language scope."""

parsed, elements, _ = self.parse(handler, (0, 0), **kwargs)
return elements[0] if parsed else None

def _parse(
self, handler: ContentHandler, starting: POS, **kwargs
) -> tuple[bool, list[Element], tuple[int, int]]:
) -> tuple[bool, list[Capture | ContentElement], tuple[int, int]]:
kwargs.pop("find_one", None)
return super()._parse(handler, starting, find_one=False, **kwargs)

Expand Down
File renamed without changes.
Loading

0 comments on commit 2575a83

Please sign in to comment.