Skip to content

Commit

Permalink
Merge branch 'parser-module'
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxcode123 committed Nov 5, 2024
2 parents bfc52c7 + 361da74 commit 3915c38
Show file tree
Hide file tree
Showing 18 changed files with 663 additions and 103 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
clean:
rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__
rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__ src/syntactes/parser/__pycache__
rm -rf dist src/syntactes.egg-info

test:
Expand Down
84 changes: 78 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
Python parser generator

## Quick start

### Creating a parsing table
```py
from syntactes import Grammar, Rule, SLRGenerator, Token

Expand Down Expand Up @@ -54,16 +56,86 @@ SLR PARSING TABLE
-------------------------------------------------
| | $ | + | E | S | T | x |
-------------------------------------------------
| 1 | -- | -- | s3 | -- | s4 | s2 |
| 1 | -- | -- | s4 | -- | s2 | s3 |
-------------------------------------------------
| 2 | r4 | r4 | -- | -- | -- | -- |
| 2 | r2 | s5 | -- | -- | -- | -- |
-------------------------------------------------
| 3 | a | -- | -- | -- | -- | -- |
------------------------------------------------
| 4 | r2 | s5 | -- | -- | -- | -- |
| 3 | r4 | r4 | -- | -- | -- | -- |
-------------------------------------------------
| 5 | -- | -- | s6 | -- | s4 | s2 |
| 4 | a | -- | -- | -- | -- | -- |
------------------------------------------------
| 5 | -- | -- | s6 | -- | s2 | s3 |
-------------------------------------------------
| 6 | r1 | -- | -- | -- | -- | -- |
-------------------------------------------------
```

### Parsing

```py
from syntactes import Grammar, Rule, Token
from syntactes.parser import ParserError, SLRParser, execute_on

EOF = Token.eof()
S = Token("S", is_terminal=False)
E = Token("E", False)
T = Token("T", False)
x = Token("x", True, 1) # value of token is 1
PLUS = Token("+", True)

tokens = {EOF, S, E, T, x, PLUS}

# 0. S -> E $
# 1. E -> T + E
# 2. E -> T
# 3. T -> x
rule_1 = Rule(0, S, E, EOF)
rule_2 = Rule(1, E, T, PLUS, E)
rule_3 = Rule(2, E, T)
rule_4 = Rule(4, T, x)

rules = (rule_1, rule_2, rule_3, rule_4)

grammar = Grammar(rule_1, rules, tokens)

parser = SLRParser.from_grammar(grammar)


@execute_on(rule_4)
def push_value(x_token):
# Add and argument for every token on the right-hand side of the rule.
print(
f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
)


@execute_on(rule_2)
def add(left, plus, right):
print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")


print("Parsing stream: x + x + x $\n")
parser.parse([x, PLUS, x, PLUS, x, EOF])

print("\nParsing stream: x + $\n")
try:
parser.parse([x, PLUS, EOF])
except ParserError as e:
print("ParserError:", e)
```

Running the above example produces this output:
```
Parsing stream: x + x + x $
received token x with value: 1, reducing by rule: T -> x
received token x with value: 1, reducing by rule: T -> x
received token x with value: 1, reducing by rule: T -> x
received tokens E, +, T, reducing by rule: E -> T + E
received tokens E, +, T, reducing by rule: E -> T + E
Parsing stream: x + $
received token x with value: 1, reducing by rule: T -> x
ParserError: Received token: $; expected one of: ['x', 'T', 'E']
```
49 changes: 49 additions & 0 deletions examples/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from syntactes import Grammar, Rule, Token
from syntactes.parser import ParserError, SLRParser, execute_on

EOF = Token.eof()
S = Token("S", is_terminal=False)
E = Token("E", False)
T = Token("T", False)
x = Token("x", True, 1) # value of token is 1
PLUS = Token("+", True)

tokens = {EOF, S, E, T, x, PLUS}

# 0. S -> E $
# 1. E -> T + E
# 2. E -> T
# 3. T -> x
rule_1 = Rule(0, S, E, EOF)
rule_2 = Rule(1, E, T, PLUS, E)
rule_3 = Rule(2, E, T)
rule_4 = Rule(4, T, x)

rules = (rule_1, rule_2, rule_3, rule_4)

grammar = Grammar(rule_1, rules, tokens)

parser = SLRParser.from_grammar(grammar)


@execute_on(rule_4)
def push_value(x_token):
# Add and argument for every token on the right-hand side of the rule.
print(
f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
)


@execute_on(rule_2)
def add(left, plus, right):
print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")


print("Parsing stream: x + x + x $\n")
parser.parse([x, PLUS, x, PLUS, x, EOF])

print("\nParsing stream: x + $\n")
try:
parser.parse([x, PLUS, EOF])
except ParserError as e:
print("ParserError:", e)
File renamed without changes.
7 changes: 4 additions & 3 deletions src/syntactes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .generator import LR0Generator, SLRGenerator
from .grammar import Grammar
from .rule import Rule
from .token import Token
from .rule import Rule
from .grammar import Grammar
from .generator import LR0Generator, SLRGenerator
from .table import LR0ParsingTable, SLRParsingTable
21 changes: 21 additions & 0 deletions src/syntactes/_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,27 @@ def __init__(self, actionable: Actionable, action_type: ActionType) -> None:
self.actionable = actionable
self.action_type = action_type

@staticmethod
def shift(state: Actionable) -> "Action":
"""
Create a shift action to the given state.
"""
return Action(state, ActionType.SHIFT)

@staticmethod
def reduce(rule: Actionable) -> "Action":
"""
Create a reduce action of the given rule.
"""
return Action(rule, ActionType.REDUCE)

@staticmethod
def accept() -> "Action":
"""
Create an accept action.
"""
return Action(None, ActionType.ACCEPT)

def __repr__(self) -> str:
return f"<Action: {self}>"

Expand Down
19 changes: 5 additions & 14 deletions src/syntactes/_state.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
from typing import Iterable, Protocol
from typing import Iterable

from syntactes._item import LR0Item


class State(Protocol):
"""
State of parser automaton. A state is a set of items.
"""

items: set

def __repr__(self) -> str: ...
def __str__(self) -> str: ...
def __hash__(self) -> int: ...
def __eq__(self) -> bool: ...


class LR0State:
"""
State of LR0 parser. A LR0 state is a set of LR0 items.
Expand All @@ -24,6 +11,7 @@ class LR0State:
def __init__(self) -> None:
self.number = None
self.items = set()
self.is_final = False

@staticmethod
def from_items(items: Iterable[LR0Item]) -> "LR0State":
Expand All @@ -44,6 +32,9 @@ def add_item(self, item: LR0Item) -> None:
def set_number(self, number: int) -> None:
self.number = number

def set_final(self) -> None:
self.is_final = True

def __repr__(self) -> str:
return f"<LR0State: {self.number}>"

Expand Down
24 changes: 11 additions & 13 deletions src/syntactes/generator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from syntactes import Grammar, Token
from syntactes._action import Action, ActionType
from syntactes._item import LR0Item
from syntactes._state import LR0State
from syntactes.grammar import Grammar
from syntactes.table import Entry, LR0ParsingTable, SLRParsingTable
from syntactes.token import Token


class LR0Generator:
Expand Down Expand Up @@ -191,7 +190,11 @@ def _extend_states_and_shift_entries(
EOF = Token.eof()
for state in states:
for item in state.items:
if item.dot_is_last() or item.after_dot == EOF:
if item.dot_is_last():
continue

if item.after_dot == EOF:
state.set_final()
continue

new_items = self.goto(state.items, item.after_dot)
Expand All @@ -204,8 +207,7 @@ def _extend_states_and_shift_entries(
number = _states.setdefault(new, len(_states) + 1)
new.set_number(number)

action = Action(new, ActionType.SHIFT)
_entries.add(Entry(state, item.after_dot, action))
_entries.add(Entry(state, item.after_dot, Action.shift(new)))

return _states, _entries

Expand All @@ -218,16 +220,14 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
for state in states:
for item in state.items:
if item.after_dot == Token.eof():
action = Action(item.rule, ActionType.ACCEPT)
entries.add(Entry(state, Token.eof(), action))
entries.add(Entry(state, Token.eof(), Action.accept()))

if not item.dot_is_last():
continue

action = Action(item.rule, ActionType.REDUCE)
for token in self.grammar.tokens:
if token.is_terminal:
entries.add(Entry(state, token, action))
entries.add(Entry(state, token, Action.reduce(item.rule)))

return entries

Expand Down Expand Up @@ -255,14 +255,12 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
for state in states:
for item in state.items:
if item.after_dot == Token.eof():
action = Action(item.rule, ActionType.ACCEPT)
entries.add(Entry(state, Token.eof(), action))
entries.add(Entry(state, Token.eof(), Action.accept()))

if not item.dot_is_last():
continue

action = Action(item.rule, ActionType.REDUCE)
for token in self._follow(item.rule.lhs):
entries.add(Entry(state, token, action))
entries.add(Entry(state, token, Action.reduce(item.rule)))

return entries
3 changes: 1 addition & 2 deletions src/syntactes/grammar.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Iterable

from syntactes.rule import Rule
from syntactes.token import Token
from syntactes import Rule, Token


class Grammar:
Expand Down
3 changes: 3 additions & 0 deletions src/syntactes/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .exception import NotAcceptedError, ParserError, UnexpectedTokenError
from .execute import ExecutablesRegistry, execute_on
from .parser import LR0Parser, SLRParser
21 changes: 21 additions & 0 deletions src/syntactes/parser/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class ParserError(Exception): ...


class UnexpectedTokenError(ParserError):
"""
A token was received that does not map to an action. The stream of tokens
is syntactically invalid.
"""

def __init__(self, received_token, expected_tokens):
self.received_token = received_token
self.expected_tokens = expected_tokens
msg = f"Received token: {received_token}; expected one of: {[str(e) for e in expected_tokens]}"
super().__init__(msg)


class NotAcceptedError(ParserError):
"""
The parser did not receive an accept action. The stream of tokens is
syntactically invalid.
"""
55 changes: 55 additions & 0 deletions src/syntactes/parser/execute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import functools
from collections.abc import Callable
from typing import TypeAlias

from syntactes import Rule

Executable: TypeAlias = Callable[[...], None]


def execute_on(rule: Rule):
"""
Decorate a function to be executed upon recognition of `rule` by the parser.
"""

def executable_decorator(executable_fn: Executable) -> Executable:
ExecutablesRegistry.register(rule, executable_fn)

@functools.wraps(executable_fn)
def wrapped_executable_fn(*args, **kwargs) -> None:
return executable_fn(*args, **kwargs)

return wrapped_executable_fn

return executable_decorator


class ExecutablesRegistry:
"""
Registry of executable functions, i.e. functions that get called when a grammar
rule is recognized by the parser.
"""

_registry: dict[Rule, Executable] = {}

@classmethod
def register(cls, rule: Rule, executable_fn: Executable) -> None:
"""
Register a function to be executed upon recognition of the given rule.
"""
cls._registry[rule] = executable_fn

@classmethod
def get(cls, rule: Rule) -> Executable:
"""
Get the executable registered for the given rule.
If no executable is registered returns a function that does nothing.
"""
return cls._registry.get(rule, lambda *_, **__: None)

@classmethod
def clear(cls) -> None:
"""
Clear all registered rules.
"""
cls._registry.clear()
Loading

0 comments on commit 3915c38

Please sign in to comment.