Merge branch 'parser-module'

Maxcode123 · Nov 5, 2024 · 3915c38 · 3915c38
2 parents bfc52c7 + 361da74
commit 3915c38
Show file tree

Hide file tree

Showing 18 changed files with 663 additions and 103 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 clean:
-	rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__
+	rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__ src/syntactes/parser/__pycache__
 	rm -rf dist src/syntactes.egg-info
 
 test:

diff --git a/README.md b/README.md
@@ -8,6 +8,8 @@
 Python parser generator
 
 ## Quick start
+
+### Creating a parsing table
 ```py
 from syntactes import Grammar, Rule, SLRGenerator, Token
 
@@ -54,16 +56,86 @@ SLR PARSING TABLE
 -------------------------------------------------
 |     |  $   |  +   |  E   |  S   |  T   |  x  |
 -------------------------------------------------
-|  1  |  --  |  --  |  s3  |  --  |  s4  |  s2 |
+|  1  |  --  |  --  |  s4  |  --  |  s2  |  s3 |
 -------------------------------------------------
-|  2  |  r4  |  r4  |  --  |  --  |  --  |  -- |
+|  2  |  r2  |  s5  |  --  |  --  |  --  |  -- |
 -------------------------------------------------
-|  3  |  a  |  --  |  --  |  --  |  --  |  -- |
-------------------------------------------------
-|  4  |  r2  |  s5  |  --  |  --  |  --  |  -- |
+|  3  |  r4  |  r4  |  --  |  --  |  --  |  -- |
 -------------------------------------------------
-|  5  |  --  |  --  |  s6  |  --  |  s4  |  s2 |
+|  4  |  a  |  --  |  --  |  --  |  --  |  -- |
+------------------------------------------------
+|  5  |  --  |  --  |  s6  |  --  |  s2  |  s3 |
 -------------------------------------------------
 |  6  |  r1  |  --  |  --  |  --  |  --  |  -- |
 -------------------------------------------------
 ```
+
+### Parsing
+
+```py
+from syntactes import Grammar, Rule, Token
+from syntactes.parser import ParserError, SLRParser, execute_on
+
+EOF = Token.eof()
+S = Token("S", is_terminal=False)
+E = Token("E", False)
+T = Token("T", False)
+x = Token("x", True, 1)  # value of token is 1
+PLUS = Token("+", True)
+
+tokens = {EOF, S, E, T, x, PLUS}
+
+# 0. S -> E $
+# 1. E -> T + E
+# 2. E -> T
+# 3. T -> x
+rule_1 = Rule(0, S, E, EOF)
+rule_2 = Rule(1, E, T, PLUS, E)
+rule_3 = Rule(2, E, T)
+rule_4 = Rule(4, T, x)
+
+rules = (rule_1, rule_2, rule_3, rule_4)
+
+grammar = Grammar(rule_1, rules, tokens)
+
+parser = SLRParser.from_grammar(grammar)
+
+
+@execute_on(rule_4)
+def push_value(x_token):
+    # Add and argument for every token on the right-hand side of the rule.
+    print(
+        f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
+    )
+
+
+@execute_on(rule_2)
+def add(left, plus, right):
+    print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")
+
+
+print("Parsing stream: x + x + x $\n")
+parser.parse([x, PLUS, x, PLUS, x, EOF])
+
+print("\nParsing stream: x + $\n")
+try:
+    parser.parse([x, PLUS, EOF])
+except ParserError as e:
+    print("ParserError:", e)
+```
+
+Running the above example produces this output:
+```
+Parsing stream: x + x + x $
+
+received token x with value: 1, reducing by rule: T -> x
+received token x with value: 1, reducing by rule: T -> x
+received token x with value: 1, reducing by rule: T -> x
+received tokens E, +, T, reducing by rule: E -> T + E
+received tokens E, +, T, reducing by rule: E -> T + E
+
+Parsing stream: x + $
+
+received token x with value: 1, reducing by rule: T -> x
+ParserError: Received token: $; expected one of: ['x', 'T', 'E']
+```
diff --git a/examples/parser.py b/examples/parser.py
@@ -0,0 +1,49 @@
+from syntactes import Grammar, Rule, Token
+from syntactes.parser import ParserError, SLRParser, execute_on
+
+EOF = Token.eof()
+S = Token("S", is_terminal=False)
+E = Token("E", False)
+T = Token("T", False)
+x = Token("x", True, 1)  # value of token is 1
+PLUS = Token("+", True)
+
+tokens = {EOF, S, E, T, x, PLUS}
+
+# 0. S -> E $
+# 1. E -> T + E
+# 2. E -> T
+# 3. T -> x
+rule_1 = Rule(0, S, E, EOF)
+rule_2 = Rule(1, E, T, PLUS, E)
+rule_3 = Rule(2, E, T)
+rule_4 = Rule(4, T, x)
+
+rules = (rule_1, rule_2, rule_3, rule_4)
+
+grammar = Grammar(rule_1, rules, tokens)
+
+parser = SLRParser.from_grammar(grammar)
+
+
+@execute_on(rule_4)
+def push_value(x_token):
+    # Add and argument for every token on the right-hand side of the rule.
+    print(
+        f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
+    )
+
+
+@execute_on(rule_2)
+def add(left, plus, right):
+    print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")
+
+
+print("Parsing stream: x + x + x $\n")
+parser.parse([x, PLUS, x, PLUS, x, EOF])
+
+print("\nParsing stream: x + $\n")
+try:
+    parser.parse([x, PLUS, EOF])
+except ParserError as e:
+    print("ParserError:", e)
diff --git a/example.py → examples/parsing_table.py b/example.py → examples/parsing_table.py
diff --git a/src/syntactes/__init__.py b/src/syntactes/__init__.py
@@ -1,4 +1,5 @@
-from .generator import LR0Generator, SLRGenerator
-from .grammar import Grammar
-from .rule import Rule
 from .token import Token
+from .rule import Rule
+from .grammar import Grammar
+from .generator import LR0Generator, SLRGenerator
+from .table import LR0ParsingTable, SLRParsingTable
diff --git a/src/syntactes/_action.py b/src/syntactes/_action.py
@@ -43,6 +43,27 @@ def __init__(self, actionable: Actionable, action_type: ActionType) -> None:
         self.actionable = actionable
         self.action_type = action_type
 
+    @staticmethod
+    def shift(state: Actionable) -> "Action":
+        """
+        Create a shift action to the given state.
+        """
+        return Action(state, ActionType.SHIFT)
+
+    @staticmethod
+    def reduce(rule: Actionable) -> "Action":
+        """
+        Create a reduce action of the given rule.
+        """
+        return Action(rule, ActionType.REDUCE)
+
+    @staticmethod
+    def accept() -> "Action":
+        """
+        Create an accept action.
+        """
+        return Action(None, ActionType.ACCEPT)
+
     def __repr__(self) -> str:
         return f"<Action: {self}>"
 

diff --git a/src/syntactes/_state.py b/src/syntactes/_state.py
@@ -1,21 +1,8 @@
-from typing import Iterable, Protocol
+from typing import Iterable
 
 from syntactes._item import LR0Item
 
 
-class State(Protocol):
-    """
-    State of parser automaton. A state is a set of items.
-    """
-
-    items: set
-
-    def __repr__(self) -> str: ...
-    def __str__(self) -> str: ...
-    def __hash__(self) -> int: ...
-    def __eq__(self) -> bool: ...
-
-
 class LR0State:
     """
     State of LR0 parser. A LR0 state is a set of LR0 items.
@@ -24,6 +11,7 @@ class LR0State:
     def __init__(self) -> None:
         self.number = None
         self.items = set()
+        self.is_final = False
 
     @staticmethod
     def from_items(items: Iterable[LR0Item]) -> "LR0State":
@@ -44,6 +32,9 @@ def add_item(self, item: LR0Item) -> None:
     def set_number(self, number: int) -> None:
         self.number = number
 
+    def set_final(self) -> None:
+        self.is_final = True
+
     def __repr__(self) -> str:
         return f"<LR0State: {self.number}>"
 

diff --git a/src/syntactes/generator.py b/src/syntactes/generator.py
@@ -1,9 +1,8 @@
+from syntactes import Grammar, Token
 from syntactes._action import Action, ActionType
 from syntactes._item import LR0Item
 from syntactes._state import LR0State
-from syntactes.grammar import Grammar
 from syntactes.table import Entry, LR0ParsingTable, SLRParsingTable
-from syntactes.token import Token
 
 
 class LR0Generator:
@@ -191,7 +190,11 @@ def _extend_states_and_shift_entries(
         EOF = Token.eof()
         for state in states:
             for item in state.items:
-                if item.dot_is_last() or item.after_dot == EOF:
+                if item.dot_is_last():
+                    continue
+
+                if item.after_dot == EOF:
+                    state.set_final()
                     continue
 
                 new_items = self.goto(state.items, item.after_dot)
@@ -204,8 +207,7 @@ def _extend_states_and_shift_entries(
                 number = _states.setdefault(new, len(_states) + 1)
                 new.set_number(number)
 
-                action = Action(new, ActionType.SHIFT)
-                _entries.add(Entry(state, item.after_dot, action))
+                _entries.add(Entry(state, item.after_dot, Action.shift(new)))
 
         return _states, _entries
 
@@ -218,16 +220,14 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
         for state in states:
             for item in state.items:
                 if item.after_dot == Token.eof():
-                    action = Action(item.rule, ActionType.ACCEPT)
-                    entries.add(Entry(state, Token.eof(), action))
+                    entries.add(Entry(state, Token.eof(), Action.accept()))
 
                 if not item.dot_is_last():
                     continue
 
-                action = Action(item.rule, ActionType.REDUCE)
                 for token in self.grammar.tokens:
                     if token.is_terminal:
-                        entries.add(Entry(state, token, action))
+                        entries.add(Entry(state, token, Action.reduce(item.rule)))
 
         return entries
 
@@ -255,14 +255,12 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
         for state in states:
             for item in state.items:
                 if item.after_dot == Token.eof():
-                    action = Action(item.rule, ActionType.ACCEPT)
-                    entries.add(Entry(state, Token.eof(), action))
+                    entries.add(Entry(state, Token.eof(), Action.accept()))
 
                 if not item.dot_is_last():
                     continue
 
-                action = Action(item.rule, ActionType.REDUCE)
                 for token in self._follow(item.rule.lhs):
-                    entries.add(Entry(state, token, action))
+                    entries.add(Entry(state, token, Action.reduce(item.rule)))
 
         return entries
diff --git a/src/syntactes/grammar.py b/src/syntactes/grammar.py
@@ -1,7 +1,6 @@
 from typing import Iterable
 
-from syntactes.rule import Rule
-from syntactes.token import Token
+from syntactes import Rule, Token
 
 
 class Grammar:

diff --git a/src/syntactes/parser/__init__.py b/src/syntactes/parser/__init__.py
@@ -0,0 +1,3 @@
+from .exception import NotAcceptedError, ParserError, UnexpectedTokenError
+from .execute import ExecutablesRegistry, execute_on
+from .parser import LR0Parser, SLRParser
diff --git a/src/syntactes/parser/exception.py b/src/syntactes/parser/exception.py
@@ -0,0 +1,21 @@
+class ParserError(Exception): ...
+
+
+class UnexpectedTokenError(ParserError):
+    """
+    A token was received that does not map to an action. The stream of tokens
+    is syntactically invalid.
+    """
+
+    def __init__(self, received_token, expected_tokens):
+        self.received_token = received_token
+        self.expected_tokens = expected_tokens
+        msg = f"Received token: {received_token}; expected one of: {[str(e) for e in expected_tokens]}"
+        super().__init__(msg)
+
+
+class NotAcceptedError(ParserError):
+    """
+    The parser did not receive an accept action. The stream of tokens is
+    syntactically invalid.
+    """
diff --git a/src/syntactes/parser/execute.py b/src/syntactes/parser/execute.py
@@ -0,0 +1,55 @@
+import functools
+from collections.abc import Callable
+from typing import TypeAlias
+
+from syntactes import Rule
+
+Executable: TypeAlias = Callable[[...], None]
+
+
+def execute_on(rule: Rule):
+    """
+    Decorate a function to be executed upon recognition of `rule` by the parser.
+    """
+
+    def executable_decorator(executable_fn: Executable) -> Executable:
+        ExecutablesRegistry.register(rule, executable_fn)
+
+        @functools.wraps(executable_fn)
+        def wrapped_executable_fn(*args, **kwargs) -> None:
+            return executable_fn(*args, **kwargs)
+
+        return wrapped_executable_fn
+
+    return executable_decorator
+
+
+class ExecutablesRegistry:
+    """
+    Registry of executable functions, i.e. functions that get called when a grammar
+    rule is recognized by the parser.
+    """
+
+    _registry: dict[Rule, Executable] = {}
+
+    @classmethod
+    def register(cls, rule: Rule, executable_fn: Executable) -> None:
+        """
+        Register a function to be executed upon recognition of the given rule.
+        """
+        cls._registry[rule] = executable_fn
+
+    @classmethod
+    def get(cls, rule: Rule) -> Executable:
+        """
+        Get the executable registered for the given rule.
+        If no executable is registered returns a function that does nothing.
+        """
+        return cls._registry.get(rule, lambda *_, **__: None)
+
+    @classmethod
+    def clear(cls) -> None:
+        """
+        Clear all registered rules.
+        """
+        cls._registry.clear()