From 4ec878c19e233650af56bb04ab8c8a160bff1192 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Sat, 2 Nov 2024 10:36:03 +0200 Subject: [PATCH 01/23] Changes imports in tests.data --- src/syntactes/tests/data.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/syntactes/tests/data.py b/src/syntactes/tests/data.py index a63f60c..bee833a 100644 --- a/src/syntactes/tests/data.py +++ b/src/syntactes/tests/data.py @@ -1,6 +1,4 @@ -from syntactes.grammar import Grammar -from syntactes.rule import Rule -from syntactes.token import Token +from syntactes import Grammar, Rule, Token EOF = Token.eof() S = Token("S", False) From ae1454d1dabbe7f06d83f5883a9bc8a192719d8e Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Sat, 2 Nov 2024 10:42:11 +0200 Subject: [PATCH 02/23] Changes imports in syntactes modules --- src/syntactes/__init__.py | 6 +++--- src/syntactes/generator.py | 3 +-- src/syntactes/grammar.py | 3 +-- src/syntactes/table.py | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/syntactes/__init__.py b/src/syntactes/__init__.py index 17abed5..1b1c3fd 100644 --- a/src/syntactes/__init__.py +++ b/src/syntactes/__init__.py @@ -1,4 +1,4 @@ -from .generator import LR0Generator, SLRGenerator -from .grammar import Grammar -from .rule import Rule from .token import Token +from .rule import Rule +from .grammar import Grammar +from .generator import LR0Generator, SLRGenerator diff --git a/src/syntactes/generator.py b/src/syntactes/generator.py index 94ba409..50a315c 100644 --- a/src/syntactes/generator.py +++ b/src/syntactes/generator.py @@ -1,9 +1,8 @@ +from syntactes import Grammar, Token from syntactes._action import Action, ActionType from syntactes._item import LR0Item from syntactes._state import LR0State -from syntactes.grammar import Grammar from syntactes.table import Entry, LR0ParsingTable, SLRParsingTable -from syntactes.token import Token class LR0Generator: diff --git a/src/syntactes/grammar.py b/src/syntactes/grammar.py index 2894533..17e0844 100644 --- a/src/syntactes/grammar.py +++ b/src/syntactes/grammar.py @@ -1,7 +1,6 @@ from typing import Iterable -from syntactes.rule import Rule -from syntactes.token import Token +from syntactes import Rule, Token class Grammar: diff --git a/src/syntactes/table.py b/src/syntactes/table.py index 54c4605..6b1124c 100644 --- a/src/syntactes/table.py +++ b/src/syntactes/table.py @@ -1,9 +1,8 @@ from typing import Iterable, Optional, TypeAlias +from syntactes import Grammar, Token from syntactes._action import Action from syntactes._state import LR0State -from syntactes.grammar import Grammar -from syntactes.token import Token Row: TypeAlias = dict[Token, list[Action]] From a82e16698a286b7ae328db88ebcc14b811732fdc Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Sat, 2 Nov 2024 11:09:23 +0200 Subject: [PATCH 03/23] Creates parser module --- src/syntactes/parser/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/syntactes/parser/__init__.py diff --git a/src/syntactes/parser/__init__.py b/src/syntactes/parser/__init__.py new file mode 100644 index 0000000..e69de29 From f9a492a7d3fdb960599cac6524f37ac6fbee4242 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Sat, 2 Nov 2024 11:10:38 +0200 Subject: [PATCH 04/23] Defines parser.execute module --- src/syntactes/parser/__init__.py | 1 + src/syntactes/parser/execute.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/syntactes/parser/execute.py diff --git a/src/syntactes/parser/__init__.py b/src/syntactes/parser/__init__.py index e69de29..ed9677b 100644 --- a/src/syntactes/parser/__init__.py +++ b/src/syntactes/parser/__init__.py @@ -0,0 +1 @@ +from .execute import execute_on diff --git a/src/syntactes/parser/execute.py b/src/syntactes/parser/execute.py new file mode 100644 index 0000000..3a8db39 --- /dev/null +++ b/src/syntactes/parser/execute.py @@ -0,0 +1,55 @@ +import functools +from collections.abc import Callable +from typing import TypeAlias + +from syntactes import Rule + +Executable: TypeAlias = Callable[[...], None] + + +def execute_on(rule: Rule): + """ + Decorate a function to be executed upon recognition of `rule` by the parser. + """ + + def executable_decorator(executable_fn: Executable) -> Executable: + ExecutablesRegistry.register(rule, executable_fn) + + @functools.wraps(executable_fn) + def wrapped_executable_fn(*args, **kwargs) -> None: + return executable_fn(*args, **kwargs) + + return wrapped_executable_fn + + return executable_decorator + + +class ExecutablesRegistry: + """ + Registry of executable functions, i.e. functions that get called when a grammar + rule is recognized by the parser. + """ + + _registry: dict[Rule, Executable] = {} + + @classmethod + def register(cls, rule: Rule, executable_fn: Executable) -> None: + """ + Register a function to be executed upon recognition of the given rule. + """ + cls._registry[rule] = executable_fn + + @classmethod + def get(cls, rule: Rule) -> Executable: + """ + Get the executable registered for the given rule. + If no executable is registered returns a function that does nothing. + """ + return cls._registry.get(rule, lambda *_, **__: None) + + @classmethod + def clear(cls) -> None: + """ + Clear all registered rules. + """ + cls._registry.clear() From d6f266c5c51f8598b10ceeebd246305512304c82 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 13:32:59 +0200 Subject: [PATCH 05/23] Adds parsing tables to root dir import --- src/syntactes/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/syntactes/__init__.py b/src/syntactes/__init__.py index 1b1c3fd..2fa3aa6 100644 --- a/src/syntactes/__init__.py +++ b/src/syntactes/__init__.py @@ -2,3 +2,4 @@ from .rule import Rule from .grammar import Grammar from .generator import LR0Generator, SLRGenerator +from .table import LR0ParsingTable, SLRParsingTable From 392fb2799e54826e715221675e4f0d8ca9b17758 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 13:33:39 +0200 Subject: [PATCH 06/23] Adds initial state property to parsing table --- src/syntactes/table.py | 14 +++++++++++--- src/syntactes/tests/test_generator.py | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/syntactes/table.py b/src/syntactes/table.py index 6b1124c..c1bbcba 100644 --- a/src/syntactes/table.py +++ b/src/syntactes/table.py @@ -46,6 +46,7 @@ class LR0ParsingTable: def __init__(self, grammar: Grammar) -> None: self.rows: dict[LR0State, Row] = dict() self._grammar = grammar + self._initial_state = None def get_actions(self, state: LR0State, token: Token) -> Optional[list[Action]]: """ @@ -65,9 +66,12 @@ def add_entry(self, entry: Entry) -> None: """ Adds an entry to the parsing table. """ - self.rows.setdefault(entry.from_state, {}).setdefault( - entry.token, list() - ).append(entry.action) + if entry.from_state.number == 1: + self._initial_state = entry.from_state + + row = self.rows.setdefault(entry.from_state, {}) + actions = row.setdefault(entry.token, list()) + actions.append(entry.action) @staticmethod def from_entries( @@ -86,6 +90,10 @@ def pretty_str(self) -> str: """ return self._rules_pretty_str() + "\n\n" + self._table_pretty_str() + @property + def initial_state(self) -> LR0State: + return self._initial_state + def _rules_pretty_str(self) -> str: rules = [str(i) + ". " + str(r) for i, r in enumerate(self._grammar.rules)] rules_str = "\n".join(rules) diff --git a/src/syntactes/tests/test_generator.py b/src/syntactes/tests/test_generator.py index 5c94e6b..e4ae610 100644 --- a/src/syntactes/tests/test_generator.py +++ b/src/syntactes/tests/test_generator.py @@ -262,6 +262,13 @@ def test_state_6_token_plus(self): def test_state_6_token_x(self): self.assert_state_actions(reduce(rule_2)) +class TestLR0GeneratorGenerateInitialState(TestLR0Generator): + def subject(self): + return self.generator().generate().initial_state + + def test_initial_state(self): + self.assertResult(state_1()) + class TestSLRGenerator(TestCase): def setUp(self): @@ -370,3 +377,11 @@ def test_state_5_token_eof(self): @args(state_6(), EOF) def test_state_6_token_eof(self): self.assert_state_actions(reduce(rule_2)) + + +class TestSLRGeneratorGenerateInitialState(TestSLRGenerator): + def subject(self): + return self.generator().generate().initial_state + + def test_initial_state(self): + self.assertResult(state_1()) From e924d0037edf86dca9cb4cdc728fee7c150d5a16 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 13:49:44 +0200 Subject: [PATCH 07/23] Defines states in test data --- src/syntactes/tests/data.py | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/syntactes/tests/data.py b/src/syntactes/tests/data.py index bee833a..d43466a 100644 --- a/src/syntactes/tests/data.py +++ b/src/syntactes/tests/data.py @@ -1,4 +1,6 @@ from syntactes import Grammar, Rule, Token +from syntactes._item import LR0Item +from syntactes._state import LR0State EOF = Token.eof() S = Token("S", False) @@ -22,3 +24,52 @@ rules = (rule_1, rule_2, rule_3, rule_4) grammar = Grammar(rule_1, rules, tokens) + + +def state_1(): + item_1 = LR0Item(grammar.starting_rule, 0) # S -> . E $ + item_2 = LR0Item(rule_2, 0) # E -> . T + E + item_3 = LR0Item(rule_3, 0) # E -> . T + item_4 = LR0Item(rule_4, 0) # T -> . x + state = LR0State.from_items({item_1, item_2, item_3, item_4}) + state.set_number(1) + return state + + +def state_2(): + item_1 = LR0Item(grammar.starting_rule, 1) # S -> E . $ + state = LR0State.from_items({item_1}) + state.set_number(2) + return state + + +def state_3(): + item_1 = LR0Item(rule_2, 1) # E -> T . + E + item_2 = LR0Item(rule_3, 1) # E -> T . + state = LR0State.from_items({item_1, item_2}) + state.set_number(3) + return state + + +def state_4(): + item_1 = LR0Item(rule_2, 2) # E -> T + . E + item_2 = LR0Item(rule_2, 0) # E -> . T + E + item_3 = LR0Item(rule_3, 0) # E -> . T + item_4 = LR0Item(rule_4, 0) # T -> . x + state = LR0State.from_items({item_1, item_2, item_3, item_4}) + state.set_number(4) + return state + + +def state_5(): + item_1 = LR0Item(rule_4, 1) # T -> x . + state = LR0State.from_items({item_1}) + state.set_number(5) + return state + + +def state_6(): + item_1 = LR0Item(rule_2, 3) # E -> T + E . + state = LR0State.from_items({item_1}) + state.set_number(6) + return state From b02076a39172a41deef20d356bfe7c5e204534b6 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 13:50:05 +0200 Subject: [PATCH 08/23] Uses defined states in generator tests --- src/syntactes/tests/test_generator.py | 69 +++++++-------------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/src/syntactes/tests/test_generator.py b/src/syntactes/tests/test_generator.py index e4ae610..385d4b0 100644 --- a/src/syntactes/tests/test_generator.py +++ b/src/syntactes/tests/test_generator.py @@ -2,58 +2,24 @@ from syntactes._action import Action, ActionType from syntactes._item import LR0Item -from syntactes._state import LR0State from syntactes.generator import LR0Generator, SLRGenerator -from syntactes.tests.data import EOF, PLUS, E, T, grammar, rule_2, rule_3, rule_4, x - - -def state_1(): - item_1 = LR0Item(grammar.starting_rule, 0) # S -> . E $ - item_2 = LR0Item(rule_2, 0) # E -> . T + E - item_3 = LR0Item(rule_3, 0) # E -> . T - item_4 = LR0Item(rule_4, 0) # T -> . x - state = LR0State.from_items({item_1, item_2, item_3, item_4}) - state.set_number(1) - return state - - -def state_2(): - item_1 = LR0Item(grammar.starting_rule, 1) # S -> E . $ - state = LR0State.from_items({item_1}) - state.set_number(2) - return state - - -def state_3(): - item_1 = LR0Item(rule_2, 1) # E -> T . + E - item_2 = LR0Item(rule_3, 1) # E -> T . - state = LR0State.from_items({item_1, item_2}) - state.set_number(3) - return state - - -def state_4(): - item_1 = LR0Item(rule_2, 2) # E -> T + . E - item_2 = LR0Item(rule_2, 0) # E -> . T + E - item_3 = LR0Item(rule_3, 0) # E -> . T - item_4 = LR0Item(rule_4, 0) # T -> . x - state = LR0State.from_items({item_1, item_2, item_3, item_4}) - state.set_number(4) - return state - - -def state_5(): - item_1 = LR0Item(rule_4, 1) # T -> x . - state = LR0State.from_items({item_1}) - state.set_number(5) - return state - - -def state_6(): - item_1 = LR0Item(rule_2, 3) # E -> T + E . - state = LR0State.from_items({item_1}) - state.set_number(6) - return state +from syntactes.tests.data import ( + EOF, + PLUS, + E, + T, + grammar, + rule_2, + rule_3, + rule_4, + state_1, + state_2, + state_3, + state_4, + state_5, + state_6, + x, +) def shift(state): @@ -262,6 +228,7 @@ def test_state_6_token_plus(self): def test_state_6_token_x(self): self.assert_state_actions(reduce(rule_2)) + class TestLR0GeneratorGenerateInitialState(TestLR0Generator): def subject(self): return self.generator().generate().initial_state From 1c94ce551bb4ef15ba47839825ff62b79d712bf0 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 14:24:32 +0200 Subject: [PATCH 09/23] Defines constructor methods for actions --- src/syntactes/_action.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/syntactes/_action.py b/src/syntactes/_action.py index 8aaebe4..2cd92e6 100644 --- a/src/syntactes/_action.py +++ b/src/syntactes/_action.py @@ -43,6 +43,27 @@ def __init__(self, actionable: Actionable, action_type: ActionType) -> None: self.actionable = actionable self.action_type = action_type + @staticmethod + def shift(state: Actionable) -> "Action": + """ + Create a shift action to the given state. + """ + return Action(state, ActionType.SHIFT) + + @staticmethod + def reduce(rule: Actionable) -> "Action": + """ + Create a reduce action of the given rule. + """ + return Action(rule, ActionType.REDUCE) + + @staticmethod + def accept() -> "Action": + """ + Create an accept action. + """ + return Action(None, ActionType.ACCEPT) + def __repr__(self) -> str: return f"" From 4ae1f809f71eae792417a5d8592232125039ae4f Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 14:25:02 +0200 Subject: [PATCH 10/23] Uses action constructors in generator --- src/syntactes/generator.py | 15 +++++---------- src/syntactes/tests/test_generator.py | 8 ++++---- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/syntactes/generator.py b/src/syntactes/generator.py index 50a315c..3ef6334 100644 --- a/src/syntactes/generator.py +++ b/src/syntactes/generator.py @@ -203,8 +203,7 @@ def _extend_states_and_shift_entries( number = _states.setdefault(new, len(_states) + 1) new.set_number(number) - action = Action(new, ActionType.SHIFT) - _entries.add(Entry(state, item.after_dot, action)) + _entries.add(Entry(state, item.after_dot, Action.shift(new))) return _states, _entries @@ -217,16 +216,14 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]: for state in states: for item in state.items: if item.after_dot == Token.eof(): - action = Action(item.rule, ActionType.ACCEPT) - entries.add(Entry(state, Token.eof(), action)) + entries.add(Entry(state, Token.eof(), Action.accept())) if not item.dot_is_last(): continue - action = Action(item.rule, ActionType.REDUCE) for token in self.grammar.tokens: if token.is_terminal: - entries.add(Entry(state, token, action)) + entries.add(Entry(state, token, Action.reduce(item.rule))) return entries @@ -254,14 +251,12 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]: for state in states: for item in state.items: if item.after_dot == Token.eof(): - action = Action(item.rule, ActionType.ACCEPT) - entries.add(Entry(state, Token.eof(), action)) + entries.add(Entry(state, Token.eof(), Action.accept())) if not item.dot_is_last(): continue - action = Action(item.rule, ActionType.REDUCE) for token in self._follow(item.rule.lhs): - entries.add(Entry(state, token, action)) + entries.add(Entry(state, token, Action.reduce(item.rule))) return entries diff --git a/src/syntactes/tests/test_generator.py b/src/syntactes/tests/test_generator.py index 385d4b0..f149268 100644 --- a/src/syntactes/tests/test_generator.py +++ b/src/syntactes/tests/test_generator.py @@ -30,8 +30,8 @@ def reduce(rule): return Action(rule, ActionType.REDUCE) -def accept(rule): - return Action(rule, ActionType.ACCEPT) +def accept(): + return Action(None, ActionType.ACCEPT) class TestLR0Generator(TestCase): @@ -178,7 +178,7 @@ def test_state_1_token_x(self): @args(state_2(), EOF) def test_state_2_token_eof(self): - self.assert_state_actions(accept(grammar.starting_rule)) + self.assert_state_actions(accept()) @args(state_3(), PLUS) def test_state_3_token_plus(self): @@ -311,7 +311,7 @@ def test_state_1_token_x(self): @args(state_2(), EOF) def test_state_2_token_eof(self): - self.assert_state_actions(accept(grammar.starting_rule)) + self.assert_state_actions(accept()) @args(state_3(), PLUS) def test_state_3_token_plus(self): From 189e1871e8e265d8eca871310d8f5a32c7bd42a4 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 14:25:36 +0200 Subject: [PATCH 11/23] Creates examples dir --- example.py => examples/parsing_table.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example.py => examples/parsing_table.py (100%) diff --git a/example.py b/examples/parsing_table.py similarity index 100% rename from example.py rename to examples/parsing_table.py From 4b5eb6238917bc9c1d6c1ff788cc477790b6d3c8 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 17:10:47 +0200 Subject: [PATCH 12/23] Removes State protocol --- src/syntactes/_state.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/syntactes/_state.py b/src/syntactes/_state.py index a6d324b..14cf933 100644 --- a/src/syntactes/_state.py +++ b/src/syntactes/_state.py @@ -1,21 +1,8 @@ -from typing import Iterable, Protocol +from typing import Iterable from syntactes._item import LR0Item -class State(Protocol): - """ - State of parser automaton. A state is a set of items. - """ - - items: set - - def __repr__(self) -> str: ... - def __str__(self) -> str: ... - def __hash__(self) -> int: ... - def __eq__(self) -> bool: ... - - class LR0State: """ State of LR0 parser. A LR0 state is a set of LR0 items. From 6a9913c350cd433d4f3b7071aee54718cc993e5b Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:00:32 +0200 Subject: [PATCH 13/23] Adds is_final attr to LR0State --- src/syntactes/_state.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/syntactes/_state.py b/src/syntactes/_state.py index 14cf933..4f53e42 100644 --- a/src/syntactes/_state.py +++ b/src/syntactes/_state.py @@ -11,6 +11,7 @@ class LR0State: def __init__(self) -> None: self.number = None self.items = set() + self.is_final = False @staticmethod def from_items(items: Iterable[LR0Item]) -> "LR0State": @@ -31,6 +32,9 @@ def add_item(self, item: LR0Item) -> None: def set_number(self, number: int) -> None: self.number = number + def set_final(self) -> None: + self.is_final = True + def __repr__(self) -> str: return f"" From d4f8c00c8ef0958860c71d3e180043eb25e34b95 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:01:04 +0200 Subject: [PATCH 14/23] Sets final state in LR0Generator --- src/syntactes/generator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/syntactes/generator.py b/src/syntactes/generator.py index 3ef6334..cb7238a 100644 --- a/src/syntactes/generator.py +++ b/src/syntactes/generator.py @@ -190,7 +190,11 @@ def _extend_states_and_shift_entries( EOF = Token.eof() for state in states: for item in state.items: - if item.dot_is_last() or item.after_dot == EOF: + if item.dot_is_last(): + continue + + if item.after_dot == EOF: + state.set_final() continue new_items = self.goto(state.items, item.after_dot) From 784b0fb49f9000c8575c9a951cb731bcab3f3428 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:01:51 +0200 Subject: [PATCH 15/23] Defines test parsing table --- src/syntactes/tests/data.py | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/syntactes/tests/data.py b/src/syntactes/tests/data.py index d43466a..3ec6944 100644 --- a/src/syntactes/tests/data.py +++ b/src/syntactes/tests/data.py @@ -1,6 +1,8 @@ from syntactes import Grammar, Rule, Token +from syntactes._action import Action from syntactes._item import LR0Item from syntactes._state import LR0State +from syntactes.table import Entry, LR0ParsingTable, SLRParsingTable EOF = Token.eof() S = Token("S", False) @@ -40,6 +42,7 @@ def state_2(): item_1 = LR0Item(grammar.starting_rule, 1) # S -> E . $ state = LR0State.from_items({item_1}) state.set_number(2) + state.set_final() return state @@ -73,3 +76,42 @@ def state_6(): state = LR0State.from_items({item_1}) state.set_number(6) return state + + +def lr0_parsing_table(): + table = LR0ParsingTable(grammar) + table.add_entry(Entry(state_1(), E, Action.shift(state_2()))) + table.add_entry(Entry(state_1(), T, Action.shift(state_3()))) + table.add_entry(Entry(state_1(), x, Action.shift(state_5()))) + table.add_entry(Entry(state_2(), EOF, Action.accept())) + table.add_entry(Entry(state_3(), x, Action.reduce(rule_3))) + table.add_entry(Entry(state_3(), PLUS, Action.shift(state_4()))) + table.add_entry(Entry(state_3(), PLUS, Action.reduce(rule_3))) + table.add_entry(Entry(state_3(), EOF, Action.reduce(rule_3))) + table.add_entry(Entry(state_4(), x, Action.shift(state_5()))) + table.add_entry(Entry(state_4(), E, Action.shift(state_6()))) + table.add_entry(Entry(state_4(), T, Action.shift(state_3()))) + table.add_entry(Entry(state_5(), x, Action.reduce(rule_4))) + table.add_entry(Entry(state_5(), PLUS, Action.reduce(rule_4))) + table.add_entry(Entry(state_5(), EOF, Action.reduce(rule_4))) + table.add_entry(Entry(state_6(), x, Action.reduce(rule_2))) + table.add_entry(Entry(state_6(), PLUS, Action.reduce(rule_2))) + table.add_entry(Entry(state_6(), EOF, Action.reduce(rule_2))) + return table + + +def slr_parsing_table(): + table = SLRParsingTable(grammar) + table.add_entry(Entry(state_1(), x, Action.shift(state_5()))) + table.add_entry(Entry(state_1(), E, Action.shift(state_2()))) + table.add_entry(Entry(state_1(), T, Action.shift(state_3()))) + table.add_entry(Entry(state_2(), EOF, Action.accept())) + table.add_entry(Entry(state_3(), PLUS, Action.shift(state_4()))) + table.add_entry(Entry(state_3(), EOF, Action.reduce(rule_3))) + table.add_entry(Entry(state_4(), x, Action.shift(state_5()))) + table.add_entry(Entry(state_4(), E, Action.shift(state_6()))) + table.add_entry(Entry(state_4(), T, Action.shift(state_3()))) + table.add_entry(Entry(state_5(), PLUS, Action.reduce(rule_4))) + table.add_entry(Entry(state_5(), EOF, Action.reduce(rule_4))) + table.add_entry(Entry(state_6(), EOF, Action.reduce(rule_2))) + return table From c2b63f53f54ac2f4be981ad943b5da385fde26c8 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:02:29 +0200 Subject: [PATCH 16/23] Defines parser errors --- src/syntactes/parser/exception.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/syntactes/parser/exception.py diff --git a/src/syntactes/parser/exception.py b/src/syntactes/parser/exception.py new file mode 100644 index 0000000..1392652 --- /dev/null +++ b/src/syntactes/parser/exception.py @@ -0,0 +1,21 @@ +class ParserError(Exception): ... + + +class UnexpectedTokenError(ParserError): + """ + A token was received that does not map to an action. The stream of tokens + is syntactically invalid. + """ + + def __init__(self, received_token, expected_tokens): + self.received_token = received_token + self.expected_tokens = expected_tokens + msg = f"Received token: {received_token}; expected one of: {[str(e) for e in expected_tokens]}" + super().__init__(msg) + + +class NotAcceptedError(ParserError): + """ + The parser did not receive an accept action. The stream of tokens is + syntactically invalid. + """ From 7512bcb9c8d738d6bab92b769e8c72a97c5fbfb1 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:02:59 +0200 Subject: [PATCH 17/23] Defines parser module --- src/syntactes/parser/parser.py | 103 ++++++++++++++++++++ src/syntactes/tests/test_parser.py | 146 +++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 src/syntactes/parser/parser.py create mode 100644 src/syntactes/tests/test_parser.py diff --git a/src/syntactes/parser/parser.py b/src/syntactes/parser/parser.py new file mode 100644 index 0000000..adebd2e --- /dev/null +++ b/src/syntactes/parser/parser.py @@ -0,0 +1,103 @@ +from collections import deque +from typing import Iterable + +from syntactes import Token +from syntactes._action import Action, ActionType +from syntactes._state import LR0State +from syntactes.parser import ( + ExecutablesRegistry, + NotAcceptedError, + ParserError, + UnexpectedTokenError, +) +from syntactes.table import LR0ParsingTable, SLRParsingTable + + +class LR0Parser: + """ + Parses streams of tokens based on the configured parsing table. + """ + + def __init__(self, table: LR0ParsingTable) -> None: + self._table = table + self._token_stack: deque[Token] = deque() + self._state_stack: deque[LR0State] = deque() + self._token_stream: deque[Token] = deque() + + def parse(self, stream: Iterable[Token]) -> None: + """ + Parses the given stream of tokens. Expects the EOF token as the last one. + + Raises `syntactes.parser.UnexpectedTokenError` if an unexpected token is + received. + + Raises `syntactes.parser.NotAcceptedError` if the stream of token has been + parsed and the parser did not receive an accept action. + """ + self._set_state(self._table.initial_state) + self._token_stream.extend(stream) + + while len(self._token_stream) > 0: + token = self._token_stream.popleft() + self._apply_action(token, self._get_action(token)) + + if token != Token.eof(): + self._raise(NotAcceptedError("Expected EOF token. ")) + + if not self._get_state().is_final: + actions = self._table.get(self._get_state()) + expected_tokens = [] if actions is None else list(actions.keys()) + self._raise(UnexpectedTokenError(Token.eof(), expected_tokens)) + + def _apply_action(self, token: Token, action: Action) -> None: + if action.action_type == ActionType.SHIFT: + self._token_stack.append(token) + self._set_state(action.actionable) + elif action.action_type == ActionType.REDUCE: + rule = action.actionable + args = [self._token_stack.pop() for _ in reversed(rule.rhs)] + self._token_stack.append(rule.lhs) + + {self._state_stack.pop() for _ in rule.rhs} + + executable = ExecutablesRegistry.get(rule) + executable(*args) + + self._token_stream.appendleft(token) # reduce actions do not consume tokenA + + shift = self._get_action(rule.lhs) + self._set_state(shift.actionable) + + def _get_action(self, token: Token) -> Action: + actions = self._table.get_actions(self._get_state(), token) + if actions is None: + actions = self._table.get(self._get_state()) + expected_tokens = [] if actions is None else list(actions.keys()) + self._raise(UnexpectedTokenError(token, expected_tokens)) + + action = self._resolve_conflict(actions) + return action + + def _resolve_conflict(self, actions: list[Action]) -> Action: + return actions[0] + + def _set_state(self, state: LR0State) -> None: + self._state_stack.append(state) + + def _get_state(self) -> LR0State: + return self._state_stack[-1] + + def _cleanup(self) -> None: + self._token_stack.clear() + self._state_stack.clear() + self._token_stream.clear() + + def _raise(self, error: ParserError) -> None: + self._cleanup() + raise error from None + + +class SLRParser(LR0Parser): + """ + Parses streams of tokens based on the configured parsing table. + """ diff --git a/src/syntactes/tests/test_parser.py b/src/syntactes/tests/test_parser.py new file mode 100644 index 0000000..1c7e527 --- /dev/null +++ b/src/syntactes/tests/test_parser.py @@ -0,0 +1,146 @@ +from unittest_extensions import TestCase, args + +from syntactes import Token +from syntactes.parser import ( + ExecutablesRegistry, + LR0Parser, + ParserError, + SLRParser, + execute_on, +) +from syntactes.tests.data import ( + EOF, + PLUS, + lr0_parsing_table, + rule_2, + rule_4, + slr_parsing_table, + x, +) + +x1 = Token("x", True, 1) +x2 = Token("x", True, 2) + + +class TestLR0Parser(TestCase): + def parser(self): + return self._parser + + def setUp(self): + self._parser = LR0Parser(lr0_parsing_table()) + + def assert_parser_error(self): + self.assertResultRaises(ParserError) + + +class TestLR0ParserParse(TestLR0Parser): + def subject(self, *stream): + return self.parser().parse(stream) + + @args(x, EOF) + def test_simple_x(self): + self.result() + + @args(x, PLUS, x, EOF) + def test_x_plus_x(self): + self.result() + + @args(x) + def test_no_eof_raises(self): + self.assert_parser_error() + + @args(x, x) + def test_x_x_raises(self): + self.assert_parser_error() + + @args(x, PLUS) + def test_x_plus_raises(self): + self.assert_parser_error() + + @args(x, PLUS, EOF) + def test_x_plus_eof_raises(self): + self.assert_parser_error() + + @args(EOF) + def test_eof_raises(self): + self.assert_parser_error() + + +class TestLR0ParserParseExecutables(TestLR0Parser): + def subject(self, *stream): + self.parser().parse(stream) + return self.sum + + def add(self, _right, _plus, _left): + self.sum += 1 + + def setUp(self): + self.sum = 0 + self.add = execute_on(rule_2)(self.add) + super().setUp() + + @args(x, PLUS, x, EOF) + def test_x_plus_x(self): + self.assertResult(1) + + @args(x, PLUS, x, PLUS, x, EOF) + def test_x_plus_x_plus_x(self): + self.assertResult(2) + + +class TestLR0ParserParseExecutablesTokenValues(TestLR0Parser): + def subject(self, *stream): + self.parser().parse(stream) + return self.sum + + def push(self, x): + self.stack.append(x.value) + + def add(self, x1, _plus, x2): + self.sum = self.stack.pop() + self.stack.pop() + + def setUp(self): + self.sum = 0 + self.stack = list() + execute_on(rule_4)(self.push) + execute_on(rule_2)(self.add) + super().setUp() + + def tearDown(self): + ExecutablesRegistry.clear() + + @args(x1, PLUS, x1, EOF) + def test_x1_plus_x1(self): + self.assertResult(2) + + @args(x1, PLUS, x2, EOF) + def test_x1_plus_x2(self): + self.assertResult(3) + + @args(x2, PLUS, x2, EOF) + def test_x2_plus_x2(self): + self.assertResult(4) + + +class TestSLRParser(TestCase): + def parser(self): + return self._parser + + def setUp(self): + self._parser = SLRParser(slr_parsing_table()) + + def assert_parser_error(self): + self.assertResultRaises(ParserError) + + +class TestSLRParserParse(TestSLRParser): + def subject(self, *stream): + return self.parser().parse(stream) + + @args(x, x, EOF) + def test_x_x_eof_raises(self): + self.assert_parser_error() + + @args(x, PLUS, x, EOF) + def test_x_plus_x(self): + self.result() From 6f9318f906a7074edfc56c1366872393cc77df59 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Mon, 4 Nov 2024 18:03:37 +0200 Subject: [PATCH 18/23] Imports objects to parser root dir --- src/syntactes/parser/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/syntactes/parser/__init__.py b/src/syntactes/parser/__init__.py index ed9677b..f23d1ab 100644 --- a/src/syntactes/parser/__init__.py +++ b/src/syntactes/parser/__init__.py @@ -1 +1,3 @@ -from .execute import execute_on +from .exception import NotAcceptedError, ParserError, UnexpectedTokenError +from .execute import ExecutablesRegistry, execute_on +from .parser import LR0Parser, SLRParser From bc5f0d7b20614ca5abcf5444baeff5e4d7234ab2 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Tue, 5 Nov 2024 08:17:03 +0200 Subject: [PATCH 19/23] Adds value attr to token --- src/syntactes/token.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/syntactes/token.py b/src/syntactes/token.py index fbbb0da..39f2bd6 100644 --- a/src/syntactes/token.py +++ b/src/syntactes/token.py @@ -3,9 +3,10 @@ class Token: A token of the grammar. Can be a terminal or non-terminal symbol. """ - def __init__(self, symbol: str, is_terminal: bool) -> None: + def __init__(self, symbol: str, is_terminal: bool, value=None) -> None: self.symbol = symbol self.is_terminal = is_terminal + self.value = value @staticmethod def null() -> "Token": From 47a147cc93cea71d86ab826b4ce31450982b768c Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Tue, 5 Nov 2024 11:50:52 +0200 Subject: [PATCH 20/23] Adds to Makefile clean target --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d9728a6..e441197 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ clean: - rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__ + rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__ src/syntactes/parser/__pycache__ rm -rf dist src/syntactes.egg-info test: From aaae5326d74832cfad7fc7cd720187b78bdca934 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Tue, 5 Nov 2024 12:40:42 +0200 Subject: [PATCH 21/23] Adds from_grammar method to parser --- src/syntactes/parser/parser.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/syntactes/parser/parser.py b/src/syntactes/parser/parser.py index adebd2e..1c4c087 100644 --- a/src/syntactes/parser/parser.py +++ b/src/syntactes/parser/parser.py @@ -1,7 +1,7 @@ from collections import deque from typing import Iterable -from syntactes import Token +from syntactes import Grammar, LR0Generator, SLRGenerator, Token from syntactes._action import Action, ActionType from syntactes._state import LR0State from syntactes.parser import ( @@ -10,7 +10,7 @@ ParserError, UnexpectedTokenError, ) -from syntactes.table import LR0ParsingTable, SLRParsingTable +from syntactes.table import LR0ParsingTable class LR0Parser: @@ -24,6 +24,16 @@ def __init__(self, table: LR0ParsingTable) -> None: self._state_stack: deque[LR0State] = deque() self._token_stream: deque[Token] = deque() + @staticmethod + def from_grammar(grammar: Grammar) -> "LR0Parser": + """ + Create a parser for the given grammar. + """ + generator = LR0Generator(grammar) + parsing_table = generator.generate() + parser = LR0Parser(parsing_table) + return parser + def parse(self, stream: Iterable[Token]) -> None: """ Parses the given stream of tokens. Expects the EOF token as the last one. @@ -101,3 +111,13 @@ class SLRParser(LR0Parser): """ Parses streams of tokens based on the configured parsing table. """ + + @staticmethod + def from_grammar(grammar: Grammar) -> "SLRParser": + """ + Create a parser for the given grammar. + """ + generator = SLRGenerator(grammar) + parsing_table = generator.generate() + parser = SLRParser(parsing_table) + return parser From e1bf75428b3e9f6e433c2ac1f8d5987a7fe7780f Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Tue, 5 Nov 2024 12:40:54 +0200 Subject: [PATCH 22/23] Adds parser example --- examples/parser.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 examples/parser.py diff --git a/examples/parser.py b/examples/parser.py new file mode 100644 index 0000000..35143a4 --- /dev/null +++ b/examples/parser.py @@ -0,0 +1,49 @@ +from syntactes import Grammar, Rule, Token +from syntactes.parser import ParserError, SLRParser, execute_on + +EOF = Token.eof() +S = Token("S", is_terminal=False) +E = Token("E", False) +T = Token("T", False) +x = Token("x", True, 1) # value of token is 1 +PLUS = Token("+", True) + +tokens = {EOF, S, E, T, x, PLUS} + +# 0. S -> E $ +# 1. E -> T + E +# 2. E -> T +# 3. T -> x +rule_1 = Rule(0, S, E, EOF) +rule_2 = Rule(1, E, T, PLUS, E) +rule_3 = Rule(2, E, T) +rule_4 = Rule(4, T, x) + +rules = (rule_1, rule_2, rule_3, rule_4) + +grammar = Grammar(rule_1, rules, tokens) + +parser = SLRParser.from_grammar(grammar) + + +@execute_on(rule_4) +def push_value(x_token): + # Add and argument for every token on the right-hand side of the rule. + print( + f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}" + ) + + +@execute_on(rule_2) +def add(left, plus, right): + print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}") + + +print("Parsing stream: x + x + x $\n") +parser.parse([x, PLUS, x, PLUS, x, EOF]) + +print("\nParsing stream: x + $\n") +try: + parser.parse([x, PLUS, EOF]) +except ParserError as e: + print("ParserError:", e) From 361da745793bdcd39113ef10f9823bc36860b912 Mon Sep 17 00:00:00 2001 From: Maximos Nikiforakis Date: Tue, 5 Nov 2024 12:41:01 +0200 Subject: [PATCH 23/23] Updates README.md --- README.md | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fa7257c..90e5efd 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ Python parser generator ## Quick start + +### Creating a parsing table ```py from syntactes import Grammar, Rule, SLRGenerator, Token @@ -54,16 +56,86 @@ SLR PARSING TABLE ------------------------------------------------- | | $ | + | E | S | T | x | ------------------------------------------------- -| 1 | -- | -- | s3 | -- | s4 | s2 | +| 1 | -- | -- | s4 | -- | s2 | s3 | ------------------------------------------------- -| 2 | r4 | r4 | -- | -- | -- | -- | +| 2 | r2 | s5 | -- | -- | -- | -- | ------------------------------------------------- -| 3 | a | -- | -- | -- | -- | -- | ------------------------------------------------- -| 4 | r2 | s5 | -- | -- | -- | -- | +| 3 | r4 | r4 | -- | -- | -- | -- | ------------------------------------------------- -| 5 | -- | -- | s6 | -- | s4 | s2 | +| 4 | a | -- | -- | -- | -- | -- | +------------------------------------------------ +| 5 | -- | -- | s6 | -- | s2 | s3 | ------------------------------------------------- | 6 | r1 | -- | -- | -- | -- | -- | ------------------------------------------------- ``` + +### Parsing + +```py +from syntactes import Grammar, Rule, Token +from syntactes.parser import ParserError, SLRParser, execute_on + +EOF = Token.eof() +S = Token("S", is_terminal=False) +E = Token("E", False) +T = Token("T", False) +x = Token("x", True, 1) # value of token is 1 +PLUS = Token("+", True) + +tokens = {EOF, S, E, T, x, PLUS} + +# 0. S -> E $ +# 1. E -> T + E +# 2. E -> T +# 3. T -> x +rule_1 = Rule(0, S, E, EOF) +rule_2 = Rule(1, E, T, PLUS, E) +rule_3 = Rule(2, E, T) +rule_4 = Rule(4, T, x) + +rules = (rule_1, rule_2, rule_3, rule_4) + +grammar = Grammar(rule_1, rules, tokens) + +parser = SLRParser.from_grammar(grammar) + + +@execute_on(rule_4) +def push_value(x_token): + # Add and argument for every token on the right-hand side of the rule. + print( + f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}" + ) + + +@execute_on(rule_2) +def add(left, plus, right): + print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}") + + +print("Parsing stream: x + x + x $\n") +parser.parse([x, PLUS, x, PLUS, x, EOF]) + +print("\nParsing stream: x + $\n") +try: + parser.parse([x, PLUS, EOF]) +except ParserError as e: + print("ParserError:", e) +``` + +Running the above example produces this output: +``` +Parsing stream: x + x + x $ + +received token x with value: 1, reducing by rule: T -> x +received token x with value: 1, reducing by rule: T -> x +received token x with value: 1, reducing by rule: T -> x +received tokens E, +, T, reducing by rule: E -> T + E +received tokens E, +, T, reducing by rule: E -> T + E + +Parsing stream: x + $ + +received token x with value: 1, reducing by rule: T -> x +ParserError: Received token: $; expected one of: ['x', 'T', 'E'] +```