From 7cb836e10eb863a8496261febf19252eccf6c64f Mon Sep 17 00:00:00 2001 From: Scott K Logan Date: Fri, 6 Sep 2024 16:02:01 -0500 Subject: [PATCH] Re-work EmPy token caching (#662) The previous approach was to re-implement Interpreter.parse() to iterate over cached tokens where possible. This proved to be a problem when the implementation changed in EmPy 4.x. The approach implemented here is to create a shim between the Interpreter and Scanner API and record/inject the tokens there, which improves the compatibility by working only at the API boundary and not duplicating chunks of the upstream implementation. Co-authored-by: Chris Lalancette --- colcon_core/shell/template/__init__.py | 50 ++++++++++++++------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/colcon_core/shell/template/__init__.py b/colcon_core/shell/template/__init__.py index c6ba3476..7f2c69bc 100644 --- a/colcon_core/shell/template/__init__.py +++ b/colcon_core/shell/template/__init__.py @@ -4,6 +4,7 @@ from io import StringIO import os +from colcon_core.generic_decorator import GenericDecorator from colcon_core.logging import colcon_logger try: from em import Interpreter @@ -70,25 +71,30 @@ def installProxy(self): # noqa: D102 N802 class CachingInterpreter(BypassStdoutInterpreter): - """Interpreter for EmPy which which caches parsed tokens.""" - - def parse(self, scanner, locals=None): # noqa: A002 D102 - global cached_tokens - data = scanner.buffer - # try to use cached tokens - tokens = cached_tokens.get(data) - if tokens is None: - # collect tokens and cache them - tokens = [] - while True: - token = scanner.one() - if token is None: - break - tokens.append(token) - cached_tokens[data] = tokens - - # reimplement the parse method using the (cached) tokens - self.invoke('atParse', scanner=scanner, locals=locals) - for token in tokens: - self.invoke('atToken', token=token) - token.run(self, locals) + """Interpreter for EmPy which caches parsed tokens.""" + + class _CachingScannerDecorator(GenericDecorator): + + def __init__(self, decoree, cache): + super().__init__(decoree, _cache=cache, _idx=0) + + def one(self, *args, **kwargs): + if self._idx < len(self._cache): + token, count = self._cache[self._idx] + self.advance(count) + self.sync() + else: + count = len(self._decoree) + token = self._decoree.one(*args, **kwargs) + count -= len(self._decoree) + self._cache.append((token, count)) + + self._idx += 1 + return token + + def parse(self, scanner, *args, **kwargs): # noqa: A002 D102 + cache = cached_tokens.setdefault(scanner.buffer, []) + return super().parse( + CachingInterpreter._CachingScannerDecorator(scanner, cache), + *args, + **kwargs)