Re-work EmPy token caching (#662)

The previous approach was to re-implement Interpreter.parse() to iterate over cached tokens where possible. This proved to be a problem when the implementation changed in EmPy 4.x. The approach implemented here is to create a shim between the Interpreter and Scanner API and record/inject the tokens there, which improves the compatibility by working only at the API boundary and not duplicating chunks of the upstream implementation. Co-authored-by: Chris Lalancette <clalancette@gmail.com>
colcon · Sep 6, 2024 · 7cb836e · 7cb836e
1 parent 090e650
commit 7cb836e
Showing 1 changed file with 28 additions and 22 deletions.
diff --git a/colcon_core/shell/template/__init__.py b/colcon_core/shell/template/__init__.py
@@ -4,6 +4,7 @@
 from io import StringIO
 import os
 
+from colcon_core.generic_decorator import GenericDecorator
 from colcon_core.logging import colcon_logger
 try:
  from em import Interpreter
@@ -70,25 +71,30 @@ def installProxy(self): # noqa: D102 N802
 
 
 class CachingInterpreter(BypassStdoutInterpreter):
- """Interpreter for EmPy which which caches parsed tokens."""
-
- def parse(self, scanner, locals=None): # noqa: A002 D102
- global cached_tokens
- data = scanner.buffer
- # try to use cached tokens
- tokens = cached_tokens.get(data)
- if tokens is None:
- # collect tokens and cache them
- tokens = []
- while True:
- token = scanner.one()
- if token is None:
- break
- tokens.append(token)
- cached_tokens[data] = tokens
-
- # reimplement the parse method using the (cached) tokens
- self.invoke('atParse', scanner=scanner, locals=locals)
- for token in tokens:
- self.invoke('atToken', token=token)
- token.run(self, locals)
+ """Interpreter for EmPy which caches parsed tokens."""
+
+ class _CachingScannerDecorator(GenericDecorator):
+
+ def __init__(self, decoree, cache):
+ super().__init__(decoree, _cache=cache, _idx=0)
+
+ def one(self, *args, **kwargs):
+ if self._idx < len(self._cache):
+ token, count = self._cache[self._idx]
+ self.advance(count)
+ self.sync()
+ else:
+ count = len(self._decoree)
+ token = self._decoree.one(*args, **kwargs)
+ count -= len(self._decoree)
+ self._cache.append((token, count))
+
+ self._idx += 1
+ return token
+
+ def parse(self, scanner, *args, **kwargs): # noqa: A002 D102
+ cache = cached_tokens.setdefault(scanner.buffer, [])
+ return super().parse(
+ CachingInterpreter._CachingScannerDecorator(scanner, cache),
+ *args,
+ **kwargs)