From 7cb836e10eb863a8496261febf19252eccf6c64f Mon Sep 17 00:00:00 2001
From: Scott K Logan <logans@cottsay.net>
Date: Fri, 6 Sep 2024 16:02:01 -0500
Subject: [PATCH] Re-work EmPy token caching (#662)

The previous approach was to re-implement Interpreter.parse() to iterate
over cached tokens where possible. This proved to be a problem when the
implementation changed in EmPy 4.x.

The approach implemented here is to create a shim between the
Interpreter and Scanner API and record/inject the tokens there, which
improves the compatibility by working only at the API boundary and not
duplicating chunks of the upstream implementation.

Co-authored-by: Chris Lalancette <clalancette@gmail.com>
---
 colcon_core/shell/template/__init__.py | 50 ++++++++++++++------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/colcon_core/shell/template/__init__.py b/colcon_core/shell/template/__init__.py
index c6ba3476..7f2c69bc 100644
--- a/colcon_core/shell/template/__init__.py
+++ b/colcon_core/shell/template/__init__.py
@@ -4,6 +4,7 @@
 from io import StringIO
 import os
 
+from colcon_core.generic_decorator import GenericDecorator
 from colcon_core.logging import colcon_logger
 try:
     from em import Interpreter
@@ -70,25 +71,30 @@ def installProxy(self):  # noqa: D102 N802
 
 
 class CachingInterpreter(BypassStdoutInterpreter):
-    """Interpreter for EmPy which which caches parsed tokens."""
-
-    def parse(self, scanner, locals=None):  # noqa: A002 D102
-        global cached_tokens
-        data = scanner.buffer
-        # try to use cached tokens
-        tokens = cached_tokens.get(data)
-        if tokens is None:
-            # collect tokens and cache them
-            tokens = []
-            while True:
-                token = scanner.one()
-                if token is None:
-                    break
-                tokens.append(token)
-            cached_tokens[data] = tokens
-
-        # reimplement the parse method using the (cached) tokens
-        self.invoke('atParse', scanner=scanner, locals=locals)
-        for token in tokens:
-            self.invoke('atToken', token=token)
-            token.run(self, locals)
+    """Interpreter for EmPy which caches parsed tokens."""
+
+    class _CachingScannerDecorator(GenericDecorator):
+
+        def __init__(self, decoree, cache):
+            super().__init__(decoree, _cache=cache, _idx=0)
+
+        def one(self, *args, **kwargs):
+            if self._idx < len(self._cache):
+                token, count = self._cache[self._idx]
+                self.advance(count)
+                self.sync()
+            else:
+                count = len(self._decoree)
+                token = self._decoree.one(*args, **kwargs)
+                count -= len(self._decoree)
+                self._cache.append((token, count))
+
+            self._idx += 1
+            return token
+
+    def parse(self, scanner, *args, **kwargs):  # noqa: A002 D102
+        cache = cached_tokens.setdefault(scanner.buffer, [])
+        return super().parse(
+            CachingInterpreter._CachingScannerDecorator(scanner, cache),
+            *args,
+            **kwargs)