From d2b419fb8d1c6f3e59e403992297dc406c62b2fc Mon Sep 17 00:00:00 2001 From: Tomas Capretto Date: Sun, 18 Feb 2024 16:11:35 -0300 Subject: [PATCH] Interpret True, False, and None as python literals --- formulae/expr.py | 2 +- formulae/parser.py | 2 ++ formulae/scanner.py | 8 +++++++- formulae/terms/call_resolver.py | 17 ++++++----------- tests/test_parser.py | 17 +++++++++++++++++ 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/formulae/expr.py b/formulae/expr.py index 92391e6..ddb99de 100644 --- a/formulae/expr.py +++ b/formulae/expr.py @@ -19,7 +19,7 @@ def __repr__(self): # pragma: no cover def __str__(self): # pragma: no cover right = " ".join(str(self.value).splitlines(True)) - return f"Assign(name={self.name}, value={right}\n)" + return f"Assign(name={self.name}, value={right})" def accept(self, visitor): return visitor.visitAssignExpr(self) diff --git a/formulae/parser.py b/formulae/parser.py index e7ac14d..d707f69 100644 --- a/formulae/parser.py +++ b/formulae/parser.py @@ -195,6 +195,8 @@ def primary(self): # pylint: disable=too-many-return-statements return Literal(token.literal, lexeme=token.lexeme) elif self.match("BQNAME"): return QuotedName(self.previous()) + elif self.match("PYTHON_LITERAL"): + return Literal(self.previous().literal) elif self.match("LEFT_PAREN"): expr = self.expression() self.consume("RIGHT_PAREN", "Expect ')' after expression.") diff --git a/formulae/scanner.py b/formulae/scanner.py index c550333..e3de458 100644 --- a/formulae/scanner.py +++ b/formulae/scanner.py @@ -188,11 +188,17 @@ def number(self): self.add_token("NUMBER", token) + # pylint: disable=eval-used def identifier(self): # 'mod.function' is also an identifier while self.peek().isalnum() or self.peek() in [".", "_"]: self.advance() - self.add_token("IDENTIFIER") + + token = self.code[self.start : self.current] + if token in ("True", "False", "None"): # These are actually literals, not variable names + self.add_token("PYTHON_LITERAL", eval(token)) # Pass literals, not strings + else: + self.add_token("IDENTIFIER") def char(self): while self.peek() not in ["'", '"'] and not self.at_end(): diff --git a/formulae/terms/call_resolver.py b/formulae/terms/call_resolver.py index 71ff69b..23cd4ad 100644 --- a/formulae/terms/call_resolver.py +++ b/formulae/terms/call_resolver.py @@ -93,8 +93,6 @@ class LazyVariable: The name of the variable it represents. """ - BUILTINS = {"True": True, "False": False, "None": None} - def __init__(self, name): self.name = name @@ -129,16 +127,13 @@ def eval(self, data_mask, env): result: The value represented by this name in either the data mask or the environment. """ - if self.name in self.BUILTINS: - result = self.BUILTINS[self.name] - else: + try: + result = data_mask[self.name] + except KeyError: try: - result = data_mask[self.name] - except KeyError: - try: - result = env.namespace[self.name] - except KeyError as e: - raise e + result = env.namespace[self.name] + except KeyError as e: + raise e return result diff --git a/tests/test_parser.py b/tests/test_parser.py index 0965c46..7e2a501 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -264,3 +264,20 @@ def test_unclosed_function_call(): data = pd.DataFrame({"y": [1, 2], "x": [1, 2]}) with pytest.raises(ParseError, match="after arguments"): design_matrices("y ~ f(x", data) + + +def test_parse_python_literals(): + result = Parser(Scanner("f(x, True, y=False, z=None)").scan(False)).parse() + + assert isinstance(result.args[0], Variable) + + assert isinstance(result.args[1], Literal) + assert result.args[1].value is True + + assert isinstance(result.args[2], Assign) + assert isinstance(result.args[2].value, Literal) + assert result.args[2].value.value is False + + assert isinstance(result.args[3], Assign) + assert isinstance(result.args[3].value, Literal) + assert result.args[3].value.value is None \ No newline at end of file