From 17e4a888882a366930e9d00df626bded20c3d49a Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Wed, 13 Nov 2024 04:35:04 -0800
Subject: [PATCH 1/5] Fix temporary transient counter during Python parsing of
 nested calls (#1745)

Fixes #1139
---
 dace/frontend/python/newast.py                |  3 +
 dace/sdfg/sdfg.py                             | 14 ++++
 tests/npbench/misc/mandelbrot1_test.py        |  2 +-
 tests/npbench/misc/mandelbrot2_test.py        |  2 +-
 .../nested_name_accesses_test.py              | 70 +++++++++++++++----
 5 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index b4e83cc1e7..1cbb8e67c9 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -3940,6 +3940,9 @@ def _parse_sdfg_call(self, funcname: str, func: Union[SDFG, SDFGConvertible], no
         for arg in args_to_remove:
             args.remove(arg)
 
+        # Refresh temporary transient counter of the nested SDFG
+        sdfg.refresh_temp_transients()
+
         # Change connector names
         updated_args = []
         arrays_before = list(sdfg.arrays.items())
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 1b449403d5..927f033584 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -1952,6 +1952,20 @@ def temp_data_name(self):
         self._temp_transients += 1
         return name
 
+    def refresh_temp_transients(self):
+        """
+        Updates the temporary transient counter of this SDFG by querying the maximum number among the
+        ``__tmp###`` data descriptors.
+        """
+        temp_transients = [k[5:] for k in self.arrays.keys() if k.startswith('__tmp')]
+        max_temp_transient = 0
+        for arr_suffix in temp_transients:
+            try:
+                max_temp_transient = max(max_temp_transient, int(arr_suffix))
+            except ValueError:  # Not of the form __tmp###
+                continue
+        self._temp_transients = max_temp_transient + 1
+
     def add_temp_transient(self,
                            shape,
                            dtype,
diff --git a/tests/npbench/misc/mandelbrot1_test.py b/tests/npbench/misc/mandelbrot1_test.py
index 521d41c560..6fb8262aa2 100644
--- a/tests/npbench/misc/mandelbrot1_test.py
+++ b/tests/npbench/misc/mandelbrot1_test.py
@@ -97,7 +97,7 @@ def run_mandelbrot1(device_type: dace.dtypes.DeviceType):
         Z, N = sdfg(xmin, xmax, ymin, ymax, maxiter, horizon)
 
     # Compute ground truth and validate
-    Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, xn, yn, maxiter)
+    Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, XN, YN, maxiter)
     assert np.allclose(Z, Z_ref)
     assert np.allclose(N, N_ref)
     return sdfg
diff --git a/tests/npbench/misc/mandelbrot2_test.py b/tests/npbench/misc/mandelbrot2_test.py
index aaca2c6db9..1be84d0c1c 100644
--- a/tests/npbench/misc/mandelbrot2_test.py
+++ b/tests/npbench/misc/mandelbrot2_test.py
@@ -145,7 +145,7 @@ def run_mandelbrot2(device_type: dace.dtypes.DeviceType):
         Z, N = sdfg(xmin, xmax, ymin, ymax, maxiter, horizon)
 
     # Compute ground truth and validate
-    Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, xn, yn, maxiter)
+    Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, XN, YN, maxiter)
     assert np.allclose(Z, Z_ref)
     assert np.allclose(N, N_ref)
     return sdfg
diff --git a/tests/python_frontend/nested_name_accesses_test.py b/tests/python_frontend/nested_name_accesses_test.py
index 1e5b18b840..424f280771 100644
--- a/tests/python_frontend/nested_name_accesses_test.py
+++ b/tests/python_frontend/nested_name_accesses_test.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved.
 import dace as dc
 import numpy as np
 import os
@@ -30,6 +30,7 @@ def test_nested_name_accesses():
 
 
 def test_nested_offset_access():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 5]):
         out = np.zeros((5, 5, 5), np.float64)
@@ -46,6 +47,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]):
 
 
 def test_nested_offset_access_dappy():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 5]):
         out = np.zeros((5, 5, 5), np.float64)
@@ -66,6 +68,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]):
 
 
 def test_nested_multi_offset_access():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 10]):
         out = np.zeros((5, 5, 10), np.float64)
@@ -83,6 +86,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 10]):
 
 
 def test_nested_multi_offset_access_dappy():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 10]):
         out = np.zeros((5, 5, 10), np.float64)
@@ -104,6 +108,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 10]):
 
 
 def test_nested_dec_offset_access():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 5]):
         out = np.zeros((5, 5, 5), np.float64)
@@ -120,6 +125,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]):
 
 
 def test_nested_dec_offset_access_dappy():
+
     @dc.program
     def nested_offset_access(inp: dc.float64[6, 5, 5]):
         out = np.zeros((5, 5, 5), np.float64)
@@ -140,6 +146,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]):
 
 
 def test_nested_offset_access_nested_dependency():
+
     @dc.program
     def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 5]):
         out = np.zeros((5, 5, 5), np.float64)
@@ -161,6 +168,7 @@ def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 5]):
 
 
 def test_nested_offset_access_nested_dependency_dappy():
+
     @dc.program
     def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 10]):
         out = np.zeros((5, 5, 10), np.float64)
@@ -188,19 +196,19 @@ def test_access_to_nested_transient():
     NBLOCKS = 5
 
     @dc.program
-    def small_wip(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]):
+    def small_wip(inp: dc.float64[KLEV + 1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]):
         for jn in dc.map[0:NBLOCKS]:
-            tmp = np.zeros([KLEV+1, KLON])
+            tmp = np.zeros([KLEV + 1, KLON])
             for jl in range(KLON):
                 for jk in range(KLEV):
-                    tmp[jk, jl] = inp[jk, jl, jn] + inp[jk+1, jl, jn]
+                    tmp[jk, jl] = inp[jk, jl, jn] + inp[jk + 1, jl, jn]
 
             for jl in range(KLON):
                 for jk in range(KLEV):
-                    out[jk, jl, jn] = tmp[jk, jl] + tmp[jk+1, jl]
-    
+                    out[jk, jl, jn] = tmp[jk, jl] + tmp[jk + 1, jl]
+
     rng = np.random.default_rng(42)
-    inp = rng.random((KLEV+1, KLON, NBLOCKS))
+    inp = rng.random((KLEV + 1, KLON, NBLOCKS))
     ref = np.zeros((KLEV, KLON, NBLOCKS))
     val = np.zeros((KLEV, KLON, NBLOCKS))
 
@@ -217,14 +225,14 @@ def test_access_to_nested_transient_dappy():
     NBLOCKS = 5
 
     @dc.program
-    def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]):
+    def small_wip_dappy(inp: dc.float64[KLEV + 1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]):
         for jn in dc.map[0:NBLOCKS]:
-            tmp = np.zeros([KLEV+1, KLON])
+            tmp = np.zeros([KLEV + 1, KLON])
             for jl in range(KLON):
                 for jk in range(KLEV):
                     with dc.tasklet():
                         in1 << inp[jk, jl, jn]
-                        in2 << inp[jk+1, jl, jn]
+                        in2 << inp[jk + 1, jl, jn]
                         out1 >> tmp[jk, jl]
                         out1 = in1 + in2
 
@@ -232,12 +240,12 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV
                 for jk in range(KLEV):
                     with dc.tasklet():
                         in1 << tmp[jk, jl]
-                        in2 << tmp[jk+1, jl]
+                        in2 << tmp[jk + 1, jl]
                         out1 >> out[jk, jl, jn]
                         out1 = in1 + in2
-    
+
     rng = np.random.default_rng(42)
-    inp = rng.random((KLEV+1, KLON, NBLOCKS))
+    inp = rng.random((KLEV + 1, KLON, NBLOCKS))
     ref = np.zeros((KLEV, KLON, NBLOCKS))
     val = np.zeros((KLEV, KLON, NBLOCKS))
 
@@ -247,6 +255,41 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV
     assert np.allclose(val, ref)
 
 
+def test_issue_1139():
+    """
+    Regression test generated from issue #1139.
+
+    The origin of the bug was in the Python frontend: An SDFG parsed by the frontend kept
+    a number called ``_temp_transients`` that specifies how many ``__tmp*`` arrays have been created.
+    This number is used to avoid name clashes when inlining SDFGs (although unnecessary).
+    However, if a nested SDFG had already been simplified, where transformations may change the number
+    of transients (or add new ones via inlining, which is happening in this bug), the ``_temp_transients``
+    field becomes out of date and renaming the fields during inlining removes data descriptors.
+    """
+    XN = dc.symbol('XN')
+    YN = dc.symbol('YN')
+    N = dc.symbol('N')
+
+    @dc.program
+    def nester(start: dc.float64, stop: dc.float64, X: dc.float64[N]):
+        dist = (stop - start) / (N - 1)
+        for i in dc.map[0:N]:
+            X[i] = start + i * dist
+
+    @dc.program
+    def tester(xmin: dc.float64, xmax: dc.float64):
+        a = np.ndarray((XN, YN), dtype=np.int64)
+        b = np.ndarray((XN, YN), dtype=np.int64)
+        c = np.ndarray((XN, ), dtype=np.float64)
+        nester(xmin, xmax, c)
+        return c
+
+    xmin = 0.123
+    xmax = 4.567
+    c = tester(xmin, xmax, XN=30, YN=40)
+    assert np.allclose(c, np.linspace(xmin, xmax, 30))
+
+
 if __name__ == "__main__":
     test_nested_name_accesses()
     test_nested_offset_access()
@@ -259,3 +302,4 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV
     test_nested_offset_access_nested_dependency_dappy()
     test_access_to_nested_transient()
     test_access_to_nested_transient_dappy()
+    test_issue_1139()

From c83f601f817fbb8e905e0557ec9fbb6756743a33 Mon Sep 17 00:00:00 2001
From: Philipp Schaad <schaad.phil@gmail.com>
Date: Thu, 14 Nov 2024 19:31:45 +0100
Subject: [PATCH 2/5] Fix `pystr_to_symbolic` not correctly interpreting
 constants as boolean values in boolean comparisons (#1756)

Strings like `not ((N > 20) != 0)` (== `Not(Ne(Gt(N, 20), 0))`) were
incorrectly interpreted by `sympy.sympify` as constant "False". This is
a limitation by sympy, which does not assume integer 0 to be a Falsy,
and enforces exact equivalence (or difference) checks with `Ne`. To get
around this limitation, the DaCe internal AST preprocessor now replaces
constants with boolean values if they are arguments to Comparison
operations, where the other operand is also a comparison operation, thus
returning a boolean.

This fixes an issue with `DeadStateElimination`, closing issue #1129.
---
 dace/symbolic.py                           | 36 +++++++++++++++++++++-
 tests/passes/dead_code_elimination_test.py | 23 +++++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/dace/symbolic.py b/dace/symbolic.py
index 9737080c52..98ffa008d3 100644
--- a/dace/symbolic.py
+++ b/dace/symbolic.py
@@ -1,6 +1,7 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved.
 import ast
 from functools import lru_cache
+import sys
 import sympy
 import pickle
 import re
@@ -982,6 +983,32 @@ def _process_is(elem: Union[Is, IsNot]):
     return expr
 
 
+# Depending on the Python version we need to handle different AST nodes to correctly interpret and detect falsy / truthy
+# values.
+if sys.version_info < (3, 8):
+    _SimpleASTNode = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
+    _SimpleASTNodeT = Union[ast.Constant, ast.Name, ast.NameConstant, ast.Num]
+
+    def __comp_convert_truthy_falsy(node: _SimpleASTNodeT):
+        if isinstance(node, ast.Num):
+            node_val = node.n
+        elif isinstance(node, ast.Name):
+            node_val = node.id
+        else:
+            node_val = node.value
+        return ast.copy_location(ast.NameConstant(bool(node_val)), node)
+else:
+    _SimpleASTNode = (ast.Constant, ast.Name)
+    _SimpleASTNodeT = Union[ast.Constant, ast.Name]
+
+    def __comp_convert_truthy_falsy(node: _SimpleASTNodeT):
+        return ast.copy_location(ast.Constant(bool(node.value)), node)
+
+# Convert simple AST node (constant) into a falsy / truthy. Anything other than 0, None, and an empty string '' is
+# considered a truthy, while the listed exceptions are considered falsy values - following the semantics of Python's
+# bool() builtin.
+_convert_truthy_falsy = __comp_convert_truthy_falsy
+
 class PythonOpToSympyConverter(ast.NodeTransformer):
     """ 
     Replaces various operations with the appropriate SymPy functions to avoid non-symbolic evaluation.
@@ -1067,6 +1094,13 @@ def visit_Compare(self, node: ast.Compare):
             raise NotImplementedError
         op = node.ops[0]
         arguments = [node.left, node.comparators[0]]
+
+        # Ensure constant values in boolean comparisons are interpreted als booleans.
+        if isinstance(node.left, ast.Compare) and isinstance(node.comparators[0], _SimpleASTNode):
+            arguments[1] = _convert_truthy_falsy(node.comparators[0])
+        elif isinstance(node.left, _SimpleASTNode) and isinstance(node.comparators[0], ast.Compare):
+            arguments[0] = _convert_truthy_falsy(node.left)
+
         func_node = ast.copy_location(ast.Name(id=self._ast_to_sympy_comparators[type(op)], ctx=ast.Load()), node)
         new_node = ast.Call(func=func_node, args=[self.visit(arg) for arg in arguments], keywords=[])
         return ast.copy_location(new_node, node)
diff --git a/tests/passes/dead_code_elimination_test.py b/tests/passes/dead_code_elimination_test.py
index a41a11c4d6..1832ad8321 100644
--- a/tests/passes/dead_code_elimination_test.py
+++ b/tests/passes/dead_code_elimination_test.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved.
 """ Various tests for dead code elimination passes. """
 
 import numpy as np
@@ -45,6 +45,26 @@ def test_dse_unconditional():
     assert set(sdfg.states()) == {s, s2, e}
 
 
+def test_dse_edge_condition_with_integer_as_boolean_regression():
+    """
+    This is a regression test for issue #1129, which describes dead state elimination incorrectly eliminating interstate
+    edges when integers are used as boolean values in interstate edge conditions. Code taken from issue #1129.
+    """
+    sdfg = dace.SDFG('dse_edge_condition_with_integer_as_boolean_regression')
+    sdfg.add_scalar('N', dtype=dace.int32, transient=True)
+    sdfg.add_scalar('result', dtype=dace.int32)
+    state_init = sdfg.add_state()
+    state_middle = sdfg.add_state()
+    state_end = sdfg.add_state()
+    sdfg.add_edge(state_init, state_end, dace.InterstateEdge(condition='(not ((N > 20) != 0))',
+                                                             assignments={'result': 'N'}))
+    sdfg.add_edge(state_init, state_middle, dace.InterstateEdge(condition='((N > 20) != 0)'))
+    sdfg.add_edge(state_middle, state_end, dace.InterstateEdge(assignments={'result': '20'}))
+
+    res = DeadStateElimination().apply_pass(sdfg, {})
+    assert res is None
+
+
 def test_dde_simple():
 
     @dace.program
@@ -307,6 +327,7 @@ def test_dce_add_type_hint_of_variable(dtype):
 if __name__ == '__main__':
     test_dse_simple()
     test_dse_unconditional()
+    test_dse_edge_condition_with_integer_as_boolean_regression()
     test_dde_simple()
     test_dde_libnode()
     test_dde_access_node_in_scope(False)

From f7576870a2133e9f118085d8860a789a24e8cb6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:11:57 +0100
Subject: [PATCH 3/5] Fixed `dace::math::pi` and `dace::math::nan` on GPU
 (#1759)

Both $\pi$ and `NaN` are implemented as classes in DaCe.
However, these classes where not marked as device, thus they are only
available on the host.

This PR:
- Fixes this for `NaN` and $\pi$.
- Extend the implementation.
- Adds tests for them.
---
 dace/runtime/include/dace/math.h  |   2 +-
 dace/runtime/include/dace/nan.h   | 100 ++++---
 dace/runtime/include/dace/pi.h    | 473 ++++++++++++++++--------------
 dace/runtime/include/dace/types.h |   6 +
 tests/numpy/constants_test.py     | 170 +++++++++++
 5 files changed, 494 insertions(+), 257 deletions(-)
 create mode 100644 tests/numpy/constants_test.py

diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h
index 4dae494a8a..9cc131ee62 100644
--- a/dace/runtime/include/dace/math.h
+++ b/dace/runtime/include/dace/math.h
@@ -496,7 +496,7 @@ namespace dace
 {
     namespace math
     {       
-        static DACE_CONSTEXPR typeless_pi pi{};
+        static DACE_CONSTEXPR DACE_HostDev typeless_pi pi{};
         static DACE_CONSTEXPR typeless_nan nan{};
         //////////////////////////////////////////////////////
         template<typename T>
diff --git a/dace/runtime/include/dace/nan.h b/dace/runtime/include/dace/nan.h
index a8d1eb4c52..b4bac93980 100644
--- a/dace/runtime/include/dace/nan.h
+++ b/dace/runtime/include/dace/nan.h
@@ -13,101 +13,119 @@ namespace dace
         // Defines a typeless Pi
         struct typeless_nan
         {
+            DACE_CONSTEXPR DACE_HDFI typeless_nan() noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_nan(const typeless_nan&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_nan(typeless_nan&&) noexcept = default;
+            DACE_HDFI ~typeless_nan() noexcept = default;
+
+#ifndef DACE_XILINX
+            DACE_CONSTEXPR DACE_HDFI typeless_nan& operator=(const typeless_nan&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_nan& operator=(typeless_nan&&) noexcept = default;
+#endif
+
             operator int() const = delete;
-            operator float() const
+            DACE_CONSTEXPR DACE_HDFI operator float() const
             {
                 return std::numeric_limits<float>::quiet_NaN();
             }
-            operator double() const
+            DACE_CONSTEXPR DACE_HDFI operator double() const
             {
                 return std::numeric_limits<double>::quiet_NaN();
             }
-            operator long double() const
+
+#if !( defined(__CUDACC__) || defined(__HIPCC__) )
+            //There is no long double on the GPU
+            DACE_CONSTEXPR DACE_HDFI operator long double() const
             {
                 return std::numeric_limits<long double>::quiet_NaN();
             }
-            typeless_nan operator+() const
+#endif
+            DACE_CONSTEXPR DACE_HDFI typeless_nan operator+() const
             {
                 return typeless_nan{};
             }
-            typeless_nan operator-() const
+            DACE_CONSTEXPR DACE_HDFI typeless_nan operator-() const
             {
                 return typeless_nan{};
             }
         };
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator*(const T&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator*(const T&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator*(const typeless_nan&,  const T&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator*(const typeless_nan&,  const T&) noexcept { return typeless_nan{}; }
 
-        inline typeless_nan
-        operator*(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI typeless_nan
+        operator*(const typeless_nan&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator+(const T&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator+(const T&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator+(const typeless_nan&,  const T&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator+(const typeless_nan&,  const T&) noexcept { return typeless_nan{}; }
 
-        inline typeless_nan
-        operator+(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI typeless_nan
+        operator+(const typeless_nan&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator-(const T&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator-(const T&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator-(const typeless_nan&,  const T&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator-(const typeless_nan&,  const T&) noexcept { return typeless_nan{}; }
 
-        inline typeless_nan
-        operator-(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI typeless_nan
+        operator-(const typeless_nan&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator/(const T&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator/(const T&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator/(const typeless_nan&,  const T&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator/(const typeless_nan&,  const T&) noexcept { return typeless_nan{}; }
 
-        inline typeless_nan
-        operator/(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI typeless_nan
+        operator/(const typeless_nan&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator%(const T&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator%(const T&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
         template<typename T>
-        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
-        operator%(const typeless_nan&,  const T&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI std::enable_if_t<std::is_floating_point<T>::value || std::is_integral<T>::value, typeless_nan>
+        operator%(const typeless_nan&,  const T&) noexcept { return typeless_nan{}; }
 
-        inline typeless_nan
-        operator%(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+        DACE_CONSTEXPR DACE_HDFI typeless_nan
+        operator%(const typeless_nan&,  const typeless_nan&) noexcept { return typeless_nan{}; }
 
-    }
-}
+        DACE_HDFI typeless_nan ipow(const typeless_nan&, const unsigned int&) {
+            return typeless_nan{};
+        }
 
 	//These functions allows to perfrom operations with `typeless_nan` instances.
-#	define FADAPT(F) DACE_CONSTEXPR ::dace::math::typeless_nan F (::dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }
-#	define FADAPT2(F) template<typename T1> DACE_CONSTEXPR dace::math::typeless_nan F (T1&&, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }; \
-			  template<typename T2> DACE_CONSTEXPR dace::math::typeless_nan F (dace::math::typeless_nan, T2&&) { return ::dace::math::typeless_nan{}; }; \
-			  DACE_CONSTEXPR ::dace::math::typeless_nan F (dace::math::typeless_nan, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }
+#	define FADAPT(F) DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&) noexcept { return typeless_nan{}; }
+#	define FADAPT2(F) template<typename T1> DACE_CONSTEXPR DACE_HDFI typeless_nan F (T1&&, dace::math::typeless_nan) noexcept { return typeless_nan{}; }; \
+			  template<typename T2> DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&, T2&&) noexcept { return typeless_nan{}; }; \
+			  DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; }
         FADAPT(tanh); FADAPT(cos); FADAPT(sin); FADAPT(sqrt); FADAPT(tan);
         FADAPT(acos); FADAPT(asin); FADAPT(atan); FADAPT(log); FADAPT(exp);
         FADAPT(floor); FADAPT(ceil); FADAPT(round); FADAPT(abs);
         FADAPT2(max); FADAPT2(min);
 #       undef FADAPT2
 #	undef FADAPT
+    }
+}
+
 
 #endif  // __DACE_NAN_H
diff --git a/dace/runtime/include/dace/pi.h b/dace/runtime/include/dace/pi.h
index 331b8c1636..818a22f6d0 100644
--- a/dace/runtime/include/dace/pi.h
+++ b/dace/runtime/include/dace/pi.h
@@ -2,6 +2,8 @@
 #ifndef __DACE_PI_H
 #define __DACE_PI_H
 
+#include <type_traits>
+
 // Classes that are used to define a typeless Pi
 
 //#define _USE_MATH_DEFINES
@@ -16,233 +18,274 @@ namespace dace
     {
         //////////////////////////////////////////////////////
         // Defines a typeless Pi
-        struct typeless_pi
+
+        template<typename T>
+        struct is_typeless_pi { static constexpr bool value = false; };
+        #define MAKE_TYPELESS_PI(type) template<> struct is_typeless_pi<type> { static constexpr bool value = true; }
+
+        struct typeless_pi;
+
+        /* Represents $m * \pi$. */
+        struct typeless_pi_mult
         {
-            double value() const { return M_PI; }
-            operator int() const
-            {
-                return int(this->value());
-            }
-            operator float() const
-            {
-                return float(this->value());
-            }
-            operator double() const
-            {
-                return double(this->value());
-            }
+            int mult;
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(int m): mult(m) {}
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult() noexcept: typeless_pi_mult(1) {};
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(const typeless_pi&) noexcept: typeless_pi_mult(1) {};
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(const typeless_pi_mult&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(typeless_pi_mult&&) noexcept = default;
+            DACE_HDFI ~typeless_pi_mult() noexcept = default;
+
+#ifndef DACE_XILINX
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult& operator=(const typeless_pi_mult&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult& operator=(typeless_pi_mult&&) noexcept = default;
+#endif
+
+            template<
+                typename T,
+                typename = std::enable_if_t<std::is_integral<T>::value>
+            >
+            DACE_CONSTEXPR DACE_HDFI operator T() const noexcept
+            { return T(mult * M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI operator float() const noexcept
+            { return float(mult * M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI operator double() const noexcept
+            { return mult * M_PI; }
+
+            DACE_CONSTEXPR DACE_HDFI operator long double() const noexcept
+            { return (long double)(mult * M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+() const noexcept
+            { return *this; }
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-() const noexcept
+            { return typeless_pi_mult(-this->mult); }
         };
-        struct typeless_pi_mult : typeless_pi
+        MAKE_TYPELESS_PI(typeless_pi_mult);
+
+        /* Represents $\pi$ */
+        struct typeless_pi
         {
-            int mult; typeless_pi_mult(int m = 1) : mult(m) {}
-            double value() const { return mult * M_PI; }
-
-            operator int() const
-            {
-                return int(this->value());
-            }
-            operator float() const
-            {
-                return float(this->value());
-            }
-            operator double() const
-            {
-                return double(this->value());
-            }
+            DACE_CONSTEXPR DACE_HDFI typeless_pi() noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi(const typeless_pi&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi(typeless_pi&&) noexcept = default;
+            DACE_HDFI ~typeless_pi() noexcept = default;
+#ifndef DACE_XILINX
+            DACE_CONSTEXPR DACE_HDFI typeless_pi& operator=(const typeless_pi&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi& operator=(typeless_pi&&) noexcept = default;
+#endif
+
+            template<
+                typename T,
+                typename = std::enable_if_t<std::is_integral<T>::value>
+            >
+            DACE_CONSTEXPR DACE_HDFI operator T() const noexcept
+            { return T(M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI operator float() const noexcept
+            { return float(M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI operator double() const noexcept
+            { return M_PI; }
+
+            DACE_CONSTEXPR DACE_HDFI operator long double() const noexcept
+            { return (long double)(M_PI); }
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi operator+() const noexcept
+            { return *this; }
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-() const noexcept
+            { return typeless_pi_mult(-1); }
         };
-        struct typeless_pi_exp : typeless_pi_mult
+        MAKE_TYPELESS_PI(typeless_pi);
+
+        /* Represents $m * \pi^{e}$ */
+        struct typeless_pi_exp
         {
-            int mult, exp; typeless_pi_exp(int m = 1, int e = 1) : mult(m), exp(e) {}
-            double value() const { return mult * std::pow(M_PI, exp); }
-            operator int() const
-            {
-                return int(this->value());
-            }
-            operator float() const
-            {
-                return float(this->value());
-            }
-            operator double() const
-            {
-                return double(this->value());
-            }
+            int mult, exp;
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(int m, int e): mult(m), exp(e) {}
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp() noexcept: typeless_pi_exp(1, 1) {};
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(const typeless_pi_exp&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(typeless_pi_exp&&) noexcept = default;
+            DACE_HDFI ~typeless_pi_exp() noexcept = default;
+
+#ifndef DACE_XILINX
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp& operator=(const typeless_pi_exp&) noexcept = default;
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp& operator=(typeless_pi_exp&&) noexcept = default;
+#endif
+
+            template<
+                typename T,
+                typename = std::enable_if_t<std::is_integral<T>::value>
+            >
+            DACE_CONSTEXPR DACE_HDFI operator T() const noexcept
+            { return T(mult * std::pow(static_cast<T>(M_PI), exp)); }
+
+
+            /* We have to do the selection this way, because it seems as nvidia does
+             *  not provide `powl` and `powf` in the std namespace */
+            DACE_CONSTEXPR DACE_HDFI operator float() const
+            { using std::pow; return mult * pow(static_cast<float>(M_PI), exp); }
+
+            DACE_CONSTEXPR DACE_HDFI operator double() const
+            { using std::pow; return mult * std::pow(static_cast<double>(M_PI), exp); }
+
+#if !( defined(__CUDACC__) || defined(__HIPCC__) )
+            //There is no long double on the GPU
+            DACE_CONSTEXPR DACE_HDFI operator long double() const
+            { using std::pow; return mult * std::pow(static_cast<long double>(M_PI), exp); }
+#endif
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator+() const
+            { return *this; }
+
+            DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator-() const
+            { return typeless_pi_exp(-this->mult, this->exp); }
         };
-        inline typeless_pi_mult operator*(const typeless_pi&, const int& num)
-        {
-            return typeless_pi_mult(num);
-        }
-        inline typeless_pi_mult operator*(const typeless_pi_mult& p, const int& num)
-        {
-            return typeless_pi_mult(p.mult * num);
-        }
-        inline typeless_pi_exp operator*(const typeless_pi_exp& p, const int& num)
-        {
-            return typeless_pi_exp(p.mult * num, p.exp);
-        }
-        inline typeless_pi_mult operator*(const int& num, const typeless_pi&)
-        {
-            return typeless_pi_mult(num);
-        }
-        inline typeless_pi_mult operator*(const int& num, const typeless_pi_mult& p)
-        {
-            return typeless_pi_mult(num * p.mult);
-        }
-        inline typeless_pi_exp operator*(const int& num, const typeless_pi_exp& p)
-        {
-            return typeless_pi_exp(num * p.mult, p.exp);
-        }
-        template <typename T>
-        T operator+(const typeless_pi& p, const T& num)
-        {
-            return T(p.value()) + num;
-        }
-        template <typename T>
-        T operator-(const typeless_pi& p, const T& num)
-        {
-            return T(p.value()) - num;
-        }
+        MAKE_TYPELESS_PI(typeless_pi_exp);
 
-        template <typename T>
-        T operator*(const typeless_pi& p, const T& num)
-        {
-            return T(p.value()) * num;
-        }
-        template <typename T>
-        T operator/(const typeless_pi& p, const T& num)
-        {
-            return T(p.value()) / num;
-        }
-        template <typename T>
-        T operator+(const T& num, const typeless_pi& p)
-        {
-            return num + T(p.value());
-        }
-        template <typename T>
-        T operator-(const T& num, const typeless_pi& p)
-        {
-            return num - T(p.value());
-        }
-        template <typename T>
-        T operator*(const T& num, const typeless_pi& p)
-        {
-            return num * T(p.value());
-        }
-        template <typename T>
-        T operator/(const T& num, const typeless_pi& p)
-        {
-            return num / T(p.value());
-        }
-        template <typename T>
-        T operator+(const typeless_pi_mult& p, const T& num)
-        {
-            return T(p.value()) + num;
-        }
-        template <typename T>
-        T operator-(const typeless_pi_mult& p, const T& num)
-        {
-            return T(p.value()) - num;
-        }
 
-        template <typename T>
-        T operator*(const typeless_pi_mult& p, const T& num)
-        {
-            return T(p.value()) * num;
-        }
-        template <typename T>
-        T operator/(const typeless_pi_mult& p, const T& num)
-        {
-            return T(p.value()) / num;
-        }
-        template <typename T>
-        T operator+(const T& num, const typeless_pi_mult& p)
-        {
-            return num + T(p.value());
-        }
-        template <typename T>
-        T operator-(const T& num, const typeless_pi_mult& p)
-        {
-            return num - T(p.value());
-        }
-        template <typename T>
-        T operator*(const T& num, const typeless_pi_mult& p)
-        {
-            return num * T(p.value());
-        }
-        template <typename T>
-        T operator/(const T& num, const typeless_pi_mult& p)
-        {
-            return num / T(p.value());
-        }
-        template <typename T>
-        T operator+(const typeless_pi_exp& p, const T& num)
-        {
-            return T(p.value()) + num;
-        }
-        template <typename T>
-        T operator-(const typeless_pi_exp& p, const T& num)
-        {
-            return T(p.value()) - num;
-        }
+        DACE_CONSTEXPR DACE_HDFI int operator/(const typeless_pi&, const typeless_pi&) noexcept
+        { return 1; }
 
-        template <typename T>
-        T operator*(const typeless_pi_exp& p, const T& num)
-        {
-            return T(p.value()) * num;
-        }
-        template <typename T>
-        T operator/(const typeless_pi_exp& p, const T& num)
-        {
-            return T(p.value()) / num;
-        }
-        template <typename T>
-        T operator+(const T& num, const typeless_pi_exp& p)
-        {
-            return num + T(p.value());
-        }
-        template <typename T>
-        T operator-(const T& num, const typeless_pi_exp& p)
-        {
-            return num - T(p.value());
-        }
-        template <typename T>
-        T operator*(const T& num, const typeless_pi_exp& p)
-        {
-            return num * T(p.value());
-        }
-        template <typename T>
-        T operator/(const T& num, const typeless_pi_exp& p)
-        {
-            return num / T(p.value());
-        }
-        inline typeless_pi_mult operator-(const typeless_pi&)
-        {
-            return typeless_pi_mult(-1);
-        }
-        template <typename T>
-        typeless_pi_mult operator+(const typeless_pi&, const typeless_pi&)
-        {
-            return typeless_pi_mult(2);
-        }
-        template <typename T>
-        typeless_pi_mult operator+(const typeless_pi_mult& p1, const typeless_pi_mult& p2)
-        {
-            return typeless_pi_mult(p1.mult + p2.mult);
-        }
-        template <typename T>
-        typeless_pi_exp operator*(const typeless_pi_mult& p1, const typeless_pi_mult& p2)
-        {
-            return typeless_pi_exp(p1.mult * p2.mult, 2);
+        DACE_CONSTEXPR DACE_HDFI int operator-(const typeless_pi&, const typeless_pi&) noexcept
+        { return 0; }
+
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const typeless_pi&, const int& num) noexcept
+        { return typeless_pi_mult(num); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const int& num, const typeless_pi&) noexcept
+        { return typeless_pi_mult(num); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const typeless_pi_mult& p, const int& num) noexcept
+        { return typeless_pi_mult(p.mult * num); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const int& num, const typeless_pi_mult& p) noexcept
+        { return typeless_pi_mult(p.mult * num); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi&, const typeless_pi&) noexcept
+        { return typeless_pi_mult(2); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi&, const typeless_pi_mult& pi) noexcept
+        { return typeless_pi_mult(pi.mult + 1); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi_mult& pi, const typeless_pi&) noexcept
+        { return typeless_pi_mult(pi.mult + 1); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept
+        { return typeless_pi_mult(pl.mult + pr.mult); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept
+        { return typeless_pi_mult(pl.mult - pr.mult); }
+
+        DACE_CONSTEXPR DACE_HDFI int operator/(const typeless_pi_mult& pl, const typeless_pi&) noexcept
+        { return pl.mult; }
+
+        DACE_CONSTEXPR DACE_HDFI double operator/(const typeless_pi& pl, const typeless_pi_mult& pr) noexcept
+        { return 1.0 / pr.mult; }
+
+
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi&, const typeless_pi&) noexcept
+        { return typeless_pi_exp(1, 2); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept
+        { return typeless_pi_exp(pl.mult * pr.mult, 2); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi&) noexcept
+        { return typeless_pi_exp(pl.mult, 2); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi& pl, const typeless_pi_mult& pr) noexcept
+        { return typeless_pi_exp(pr.mult, 2); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const typeless_pi_mult& pr) noexcept
+        { return typeless_pi_exp(pl.mult * pr.mult, pl.exp + 1); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi_exp& pr) noexcept
+        { return pr * pl; }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const typeless_pi_exp& pr) noexcept
+        { return typeless_pi_exp(pl.mult * pr.mult, pr.exp + pl.exp); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const int& num) noexcept
+        { return typeless_pi_exp(pl.mult * num, pl.exp); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const int& num, const typeless_pi_exp& pr) noexcept
+        { return typeless_pi_exp(pr.mult * num, pr.exp); }
+
+        DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator/(const typeless_pi_exp& pl, const typeless_pi&) noexcept
+        { return typeless_pi_exp(pl.mult, pl.exp - 1); }
+
+
+        // The code generator guarantees us that `b > 0`.
+        DACE_HDFI typeless_pi_exp ipow(const typeless_pi_mult& pi, const unsigned int& b) {
+            return typeless_pi_exp(pow(pi.mult, b), b);
         }
-        template <typename T>
-        typeless_pi_exp operator*(const typeless_pi&, const typeless_pi&)
-        {
-            return typeless_pi_exp(1, 2);
+        DACE_HDFI typeless_pi_exp ipow(const typeless_pi& pi, const unsigned int& b) {
+            return typeless_pi_exp(1, b);
         }
-        template <typename T>
-        typeless_pi_exp operator*(const typeless_pi_exp& p1, const typeless_pi_exp& p2)
-        {
-            return typeless_pi_exp(p1.mult * p2.mult, p1.exp + p2.exp);
+        DACE_HDFI typeless_pi_exp ipow(const typeless_pi_exp& pi, const unsigned int& b) {
+            return typeless_pi_exp(pow(pi.mult, b), pi.exp * b);
         }
+
+#       define DEF_PI_OPS(op) 										\
+	template<typename T, typename PI, typename = std::enable_if_t<is_typeless_pi<PI>::value && (!is_typeless_pi<T>::value)> >	\
+	DACE_CONSTEXPR DACE_HDFI T operator op (const T& lhs, const PI& pi) noexcept			\
+	{ return lhs op (static_cast<T>(pi)); }								\
+	template<typename PI, typename T, typename = std::enable_if_t<is_typeless_pi<PI>::value && (!is_typeless_pi<T>::value)> >	\
+	DACE_CONSTEXPR DACE_HDFI T operator op (const PI& pi, const T& rhs) noexcept			\
+	{ return (static_cast<T>(pi)) op rhs; }
+
+	DEF_PI_OPS(+);
+	DEF_PI_OPS(-);
+	DEF_PI_OPS(/);
+	DEF_PI_OPS(*);
+
+        DACE_CONSTEXPR DACE_HDFI int sin(const typeless_pi&) noexcept
+        { return 0; }
+
+        DACE_CONSTEXPR DACE_HDFI int sin(const typeless_pi_mult& pi) noexcept
+        { return 0; }
+
+	DACE_HDFI double sin(const typeless_pi_exp& pi) noexcept
+	{ return std::sin(static_cast<double>(pi)); }
+
+        DACE_CONSTEXPR DACE_HDFI int cos(const typeless_pi&) noexcept
+        { return 1; }
+
+        DACE_CONSTEXPR DACE_HDFI int cos(const typeless_pi_mult& pi) noexcept
+        { return (pi.mult % 2 == 0) ? 1 : (-1); }
+
+	DACE_HDFI double cos(const typeless_pi_exp& pi) noexcept
+	{ return std::cos(static_cast<double>(pi)); }
+
+
+#       define DEF_PI_TRIGO(F)						\
+	DACE_HDFI double F (const typeless_pi& pi) noexcept		\
+	{ return std:: F( static_cast<double>(pi) ); }			\
+	DACE_HDFI double F (const typeless_pi_mult& pi) noexcept	\
+	{ return std:: F( static_cast<double>(pi) ); }			\
+	DACE_HDFI double F (const typeless_pi_exp& pi) noexcept		\
+	{ return std:: F( static_cast<double>(pi) ); }
+
+        DEF_PI_TRIGO(asin);
+        DEF_PI_TRIGO(acos);
+        DEF_PI_TRIGO(tan);
+        DEF_PI_TRIGO(atan);
+        DEF_PI_TRIGO(exp);
+        DEF_PI_TRIGO(log);
+
+
+#       undef DEF_PI_TRIGO
+#       undef DEF_PI_OPS
+#	undef MAKE_TYPELESS_PI
     }
 }
 
diff --git a/dace/runtime/include/dace/types.h b/dace/runtime/include/dace/types.h
index e5eed1e35e..9a8676e0d4 100644
--- a/dace/runtime/include/dace/types.h
+++ b/dace/runtime/include/dace/types.h
@@ -49,7 +49,13 @@
     #define DACE_HDFI __host__ __device__ __forceinline__
     #define DACE_HFI __host__ __forceinline__
     #define DACE_DFI __device__ __forceinline__
+    #define DACE_HostDev __host__ __device__
+    #define DACE_Host __host__ 
+    #define DACE_Dev __device__
 #else
+    #define DACE_HostDev
+    #define DACE_Host 
+    #define DACE_Dev
     #define DACE_HDFI inline
     #define DACE_HFI inline
     #define DACE_DFI inline
diff --git a/tests/numpy/constants_test.py b/tests/numpy/constants_test.py
new file mode 100644
index 0000000000..6aa3b15621
--- /dev/null
+++ b/tests/numpy/constants_test.py
@@ -0,0 +1,170 @@
+# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import numpy as np
+import uuid
+import math
+import pytest
+
+def _make_sdfg(
+        code: str,
+        dtype = dace.float64,
+) -> dace.SDFG:
+    """Generates an SDFG that writes an expression to an array.
+    """
+    sdfg = dace.SDFG(name=f"const_test_{str(uuid.uuid1()).replace('-', '_')}")
+    state = sdfg.add_state(is_start_block=True)
+    sdfg.add_array(
+            "out",
+            shape=(10,),
+            dtype=dtype,
+            transient=False,
+    )
+
+    state.add_mapped_tasklet(
+            "comput",
+            map_ranges={"__i": "0:10"},
+            inputs={},
+            code=f"__out = {code}",
+            outputs={"__out": dace.Memlet("out[__i]")},
+            external_edges=True,
+    )
+    sdfg.validate()
+    return sdfg
+
+
+def _test_sdfg(
+        sdfg: dace.SDFG,
+        expected,
+        dtype = np.float64,
+):
+    out = np.zeros(10, dtype=dtype)
+    sdfg.apply_gpu_transformations()
+    sdfg(out=out)
+    assert np.allclose(out, expected, equal_nan=True), f"Expected {expected}, but got {out[0]}"
+
+
+def _perform_test(
+        code,
+        expected,
+        dtype = np.float64,
+):
+    print(f"PERFORM: {code}")
+    dace_dtype = dace.dtypes.dtype_to_typeclass(dtype)
+    sdfg = _make_sdfg(code=code, dtype=dace_dtype)
+    _test_sdfg(sdfg=sdfg, expected=expected, dtype=dtype)
+
+
+@pytest.mark.gpu
+def test_constant_pi_simple():
+    _perform_test(
+            code="math.pi",
+            expected=math.pi
+    )
+
+
+@pytest.mark.gpu
+def test_constant_pi_add():
+    _perform_test(
+            code="-math.pi",
+            expected=-math.pi
+    )
+    _perform_test(
+            code="math.pi + math.pi",
+            expected=2 * math.pi
+    )
+    _perform_test(
+            code="math.pi - math.pi",
+            expected=0.
+    )
+
+
+@pytest.mark.gpu
+def test_constant_pi_mult():
+    _perform_test(
+            code="(math.pi ** 2) * 2",
+            expected=math.pi * math.pi * 2.0
+    )
+    _perform_test(
+            code="math.pi * 2",
+            expected=2 * math.pi
+    )
+    _perform_test(
+            code="math.pi * 2 + math.pi",
+            expected=2 * math.pi + math.pi
+    )
+    _perform_test(
+            code="math.pi * math.pi * 2",
+            expected=math.pi * math.pi * 2.0
+    )
+    _perform_test(
+            code="math.pi / math.pi ",
+            expected=1
+    )
+    _perform_test(
+            code="(math.pi + math.pi) / math.pi ",
+            expected=2
+    )
+    _perform_test(
+            code="(math.pi * math.pi) / math.pi ",
+            expected=math.pi
+    )
+
+
+@pytest.mark.gpu
+def test_constant_pi_fun():
+    _perform_test(
+            code="math.sin(math.pi)",
+            expected=0,
+    )
+    _perform_test(
+            code="math.sin(math.pi * 4)",
+            expected=math.sin(math.pi * 4),
+    )
+    _perform_test(
+            code="math.sin(math.pi * 5)",
+            expected=math.sin(math.pi * 5),
+    )
+    _perform_test(
+            code="math.cos(math.pi * 4)",
+            expected=math.cos(math.pi * 4),
+    )
+    _perform_test(
+            code="math.cos(math.pi * 5)",
+            expected=math.cos(math.pi * 5),
+    )
+    _perform_test(
+            code="math.log(math.pi)",
+            expected=math.log(math.pi),
+    )
+
+
+@pytest.mark.gpu
+def test_constant_nan():
+    _perform_test(
+            code="math.nan",
+            expected=math.nan
+    )
+    _perform_test(
+            code="math.nan + 2",
+            expected=math.nan
+    )
+    _perform_test(
+            code="math.nan + 2.0",
+            expected=math.nan
+    )
+    _perform_test(
+            code="math.sin(math.nan + 2.0)",
+            expected=math.nan
+    )
+    _perform_test(
+            code="math.sin(math.nan + 2.0) ** 2",
+            expected=math.nan
+    )
+
+
+if __name__ == "__main__":
+    test_constant_pi_simple()
+    test_constant_pi_add()
+    test_constant_pi_mult()
+    test_constant_pi_fun()
+    test_constant_nan()

From b5f91e18c3ff6b599793645507f54a68b3f07c1f Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:29:14 -0800
Subject: [PATCH 4/5] Make scalar to symbol promotion robust to node order in
 state (#1766)

Fixes #1727
---
 dace/sdfg/analysis/schedule_tree/treenodes.py |  2 ++
 .../transformation/passes/scalar_to_symbol.py |  4 ++-
 tests/passes/scalar_to_symbol_test.py         | 30 +++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/dace/sdfg/analysis/schedule_tree/treenodes.py b/dace/sdfg/analysis/schedule_tree/treenodes.py
index 3b447fa15a..dabd436b56 100644
--- a/dace/sdfg/analysis/schedule_tree/treenodes.py
+++ b/dace/sdfg/analysis/schedule_tree/treenodes.py
@@ -41,6 +41,8 @@ def __init__(self, children: Optional[List['ScheduleTreeNode']] = None):
         if self.children:
             for child in children:
                 child.parent = self
+        self.containers = {}
+        self.symbols = {}
 
     def as_string(self, indent: int = 0):
         if not self.children:
diff --git a/dace/transformation/passes/scalar_to_symbol.py b/dace/transformation/passes/scalar_to_symbol.py
index 33712c8a1c..a37729ca7c 100644
--- a/dace/transformation/passes/scalar_to_symbol.py
+++ b/dace/transformation/passes/scalar_to_symbol.py
@@ -522,6 +522,8 @@ def remove_scalar_reads(sdfg: sd.SDFG, array_names: Dict[str, str]):
     for state in sdfg.states():
         scalar_nodes = [n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.data in array_names]
         for node in scalar_nodes:
+            if node not in state:
+                continue
             symname = array_names[node.data]
             for out_edge in state.out_edges(node):
                 for e in state.memlet_tree(out_edge):
@@ -649,7 +651,7 @@ def apply_pass(self, sdfg: SDFG, _: Dict[Any, Any]) -> Set[str]:
             scalar_nodes = [n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.data in to_promote]
             # Step 2: Assignment tasklets
             for node in scalar_nodes:
-                if state.in_degree(node) == 0:
+                if node not in state or state.in_degree(node) == 0:
                     continue
                 in_edge = state.in_edges(node)[0]
                 input = in_edge.src
diff --git a/tests/passes/scalar_to_symbol_test.py b/tests/passes/scalar_to_symbol_test.py
index 7fdfbdf737..36decceba2 100644
--- a/tests/passes/scalar_to_symbol_test.py
+++ b/tests/passes/scalar_to_symbol_test.py
@@ -729,6 +729,35 @@ def test_double_index_bug():
                         assert getattr(sympy_node, "name", None) != "indices"
 
 
+def test_reversed_order():
+    """
+    Tests a failure reported in issue #1727.
+    """
+    sdfg = dace.SDFG('tester')
+    sdfg.add_array('inputs', [1], dace.int32)
+    sdfg.add_transient('a', [1], dace.int32)
+    sdfg.add_transient('b', [1], dace.int32)
+    sdfg.add_array('output', [1], dace.int32)
+    initstate = sdfg.add_state()
+    state = sdfg.add_state_after(initstate)
+    finistate = sdfg.add_state_after(state)
+
+    # Note the order here
+    w = state.add_write('b')
+    t = state.add_tasklet('assign', {'inp'}, {'out'}, 'out = inp')
+    r = state.add_read('a')
+    state.add_edge(t, 'out', w, None, dace.Memlet('b'))
+    state.add_edge(r, None, t, 'inp', dace.Memlet('a'))
+
+    initstate.add_nedge(initstate.add_read('inputs'), initstate.add_write('a'), dace.Memlet('inputs'))
+    finistate.add_nedge(finistate.add_read('b'), finistate.add_write('output'), dace.Memlet('output'))
+
+    sdfg.validate()
+    promoted = scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(sdfg, {})
+    assert promoted == {'a', 'b'}
+    sdfg.compile()
+
+
 if __name__ == '__main__':
     test_find_promotable()
     test_promote_simple()
@@ -753,3 +782,4 @@ def test_double_index_bug():
     test_ternary_expression(False)
     test_ternary_expression(True)
     test_double_index_bug()
+    test_reversed_order()

From 4f8eb9225e11aef2c8111628423bc0a847caf224 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Fri, 15 Nov 2024 23:30:07 -0800
Subject: [PATCH 5/5] Match CMake version requirements

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c228ae4558..9c9bc020bb 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,7 @@
         output = subprocess.check_output([cmake_path, '--version']).decode('utf-8')
         cmake_version = tuple(int(t) for t in output.splitlines()[0].split(' ')[-1].split('.'))
         # If version meets minimum requirements, CMake is not necessary
-        if cmake_version >= (3, 15):
+        if cmake_version >= (3, 17):
             cmake_requires = []
 except (subprocess.CalledProcessError, OSError, IndexError, ValueError):
     # Any failure in getting the CMake version counts as "not found"