From 17e4a888882a366930e9d00df626bded20c3d49a Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Wed, 13 Nov 2024 04:35:04 -0800 Subject: [PATCH 1/5] Fix temporary transient counter during Python parsing of nested calls (#1745) Fixes #1139 --- dace/frontend/python/newast.py | 3 + dace/sdfg/sdfg.py | 14 ++++ tests/npbench/misc/mandelbrot1_test.py | 2 +- tests/npbench/misc/mandelbrot2_test.py | 2 +- .../nested_name_accesses_test.py | 70 +++++++++++++++---- 5 files changed, 76 insertions(+), 15 deletions(-) diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index b4e83cc1e7..1cbb8e67c9 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -3940,6 +3940,9 @@ def _parse_sdfg_call(self, funcname: str, func: Union[SDFG, SDFGConvertible], no for arg in args_to_remove: args.remove(arg) + # Refresh temporary transient counter of the nested SDFG + sdfg.refresh_temp_transients() + # Change connector names updated_args = [] arrays_before = list(sdfg.arrays.items()) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 1b449403d5..927f033584 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1952,6 +1952,20 @@ def temp_data_name(self): self._temp_transients += 1 return name + def refresh_temp_transients(self): + """ + Updates the temporary transient counter of this SDFG by querying the maximum number among the + ``__tmp###`` data descriptors. + """ + temp_transients = [k[5:] for k in self.arrays.keys() if k.startswith('__tmp')] + max_temp_transient = 0 + for arr_suffix in temp_transients: + try: + max_temp_transient = max(max_temp_transient, int(arr_suffix)) + except ValueError: # Not of the form __tmp### + continue + self._temp_transients = max_temp_transient + 1 + def add_temp_transient(self, shape, dtype, diff --git a/tests/npbench/misc/mandelbrot1_test.py b/tests/npbench/misc/mandelbrot1_test.py index 521d41c560..6fb8262aa2 100644 --- a/tests/npbench/misc/mandelbrot1_test.py +++ b/tests/npbench/misc/mandelbrot1_test.py @@ -97,7 +97,7 @@ def run_mandelbrot1(device_type: dace.dtypes.DeviceType): Z, N = sdfg(xmin, xmax, ymin, ymax, maxiter, horizon) # Compute ground truth and validate - Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, xn, yn, maxiter) + Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, XN, YN, maxiter) assert np.allclose(Z, Z_ref) assert np.allclose(N, N_ref) return sdfg diff --git a/tests/npbench/misc/mandelbrot2_test.py b/tests/npbench/misc/mandelbrot2_test.py index aaca2c6db9..1be84d0c1c 100644 --- a/tests/npbench/misc/mandelbrot2_test.py +++ b/tests/npbench/misc/mandelbrot2_test.py @@ -145,7 +145,7 @@ def run_mandelbrot2(device_type: dace.dtypes.DeviceType): Z, N = sdfg(xmin, xmax, ymin, ymax, maxiter, horizon) # Compute ground truth and validate - Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, xn, yn, maxiter) + Z_ref, N_ref = ground_truth(xmin, xmax, ymin, ymax, XN, YN, maxiter) assert np.allclose(Z, Z_ref) assert np.allclose(N, N_ref) return sdfg diff --git a/tests/python_frontend/nested_name_accesses_test.py b/tests/python_frontend/nested_name_accesses_test.py index 1e5b18b840..424f280771 100644 --- a/tests/python_frontend/nested_name_accesses_test.py +++ b/tests/python_frontend/nested_name_accesses_test.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. import dace as dc import numpy as np import os @@ -30,6 +30,7 @@ def test_nested_name_accesses(): def test_nested_offset_access(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 5]): out = np.zeros((5, 5, 5), np.float64) @@ -46,6 +47,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]): def test_nested_offset_access_dappy(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 5]): out = np.zeros((5, 5, 5), np.float64) @@ -66,6 +68,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]): def test_nested_multi_offset_access(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 10]): out = np.zeros((5, 5, 10), np.float64) @@ -83,6 +86,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 10]): def test_nested_multi_offset_access_dappy(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 10]): out = np.zeros((5, 5, 10), np.float64) @@ -104,6 +108,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 10]): def test_nested_dec_offset_access(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 5]): out = np.zeros((5, 5, 5), np.float64) @@ -120,6 +125,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]): def test_nested_dec_offset_access_dappy(): + @dc.program def nested_offset_access(inp: dc.float64[6, 5, 5]): out = np.zeros((5, 5, 5), np.float64) @@ -140,6 +146,7 @@ def nested_offset_access(inp: dc.float64[6, 5, 5]): def test_nested_offset_access_nested_dependency(): + @dc.program def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 5]): out = np.zeros((5, 5, 5), np.float64) @@ -161,6 +168,7 @@ def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 5]): def test_nested_offset_access_nested_dependency_dappy(): + @dc.program def nested_offset_access_nested_dep(inp: dc.float64[6, 5, 10]): out = np.zeros((5, 5, 10), np.float64) @@ -188,19 +196,19 @@ def test_access_to_nested_transient(): NBLOCKS = 5 @dc.program - def small_wip(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]): + def small_wip(inp: dc.float64[KLEV + 1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]): for jn in dc.map[0:NBLOCKS]: - tmp = np.zeros([KLEV+1, KLON]) + tmp = np.zeros([KLEV + 1, KLON]) for jl in range(KLON): for jk in range(KLEV): - tmp[jk, jl] = inp[jk, jl, jn] + inp[jk+1, jl, jn] + tmp[jk, jl] = inp[jk, jl, jn] + inp[jk + 1, jl, jn] for jl in range(KLON): for jk in range(KLEV): - out[jk, jl, jn] = tmp[jk, jl] + tmp[jk+1, jl] - + out[jk, jl, jn] = tmp[jk, jl] + tmp[jk + 1, jl] + rng = np.random.default_rng(42) - inp = rng.random((KLEV+1, KLON, NBLOCKS)) + inp = rng.random((KLEV + 1, KLON, NBLOCKS)) ref = np.zeros((KLEV, KLON, NBLOCKS)) val = np.zeros((KLEV, KLON, NBLOCKS)) @@ -217,14 +225,14 @@ def test_access_to_nested_transient_dappy(): NBLOCKS = 5 @dc.program - def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]): + def small_wip_dappy(inp: dc.float64[KLEV + 1, KLON, NBLOCKS], out: dc.float64[KLEV, KLON, NBLOCKS]): for jn in dc.map[0:NBLOCKS]: - tmp = np.zeros([KLEV+1, KLON]) + tmp = np.zeros([KLEV + 1, KLON]) for jl in range(KLON): for jk in range(KLEV): with dc.tasklet(): in1 << inp[jk, jl, jn] - in2 << inp[jk+1, jl, jn] + in2 << inp[jk + 1, jl, jn] out1 >> tmp[jk, jl] out1 = in1 + in2 @@ -232,12 +240,12 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV for jk in range(KLEV): with dc.tasklet(): in1 << tmp[jk, jl] - in2 << tmp[jk+1, jl] + in2 << tmp[jk + 1, jl] out1 >> out[jk, jl, jn] out1 = in1 + in2 - + rng = np.random.default_rng(42) - inp = rng.random((KLEV+1, KLON, NBLOCKS)) + inp = rng.random((KLEV + 1, KLON, NBLOCKS)) ref = np.zeros((KLEV, KLON, NBLOCKS)) val = np.zeros((KLEV, KLON, NBLOCKS)) @@ -247,6 +255,41 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV assert np.allclose(val, ref) +def test_issue_1139(): + """ + Regression test generated from issue #1139. + + The origin of the bug was in the Python frontend: An SDFG parsed by the frontend kept + a number called ``_temp_transients`` that specifies how many ``__tmp*`` arrays have been created. + This number is used to avoid name clashes when inlining SDFGs (although unnecessary). + However, if a nested SDFG had already been simplified, where transformations may change the number + of transients (or add new ones via inlining, which is happening in this bug), the ``_temp_transients`` + field becomes out of date and renaming the fields during inlining removes data descriptors. + """ + XN = dc.symbol('XN') + YN = dc.symbol('YN') + N = dc.symbol('N') + + @dc.program + def nester(start: dc.float64, stop: dc.float64, X: dc.float64[N]): + dist = (stop - start) / (N - 1) + for i in dc.map[0:N]: + X[i] = start + i * dist + + @dc.program + def tester(xmin: dc.float64, xmax: dc.float64): + a = np.ndarray((XN, YN), dtype=np.int64) + b = np.ndarray((XN, YN), dtype=np.int64) + c = np.ndarray((XN, ), dtype=np.float64) + nester(xmin, xmax, c) + return c + + xmin = 0.123 + xmax = 4.567 + c = tester(xmin, xmax, XN=30, YN=40) + assert np.allclose(c, np.linspace(xmin, xmax, 30)) + + if __name__ == "__main__": test_nested_name_accesses() test_nested_offset_access() @@ -259,3 +302,4 @@ def small_wip_dappy(inp: dc.float64[KLEV+1, KLON, NBLOCKS], out: dc.float64[KLEV test_nested_offset_access_nested_dependency_dappy() test_access_to_nested_transient() test_access_to_nested_transient_dappy() + test_issue_1139() From c83f601f817fbb8e905e0557ec9fbb6756743a33 Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Thu, 14 Nov 2024 19:31:45 +0100 Subject: [PATCH 2/5] Fix `pystr_to_symbolic` not correctly interpreting constants as boolean values in boolean comparisons (#1756) Strings like `not ((N > 20) != 0)` (== `Not(Ne(Gt(N, 20), 0))`) were incorrectly interpreted by `sympy.sympify` as constant "False". This is a limitation by sympy, which does not assume integer 0 to be a Falsy, and enforces exact equivalence (or difference) checks with `Ne`. To get around this limitation, the DaCe internal AST preprocessor now replaces constants with boolean values if they are arguments to Comparison operations, where the other operand is also a comparison operation, thus returning a boolean. This fixes an issue with `DeadStateElimination`, closing issue #1129. --- dace/symbolic.py | 36 +++++++++++++++++++++- tests/passes/dead_code_elimination_test.py | 23 +++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/dace/symbolic.py b/dace/symbolic.py index 9737080c52..98ffa008d3 100644 --- a/dace/symbolic.py +++ b/dace/symbolic.py @@ -1,6 +1,7 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. import ast from functools import lru_cache +import sys import sympy import pickle import re @@ -982,6 +983,32 @@ def _process_is(elem: Union[Is, IsNot]): return expr +# Depending on the Python version we need to handle different AST nodes to correctly interpret and detect falsy / truthy +# values. +if sys.version_info < (3, 8): + _SimpleASTNode = (ast.Constant, ast.Name, ast.NameConstant, ast.Num) + _SimpleASTNodeT = Union[ast.Constant, ast.Name, ast.NameConstant, ast.Num] + + def __comp_convert_truthy_falsy(node: _SimpleASTNodeT): + if isinstance(node, ast.Num): + node_val = node.n + elif isinstance(node, ast.Name): + node_val = node.id + else: + node_val = node.value + return ast.copy_location(ast.NameConstant(bool(node_val)), node) +else: + _SimpleASTNode = (ast.Constant, ast.Name) + _SimpleASTNodeT = Union[ast.Constant, ast.Name] + + def __comp_convert_truthy_falsy(node: _SimpleASTNodeT): + return ast.copy_location(ast.Constant(bool(node.value)), node) + +# Convert simple AST node (constant) into a falsy / truthy. Anything other than 0, None, and an empty string '' is +# considered a truthy, while the listed exceptions are considered falsy values - following the semantics of Python's +# bool() builtin. +_convert_truthy_falsy = __comp_convert_truthy_falsy + class PythonOpToSympyConverter(ast.NodeTransformer): """ Replaces various operations with the appropriate SymPy functions to avoid non-symbolic evaluation. @@ -1067,6 +1094,13 @@ def visit_Compare(self, node: ast.Compare): raise NotImplementedError op = node.ops[0] arguments = [node.left, node.comparators[0]] + + # Ensure constant values in boolean comparisons are interpreted als booleans. + if isinstance(node.left, ast.Compare) and isinstance(node.comparators[0], _SimpleASTNode): + arguments[1] = _convert_truthy_falsy(node.comparators[0]) + elif isinstance(node.left, _SimpleASTNode) and isinstance(node.comparators[0], ast.Compare): + arguments[0] = _convert_truthy_falsy(node.left) + func_node = ast.copy_location(ast.Name(id=self._ast_to_sympy_comparators[type(op)], ctx=ast.Load()), node) new_node = ast.Call(func=func_node, args=[self.visit(arg) for arg in arguments], keywords=[]) return ast.copy_location(new_node, node) diff --git a/tests/passes/dead_code_elimination_test.py b/tests/passes/dead_code_elimination_test.py index a41a11c4d6..1832ad8321 100644 --- a/tests/passes/dead_code_elimination_test.py +++ b/tests/passes/dead_code_elimination_test.py @@ -1,4 +1,4 @@ -# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Various tests for dead code elimination passes. """ import numpy as np @@ -45,6 +45,26 @@ def test_dse_unconditional(): assert set(sdfg.states()) == {s, s2, e} +def test_dse_edge_condition_with_integer_as_boolean_regression(): + """ + This is a regression test for issue #1129, which describes dead state elimination incorrectly eliminating interstate + edges when integers are used as boolean values in interstate edge conditions. Code taken from issue #1129. + """ + sdfg = dace.SDFG('dse_edge_condition_with_integer_as_boolean_regression') + sdfg.add_scalar('N', dtype=dace.int32, transient=True) + sdfg.add_scalar('result', dtype=dace.int32) + state_init = sdfg.add_state() + state_middle = sdfg.add_state() + state_end = sdfg.add_state() + sdfg.add_edge(state_init, state_end, dace.InterstateEdge(condition='(not ((N > 20) != 0))', + assignments={'result': 'N'})) + sdfg.add_edge(state_init, state_middle, dace.InterstateEdge(condition='((N > 20) != 0)')) + sdfg.add_edge(state_middle, state_end, dace.InterstateEdge(assignments={'result': '20'})) + + res = DeadStateElimination().apply_pass(sdfg, {}) + assert res is None + + def test_dde_simple(): @dace.program @@ -307,6 +327,7 @@ def test_dce_add_type_hint_of_variable(dtype): if __name__ == '__main__': test_dse_simple() test_dse_unconditional() + test_dse_edge_condition_with_integer_as_boolean_regression() test_dde_simple() test_dde_libnode() test_dde_access_node_in_scope(False) From f7576870a2133e9f118085d8860a789a24e8cb6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:11:57 +0100 Subject: [PATCH 3/5] Fixed `dace::math::pi` and `dace::math::nan` on GPU (#1759) Both $\pi$ and `NaN` are implemented as classes in DaCe. However, these classes where not marked as device, thus they are only available on the host. This PR: - Fixes this for `NaN` and $\pi$. - Extend the implementation. - Adds tests for them. --- dace/runtime/include/dace/math.h | 2 +- dace/runtime/include/dace/nan.h | 100 ++++--- dace/runtime/include/dace/pi.h | 473 ++++++++++++++++-------------- dace/runtime/include/dace/types.h | 6 + tests/numpy/constants_test.py | 170 +++++++++++ 5 files changed, 494 insertions(+), 257 deletions(-) create mode 100644 tests/numpy/constants_test.py diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h index 4dae494a8a..9cc131ee62 100644 --- a/dace/runtime/include/dace/math.h +++ b/dace/runtime/include/dace/math.h @@ -496,7 +496,7 @@ namespace dace { namespace math { - static DACE_CONSTEXPR typeless_pi pi{}; + static DACE_CONSTEXPR DACE_HostDev typeless_pi pi{}; static DACE_CONSTEXPR typeless_nan nan{}; ////////////////////////////////////////////////////// template diff --git a/dace/runtime/include/dace/nan.h b/dace/runtime/include/dace/nan.h index a8d1eb4c52..b4bac93980 100644 --- a/dace/runtime/include/dace/nan.h +++ b/dace/runtime/include/dace/nan.h @@ -13,101 +13,119 @@ namespace dace // Defines a typeless Pi struct typeless_nan { + DACE_CONSTEXPR DACE_HDFI typeless_nan() noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_nan(const typeless_nan&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_nan(typeless_nan&&) noexcept = default; + DACE_HDFI ~typeless_nan() noexcept = default; + +#ifndef DACE_XILINX + DACE_CONSTEXPR DACE_HDFI typeless_nan& operator=(const typeless_nan&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_nan& operator=(typeless_nan&&) noexcept = default; +#endif + operator int() const = delete; - operator float() const + DACE_CONSTEXPR DACE_HDFI operator float() const { return std::numeric_limits::quiet_NaN(); } - operator double() const + DACE_CONSTEXPR DACE_HDFI operator double() const { return std::numeric_limits::quiet_NaN(); } - operator long double() const + +#if !( defined(__CUDACC__) || defined(__HIPCC__) ) + //There is no long double on the GPU + DACE_CONSTEXPR DACE_HDFI operator long double() const { return std::numeric_limits::quiet_NaN(); } - typeless_nan operator+() const +#endif + DACE_CONSTEXPR DACE_HDFI typeless_nan operator+() const { return typeless_nan{}; } - typeless_nan operator-() const + DACE_CONSTEXPR DACE_HDFI typeless_nan operator-() const { return typeless_nan{}; } }; template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator*(const T&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator*(const T&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator*(const typeless_nan&, const T&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator*(const typeless_nan&, const T&) noexcept { return typeless_nan{}; } - inline typeless_nan - operator*(const typeless_nan&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI typeless_nan + operator*(const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator+(const T&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator+(const T&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator+(const typeless_nan&, const T&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator+(const typeless_nan&, const T&) noexcept { return typeless_nan{}; } - inline typeless_nan - operator+(const typeless_nan&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI typeless_nan + operator+(const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator-(const T&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator-(const T&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator-(const typeless_nan&, const T&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator-(const typeless_nan&, const T&) noexcept { return typeless_nan{}; } - inline typeless_nan - operator-(const typeless_nan&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI typeless_nan + operator-(const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator/(const T&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator/(const T&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator/(const typeless_nan&, const T&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator/(const typeless_nan&, const T&) noexcept { return typeless_nan{}; } - inline typeless_nan - operator/(const typeless_nan&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI typeless_nan + operator/(const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator%(const T&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator%(const T&, const typeless_nan&) noexcept { return typeless_nan{}; } template - DACE_CONSTEXPR typename std::enable_if::value, typeless_nan>::type - operator%(const typeless_nan&, const T&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI std::enable_if_t::value || std::is_integral::value, typeless_nan> + operator%(const typeless_nan&, const T&) noexcept { return typeless_nan{}; } - inline typeless_nan - operator%(const typeless_nan&, const typeless_nan&) { return typeless_nan{}; } + DACE_CONSTEXPR DACE_HDFI typeless_nan + operator%(const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } - } -} + DACE_HDFI typeless_nan ipow(const typeless_nan&, const unsigned int&) { + return typeless_nan{}; + } //These functions allows to perfrom operations with `typeless_nan` instances. -# define FADAPT(F) DACE_CONSTEXPR ::dace::math::typeless_nan F (::dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; } -# define FADAPT2(F) template DACE_CONSTEXPR dace::math::typeless_nan F (T1&&, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }; \ - template DACE_CONSTEXPR dace::math::typeless_nan F (dace::math::typeless_nan, T2&&) { return ::dace::math::typeless_nan{}; }; \ - DACE_CONSTEXPR ::dace::math::typeless_nan F (dace::math::typeless_nan, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; } +# define FADAPT(F) DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&) noexcept { return typeless_nan{}; } +# define FADAPT2(F) template DACE_CONSTEXPR DACE_HDFI typeless_nan F (T1&&, dace::math::typeless_nan) noexcept { return typeless_nan{}; }; \ + template DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&, T2&&) noexcept { return typeless_nan{}; }; \ + DACE_CONSTEXPR DACE_HDFI typeless_nan F (const typeless_nan&, const typeless_nan&) noexcept { return typeless_nan{}; } FADAPT(tanh); FADAPT(cos); FADAPT(sin); FADAPT(sqrt); FADAPT(tan); FADAPT(acos); FADAPT(asin); FADAPT(atan); FADAPT(log); FADAPT(exp); FADAPT(floor); FADAPT(ceil); FADAPT(round); FADAPT(abs); FADAPT2(max); FADAPT2(min); # undef FADAPT2 # undef FADAPT + } +} + #endif // __DACE_NAN_H diff --git a/dace/runtime/include/dace/pi.h b/dace/runtime/include/dace/pi.h index 331b8c1636..818a22f6d0 100644 --- a/dace/runtime/include/dace/pi.h +++ b/dace/runtime/include/dace/pi.h @@ -2,6 +2,8 @@ #ifndef __DACE_PI_H #define __DACE_PI_H +#include + // Classes that are used to define a typeless Pi //#define _USE_MATH_DEFINES @@ -16,233 +18,274 @@ namespace dace { ////////////////////////////////////////////////////// // Defines a typeless Pi - struct typeless_pi + + template + struct is_typeless_pi { static constexpr bool value = false; }; + #define MAKE_TYPELESS_PI(type) template<> struct is_typeless_pi { static constexpr bool value = true; } + + struct typeless_pi; + + /* Represents $m * \pi$. */ + struct typeless_pi_mult { - double value() const { return M_PI; } - operator int() const - { - return int(this->value()); - } - operator float() const - { - return float(this->value()); - } - operator double() const - { - return double(this->value()); - } + int mult; + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(int m): mult(m) {} + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult() noexcept: typeless_pi_mult(1) {}; + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(const typeless_pi&) noexcept: typeless_pi_mult(1) {}; + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(const typeless_pi_mult&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult(typeless_pi_mult&&) noexcept = default; + DACE_HDFI ~typeless_pi_mult() noexcept = default; + +#ifndef DACE_XILINX + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult& operator=(const typeless_pi_mult&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult& operator=(typeless_pi_mult&&) noexcept = default; +#endif + + template< + typename T, + typename = std::enable_if_t::value> + > + DACE_CONSTEXPR DACE_HDFI operator T() const noexcept + { return T(mult * M_PI); } + + DACE_CONSTEXPR DACE_HDFI operator float() const noexcept + { return float(mult * M_PI); } + + DACE_CONSTEXPR DACE_HDFI operator double() const noexcept + { return mult * M_PI; } + + DACE_CONSTEXPR DACE_HDFI operator long double() const noexcept + { return (long double)(mult * M_PI); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+() const noexcept + { return *this; } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-() const noexcept + { return typeless_pi_mult(-this->mult); } }; - struct typeless_pi_mult : typeless_pi + MAKE_TYPELESS_PI(typeless_pi_mult); + + /* Represents $\pi$ */ + struct typeless_pi { - int mult; typeless_pi_mult(int m = 1) : mult(m) {} - double value() const { return mult * M_PI; } - - operator int() const - { - return int(this->value()); - } - operator float() const - { - return float(this->value()); - } - operator double() const - { - return double(this->value()); - } + DACE_CONSTEXPR DACE_HDFI typeless_pi() noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi(const typeless_pi&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi(typeless_pi&&) noexcept = default; + DACE_HDFI ~typeless_pi() noexcept = default; +#ifndef DACE_XILINX + DACE_CONSTEXPR DACE_HDFI typeless_pi& operator=(const typeless_pi&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi& operator=(typeless_pi&&) noexcept = default; +#endif + + template< + typename T, + typename = std::enable_if_t::value> + > + DACE_CONSTEXPR DACE_HDFI operator T() const noexcept + { return T(M_PI); } + + DACE_CONSTEXPR DACE_HDFI operator float() const noexcept + { return float(M_PI); } + + DACE_CONSTEXPR DACE_HDFI operator double() const noexcept + { return M_PI; } + + DACE_CONSTEXPR DACE_HDFI operator long double() const noexcept + { return (long double)(M_PI); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi operator+() const noexcept + { return *this; } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-() const noexcept + { return typeless_pi_mult(-1); } }; - struct typeless_pi_exp : typeless_pi_mult + MAKE_TYPELESS_PI(typeless_pi); + + /* Represents $m * \pi^{e}$ */ + struct typeless_pi_exp { - int mult, exp; typeless_pi_exp(int m = 1, int e = 1) : mult(m), exp(e) {} - double value() const { return mult * std::pow(M_PI, exp); } - operator int() const - { - return int(this->value()); - } - operator float() const - { - return float(this->value()); - } - operator double() const - { - return double(this->value()); - } + int mult, exp; + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(int m, int e): mult(m), exp(e) {} + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp() noexcept: typeless_pi_exp(1, 1) {}; + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(const typeless_pi_exp&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp(typeless_pi_exp&&) noexcept = default; + DACE_HDFI ~typeless_pi_exp() noexcept = default; + +#ifndef DACE_XILINX + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp& operator=(const typeless_pi_exp&) noexcept = default; + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp& operator=(typeless_pi_exp&&) noexcept = default; +#endif + + template< + typename T, + typename = std::enable_if_t::value> + > + DACE_CONSTEXPR DACE_HDFI operator T() const noexcept + { return T(mult * std::pow(static_cast(M_PI), exp)); } + + + /* We have to do the selection this way, because it seems as nvidia does + * not provide `powl` and `powf` in the std namespace */ + DACE_CONSTEXPR DACE_HDFI operator float() const + { using std::pow; return mult * pow(static_cast(M_PI), exp); } + + DACE_CONSTEXPR DACE_HDFI operator double() const + { using std::pow; return mult * std::pow(static_cast(M_PI), exp); } + +#if !( defined(__CUDACC__) || defined(__HIPCC__) ) + //There is no long double on the GPU + DACE_CONSTEXPR DACE_HDFI operator long double() const + { using std::pow; return mult * std::pow(static_cast(M_PI), exp); } +#endif + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator+() const + { return *this; } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator-() const + { return typeless_pi_exp(-this->mult, this->exp); } }; - inline typeless_pi_mult operator*(const typeless_pi&, const int& num) - { - return typeless_pi_mult(num); - } - inline typeless_pi_mult operator*(const typeless_pi_mult& p, const int& num) - { - return typeless_pi_mult(p.mult * num); - } - inline typeless_pi_exp operator*(const typeless_pi_exp& p, const int& num) - { - return typeless_pi_exp(p.mult * num, p.exp); - } - inline typeless_pi_mult operator*(const int& num, const typeless_pi&) - { - return typeless_pi_mult(num); - } - inline typeless_pi_mult operator*(const int& num, const typeless_pi_mult& p) - { - return typeless_pi_mult(num * p.mult); - } - inline typeless_pi_exp operator*(const int& num, const typeless_pi_exp& p) - { - return typeless_pi_exp(num * p.mult, p.exp); - } - template - T operator+(const typeless_pi& p, const T& num) - { - return T(p.value()) + num; - } - template - T operator-(const typeless_pi& p, const T& num) - { - return T(p.value()) - num; - } + MAKE_TYPELESS_PI(typeless_pi_exp); - template - T operator*(const typeless_pi& p, const T& num) - { - return T(p.value()) * num; - } - template - T operator/(const typeless_pi& p, const T& num) - { - return T(p.value()) / num; - } - template - T operator+(const T& num, const typeless_pi& p) - { - return num + T(p.value()); - } - template - T operator-(const T& num, const typeless_pi& p) - { - return num - T(p.value()); - } - template - T operator*(const T& num, const typeless_pi& p) - { - return num * T(p.value()); - } - template - T operator/(const T& num, const typeless_pi& p) - { - return num / T(p.value()); - } - template - T operator+(const typeless_pi_mult& p, const T& num) - { - return T(p.value()) + num; - } - template - T operator-(const typeless_pi_mult& p, const T& num) - { - return T(p.value()) - num; - } - template - T operator*(const typeless_pi_mult& p, const T& num) - { - return T(p.value()) * num; - } - template - T operator/(const typeless_pi_mult& p, const T& num) - { - return T(p.value()) / num; - } - template - T operator+(const T& num, const typeless_pi_mult& p) - { - return num + T(p.value()); - } - template - T operator-(const T& num, const typeless_pi_mult& p) - { - return num - T(p.value()); - } - template - T operator*(const T& num, const typeless_pi_mult& p) - { - return num * T(p.value()); - } - template - T operator/(const T& num, const typeless_pi_mult& p) - { - return num / T(p.value()); - } - template - T operator+(const typeless_pi_exp& p, const T& num) - { - return T(p.value()) + num; - } - template - T operator-(const typeless_pi_exp& p, const T& num) - { - return T(p.value()) - num; - } + DACE_CONSTEXPR DACE_HDFI int operator/(const typeless_pi&, const typeless_pi&) noexcept + { return 1; } - template - T operator*(const typeless_pi_exp& p, const T& num) - { - return T(p.value()) * num; - } - template - T operator/(const typeless_pi_exp& p, const T& num) - { - return T(p.value()) / num; - } - template - T operator+(const T& num, const typeless_pi_exp& p) - { - return num + T(p.value()); - } - template - T operator-(const T& num, const typeless_pi_exp& p) - { - return num - T(p.value()); - } - template - T operator*(const T& num, const typeless_pi_exp& p) - { - return num * T(p.value()); - } - template - T operator/(const T& num, const typeless_pi_exp& p) - { - return num / T(p.value()); - } - inline typeless_pi_mult operator-(const typeless_pi&) - { - return typeless_pi_mult(-1); - } - template - typeless_pi_mult operator+(const typeless_pi&, const typeless_pi&) - { - return typeless_pi_mult(2); - } - template - typeless_pi_mult operator+(const typeless_pi_mult& p1, const typeless_pi_mult& p2) - { - return typeless_pi_mult(p1.mult + p2.mult); - } - template - typeless_pi_exp operator*(const typeless_pi_mult& p1, const typeless_pi_mult& p2) - { - return typeless_pi_exp(p1.mult * p2.mult, 2); + DACE_CONSTEXPR DACE_HDFI int operator-(const typeless_pi&, const typeless_pi&) noexcept + { return 0; } + + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const typeless_pi&, const int& num) noexcept + { return typeless_pi_mult(num); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const int& num, const typeless_pi&) noexcept + { return typeless_pi_mult(num); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const typeless_pi_mult& p, const int& num) noexcept + { return typeless_pi_mult(p.mult * num); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator*(const int& num, const typeless_pi_mult& p) noexcept + { return typeless_pi_mult(p.mult * num); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi&, const typeless_pi&) noexcept + { return typeless_pi_mult(2); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi&, const typeless_pi_mult& pi) noexcept + { return typeless_pi_mult(pi.mult + 1); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi_mult& pi, const typeless_pi&) noexcept + { return typeless_pi_mult(pi.mult + 1); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator+(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept + { return typeless_pi_mult(pl.mult + pr.mult); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_mult operator-(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept + { return typeless_pi_mult(pl.mult - pr.mult); } + + DACE_CONSTEXPR DACE_HDFI int operator/(const typeless_pi_mult& pl, const typeless_pi&) noexcept + { return pl.mult; } + + DACE_CONSTEXPR DACE_HDFI double operator/(const typeless_pi& pl, const typeless_pi_mult& pr) noexcept + { return 1.0 / pr.mult; } + + + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi&, const typeless_pi&) noexcept + { return typeless_pi_exp(1, 2); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi_mult& pr) noexcept + { return typeless_pi_exp(pl.mult * pr.mult, 2); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi&) noexcept + { return typeless_pi_exp(pl.mult, 2); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi& pl, const typeless_pi_mult& pr) noexcept + { return typeless_pi_exp(pr.mult, 2); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const typeless_pi_mult& pr) noexcept + { return typeless_pi_exp(pl.mult * pr.mult, pl.exp + 1); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_mult& pl, const typeless_pi_exp& pr) noexcept + { return pr * pl; } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const typeless_pi_exp& pr) noexcept + { return typeless_pi_exp(pl.mult * pr.mult, pr.exp + pl.exp); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const typeless_pi_exp& pl, const int& num) noexcept + { return typeless_pi_exp(pl.mult * num, pl.exp); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator*(const int& num, const typeless_pi_exp& pr) noexcept + { return typeless_pi_exp(pr.mult * num, pr.exp); } + + DACE_CONSTEXPR DACE_HDFI typeless_pi_exp operator/(const typeless_pi_exp& pl, const typeless_pi&) noexcept + { return typeless_pi_exp(pl.mult, pl.exp - 1); } + + + // The code generator guarantees us that `b > 0`. + DACE_HDFI typeless_pi_exp ipow(const typeless_pi_mult& pi, const unsigned int& b) { + return typeless_pi_exp(pow(pi.mult, b), b); } - template - typeless_pi_exp operator*(const typeless_pi&, const typeless_pi&) - { - return typeless_pi_exp(1, 2); + DACE_HDFI typeless_pi_exp ipow(const typeless_pi& pi, const unsigned int& b) { + return typeless_pi_exp(1, b); } - template - typeless_pi_exp operator*(const typeless_pi_exp& p1, const typeless_pi_exp& p2) - { - return typeless_pi_exp(p1.mult * p2.mult, p1.exp + p2.exp); + DACE_HDFI typeless_pi_exp ipow(const typeless_pi_exp& pi, const unsigned int& b) { + return typeless_pi_exp(pow(pi.mult, b), pi.exp * b); } + +# define DEF_PI_OPS(op) \ + template::value && (!is_typeless_pi::value)> > \ + DACE_CONSTEXPR DACE_HDFI T operator op (const T& lhs, const PI& pi) noexcept \ + { return lhs op (static_cast(pi)); } \ + template::value && (!is_typeless_pi::value)> > \ + DACE_CONSTEXPR DACE_HDFI T operator op (const PI& pi, const T& rhs) noexcept \ + { return (static_cast(pi)) op rhs; } + + DEF_PI_OPS(+); + DEF_PI_OPS(-); + DEF_PI_OPS(/); + DEF_PI_OPS(*); + + DACE_CONSTEXPR DACE_HDFI int sin(const typeless_pi&) noexcept + { return 0; } + + DACE_CONSTEXPR DACE_HDFI int sin(const typeless_pi_mult& pi) noexcept + { return 0; } + + DACE_HDFI double sin(const typeless_pi_exp& pi) noexcept + { return std::sin(static_cast(pi)); } + + DACE_CONSTEXPR DACE_HDFI int cos(const typeless_pi&) noexcept + { return 1; } + + DACE_CONSTEXPR DACE_HDFI int cos(const typeless_pi_mult& pi) noexcept + { return (pi.mult % 2 == 0) ? 1 : (-1); } + + DACE_HDFI double cos(const typeless_pi_exp& pi) noexcept + { return std::cos(static_cast(pi)); } + + +# define DEF_PI_TRIGO(F) \ + DACE_HDFI double F (const typeless_pi& pi) noexcept \ + { return std:: F( static_cast(pi) ); } \ + DACE_HDFI double F (const typeless_pi_mult& pi) noexcept \ + { return std:: F( static_cast(pi) ); } \ + DACE_HDFI double F (const typeless_pi_exp& pi) noexcept \ + { return std:: F( static_cast(pi) ); } + + DEF_PI_TRIGO(asin); + DEF_PI_TRIGO(acos); + DEF_PI_TRIGO(tan); + DEF_PI_TRIGO(atan); + DEF_PI_TRIGO(exp); + DEF_PI_TRIGO(log); + + +# undef DEF_PI_TRIGO +# undef DEF_PI_OPS +# undef MAKE_TYPELESS_PI } } diff --git a/dace/runtime/include/dace/types.h b/dace/runtime/include/dace/types.h index e5eed1e35e..9a8676e0d4 100644 --- a/dace/runtime/include/dace/types.h +++ b/dace/runtime/include/dace/types.h @@ -49,7 +49,13 @@ #define DACE_HDFI __host__ __device__ __forceinline__ #define DACE_HFI __host__ __forceinline__ #define DACE_DFI __device__ __forceinline__ + #define DACE_HostDev __host__ __device__ + #define DACE_Host __host__ + #define DACE_Dev __device__ #else + #define DACE_HostDev + #define DACE_Host + #define DACE_Dev #define DACE_HDFI inline #define DACE_HFI inline #define DACE_DFI inline diff --git a/tests/numpy/constants_test.py b/tests/numpy/constants_test.py new file mode 100644 index 0000000000..6aa3b15621 --- /dev/null +++ b/tests/numpy/constants_test.py @@ -0,0 +1,170 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import numpy as np +import uuid +import math +import pytest + +def _make_sdfg( + code: str, + dtype = dace.float64, +) -> dace.SDFG: + """Generates an SDFG that writes an expression to an array. + """ + sdfg = dace.SDFG(name=f"const_test_{str(uuid.uuid1()).replace('-', '_')}") + state = sdfg.add_state(is_start_block=True) + sdfg.add_array( + "out", + shape=(10,), + dtype=dtype, + transient=False, + ) + + state.add_mapped_tasklet( + "comput", + map_ranges={"__i": "0:10"}, + inputs={}, + code=f"__out = {code}", + outputs={"__out": dace.Memlet("out[__i]")}, + external_edges=True, + ) + sdfg.validate() + return sdfg + + +def _test_sdfg( + sdfg: dace.SDFG, + expected, + dtype = np.float64, +): + out = np.zeros(10, dtype=dtype) + sdfg.apply_gpu_transformations() + sdfg(out=out) + assert np.allclose(out, expected, equal_nan=True), f"Expected {expected}, but got {out[0]}" + + +def _perform_test( + code, + expected, + dtype = np.float64, +): + print(f"PERFORM: {code}") + dace_dtype = dace.dtypes.dtype_to_typeclass(dtype) + sdfg = _make_sdfg(code=code, dtype=dace_dtype) + _test_sdfg(sdfg=sdfg, expected=expected, dtype=dtype) + + +@pytest.mark.gpu +def test_constant_pi_simple(): + _perform_test( + code="math.pi", + expected=math.pi + ) + + +@pytest.mark.gpu +def test_constant_pi_add(): + _perform_test( + code="-math.pi", + expected=-math.pi + ) + _perform_test( + code="math.pi + math.pi", + expected=2 * math.pi + ) + _perform_test( + code="math.pi - math.pi", + expected=0. + ) + + +@pytest.mark.gpu +def test_constant_pi_mult(): + _perform_test( + code="(math.pi ** 2) * 2", + expected=math.pi * math.pi * 2.0 + ) + _perform_test( + code="math.pi * 2", + expected=2 * math.pi + ) + _perform_test( + code="math.pi * 2 + math.pi", + expected=2 * math.pi + math.pi + ) + _perform_test( + code="math.pi * math.pi * 2", + expected=math.pi * math.pi * 2.0 + ) + _perform_test( + code="math.pi / math.pi ", + expected=1 + ) + _perform_test( + code="(math.pi + math.pi) / math.pi ", + expected=2 + ) + _perform_test( + code="(math.pi * math.pi) / math.pi ", + expected=math.pi + ) + + +@pytest.mark.gpu +def test_constant_pi_fun(): + _perform_test( + code="math.sin(math.pi)", + expected=0, + ) + _perform_test( + code="math.sin(math.pi * 4)", + expected=math.sin(math.pi * 4), + ) + _perform_test( + code="math.sin(math.pi * 5)", + expected=math.sin(math.pi * 5), + ) + _perform_test( + code="math.cos(math.pi * 4)", + expected=math.cos(math.pi * 4), + ) + _perform_test( + code="math.cos(math.pi * 5)", + expected=math.cos(math.pi * 5), + ) + _perform_test( + code="math.log(math.pi)", + expected=math.log(math.pi), + ) + + +@pytest.mark.gpu +def test_constant_nan(): + _perform_test( + code="math.nan", + expected=math.nan + ) + _perform_test( + code="math.nan + 2", + expected=math.nan + ) + _perform_test( + code="math.nan + 2.0", + expected=math.nan + ) + _perform_test( + code="math.sin(math.nan + 2.0)", + expected=math.nan + ) + _perform_test( + code="math.sin(math.nan + 2.0) ** 2", + expected=math.nan + ) + + +if __name__ == "__main__": + test_constant_pi_simple() + test_constant_pi_add() + test_constant_pi_mult() + test_constant_pi_fun() + test_constant_nan() From b5f91e18c3ff6b599793645507f54a68b3f07c1f Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 15 Nov 2024 10:29:14 -0800 Subject: [PATCH 4/5] Make scalar to symbol promotion robust to node order in state (#1766) Fixes #1727 --- dace/sdfg/analysis/schedule_tree/treenodes.py | 2 ++ .../transformation/passes/scalar_to_symbol.py | 4 ++- tests/passes/scalar_to_symbol_test.py | 30 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/dace/sdfg/analysis/schedule_tree/treenodes.py b/dace/sdfg/analysis/schedule_tree/treenodes.py index 3b447fa15a..dabd436b56 100644 --- a/dace/sdfg/analysis/schedule_tree/treenodes.py +++ b/dace/sdfg/analysis/schedule_tree/treenodes.py @@ -41,6 +41,8 @@ def __init__(self, children: Optional[List['ScheduleTreeNode']] = None): if self.children: for child in children: child.parent = self + self.containers = {} + self.symbols = {} def as_string(self, indent: int = 0): if not self.children: diff --git a/dace/transformation/passes/scalar_to_symbol.py b/dace/transformation/passes/scalar_to_symbol.py index 33712c8a1c..a37729ca7c 100644 --- a/dace/transformation/passes/scalar_to_symbol.py +++ b/dace/transformation/passes/scalar_to_symbol.py @@ -522,6 +522,8 @@ def remove_scalar_reads(sdfg: sd.SDFG, array_names: Dict[str, str]): for state in sdfg.states(): scalar_nodes = [n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.data in array_names] for node in scalar_nodes: + if node not in state: + continue symname = array_names[node.data] for out_edge in state.out_edges(node): for e in state.memlet_tree(out_edge): @@ -649,7 +651,7 @@ def apply_pass(self, sdfg: SDFG, _: Dict[Any, Any]) -> Set[str]: scalar_nodes = [n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.data in to_promote] # Step 2: Assignment tasklets for node in scalar_nodes: - if state.in_degree(node) == 0: + if node not in state or state.in_degree(node) == 0: continue in_edge = state.in_edges(node)[0] input = in_edge.src diff --git a/tests/passes/scalar_to_symbol_test.py b/tests/passes/scalar_to_symbol_test.py index 7fdfbdf737..36decceba2 100644 --- a/tests/passes/scalar_to_symbol_test.py +++ b/tests/passes/scalar_to_symbol_test.py @@ -729,6 +729,35 @@ def test_double_index_bug(): assert getattr(sympy_node, "name", None) != "indices" +def test_reversed_order(): + """ + Tests a failure reported in issue #1727. + """ + sdfg = dace.SDFG('tester') + sdfg.add_array('inputs', [1], dace.int32) + sdfg.add_transient('a', [1], dace.int32) + sdfg.add_transient('b', [1], dace.int32) + sdfg.add_array('output', [1], dace.int32) + initstate = sdfg.add_state() + state = sdfg.add_state_after(initstate) + finistate = sdfg.add_state_after(state) + + # Note the order here + w = state.add_write('b') + t = state.add_tasklet('assign', {'inp'}, {'out'}, 'out = inp') + r = state.add_read('a') + state.add_edge(t, 'out', w, None, dace.Memlet('b')) + state.add_edge(r, None, t, 'inp', dace.Memlet('a')) + + initstate.add_nedge(initstate.add_read('inputs'), initstate.add_write('a'), dace.Memlet('inputs')) + finistate.add_nedge(finistate.add_read('b'), finistate.add_write('output'), dace.Memlet('output')) + + sdfg.validate() + promoted = scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(sdfg, {}) + assert promoted == {'a', 'b'} + sdfg.compile() + + if __name__ == '__main__': test_find_promotable() test_promote_simple() @@ -753,3 +782,4 @@ def test_double_index_bug(): test_ternary_expression(False) test_ternary_expression(True) test_double_index_bug() + test_reversed_order() From 4f8eb9225e11aef2c8111628423bc0a847caf224 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 15 Nov 2024 23:30:07 -0800 Subject: [PATCH 5/5] Match CMake version requirements --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c228ae4558..9c9bc020bb 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ output = subprocess.check_output([cmake_path, '--version']).decode('utf-8') cmake_version = tuple(int(t) for t in output.splitlines()[0].split(' ')[-1].split('.')) # If version meets minimum requirements, CMake is not necessary - if cmake_version >= (3, 15): + if cmake_version >= (3, 17): cmake_requires = [] except (subprocess.CalledProcessError, OSError, IndexError, ValueError): # Any failure in getting the CMake version counts as "not found"