From 2408ad0816b2e2612902f45d399f7fa5b2a0b4d8 Mon Sep 17 00:00:00 2001 From: Yakup Budanaz Date: Tue, 10 Dec 2024 17:24:58 +0100 Subject: [PATCH] Improve validation and type checks and fix bugs --- dace/codegen/targets/cpu.py | 2 +- dace/codegen/targets/cuda.py | 7 ++-- dace/data.py | 2 +- dace/sdfg/sdfg.py | 2 +- dace/sdfg/validation.py | 4 ++ tests/deferred_alloc_test.py | 72 +++++++++++++++++++++++++++++++----- 6 files changed, 73 insertions(+), 16 deletions(-) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 85c51cb8b1..2172bbc0da 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -344,7 +344,7 @@ def declare_array(self, size_desc_name = sdfg.arrays[name].size_desc_name if size_desc_name is not None: size_desc = sdfg.arrays[size_desc_name] - size_ctypedef = dtypes.pointer(size_desc.dtype).ctype + size_ctypedef = size_desc.dtype.ctype self._dispatcher.declared_arrays.add(size_desc_name, DefinedType.Pointer, size_ctypedef) return elif nodedesc.storage is dtypes.StorageType.CPU_ThreadLocal: diff --git a/dace/codegen/targets/cuda.py b/dace/codegen/targets/cuda.py index eb205d30c8..8bc51d0418 100644 --- a/dace/codegen/targets/cuda.py +++ b/dace/codegen/targets/cuda.py @@ -1503,7 +1503,7 @@ def generate_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: StateSub # make dynamic map inputs constant # TODO move this into _get_const_params(dfg_scope) # Do not add src as const if the size is being red (src_conn is _read_size) - const_params |= set((str(e.src)) for e in dace.sdfg.dynamic_map_inputs(state, scope_entry) if not e.src_conn.endswith("size")) + const_params |= set((str(e.src)) for e in dace.sdfg.dynamic_map_inputs(state, scope_entry) if e.src_conn is None or (e.src_conn is not None and e.src_conn == "_read_size")) # Store init/exit code streams old_entry_stream = self.scope_entry_stream @@ -1626,8 +1626,9 @@ def generate_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: StateSub for i in range(size_arr.shape[0]): if f"__{arr_name}_dim{i}_size" not in dyn_args: dyn_args.append(f"__{arr_name}_dim{i}_size") - dyn_args_typed.append(f"const {arg.dtype.ctype} __{arr_name}_dim{i}_size") - needed_size_scalars_declaration.append(f"const {arg.dtype.ctype} __{arr_name}_dim{i}_size = {size_desc_name}[{i}];") + size_desc = sdfg.arrays[size_desc_name] + dyn_args_typed.append(f"const {size_desc.dtype.ctype} __{arr_name}_dim{i}_size") + needed_size_scalars_declaration.append(f"const {size_desc.dtype.ctype} __{arr_name}_dim{i}_size = {size_desc_name}[{i}];") self._localcode.write( '__global__ void %s %s(%s) {\n' % diff --git a/dace/data.py b/dace/data.py index a3b008f150..1678721062 100644 --- a/dace/data.py +++ b/dace/data.py @@ -1442,7 +1442,7 @@ def __init__(self, else: self.offset = [0] * len(shape) - self.is_deferred_array = any(["__dace_defer" in str(dim) for dim in self.shape]) + self.is_deferred_array = any([str(dim).startswith("__dace_defer") for dim in self.shape]) self.validate() diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index a08f572782..6e80270ea8 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1795,7 +1795,7 @@ def add_array(self, # convert strings to int if possible, unless it is not the reserved symbol for deferred allocation newshape = [] for i, s in enumerate(shape): - if isinstance(s, str) and s == "__dace_defer": + if isinstance(s, str) and s.startswith("__dace_defer"): newshape.append(dace.symbolic.pystr_to_symbolic(f"{s}_dim{i}")) else: try: diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index c4173dd181..55ed7570db 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -573,11 +573,15 @@ def validate_state(state: 'dace.sdfg.SDFGState', write_size_edges = list(state.edges_by_connector(node, insize)) # Reading-Writing the size is valid only if the array is transient and has the storage type CPU_Heap or GPU_Global + has_writes = len(write_size_edges) > 0 has_writes_or_reads = len(read_size_edges) + len(write_size_edges) > 0 size_access_allowed = arr.transient and (arr.storage == dtypes.StorageType.CPU_Heap or arr.storage == dtypes.StorageType.GPU_Global) if has_writes_or_reads and not size_access_allowed: raise InvalidSDFGNodeError('Reading the size of an array, or changing (writing to) the size of an array ' 'is only valid if the array is transient and the storage is CPU_Heap or GPU_Global', sdfg, state_id, nid) + if has_writes and scope[node] is not None: + raise InvalidSDFGNodeError('Resizing array is not allowed within a scope (e.g. not inside maps)', sdfg, state_id, nid) + if len(write_size_edges) > 1: raise InvalidSDFGNodeError('One node can have at maximum one edge writing to its size descriptior', sdfg, state_id, nid) diff --git a/tests/deferred_alloc_test.py b/tests/deferred_alloc_test.py index adc5427a9a..9aa8d86c14 100644 --- a/tests/deferred_alloc_test.py +++ b/tests/deferred_alloc_test.py @@ -4,6 +4,7 @@ import numpy import pytest + @pytest.fixture(params=[dace.dtypes.StorageType.CPU_Heap, dace.dtypes.StorageType.GPU_Global]) def storage_type(request): return request.param @@ -73,7 +74,7 @@ def _get_assign_map_sdfg(storage_type: dace.dtypes.StorageType, transient: bool, arrn = state.add_access(arr_name) if storage_type == dace.dtypes.StorageType.CPU_Heap: - assert (schedule_type == dace.dtypes.ScheduleType.Sequential) + assert (schedule_type == dace.dtypes.ScheduleType.Sequential or schedule_type == dace.dtypes.ScheduleType.CPU_Multicore) elif storage_type == dace.dtypes.StorageType.GPU_Global: assert (schedule_type == dace.dtypes.ScheduleType.GPU_Device) @@ -188,9 +189,55 @@ def test_trivial_realloc_cpu(transient: bool): _test_trivial_realloc(dace.dtypes.StorageType.CPU_Heap, transient) -def test_realloc_inside_map(): +def _add_realloc_inside_map(sdfg: dace.SDFG, schedule_type: dace.dtypes.ScheduleType): + pre_state = sdfg.states()[0] + state = sdfg.add_state("s2") + sdfg.add_edge(pre_state, state, dace.InterstateEdge(None, None)) + + map_entry, map_exit = state.add_map(name="map2",ndrange={"i":dace.subsets.Range([(0,4,1)])}, + schedule=schedule_type) + an_2 = state.add_access('A') + an_2.add_in_connector("_write_size") + + t1 = state.add_tasklet(name="assign", inputs={}, outputs={"__out"}, code="_out=8") + t1.add_out_connector("__out") + + _, _ = sdfg.add_array("tmp0", shape=(2, ), dtype=numpy.uint64, transient=True) + sca = state.add_access("tmp0") + + state.add_edge(map_entry, None, t1, None, dace.Memlet(None)) + state.add_edge(t1, "__out", sca, None, dace.Memlet("tmp0[0]")) + state.add_edge(sca, None, an_2, "_write_size", dace.Memlet("tmp0")) + state.add_edge(an_2, None, map_exit, None, dace.Memlet(None)) + + +def test_realloc_inside_map_gpu(): + sdfg =_get_assign_map_sdfg(dace.dtypes.StorageType.GPU_Global, True, dace.dtypes.ScheduleType.GPU_Device) + _add_realloc_inside_map(sdfg, dace.dtypes.ScheduleType.GPU_Device) + try: + sdfg.validate() + except Exception: + return + + pytest.fail("Realloc-use with non-transient data and incomplete write did not fail when it was expected to.") + +def test_realloc_inside_map_cpu(): + sdfg =_get_assign_map_sdfg(dace.dtypes.StorageType.CPU_Heap, True, dace.dtypes.ScheduleType.CPU_Multicore) + _add_realloc_inside_map(sdfg, dace.dtypes.ScheduleType.CPU_Multicore) + try: + sdfg.validate() + except Exception: + return + + pytest.fail("Realloc-use with non-transient data and incomplete write did not fail when it was expected to.") + +def test_conditional_alloc_gpu(): + pass + +def test_conditional_alloc_cpu(): pass + def test_incomplete_write_dimensions_1(): sdfg = _get_trivial_alloc_sdfg(dace.dtypes.StorageType.CPU_Heap, True, "1:2") try: @@ -211,23 +258,28 @@ def test_incomplete_write_dimensions_2(): if __name__ == "__main__": + print(f"Trivial Realloc within map {dace.dtypes.StorageType.CPU_Multicore}") + test_realloc_inside_map_cpu() + print(f"Trivial Realloc within map {dace.dtypes.StorageType.GPU_Device}") + test_realloc_inside_map_gpu() + print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap}") - test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, True) + test_trivial_realloc_cpu(True) print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap}") - test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, True, dace.dtypes.ScheduleType.Sequential) + test_realloc_use_cpu(True) print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global}") - test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, True) + test_trivial_realloc_gpu(True) print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global}") - test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, True, dace.dtypes.ScheduleType.GPU_Device) + test_realloc_use_gpu(True) print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data") - test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, False) + test_trivial_realloc_cpu(False) print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data") - test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, False, dace.dtypes.ScheduleType.Sequential) + test_realloc_use_cpu(False) print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data") - test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, False) + test_trivial_realloc_gpu(False) print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data") - test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, False, dace.dtypes.ScheduleType.GPU_Device) + test_realloc_use_gpu(False) print(f"Realloc with incomplete write 1") test_incomplete_write_dimensions_1()