From 8c2f12d2e8d51f065cf80a9a3bc9d0fb99972bff Mon Sep 17 00:00:00 2001 From: Yakup Budanaz Date: Wed, 11 Dec 2024 14:09:52 +0100 Subject: [PATCH] Add tests, refactor, improve size calculation --- dace/codegen/targets/cpp.py | 23 +++++++++++++++++++++++ dace/codegen/targets/cpu.py | 21 ++++++--------------- dace/codegen/targets/cuda.py | 23 +++++++++-------------- tests/deferred_alloc_test.py | 34 +++++++++++++++++----------------- 4 files changed, 55 insertions(+), 46 deletions(-) diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py index cfd1b202c7..26b34637a3 100644 --- a/dace/codegen/targets/cpp.py +++ b/dace/codegen/targets/cpp.py @@ -612,6 +612,29 @@ def check_dace_defer(elements): deferred_size_names.append(elem) return deferred_size_names if deferred_size_names is not None and len(deferred_size_names) > 0 else None +def _get_realloc_dimensions(size_array_name:str, new_size_array_name:str, shape): + # Only consider the offsets with __dace_defer in original dim + mask_array = ["__dace_defer" in str(dim) for dim in shape] + + # In case the size does not only consist of a "__dace_defer" symbol but from an expression involving "__dace_defer" + # The size array is only updated with the symbol, and while calculating the expression, we only replace the __dace_defer_dim pattern + # With the corresponding access from the size array + size_assignment_strs = [] + new_size_strs = [] + old_size_strs = [] + for i, mask in enumerate(mask_array): + if mask: + new_size_str = sym2cpp(shape[i]) + pattern = r'__dace_defer_dim(\d+)' + new_size_strs.append(re.sub(pattern, lambda m: f'{new_size_array_name}[{m.group(1)}]', new_size_str)) + old_size_strs.append(re.sub(pattern, lambda m: f"{size_array_name}[{m.group(1)}]", new_size_str)) + size_assignment_strs.append( + f"{size_array_name}[{i}] = {new_size_array_name}[{i}];" + ) + else: + new_size_strs.append(sym2cpp(shape[i])) + return size_assignment_strs, new_size_strs, old_size_strs + def cpp_array_expr(sdfg, memlet, with_brackets=True, diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 0798abc5e6..6b052fb577 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -703,21 +703,12 @@ def reallocate( dtype = sdfg.arrays[data_name].dtype - # Only consider the offsets with __dace_defer in original dim - mask_array = ["__dace_defer" in str(dim) for dim in data.shape] - - # In case the size does not only consist of a "__dace_defer" symbol but from an expression involving "__dace_defer" - # The size array is only updated with the symbol, and while calculating the expression, we only replace the __dace_defer_dim pattern - # With the corresponding access from the size array - new_size_strs = [] - for i, mask in enumerate(mask_array): - if mask: - new_size_str = cpp.sym2cpp(data.shape[i]) - pattern = r'__dace_defer_dim(\d+)' - new_size_strs.append(re.sub(pattern, lambda m: f'{new_size_array_name}[{m.group(1)}]', new_size_str)) - callsite_stream.write( - f"{size_array_name}[{i}] = {new_size_array_name}[{i}];" - ) + size_assignment_strs, new_size_strs, _ = cpp._get_realloc_dimensions( + size_array_name, new_size_array_name, data.shape + ) + + for size_assignment in size_assignment_strs: + callsite_stream.write(size_assignment) size_str = " * ".join(new_size_strs) callsite_stream.write( diff --git a/dace/codegen/targets/cuda.py b/dace/codegen/targets/cuda.py index fb27a4d870..2222c2a002 100644 --- a/dace/codegen/targets/cuda.py +++ b/dace/codegen/targets/cuda.py @@ -1,6 +1,7 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. import ctypes import functools +import re import warnings from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union @@ -2793,15 +2794,16 @@ def reallocate( dtype = sdfg.arrays[data_name].dtype - # Only consider the offsets with __dace_defer in original dim - mask_array = ["__dace_defer" in str(dim) for dim in data.shape] - print(mask_array) + size_assignment_strs, new_size_strs, old_size_strs = cpp._get_realloc_dimensions( + size_array_name, new_size_array_name, data.shape + ) + # Call realloc only after no __dace_defer is left in size_array (must be true) # Save new and old sizes before registering them, because we need both to compute the bound of the new array - old_size_str = " * ".join([f"{size_array_name}[{i}]" for i in range(len(data.shape))]) + old_size_str = " * ".join(old_size_strs) old_size_str += f" * sizeof({dtype.ctype})" - new_size_str = " * ".join([f"{new_size_array_name}[{i}]" if mask_array[i] else f"{size_array_name}[{i}]" for i in range(len(data.shape)) ]) + new_size_str = " * ".join(new_size_strs) new_size_str += f" * sizeof({dtype.ctype})" tmp_storage_name = "__tmp_realloc_move_storage" @@ -2830,15 +2832,8 @@ def reallocate( s += "}\n" callsite_stream.write(s) - new_size_strs = [] - for i, mask in enumerate(mask_array): - if mask: - new_size_str = cpp.sym2cpp(data.shape[i]) - pattern = r'__dace_defer_dim(\d+)' - new_size_strs.append(re.sub(pattern, lambda m: f'{new_size_array_name}[{m.group(1)}]', new_size_str)) - callsite_stream.write( - f"{size_array_name}[{i}] = {new_size_array_name}[{i}];" - ) + for size_assignment in size_assignment_strs: + callsite_stream.write(size_assignment) ######################################################################## ######################################################################## diff --git a/tests/deferred_alloc_test.py b/tests/deferred_alloc_test.py index 3a6ec97a9e..eee4482ae5 100644 --- a/tests/deferred_alloc_test.py +++ b/tests/deferred_alloc_test.py @@ -387,40 +387,40 @@ def test_incomplete_write_dimensions_2(): if __name__ == "__main__": - print(f"Trivial Realloc within map {dace.dtypes.StorageType.CPU_Multicore}") + print(f"Trivial Realloc within map, cpu") test_realloc_inside_map_cpu() - print(f"Trivial Realloc within map {dace.dtypes.StorageType.GPU_Device}") + print(f"Trivial Realloc within map, gpu") test_realloc_inside_map_gpu() - print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap}") + print(f"Trivial Realloc with storage, cpu") test_trivial_realloc_cpu(True) - print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap}") + print(f"Trivial Realloc-Use with storage, cpu") test_realloc_use_cpu(True) - print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global}") + + print(f"Trivial Realloc with storage, gpu") test_trivial_realloc_gpu(True) - print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global}") + print(f"Trivial Realloc-Use with storage, gpu") test_realloc_use_gpu(True) - - print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data") + print(f"Trivial Realloc with storage, cpu, on non-transient data") test_trivial_realloc_cpu(False) - print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data") + print(f"Trivial Realloc-Use with storage, cpu, on non-transient data") test_realloc_use_cpu(False) - print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data") + print(f"Trivial Realloc with storage, gpu, on non-transient data") test_trivial_realloc_gpu(False) - print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data") + print(f"Trivial Realloc-Use with storage, gpu, on non-transient data") test_realloc_use_gpu(False) - print(f"Realloc with incomplete write 1") + print(f"Realloc with incomplete write one, validation") test_incomplete_write_dimensions_1() - print(f"Realloc with incomplete write 2") + print(f"Realloc with incomplete write two, validation") test_incomplete_write_dimensions_2() - print(f"Test conditional alloc with use cpu") + print(f"Test conditional alloc with use, cpu") test_conditional_alloc_cpu() - print(f"Test conditional alloc with use gpu") + print(f"Test conditional alloc with use, gpu") test_conditional_alloc_gpu() - print(f"Test conditional alloc with use and the shape as a non-trivial expression cpu") + print(f"Test conditional alloc with use and the shape as a non-trivial expression, cpu") test_conditional_alloc_with_expr_cpu() - print(f"Test conditional alloc with use and the shape as a non-trivial expression gpu") + print(f"Test conditional alloc with use and the shape as a non-trivial expression, gpu") test_conditional_alloc_with_expr_gpu() \ No newline at end of file