Skip to content

Commit

Permalink
Improve validation and type checks and fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
ThrudPrimrose committed Dec 10, 2024
1 parent 3854c82 commit 2408ad0
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 16 deletions.
2 changes: 1 addition & 1 deletion dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def declare_array(self,
size_desc_name = sdfg.arrays[name].size_desc_name
if size_desc_name is not None:
size_desc = sdfg.arrays[size_desc_name]
size_ctypedef = dtypes.pointer(size_desc.dtype).ctype
size_ctypedef = size_desc.dtype.ctype
self._dispatcher.declared_arrays.add(size_desc_name, DefinedType.Pointer, size_ctypedef)
return
elif nodedesc.storage is dtypes.StorageType.CPU_ThreadLocal:
Expand Down
7 changes: 4 additions & 3 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,7 @@ def generate_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: StateSub
# make dynamic map inputs constant
# TODO move this into _get_const_params(dfg_scope)
# Do not add src as const if the size is being red (src_conn is _read_size)
const_params |= set((str(e.src)) for e in dace.sdfg.dynamic_map_inputs(state, scope_entry) if not e.src_conn.endswith("size"))
const_params |= set((str(e.src)) for e in dace.sdfg.dynamic_map_inputs(state, scope_entry) if e.src_conn is None or (e.src_conn is not None and e.src_conn == "_read_size"))

# Store init/exit code streams
old_entry_stream = self.scope_entry_stream
Expand Down Expand Up @@ -1626,8 +1626,9 @@ def generate_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: StateSub
for i in range(size_arr.shape[0]):
if f"__{arr_name}_dim{i}_size" not in dyn_args:
dyn_args.append(f"__{arr_name}_dim{i}_size")
dyn_args_typed.append(f"const {arg.dtype.ctype} __{arr_name}_dim{i}_size")
needed_size_scalars_declaration.append(f"const {arg.dtype.ctype} __{arr_name}_dim{i}_size = {size_desc_name}[{i}];")
size_desc = sdfg.arrays[size_desc_name]
dyn_args_typed.append(f"const {size_desc.dtype.ctype} __{arr_name}_dim{i}_size")
needed_size_scalars_declaration.append(f"const {size_desc.dtype.ctype} __{arr_name}_dim{i}_size = {size_desc_name}[{i}];")

self._localcode.write(
'__global__ void %s %s(%s) {\n' %
Expand Down
2 changes: 1 addition & 1 deletion dace/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,7 @@ def __init__(self,
else:
self.offset = [0] * len(shape)

self.is_deferred_array = any(["__dace_defer" in str(dim) for dim in self.shape])
self.is_deferred_array = any([str(dim).startswith("__dace_defer") for dim in self.shape])

self.validate()

Expand Down
2 changes: 1 addition & 1 deletion dace/sdfg/sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -1795,7 +1795,7 @@ def add_array(self,
# convert strings to int if possible, unless it is not the reserved symbol for deferred allocation
newshape = []
for i, s in enumerate(shape):
if isinstance(s, str) and s == "__dace_defer":
if isinstance(s, str) and s.startswith("__dace_defer"):
newshape.append(dace.symbolic.pystr_to_symbolic(f"{s}_dim{i}"))
else:
try:
Expand Down
4 changes: 4 additions & 0 deletions dace/sdfg/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,11 +573,15 @@ def validate_state(state: 'dace.sdfg.SDFGState',
write_size_edges = list(state.edges_by_connector(node, insize))

# Reading-Writing the size is valid only if the array is transient and has the storage type CPU_Heap or GPU_Global
has_writes = len(write_size_edges) > 0
has_writes_or_reads = len(read_size_edges) + len(write_size_edges) > 0
size_access_allowed = arr.transient and (arr.storage == dtypes.StorageType.CPU_Heap or arr.storage == dtypes.StorageType.GPU_Global)
if has_writes_or_reads and not size_access_allowed:
raise InvalidSDFGNodeError('Reading the size of an array, or changing (writing to) the size of an array '
'is only valid if the array is transient and the storage is CPU_Heap or GPU_Global', sdfg, state_id, nid)
if has_writes and scope[node] is not None:
raise InvalidSDFGNodeError('Resizing array is not allowed within a scope (e.g. not inside maps)', sdfg, state_id, nid)


if len(write_size_edges) > 1:
raise InvalidSDFGNodeError('One node can have at maximum one edge writing to its size descriptior', sdfg, state_id, nid)
Expand Down
72 changes: 62 additions & 10 deletions tests/deferred_alloc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy
import pytest


@pytest.fixture(params=[dace.dtypes.StorageType.CPU_Heap, dace.dtypes.StorageType.GPU_Global])
def storage_type(request):
return request.param
Expand Down Expand Up @@ -73,7 +74,7 @@ def _get_assign_map_sdfg(storage_type: dace.dtypes.StorageType, transient: bool,
arrn = state.add_access(arr_name)

if storage_type == dace.dtypes.StorageType.CPU_Heap:
assert (schedule_type == dace.dtypes.ScheduleType.Sequential)
assert (schedule_type == dace.dtypes.ScheduleType.Sequential or schedule_type == dace.dtypes.ScheduleType.CPU_Multicore)
elif storage_type == dace.dtypes.StorageType.GPU_Global:
assert (schedule_type == dace.dtypes.ScheduleType.GPU_Device)

Expand Down Expand Up @@ -188,9 +189,55 @@ def test_trivial_realloc_cpu(transient: bool):
_test_trivial_realloc(dace.dtypes.StorageType.CPU_Heap, transient)


def test_realloc_inside_map():
def _add_realloc_inside_map(sdfg: dace.SDFG, schedule_type: dace.dtypes.ScheduleType):
pre_state = sdfg.states()[0]
state = sdfg.add_state("s2")
sdfg.add_edge(pre_state, state, dace.InterstateEdge(None, None))

map_entry, map_exit = state.add_map(name="map2",ndrange={"i":dace.subsets.Range([(0,4,1)])},
schedule=schedule_type)
an_2 = state.add_access('A')
an_2.add_in_connector("_write_size")

t1 = state.add_tasklet(name="assign", inputs={}, outputs={"__out"}, code="_out=8")
t1.add_out_connector("__out")

_, _ = sdfg.add_array("tmp0", shape=(2, ), dtype=numpy.uint64, transient=True)
sca = state.add_access("tmp0")

state.add_edge(map_entry, None, t1, None, dace.Memlet(None))
state.add_edge(t1, "__out", sca, None, dace.Memlet("tmp0[0]"))
state.add_edge(sca, None, an_2, "_write_size", dace.Memlet("tmp0"))
state.add_edge(an_2, None, map_exit, None, dace.Memlet(None))


def test_realloc_inside_map_gpu():
sdfg =_get_assign_map_sdfg(dace.dtypes.StorageType.GPU_Global, True, dace.dtypes.ScheduleType.GPU_Device)
_add_realloc_inside_map(sdfg, dace.dtypes.ScheduleType.GPU_Device)
try:
sdfg.validate()
except Exception:
return

pytest.fail("Realloc-use with non-transient data and incomplete write did not fail when it was expected to.")

def test_realloc_inside_map_cpu():
sdfg =_get_assign_map_sdfg(dace.dtypes.StorageType.CPU_Heap, True, dace.dtypes.ScheduleType.CPU_Multicore)
_add_realloc_inside_map(sdfg, dace.dtypes.ScheduleType.CPU_Multicore)
try:
sdfg.validate()
except Exception:
return

pytest.fail("Realloc-use with non-transient data and incomplete write did not fail when it was expected to.")

def test_conditional_alloc_gpu():
pass

def test_conditional_alloc_cpu():
pass


def test_incomplete_write_dimensions_1():
sdfg = _get_trivial_alloc_sdfg(dace.dtypes.StorageType.CPU_Heap, True, "1:2")
try:
Expand All @@ -211,23 +258,28 @@ def test_incomplete_write_dimensions_2():


if __name__ == "__main__":
print(f"Trivial Realloc within map {dace.dtypes.StorageType.CPU_Multicore}")
test_realloc_inside_map_cpu()
print(f"Trivial Realloc within map {dace.dtypes.StorageType.GPU_Device}")
test_realloc_inside_map_gpu()

print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap}")
test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, True)
test_trivial_realloc_cpu(True)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap}")
test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, True, dace.dtypes.ScheduleType.Sequential)
test_realloc_use_cpu(True)
print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global}")
test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, True)
test_trivial_realloc_gpu(True)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global}")
test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, True, dace.dtypes.ScheduleType.GPU_Device)
test_realloc_use_gpu(True)

print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data")
test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, False)
test_trivial_realloc_cpu(False)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data")
test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, False, dace.dtypes.ScheduleType.Sequential)
test_realloc_use_cpu(False)
print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data")
test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, False)
test_trivial_realloc_gpu(False)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data")
test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, False, dace.dtypes.ScheduleType.GPU_Device)
test_realloc_use_gpu(False)

print(f"Realloc with incomplete write 1")
test_incomplete_write_dimensions_1()
Expand Down

0 comments on commit 2408ad0

Please sign in to comment.