Skip to content

Commit

Permalink
Improve proposal according to PR comments, improve support for more c…
Browse files Browse the repository at this point in the history
…omplex shapes, add tests
  • Loading branch information
ThrudPrimrose committed Dec 11, 2024
1 parent 62bc08c commit f50382b
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 67 deletions.
3 changes: 0 additions & 3 deletions dace/codegen/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,10 +635,7 @@ def dispatch_reallocate(self, src_node: nodes.Node, node: nodes.Node, edge: Mult
state = cfg.state(state_id)
target = self.get_reallocate_dispatcher(node, edge, sdfg, state)
assert target is not None
if target is None:
return

# Dispatch reallocate
self._used_targets.add(target)
target.reallocate(sdfg, cfg, dfg, state_id, src_node, node, edge, function_stream, output_stream)

Expand Down
31 changes: 26 additions & 5 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,8 @@ def ndcopy_to_strided_copy(
return None


def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None, packed_veclen=1, indices=None,
deferred_size_names=None):
def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None,
packed_veclen=1, indices=None, deferred_size_names=None):
""" Creates a C++ expression that can be added to a pointer in order
to offset it to the beginning of the given subset and offset.
Expand Down Expand Up @@ -579,7 +579,7 @@ def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None, packed
if packed_veclen > 1:
index /= packed_veclen

if not (deferred_size_names is None):
if deferred_size_names is not None:
access_str_with_deferred_vars = sym2cpp(index)
def replace_pattern(match):
number = match.group(1)
Expand All @@ -591,6 +591,27 @@ def replace_pattern(match):
return sym2cpp(index)


def _get_deferred_size_names(desc, name):
if (desc.storage != dtypes.StorageType.GPU_Global and
desc.storage != dtypes.StorageType.CPU_Heap and
not desc.transient):
return None
def check_dace_defer(elements):
for elem in elements:
if "__dace_defer" in str(elem):
return True
return False
deferred_size_names = None
if check_dace_defer(desc.shape):
if desc.storage == dtypes.StorageType.GPU_Global or desc.storage == dtypes.StorageType.CPU_Heap:
deferred_size_names = []
for i, elem in enumerate(desc.shape):
if "__dace_defer" in str(elem):
deferred_size_names.append(f"__{name}_dim{i}_size" if desc.storage == dtypes.StorageType.GPU_Global else f"{desc.size_desc_name}[{i}]")
else:
deferred_size_names.append(elem)
return deferred_size_names if deferred_size_names is not None and len(deferred_size_names) > 0 else None

def cpp_array_expr(sdfg,
memlet,
with_brackets=True,
Expand All @@ -600,14 +621,14 @@ def cpp_array_expr(sdfg,
use_other_subset=False,
indices=None,
referenced_array=None,
codegen=None,
deferred_size_names=None):
codegen=None):
""" Converts an Indices/Range object to a C++ array access string. """
subset = memlet.subset if not use_other_subset else memlet.other_subset
s = subset if relative_offset else subsets.Indices(offset)
o = offset if relative_offset else None
desc : dace.Data = (sdfg.arrays[memlet.data] if referenced_array is None else referenced_array)
desc_name = memlet.data
deferred_size_names = _get_deferred_size_names(desc, desc_name)
offset_cppstr = cpp_offset_expr(desc, s, o, packed_veclen, indices=indices, deferred_size_names=deferred_size_names)

# NOTE: Are there any cases where a mix of '.' and '->' is needed when traversing nested structs?
Expand Down
63 changes: 18 additions & 45 deletions dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
from copy import deepcopy
import re
from dace.sdfg.graph import MultiConnectorEdge
from dace.sdfg.state import ControlFlowRegion, SDFGState, StateSubgraphView
import functools
Expand Down Expand Up @@ -404,7 +405,7 @@ def allocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphV

# Compute array size
arrsize = nodedesc.total_size
deferred_allocation = any([s for s in nodedesc.shape if str(s).startswith("__dace_defer")])
deferred_allocation = any([s for s in nodedesc.shape if "__dace_defer" in str(s)])
arrsize_bytes = None
if not isinstance(nodedesc.dtype, dtypes.opaque):
arrsize_bytes = arrsize * nodedesc.dtype.bytes
Expand Down Expand Up @@ -703,15 +704,22 @@ def reallocate(
dtype = sdfg.arrays[data_name].dtype

# Only consider the offsets with __dace_defer in original dim
mask_array = [str(dim).startswith("__dace_defer") for dim in data.shape]
mask_array = ["__dace_defer" in str(dim) for dim in data.shape]

# In case the size does not only consist of a "__dace_defer" symbol but from an expression involving "__dace_defer"
# The size array is only updated with the symbol, and while calculating the expression, we only replace the __dace_defer_dim pattern
# With the corresponding access from the size array
new_size_strs = []
for i, mask in enumerate(mask_array):
if mask:
new_size_str = cpp.sym2cpp(data.shape[i])
pattern = r'__dace_defer_dim(\d+)'
new_size_strs.append(re.sub(pattern, lambda m: f'{new_size_array_name}[{m.group(1)}]', new_size_str))
callsite_stream.write(
f"{size_array_name}[{i}] = {new_size_array_name}[{i}];"
)

# Call realloc only after no __dace_defer is left in size_array ?
size_str = " * ".join([f"{size_array_name}[{i}]" for i in range(len(data.shape))])
size_str = " * ".join(new_size_strs)
callsite_stream.write(
f"{dst_node.data} = static_cast<{dtype} *>(std::realloc(static_cast<void *>({dst_node.data}), {size_str} * sizeof({dtype})));"
)
Expand Down Expand Up @@ -749,34 +757,22 @@ def _emit_copy(

if isinstance(dst_node, nodes.Tasklet):
# Copy into tasklet
desc = sdfg.arrays[memlet.data]
deferred_size_names = self._get_deferred_size_names(desc, memlet)
stream.write(
" " + self.memlet_definition(sdfg, memlet, False, vconn, dst_node.in_connectors[vconn], deferred_size_names=deferred_size_names),
" " + self.memlet_definition(sdfg, memlet, False, vconn, dst_node.in_connectors[vconn]),
cfg,
state_id,
[src_node, dst_node],
)
if deferred_size_names is not None:
stream.write(
"// Size uses deferred allocation"
)

return
elif isinstance(src_node, nodes.Tasklet):
# Copy out of tasklet
desc = sdfg.arrays[memlet.data]
deferred_size_names = self._get_deferred_size_names(desc, memlet)
stream.write(
" " + self.memlet_definition(sdfg, memlet, True, uconn, src_node.out_connectors[uconn], deferred_size_names=deferred_size_names),
" " + self.memlet_definition(sdfg, memlet, True, uconn, src_node.out_connectors[uconn]),
cfg,
state_id,
[src_node, dst_node],
)
if deferred_size_names is not None:
stream.write(
"// Size uses deferred allocation"
)
return
else: # Copy array-to-array
src_nodedesc = src_node.desc(sdfg)
Expand Down Expand Up @@ -1044,27 +1040,6 @@ def write_and_resolve_expr(self, sdfg: SDFG, memlet: mmlt.Memlet, nc: bool, outn
custom_reduction = cpp.unparse_cr(sdfg, memlet.wcr, dtype)
return (f'dace::wcr_custom<{dtype.ctype}>:: template {func}({custom_reduction}, {ptr}, {inname})')

def _get_deferred_size_names(self, desc, memlet):
if (desc.storage != dtypes.StorageType.GPU_Global and
desc.storage != dtypes.StorageType.CPU_Heap and
not desc.transient):
return None
def check_dace_defer(elements):
for elem in elements:
if isinstance(elem, symbolic.symbol) and str(elem).startswith("__dace_defer"):
return True
return False
deferred_size_names = None
if check_dace_defer(desc.shape):
if desc.storage == dtypes.StorageType.GPU_Global or desc.storage == dtypes.StorageType.CPU_Heap:
deferred_size_names = []
for i, elem in enumerate(desc.shape):
if str(elem).startswith("__dace_defer"):
deferred_size_names.append(f"__{memlet.data}_dim{i}_size" if desc.storage == dtypes.StorageType.GPU_Global else f"{desc.size_desc_name}[{i}]")
else:
deferred_size_names.append(elem)
return deferred_size_names if deferred_size_names is not None and len(deferred_size_names) > 0 else None

def process_out_memlets(self,
sdfg: SDFG,
cfg: ControlFlowRegion,
Expand Down Expand Up @@ -1201,8 +1176,7 @@ def process_out_memlets(self,
# If the storage type if CPU_Heap or GPU_Global then it might be requiring deferred allocation
# We can check if the array requires sepcial access using A_size[0] (CPU) or __A_dim0_size (GPU0)
# by going through the shape and checking for symbols starting with __dace_defer
deferred_size_names = self._get_deferred_size_names(desc, memlet)
expr = cpp.cpp_array_expr(sdfg, memlet, codegen=self._frame, deferred_size_names=deferred_size_names)
expr = cpp.cpp_array_expr(sdfg, memlet, codegen=self._frame)
write_expr = codegen.make_ptr_assignment(in_local_name, conntype, expr, desc_dtype)

# Write out
Expand Down Expand Up @@ -1339,8 +1313,7 @@ def memlet_definition(self,
local_name: str,
conntype: Union[data.Data, dtypes.typeclass] = None,
allow_shadowing: bool = False,
codegen: 'CPUCodeGen' = None,
deferred_size_names = None):
codegen: 'CPUCodeGen' = None):
# TODO: Robust rule set
if conntype is None:
raise ValueError('Cannot define memlet for "%s" without connector type' % local_name)
Expand Down Expand Up @@ -1389,7 +1362,7 @@ def memlet_definition(self,
decouple_array_interfaces=decouple_array_interfaces)

result = ''
expr = (cpp.cpp_array_expr(sdfg, memlet, with_brackets=False, codegen=self._frame, deferred_size_names=deferred_size_names)
expr = (cpp.cpp_array_expr(sdfg, memlet, with_brackets=False, codegen=self._frame)
if var_type in [DefinedType.Pointer, DefinedType.StreamArray, DefinedType.ArrayInterface] else ptr)

if expr != ptr:
Expand Down Expand Up @@ -1433,7 +1406,7 @@ def memlet_definition(self,
if not memlet.dynamic and memlet.num_accesses == 1:
if not output:
if isinstance(desc, data.Stream) and desc.is_stream_array():
index = cpp.cpp_offset_expr(desc, memlet.subset, deferred_size_names=deferred_size_names)
index = cpp.cpp_offset_expr(desc, memlet.subset)
expr = f"{memlet.data}[{index}]"
result += f'{memlet_type} {local_name} = ({expr}).pop();'
defined = DefinedType.Scalar
Expand Down
9 changes: 7 additions & 2 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def allocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphV
is_dynamically_sized = symbolic.issymbolic(arrsize, sdfg.constants)
arrsize_malloc = '%s * sizeof(%s)' % (sym2cpp(arrsize), nodedesc.dtype.ctype)
ctypedef = '%s *' % nodedesc.dtype.ctype
deferred_allocation = any([s for s in nodedesc.shape if str(s).startswith("__dace_defer")])
deferred_allocation = any([s for s in nodedesc.shape if "__dace_defer" in str(s)])

# Different types of GPU arrays
if nodedesc.storage == dtypes.StorageType.GPU_Global:
Expand Down Expand Up @@ -2794,7 +2794,8 @@ def reallocate(
dtype = sdfg.arrays[data_name].dtype

# Only consider the offsets with __dace_defer in original dim
mask_array = [str(dim).startswith("__dace_defer") for dim in data.shape]
mask_array = ["__dace_defer" in str(dim) for dim in data.shape]
print(mask_array)

# Call realloc only after no __dace_defer is left in size_array (must be true)
# Save new and old sizes before registering them, because we need both to compute the bound of the new array
Expand Down Expand Up @@ -2829,8 +2830,12 @@ def reallocate(
s += "}\n"
callsite_stream.write(s)

new_size_strs = []
for i, mask in enumerate(mask_array):
if mask:
new_size_str = cpp.sym2cpp(data.shape[i])
pattern = r'__dace_defer_dim(\d+)'
new_size_strs.append(re.sub(pattern, lambda m: f'{new_size_array_name}[{m.group(1)}]', new_size_str))
callsite_stream.write(
f"{size_array_name}[{i}] = {new_size_array_name}[{i}];"
)
Expand Down
3 changes: 2 additions & 1 deletion dace/codegen/targets/framecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,8 @@ def generate_code(self,
if len(array) == 1:
array = array[0]
if type(array) == data.Array and array.is_deferred_array:
dimensions = ["0" if cpp.sym2cpp(dim).startswith("__dace_defer") else cpp.sym2cpp(dim) for dim in array.shape]
# 0 is a placeholder value, it is not important what the value is
dimensions = ["0" if "__dace_defer" in cpp.sym2cpp(dim) else cpp.sym2cpp(dim) for dim in array.shape]
size_str = ",".join(dimensions)
assert len(size_nodedesc.shape) == 1
alloc_str = f'{ctypedef} {size_desc_name}[{size_nodedesc.shape[0]}]{{{size_str}}};\n'
Expand Down
2 changes: 1 addition & 1 deletion dace/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,7 @@ def __init__(self,
else:
self.offset = [0] * len(shape)

self.is_deferred_array = any([str(dim).startswith("__dace_defer") for dim in self.shape])
self.is_deferred_array = any(["__dace_defer" in str(dim) for dim in self.shape])

self.validate()

Expand Down
3 changes: 1 addition & 2 deletions dace/sdfg/sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,6 @@ def __init__(self,
self._parent_sdfg = None
self._parent_nsdfg_node = None
self._arrays = NestedDict() # type: Dict[str, dt.Array]
self._arrays = NestedDict()
self.arg_names = []
self._labels: Set[str] = set()
self.global_code = {'frame': CodeBlock("", dtypes.Language.CPP)}
Expand Down Expand Up @@ -1795,7 +1794,7 @@ def add_array(self,
# convert strings to int if possible, unless it is not the reserved symbol for deferred allocation
newshape = []
for i, s in enumerate(shape):
if isinstance(s, str) and s.startswith("__dace_defer"):
if isinstance(s, str) and "__dace_defer" in s:
newshape.append(dace.symbolic.pystr_to_symbolic(f"{s}_dim{i}"))
else:
try:
Expand Down
3 changes: 1 addition & 2 deletions dace/sdfg/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,7 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto
# Trace through scope entry using IN_# -> OUT_#
if isinstance(curedge.dst, (nd.EntryNode, nd.ExitNode)):
if curedge.dst_conn is None:
#raise ValueError("Destination connector cannot be None for {}".format(curedge.dst))
break
raise ValueError("Destination connector cannot be None for {}".format(curedge.dst))
if not curedge.dst_conn.startswith("IN_"): # Map variable
break
next_edge = next(e for e in state.out_edges(curedge.dst) if e.src_conn == "OUT_" + curedge.dst_conn[3:])
Expand Down
27 changes: 25 additions & 2 deletions dace/sdfg/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,29 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
, sdfg, None
)

if isinstance(desc, dt.Array): #is_deferred_array and is_size_array are only defined for dt.Array
if desc.is_deferred_array:
if desc.is_size_array:
raise InvalidSDFGError(
f"A deferred array can't be used as a size array for another array. Data descriptor name: {desc}."
, sdfg, None
)
if not desc.transient:
raise InvalidSDFGError(
f"Deferred arrays need to be transient."
, sdfg, None
)
if "__return" in name:
raise InvalidSDFGError(
f"Deferred arrays can't be returned. {desc} has __return in its name."
, sdfg, None
)
if desc.storage is not dtypes.StorageType.GPU_Global and desc.storage is not dtypes.StorageType.CPU_Heap:
raise InvalidSDFGError(
f"Deferred arrays are supported only for {dtypes.StorageType.GPU_Global} and {dtypes.StorageType.CPU_Heap} storage types for {desc}."
, sdfg, None
)

# Check if SDFG is located within a GPU kernel
context['in_gpu'] = is_devicelevel_gpu(sdfg, None, None)
context['in_fpga'] = is_devicelevel_fpga(sdfg, None, None)
Expand All @@ -349,7 +372,7 @@ def _accessible(sdfg: 'dace.sdfg.SDFG', container: str, context: Dict[str, bool]
"""
Helper function that returns False if a data container cannot be accessed in the current SDFG context.
"""
storage = sdfg.arrays[container].storage if container in sdfg.arrays else sdfg.arrays[container].storage
storage = sdfg.arrays[container].storage
if storage == dtypes.StorageType.GPU_Global or storage in dtypes.GPU_STORAGES:
return context.get('in_gpu', False)
if storage == dtypes.StorageType.FPGA_Global or storage in dtypes.FPGA_STORAGES:
Expand Down Expand Up @@ -929,7 +952,7 @@ def validate_state(state: 'dace.sdfg.SDFGState',

# Check dimensionality of memory access
if isinstance(e.data.subset, (sbs.Range, sbs.Indices)):
desc = sdfg.arrays[e.data.data] if e.data.data in sdfg.arrays else sdfg.arrays[e.data.data]
desc = sdfg.arrays[e.data.data]
if e.data.subset.dims() != len(desc.shape):
raise InvalidSDFGEdgeError(
"Memlet subset uses the wrong dimensions"
Expand Down
Loading

0 comments on commit f50382b

Please sign in to comment.