From 57abd284500c1990ad2744160eb92aab5d08756d Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 18 Jul 2023 14:41:47 +0200 Subject: [PATCH 01/48] Added NestedDataClassProperty for nested data. --- dace/properties.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/dace/properties.py b/dace/properties.py index 6e883f8549..30a3e0913b 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -1381,6 +1381,45 @@ def from_json(obj, context=None): raise TypeError("Cannot parse type from: {}".format(obj)) +class NestedDataClassProperty(Property): + """ Custom property type for nested data. """ + + def __get__(self, obj, objtype=None) -> 'Data': + return super().__get__(obj, objtype) + + @property + def dtype(self): + return pydoc.locate("dace.data.Data") + + @staticmethod + def from_string(s): + dtype = pydoc.locate("dace.data.{}".format(s)) + if dtype is None or not isinstance(dtype, pydoc.locate("dace.data.Data")): + raise ValueError("Not a valid data type: {}".format(s)) + return dtype + + @staticmethod + def to_string(obj): + return obj.to_string() + + def to_json(self, obj): + if obj is None: + return None + return obj.dtype.to_json() + + @staticmethod + def from_json(obj, context=None): + if obj is None: + return None + elif isinstance(obj, str): + return NestedDataClassProperty.from_string(obj) + elif isinstance(obj, dict): + # Let the deserializer handle this + return dace.serialize.from_json(obj) + else: + raise TypeError("Cannot parse type from: {}".format(obj)) + + class LibraryImplementationProperty(Property): """ Property for choosing an implementation type for a library node. On the From 09465d242fbf33036ebf35e1c9b43357c60648ca Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 18 Jul 2023 14:42:33 +0200 Subject: [PATCH 02/48] Added Structures and StructArrays. --- dace/data.py | 121 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/dace/data.py b/dace/data.py index 2fc5f334c6..886fed75de 100644 --- a/dace/data.py +++ b/dace/data.py @@ -1,10 +1,10 @@ -# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import copy as cp import ctypes import functools -import re + from numbers import Number -from typing import Any, Dict, Optional, Sequence, Set, Tuple +from typing import Any, Dict, Optional, Sequence, Set, Tuple, Union import numpy import sympy as sp @@ -17,9 +17,8 @@ import dace.dtypes as dtypes from dace import serialize, symbolic from dace.codegen import cppunparse -from dace.properties import (CodeProperty, DebugInfoProperty, DictProperty, EnumProperty, ListProperty, Property, - ReferenceProperty, ShapeProperty, SubsetProperty, SymbolicProperty, TypeClassProperty, - make_properties) +from dace.properties import (DebugInfoProperty, DictProperty, EnumProperty, ListProperty, NestedDataClassProperty, + Property, ShapeProperty, SymbolicProperty, TypeClassProperty, make_properties) def create_datadescriptor(obj, no_custom_desc=False): @@ -342,6 +341,86 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global): return new_desc +class Structure(Data): + """ Base class for structures. """ + + def __init__(self, + shape: Sequence[Union[int, symbolic.SymbolicType]] = None, + transient: bool = False, + storage: dtypes.StorageType = dtypes.StorageType.Default, + location: Dict[str, str] = None, + lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, + debuginfo: dtypes.DebugInfo = None): + fields = { + attr: getattr(self, attr) + for attr in dir(self) if ( + not attr in dir(Data) and + not attr.startswith("_") and + not attr in ('total_size', 'offset', 'start_offset', 'strides'))} + fields_and_types = dict() + symbols = set() + for attr in dir(self): + if (attr in dir(Data) or attr.startswith("__") or + attr in ('total_size', 'offset', 'start_offset', 'strides')): + continue + value = getattr(self, attr) + if isinstance(value, Array): + symbols |= value.free_symbols + fields_and_types[attr] = (dtypes.pointer(value.dtype), str(_prod(value.shape))) + elif isinstance(value, Scalar): + symbols |= value.free_symbols + fields_and_types[attr] = value.dtype + elif isinstance(value, (sp.Basic, symbolic.SymExpr)): + symbols |= value.free_symbols + fields_and_types[attr] = symbolic.symtype(value) + elif isinstance(value, (int, numpy.integer)): + fields_and_types[attr] = dtypes.typeclass(type(value)) + else: + raise TypeError(f"Attribute {attr}'s value {value} has unsupported type: {type(value)}") + for s in symbols: + if str(s) in fields_and_types: + continue + if hasattr(s, "dtype"): + fields_and_types[str(s)] = s.dtype + else: + fields_and_types[str(s)] = dtypes.int32 + dtype = dtypes.struct(self.__class__.__name__, **fields_and_types) + shape = shape or (1,) + super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) + + @property + def total_size(self): + return -1 + + @property + def offset(self): + return [0] + + @property + def start_offset(self): + return 0 + + @property + def strides(self): + return [1] + + def as_arg(self, with_types=True, for_call=False, name=None): + if self.storage is dtypes.StorageType.GPU_Global: + return Array(self.dtype, [1]).as_arg(with_types, for_call, name) + if not with_types or for_call: + return name + return self.dtype.as_arg(name) + + def __getitem__(self, s): + """ This is syntactic sugar that allows us to define an array type + with the following syntax: ``Structure[N,M]`` + :return: A ``data.Array`` data descriptor. + """ + if isinstance(s, list) or isinstance(s, tuple): + return StructArray(self, tuple(s)) + return StructArray(self, (s, )) + + @make_properties class Scalar(Data): """ Data descriptor of a scalar value. """ @@ -902,6 +981,36 @@ def free_symbols(self): return result +@make_properties +class StructArray(Array): + """ Array of Structures. """ + + stype = NestedDataClassProperty(allow_none=True, default=None) + + def __init__(self, + stype, + shape, + transient=False, + allow_conflicts=False, + storage=dtypes.StorageType.Default, + location=None, + strides=None, + offset=None, + may_alias=False, + lifetime=dtypes.AllocationLifetime.Scope, + alignment=0, + debuginfo=None, + total_size=-1, + start_offset=None, + optional=None, + pool=False): + + self.stype = stype + dtype = stype.dtype + super(StructArray, self).__init__(dtype, shape, transient, allow_conflicts, storage, location, strides, offset, + may_alias, lifetime, alignment, debuginfo, total_size, start_offset, optional, pool) + + @make_properties class View(Array): """ From 51776a1b746126194fc1eebcece20adbe88be302 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 18 Jul 2023 15:09:00 +0200 Subject: [PATCH 03/48] Break array lengths down to their symbolic tokents. --- dace/dtypes.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dace/dtypes.py b/dace/dtypes.py index dee2283f25..230197bc6f 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -791,6 +791,7 @@ def from_json(json_obj, context=None): return ret def _parse_field_and_types(self, **fields_and_types): + from dace.symbolic import pystr_to_symbolic self._data = dict() self._length = dict() self.bytes = 0 @@ -799,8 +800,12 @@ def _parse_field_and_types(self, **fields_and_types): t, l = v if not isinstance(t, pointer): raise TypeError("Only pointer types may have a length.") - if l not in fields_and_types.keys(): - raise ValueError("Length {} not a field of struct {}".format(l, self.name)) + sym_tokens = pystr_to_symbolic(l).free_symbols + for sym in sym_tokens: + if str(sym) not in fields_and_types.keys(): + raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}") + # if l not in fields_and_types.keys(): + # raise ValueError("Length {} not a field of struct {}".format(l, self.name)) self._data[k] = t self._length[k] = l self.bytes += t.bytes From b23ed86de823398321ef6f620e3db0d3fd7f857b Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 18 Jul 2023 15:11:09 +0200 Subject: [PATCH 04/48] Allow structures to have fields whose name doesn't start with underscore. --- dace/properties.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dace/properties.py b/dace/properties.py index 30a3e0913b..679c0b9596 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import ast from collections import OrderedDict import copy @@ -412,12 +412,12 @@ def initialize_properties(obj, *args, **kwargs): except AttributeError: if not prop.unmapped: raise PropertyError("Property {} is unassigned in __init__ for {}".format(name, cls.__name__)) - # Assert that there are no fields in the object not captured by - # properties, unless they are prefixed with "_" - for name, prop in obj.__dict__.items(): - if (name not in properties and not name.startswith("_") and name not in dir(type(obj))): - raise PropertyError("{} : Variable {} is neither a Property nor " - "an internal variable (prefixed with \"_\")".format(str(type(obj)), name)) + # Assert that there are no fields in the object not captured by properties, unless they are prefixed with "_" + if not isinstance(obj, dace.data.Structure): + for name, prop in obj.__dict__.items(): + if (name not in properties and not name.startswith("_") and name not in dir(type(obj))): + raise PropertyError("{} : Variable {} is neither a Property nor " + "an internal variable (prefixed with \"_\")".format(str(type(obj)), name)) # Replace the __init__ method cls.__init__ = initialize_properties From 777821f0a940bc2f981ef5c04749c0f49968e0d1 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:21:54 +0200 Subject: [PATCH 05/48] Structures now have a "members" dictionary. Their dtype is a pointer to the corresponding dtypes.struct typeclass. --- dace/data.py | 64 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/dace/data.py b/dace/data.py index 886fed75de..0f1ef1f266 100644 --- a/dace/data.py +++ b/dace/data.py @@ -341,42 +341,54 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global): return new_desc +def _arrays_to_json(arrays): + if arrays is None: + return None + return {k: serialize.to_json(v) for k, v in arrays.items()} + + +def _arrays_from_json(obj, context=None): + if obj is None: + return {} + return {k: serialize.from_json(v, context) for k, v in obj.items()} + + +@make_properties class Structure(Data): """ Base class for structures. """ + members = Property(dtype=dict, + desc="Dictionary of structure members", + from_json=_arrays_from_json, + to_json=_arrays_to_json) + def __init__(self, - shape: Sequence[Union[int, symbolic.SymbolicType]] = None, + members: Dict[str, Any], transient: bool = False, storage: dtypes.StorageType = dtypes.StorageType.Default, location: Dict[str, str] = None, lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, debuginfo: dtypes.DebugInfo = None): - fields = { - attr: getattr(self, attr) - for attr in dir(self) if ( - not attr in dir(Data) and - not attr.startswith("_") and - not attr in ('total_size', 'offset', 'start_offset', 'strides'))} + self.members = members or {} fields_and_types = dict() symbols = set() - for attr in dir(self): - if (attr in dir(Data) or attr.startswith("__") or - attr in ('total_size', 'offset', 'start_offset', 'strides')): - continue - value = getattr(self, attr) - if isinstance(value, Array): - symbols |= value.free_symbols - fields_and_types[attr] = (dtypes.pointer(value.dtype), str(_prod(value.shape))) - elif isinstance(value, Scalar): - symbols |= value.free_symbols - fields_and_types[attr] = value.dtype - elif isinstance(value, (sp.Basic, symbolic.SymExpr)): - symbols |= value.free_symbols - fields_and_types[attr] = symbolic.symtype(value) - elif isinstance(value, (int, numpy.integer)): - fields_and_types[attr] = dtypes.typeclass(type(value)) + for k, v in members.items(): + if isinstance(v, Structure): + symbols |= v.free_symbols + fields_and_types[k] = (v.dtype, str(v.total_size)) + elif isinstance(v, Array): + symbols |= v.free_symbols + fields_and_types[k] = (dtypes.pointer(v.dtype), str(_prod(v.shape))) + elif isinstance(v, Scalar): + symbols |= v.free_symbols + fields_and_types[k] = v.dtype + elif isinstance(v, (sp.Basic, symbolic.SymExpr)): + symbols |= v.free_symbols + fields_and_types[k] = symbolic.symtype(v) + elif isinstance(v, (int, numpy.integer)): + fields_and_types[k] = dtypes.typeclass(type(v)) else: - raise TypeError(f"Attribute {attr}'s value {value} has unsupported type: {type(value)}") + raise TypeError(f"Attribute {k}'s value {v} has unsupported type: {type(v)}") for s in symbols: if str(s) in fields_and_types: continue @@ -384,8 +396,8 @@ def __init__(self, fields_and_types[str(s)] = s.dtype else: fields_and_types[str(s)] = dtypes.int32 - dtype = dtypes.struct(self.__class__.__name__, **fields_and_types) - shape = shape or (1,) + dtype = dtypes.pointer(dtypes.struct(self.__class__.__name__, **fields_and_types)) + shape = (1,) super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) @property From ebf72068e4b27ed777fb835bc75c835980d502d6 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:24:37 +0200 Subject: [PATCH 06/48] dtype.structs store their ctype in `_FFI_CTYPES`. --- dace/dtypes.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dace/dtypes.py b/dace/dtypes.py index 230197bc6f..d01209469f 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. """ A module that contains various DaCe type definitions. """ from __future__ import print_function import ctypes @@ -654,6 +654,8 @@ def from_json(json_obj, context=None): def as_ctypes(self): """ Returns the ctypes version of the typeclass. """ + if isinstance(self._typeclass, struct): + return ctypes.POINTER(self._typeclass.as_ctypes()) return ctypes.POINTER(_FFI_CTYPES[self.type]) def as_numpy_dtype(self): @@ -804,8 +806,6 @@ def _parse_field_and_types(self, **fields_and_types): for sym in sym_tokens: if str(sym) not in fields_and_types.keys(): raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}") - # if l not in fields_and_types.keys(): - # raise ValueError("Length {} not a field of struct {}".format(l, self.name)) self._data[k] = t self._length[k] = l self.bytes += t.bytes @@ -817,16 +817,24 @@ def _parse_field_and_types(self, **fields_and_types): def as_ctypes(self): """ Returns the ctypes version of the typeclass. """ + if self in _FFI_CTYPES: + return _FFI_CTYPES[self] # Populate the ctype fields for the struct class. fields = [] for k, v in self._data.items(): if isinstance(v, pointer): - fields.append((k, ctypes.c_void_p)) # ctypes.POINTER(_FFI_CTYPES[v.type]))) + if isinstance(v._typeclass, struct): + fields.append((k, ctypes.POINTER(v._typeclass.as_ctypes()))) + else: + fields.append((k, ctypes.c_void_p)) + elif isinstance(v, struct): + fields.append((k, v.as_ctypes())) else: fields.append((k, _FFI_CTYPES[v.type])) fields = sorted(fields, key=lambda f: f[0]) # Create new struct class. struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields}) + _FFI_CTYPES[self] = struct_class return struct_class def as_numpy_dtype(self): From c52a48257ffbb7933aec3b04fd7029cdafce77a8 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:26:03 +0200 Subject: [PATCH 07/48] Reverted underscore exception for Structures. --- dace/properties.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dace/properties.py b/dace/properties.py index 679c0b9596..2225b6d853 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -413,11 +413,10 @@ def initialize_properties(obj, *args, **kwargs): if not prop.unmapped: raise PropertyError("Property {} is unassigned in __init__ for {}".format(name, cls.__name__)) # Assert that there are no fields in the object not captured by properties, unless they are prefixed with "_" - if not isinstance(obj, dace.data.Structure): - for name, prop in obj.__dict__.items(): - if (name not in properties and not name.startswith("_") and name not in dir(type(obj))): - raise PropertyError("{} : Variable {} is neither a Property nor " - "an internal variable (prefixed with \"_\")".format(str(type(obj)), name)) + for name, prop in obj.__dict__.items(): + if (name not in properties and not name.startswith("_") and name not in dir(type(obj))): + raise PropertyError("{} : Variable {} is neither a Property nor " + "an internal variable (prefixed with \"_\")".format(str(type(obj)), name)) # Replace the __init__ method cls.__init__ = initialize_properties From 40cc858f992d71a49730d934268c31d380d8e82b Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:26:40 +0200 Subject: [PATCH 08/48] Small fixes. --- dace/codegen/compiled_sdfg.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index d0d29cfa1e..863e804802 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -452,9 +452,10 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: # GPU scalars are pointers, so this is fine if atype.storage != dtypes.StorageType.GPU_Global: raise TypeError('Passing an array to a scalar (type %s) in argument "%s"' % (atype.dtype.ctype, a)) - elif not isinstance(atype, dt.Array) and not isinstance(atype.dtype, dtypes.callback) and not isinstance( - arg, - (atype.dtype.type, sp.Basic)) and not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype): + elif (not isinstance(atype, (dt.Array, dt.Structure)) and + not isinstance(atype.dtype, dtypes.callback) and + not isinstance(arg, (atype.dtype.type, sp.Basic)) and + not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype)): if isinstance(arg, int) and atype.dtype.type == np.int64: pass elif isinstance(arg, float) and atype.dtype.type == np.float64: @@ -521,7 +522,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: # Construct init args, which only consist of the symbols symbols = self._free_symbols initargs = tuple( - actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg + actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg for arg, actype, atype, aname in callparams if aname in symbols) # Replace arrays with their base host/device pointers @@ -531,7 +532,8 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: try: newargs = tuple( - actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg for arg, actype, atype in newargs) + actype(arg) if not isinstance(arg, (ctypes._SimpleCData)) else arg + for arg, actype, atype in newargs) except TypeError: # Pinpoint bad argument for i, (arg, actype, _) in enumerate(newargs): From dd73aaa8816864958fc4fd547e16d5372519f167 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:27:17 +0200 Subject: [PATCH 09/48] WIP: Replace ',' with '->' to quickly support nested data. --- dace/codegen/targets/cpp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py index afbc6fca12..7d54e985f5 100644 --- a/dace/codegen/targets/cpp.py +++ b/dace/codegen/targets/cpp.py @@ -370,6 +370,8 @@ def make_const(expr: str) -> str: # Register defined variable dispatcher.defined_vars.add(pointer_name, defined_type, typedef, allow_shadowing=True) + expr = expr.replace('.', '->') + return (typedef + ref, pointer_name, expr) From 623a7f88838f0a3bc033333bef28e4de03544d37 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:28:08 +0200 Subject: [PATCH 10/48] Recursively add to arglist nested data descriptors. --- dace/codegen/targets/cpu.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index eb7d232966..2759c9744c 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -55,10 +55,30 @@ def __init__(self, frame_codegen, sdfg): # Keep track of generated NestedSDG, and the name of the assigned function self._generated_nested_sdfg = dict() - # Keeps track of generated connectors, so we know how to access them in - # nested scopes + def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''): + for k, v in struct.members.items(): + if isinstance(v, data.Structure): + _visit_structure(v, args, f'{prefix}.{k}') + elif isinstance(v, data.Data): + args[f'{prefix}.{k}'] = v + + # Keeps track of generated connectors, so we know how to access them in nested scopes + arglist = dict(self._frame.arglist) for name, arg_type in self._frame.arglist.items(): - if isinstance(arg_type, data.Scalar): + if isinstance(arg_type, data.Structure): + desc = sdfg.arrays[name] + _visit_structure(arg_type, arglist, name) + elif isinstance(arg_type, data.StructArray): + desc = sdfg.arrays[name] + desc = desc.stype + for attr in dir(desc): + value = getattr(desc, attr) + if isinstance(value, data.Data): + assert attr in sdfg.arrays + arglist[attr] = value + + for name, arg_type in arglist.items(): + if isinstance(arg_type, (data.Scalar, data.Structure)): # GPU global memory is only accessed via pointers # TODO(later): Fix workaround somehow if arg_type.storage is dtypes.StorageType.GPU_Global: From 1e5baddcbda6e0d78bd9526af7e1a0b78627a4e3 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:28:50 +0200 Subject: [PATCH 11/48] Recursively look into nested data to emit definitions. --- dace/codegen/targets/framecode.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index 6f302c11ba..be6b85602a 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -150,15 +150,23 @@ def generate_fileheader(self, sdfg: SDFG, global_stream: CodeIOStream, backend: for _, arrname, arr in sdfg.arrays_recursive(): if arr is not None: datatypes.add(arr.dtype) + + def _emit_definitions(dtype: dtypes.typeclass, wrote_something: bool) -> bool: + if isinstance(dtype, dtypes.pointer): + wrote_something = _emit_definitions(dtype._typeclass, wrote_something) + elif isinstance(dtype, dtypes.struct): + for field in dtype.fields.values(): + wrote_something = _emit_definitions(field, wrote_something) + if hasattr(dtype, 'emit_definition'): + if not wrote_something: + global_stream.write("", sdfg) + global_stream.write(dtype.emit_definition(), sdfg) + return wrote_something # Emit unique definitions wrote_something = False for typ in datatypes: - if hasattr(typ, 'emit_definition'): - if not wrote_something: - global_stream.write("", sdfg) - wrote_something = True - global_stream.write(typ.emit_definition(), sdfg) + wrote_something = _emit_definitions(typ, wrote_something) if wrote_something: global_stream.write("", sdfg) From 36d4e826ac769f1cb99ecc3c8fe8206c0690cdab Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:30:21 +0200 Subject: [PATCH 12/48] SDFG data (_arrays) are now stored in a NestedDict. --- dace/sdfg/sdfg.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 18763e385a..6e4c3587f4 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -48,6 +48,35 @@ from dace.codegen.compiled_sdfg import CompiledSDFG +class NestedDict(dict): + + def __init__(self): + super(NestedDict, self).__init__() + + def __getitem__(self, key): + tokens = key.split('.') + token = tokens.pop(0) + result = super(NestedDict, self).__getitem__(token) + while tokens: + token = tokens.pop(0) + result = result.members[token] + return result + + def __contains__(self, key): + tokens = key.split('.') + token = tokens.pop(0) + result = super(NestedDict, self).__contains__(token) + desc = None + while tokens and result: + if desc is None: + desc = super(NestedDict, self).__getitem__(token) + else: + desc = desc.members[token] + token = tokens.pop(0) + result = token in desc.members + return result + + def _arrays_to_json(arrays): if arrays is None: return None @@ -375,7 +404,7 @@ class SDFG(OrderedDiGraph[SDFGState, InterstateEdge]): name = Property(dtype=str, desc="Name of the SDFG") arg_names = ListProperty(element_type=str, desc='Ordered argument names (used for calling conventions).') constants_prop = Property(dtype=dict, default={}, desc="Compile-time constants") - _arrays = Property(dtype=dict, + _arrays = Property(dtype=NestedDict, desc="Data descriptors for this SDFG", to_json=_arrays_to_json, from_json=_arrays_from_json) @@ -456,7 +485,7 @@ def __init__(self, self._sdfg_list = [self] self._start_state: Optional[int] = None self._cached_start_state: Optional[SDFGState] = None - self._arrays = {} # type: Dict[str, dt.Array] + self._arrays = NestedDict() # type: Dict[str, dt.Array] self._labels: Set[str] = set() self.global_code = {'frame': CodeBlock("", dtypes.Language.CPP)} self.init_code = {'frame': CodeBlock("", dtypes.Language.CPP)} From 38a4265a29c64f6100e03f536aecdd09fd160dca Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:31:11 +0200 Subject: [PATCH 13/48] Adjusted the matching check for memlet data and src/dst nodes to not fail for Structures. --- dace/sdfg/validation.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index 3bac646479..c963df9d7e 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -587,9 +587,14 @@ def validate_state(state: 'dace.sdfg.SDFGState', break # Check if memlet data matches src or dst nodes - if (e.data.data is not None and (isinstance(src_node, nd.AccessNode) or isinstance(dst_node, nd.AccessNode)) - and (not isinstance(src_node, nd.AccessNode) or e.data.data != src_node.data) - and (not isinstance(dst_node, nd.AccessNode) or e.data.data != dst_node.data)): + name = e.data.data + if isinstance(src_node, nd.AccessNode) and isinstance(sdfg.arrays[src_node.data], dt.Structure): + name = None + if isinstance(dst_node, nd.AccessNode) and isinstance(sdfg.arrays[dst_node.data], dt.Structure): + name = None + if (name is not None and (isinstance(src_node, nd.AccessNode) or isinstance(dst_node, nd.AccessNode)) + and (not isinstance(src_node, nd.AccessNode) or (name != src_node.data and name != e.src_conn)) + and (not isinstance(dst_node, nd.AccessNode) or (name != dst_node.data and name != e.dst_conn))): raise InvalidSDFGEdgeError( "Memlet data does not match source or destination " "data nodes)", From 479cb2ad240dd167a7b26d2665527e04727cffe6 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 19:32:51 +0200 Subject: [PATCH 14/48] Added tests. --- tests/sdfg/data/structure_test.py | 240 ++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 tests/sdfg/data/structure_test.py diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py new file mode 100644 index 0000000000..3783a98068 --- /dev/null +++ b/tests/sdfg/data/structure_test.py @@ -0,0 +1,240 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import numpy as np + +from scipy import sparse + + +def create_structure(name: str, **members) -> dace.data.Structure: + + StructureClass = type(name, (dace.data.Structure, ), {}) + return StructureClass(members) + + +def test_read_structure(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix', + indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz) + + sdfg = dace.SDFG('csr_to_dense') + + sdfg.add_datadesc('A', CSR) + sdfg.add_array('B', [M, N], dace.float32) + + sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype) + sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype) + sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype) + + state = sdfg.add_state() + + A = state.add_access('A') + B = state.add_access('B') + + indptr = state.add_access('vindptr') + indices = state.add_access('vindices') + data = state.add_access('vdata') + + state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', CSR.members['indptr'])) + state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', CSR.members['indices'])) + state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', CSR.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + inpA = CSR.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + N=A.shape[1], + nnz=A.nnz) + + func(A=inpA, B=B, M=20, N=20, nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + +def test_write_structure(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix', + indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz) + + sdfg = dace.SDFG('dense_to_csr') + + sdfg.add_array('A', [M, N], dace.float32) + sdfg.add_datadesc('B', CSR) + + sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype) + sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype) + sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype) + + # Make If + if_before = sdfg.add_state('if_before') + if_guard = sdfg.add_state('if_guard') + if_body = sdfg.add_state('if_body') + if_after = sdfg.add_state('if_after') + sdfg.add_edge(if_before, if_guard, dace.InterstateEdge()) + sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0')) + sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'})) + sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0')) + A = if_body.add_access('A') + B = if_body.add_access('B') + indices = if_body.add_access('vindices') + data = if_body.add_access('vdata') + if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) + if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz')) + t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') + if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx')) + if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz')) + # Make For Loop for j + j_before, j_guard, j_after = sdfg.add_loop(None, + if_before, + None, + 'j', + '0', + 'j < N', + 'j + 1', + loop_end_state=if_after) + # Make For Loop for i + i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after) + sdfg.start_state = sdfg.node_id(i_before) + i_before_guard = sdfg.edges_between(i_before, i_guard)[0] + i_before_guard.data.assignments['idx'] = '0' + B = i_guard.add_access('B') + indptr = i_guard.add_access('vindptr') + t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') + i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i')) + i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) + B = i_after.add_access('B') + indptr = i_after.add_access('vindptr') + t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') + i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) + i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) + + func = sdfg.compile() + + rng = np.random.default_rng(42) + tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + A = tmp.toarray() + B = tmp.tocsr(copy=True) + B.indptr[:] = -1 + B.indices[:] = -1 + B.data[:] = -1 + + outB = CSR.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) + + func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) + + assert np.allclose(A, B.toarray()) + + +def test_read_nested_structure(): + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix', + indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz) + Wrapper = create_structure('WrapperClass', csr=CSR) + + sdfg = dace.SDFG('nested_csr_to_dense') + + sdfg.add_datadesc('A', Wrapper) + sdfg.add_array('B', [M, N], dace.float32) + + spmat = Wrapper.members['csr'] + sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) + sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) + sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) + + state = sdfg.add_state() + + A = state.add_access('A') + B = state.add_access('B') + + indptr = state.add_access('vindptr') + indices = state.add_access('vindices') + data = state.add_access('vdata') + + state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) + state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) + state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + structclass = CSR.dtype._typeclass.as_ctypes() + inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + K=A.shape[1], + nnz=A.nnz) + import ctypes + inpW = Wrapper.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) + + func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + +if __name__ == "__main__": + test_read_structure() + test_write_structure() + test_read_nested_structure() From 8365ab34926a01d65a67d93d1b1bbaf2e67eac11 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 20:25:26 +0200 Subject: [PATCH 15/48] Serialization fixes. --- dace/sdfg/sdfg.py | 13 ++++++++++--- tests/sdfg/data/structure_test.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 6e4c3587f4..b5598870ec 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -50,8 +50,9 @@ class NestedDict(dict): - def __init__(self): - super(NestedDict, self).__init__() + def __init__(self, mapping=None): + mapping = mapping or {} + super(NestedDict, self).__init__(mapping) def __getitem__(self, key): tokens = key.split('.') @@ -89,6 +90,12 @@ def _arrays_from_json(obj, context=None): return {k: dace.serialize.from_json(v, context) for k, v in obj.items()} +def _nested_arrays_from_json(obj, context=None): + if obj is None: + return NestedDict({}) + return NestedDict({k: dace.serialize.from_json(v, context) for k, v in obj.items()}) + + def _replace_dict_keys(d, old, new): if old in d: if new in d: @@ -407,7 +414,7 @@ class SDFG(OrderedDiGraph[SDFGState, InterstateEdge]): _arrays = Property(dtype=NestedDict, desc="Data descriptors for this SDFG", to_json=_arrays_to_json, - from_json=_arrays_from_json) + from_json=_nested_arrays_from_json) symbols = DictProperty(str, dtypes.typeclass, desc="Global symbols for this SDFG") instrument = EnumProperty(dtype=dtypes.InstrumentationType, diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 3783a98068..5348ecaa5a 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -2,12 +2,29 @@ import dace import numpy as np +from dace import serialize +from dace.properties import make_properties from scipy import sparse def create_structure(name: str, **members) -> dace.data.Structure: StructureClass = type(name, (dace.data.Structure, ), {}) + + @staticmethod + def from_json(json_obj, context=None): + if json_obj['type'] != name: + raise TypeError("Invalid data type") + + # Create dummy object + ret = StructureClass({}) + serialize.set_properties_from_json(ret, json_obj, context=context) + + return ret + + setattr(StructureClass, 'from_json', from_json) + StructureClass = make_properties(StructureClass) + return StructureClass(members) From 14ba6655c883f2f0761ca4ccacfb722d82b7eac3 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Wed, 19 Jul 2023 20:29:36 +0200 Subject: [PATCH 16/48] Fixed NestedDict for non-str keys. --- dace/sdfg/sdfg.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index b5598870ec..a4c29c2e89 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -55,16 +55,17 @@ def __init__(self, mapping=None): super(NestedDict, self).__init__(mapping) def __getitem__(self, key): - tokens = key.split('.') + tokens = key.split('.') if isinstance(key, str) else [key] token = tokens.pop(0) result = super(NestedDict, self).__getitem__(token) while tokens: token = tokens.pop(0) result = result.members[token] return result + def __contains__(self, key): - tokens = key.split('.') + tokens = key.split('.') if isinstance(key, str) else [key] token = tokens.pop(0) result = super(NestedDict, self).__contains__(token) desc = None From 80d6f10af1efe172560d64b976c451a91670b2fb Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 14:56:21 +0200 Subject: [PATCH 17/48] Added support for transient Structures. --- dace/codegen/targets/cpu.py | 16 ++++++++++++++-- dace/data.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 2759c9744c..7ff91cbc7b 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -286,16 +286,17 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d name = node.data alloc_name = cpp.ptr(name, nodedesc, sdfg, self._frame) name = alloc_name + alloc_name = alloc_name.replace('.', '->') if nodedesc.transient is False: return # Check if array is already allocated - if self._dispatcher.defined_vars.has(alloc_name): + if self._dispatcher.defined_vars.has(name): return # Check if array is already declared - declared = self._dispatcher.declared_arrays.has(alloc_name) + declared = self._dispatcher.declared_arrays.has(name) define_var = self._dispatcher.defined_vars.add if nodedesc.lifetime in (dtypes.AllocationLifetime.Persistent, dtypes.AllocationLifetime.External): @@ -308,6 +309,17 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d if not isinstance(nodedesc.dtype, dtypes.opaque): arrsize_bytes = arrsize * nodedesc.dtype.bytes + if isinstance(nodedesc, data.Structure): + declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n") + define_var(name, DefinedType.Pointer, nodedesc.ctype) + for k, v in nodedesc.members.items(): + if isinstance(v, data.Data): + ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype + defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer + self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef) + self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream, + declaration_stream, allocation_stream) + return if isinstance(nodedesc, data.View): return self.allocate_view(sdfg, dfg, state_id, node, function_stream, declaration_stream, allocation_stream) if isinstance(nodedesc, data.Reference): diff --git a/dace/data.py b/dace/data.py index 0f1ef1f266..838fc43542 100644 --- a/dace/data.py +++ b/dace/data.py @@ -369,7 +369,10 @@ def __init__(self, location: Dict[str, str] = None, lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, debuginfo: dtypes.DebugInfo = None): + # TODO: Should we make a deep-copy here? self.members = members or {} + for k, v in self.members.items(): + v.transient = transient fields_and_types = dict() symbols = set() for k, v in members.items(): @@ -433,6 +436,31 @@ def __getitem__(self, s): return StructArray(self, (s, )) +@make_properties +class StructureView(Structure): + """ + Data descriptor that acts as a reference (or view) of another structure. + """ + + @staticmethod + def from_json(json_obj, context=None): + if json_obj['type'] != 'StructureView': + raise TypeError("Invalid data type") + + # Create dummy object + ret = StructureView({}) + serialize.set_properties_from_json(ret, json_obj, context=context) + + return ret + + def validate(self): + super().validate() + + # We ensure that allocation lifetime is always set to Scope, since the + # view is generated upon "allocation" + if self.lifetime != dtypes.AllocationLifetime.Scope: + raise ValueError('Only Scope allocation lifetime is supported for Views') + @make_properties class Scalar(Data): """ Data descriptor of a scalar value. """ From 9658c2236b7ba154bccbbd3b839944f4f88c2668 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 14:56:40 +0200 Subject: [PATCH 18/48] Edited tests. --- tests/sdfg/data/structure_test.py | 346 +++++++++++++++++++++++++++--- 1 file changed, 321 insertions(+), 25 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 5348ecaa5a..462c6a8e7b 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -7,7 +7,7 @@ from scipy import sparse -def create_structure(name: str, **members) -> dace.data.Structure: +def create_structure(name: str) -> dace.data.Structure: StructureClass = type(name, (dace.data.Structure, ), {}) @@ -25,28 +25,28 @@ def from_json(json_obj, context=None): setattr(StructureClass, 'from_json', from_json) StructureClass = make_properties(StructureClass) - return StructureClass(members) + return StructureClass def test_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix', - indptr=dace.int32[M + 1], + CSR = create_structure('CSRMatrix') + csr_obj = CSR(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz], rows=M, cols=N, - nnz=nnz) + nnz=nnz)) sdfg = dace.SDFG('csr_to_dense') - sdfg.add_datadesc('A', CSR) + sdfg.add_datadesc('A', csr_obj) sdfg.add_array('B', [M, N], dace.float32) - sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype) - sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype) - sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype) + sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) state = sdfg.add_state() @@ -57,9 +57,9 @@ def test_read_structure(): indices = state.add_access('vindices') data = state.add_access('vdata') - state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', CSR.members['indptr'])) - state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', CSR.members['indices'])) - state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', CSR.members['data'])) + state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr'])) + state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices'])) + state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data'])) ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) @@ -79,7 +79,7 @@ def test_read_structure(): A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) B = np.zeros((20, 20), dtype=np.float32) - inpA = CSR.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], + inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], indices=A.indices.__array_interface__['data'][0], data=A.data.__array_interface__['data'][0], rows=A.shape[0], @@ -97,22 +97,22 @@ def test_read_structure(): def test_write_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix', - indptr=dace.int32[M + 1], + CSR = create_structure('CSRMatrix') + csr_obj = CSR(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz], rows=M, cols=N, - nnz=nnz) + nnz=nnz)) sdfg = dace.SDFG('dense_to_csr') sdfg.add_array('A', [M, N], dace.float32) - sdfg.add_datadesc('B', CSR) + sdfg.add_datadesc('B', csr_obj) - sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype) - sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype) - sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype) + sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) # Make If if_before = sdfg.add_state('if_before') @@ -167,7 +167,7 @@ def test_write_structure(): B.indices[:] = -1 B.data[:] = -1 - outB = CSR.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], + outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], indices=B.indices.__array_interface__['data'][0], data=B.data.__array_interface__['data'][0], rows=tmp.shape[0], @@ -181,7 +181,204 @@ def test_write_structure(): assert np.allclose(A, B.toarray()) +def test_local_structure(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix') + csr_obj = CSR(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz)) + tmp_obj = CSR(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), transient=True) + + sdfg = dace.SDFG('dense_to_csr') + + sdfg.add_array('A', [M, N], dace.float32) + sdfg.add_datadesc('B', csr_obj) + sdfg.add_datadesc('tmp', tmp_obj) + + sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) + + sdfg.add_view('tmp_vindptr', tmp_obj.members['indptr'].shape, tmp_obj.members['indptr'].dtype) + sdfg.add_view('tmp_vindices', tmp_obj.members['indices'].shape, tmp_obj.members['indices'].dtype) + sdfg.add_view('tmp_vdata', tmp_obj.members['data'].shape, tmp_obj.members['data'].dtype) + + # Make If + if_before = sdfg.add_state('if_before') + if_guard = sdfg.add_state('if_guard') + if_body = sdfg.add_state('if_body') + if_after = sdfg.add_state('if_after') + sdfg.add_edge(if_before, if_guard, dace.InterstateEdge()) + sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0')) + sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'})) + sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0')) + A = if_body.add_access('A') + tmp = if_body.add_access('tmp') + indices = if_body.add_access('tmp_vindices') + data = if_body.add_access('tmp_vdata') + if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) + if_body.add_edge(data, 'views', tmp, 'data', dace.Memlet(data='tmp.data', subset='0:nnz')) + t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') + if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='tmp_vindices', subset='idx')) + if_body.add_edge(indices, 'views', tmp, 'indices', dace.Memlet(data='tmp.indices', subset='0:nnz')) + # Make For Loop for j + j_before, j_guard, j_after = sdfg.add_loop(None, + if_before, + None, + 'j', + '0', + 'j < N', + 'j + 1', + loop_end_state=if_after) + # Make For Loop for i + i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after) + sdfg.start_state = sdfg.node_id(i_before) + i_before_guard = sdfg.edges_between(i_before, i_guard)[0] + i_before_guard.data.assignments['idx'] = '0' + tmp = i_guard.add_access('tmp') + indptr = i_guard.add_access('tmp_vindptr') + t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') + i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='i')) + i_guard.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + tmp = i_after.add_access('tmp') + indptr = i_after.add_access('tmp_vindptr') + t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') + i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='M')) + i_after.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + + set_B = sdfg.add_state('set_B') + sdfg.add_edge(i_after, set_B, dace.InterstateEdge()) + tmp = set_B.add_access('tmp') + tmp_indptr = set_B.add_access('tmp_vindptr') + tmp_indices = set_B.add_access('tmp_vindices') + tmp_data = set_B.add_access('tmp_vdata') + set_B.add_edge(tmp, 'indptr', tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + set_B.add_edge(tmp, 'indices', tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz')) + set_B.add_edge(tmp, 'data', tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz')) + B = set_B.add_access('B') + B_indptr = set_B.add_access('vindptr') + B_indices = set_B.add_access('vindices') + B_data = set_B.add_access('vdata') + set_B.add_edge(B_indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) + set_B.add_edge(B_indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz')) + set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz')) + set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1')) + set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz')) + t, me, mx = set_B.add_mapped_tasklet('set_data', + {'idx': '0:nnz'}, + {'__inp': dace.Memlet(data='tmp_vdata', subset='idx')}, + '__out = 2 * __inp', + {'__out': dace.Memlet(data='vdata', subset='idx')}, + external_edges=True, + input_nodes={'tmp_vdata': tmp_data}, + output_nodes={'vdata': B_data}) + + + func = sdfg.compile() + + rng = np.random.default_rng(42) + tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + A = tmp.toarray() + B = tmp.tocsr(copy=True) + B.indptr[:] = -1 + B.indices[:] = -1 + B.data[:] = -1 + + outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) + + func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) + + assert np.allclose(A * 2, B.toarray()) + + def test_read_nested_structure(): + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix') + csr_obj = CSR(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz)) + Wrapper = create_structure('WrapperClass') + wrapper_obj = Wrapper(dict(csr=csr_obj)) + + sdfg = dace.SDFG('nested_csr_to_dense') + + sdfg.add_datadesc('A', wrapper_obj) + sdfg.add_array('B', [M, N], dace.float32) + + spmat = wrapper_obj.members['csr'] + sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) + sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) + sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) + + state = sdfg.add_state() + + A = state.add_access('A') + B = state.add_access('B') + + indptr = state.add_access('vindptr') + indices = state.add_access('vindices') + data = state.add_access('vdata') + + state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) + state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) + state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + structclass = csr_obj.dtype._typeclass.as_ctypes() + inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + K=A.shape[1], + nnz=A.nnz) + import ctypes + inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) + + func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + +def test_read_nested_structure_2(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) CSR = create_structure('CSRMatrix', indptr=dace.int32[M + 1], @@ -190,14 +387,16 @@ def test_read_nested_structure(): rows=M, cols=N, nnz=nnz) + CSRView = dace.data.StructureView(CSR.members, transient=True) Wrapper = create_structure('WrapperClass', csr=CSR) - sdfg = dace.SDFG('nested_csr_to_dense') + sdfg = dace.SDFG('nested_csr_to_dense_2') sdfg.add_datadesc('A', Wrapper) sdfg.add_array('B', [M, N], dace.float32) spmat = Wrapper.members['csr'] + sdfg.add_datadesc('vcsr', CSRView) sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) @@ -207,13 +406,15 @@ def test_read_nested_structure(): A = state.add_access('A') B = state.add_access('B') + csr = state.add_access('vcsr') indptr = state.add_access('vindptr') indices = state.add_access('vindices') data = state.add_access('vdata') - state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) - state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) - state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) + state.add_edge(A, 'csr', csr, 'views', dace.Memlet.from_array('A.csr', spmat)) + state.add_edge(csr, 'indptr', indptr, 'views', dace.Memlet.from_array('vcsr.indptr', spmat.members['indptr'])) + state.add_edge(csr, 'indices', indices, 'views', dace.Memlet.from_array('vcsr.indices', spmat.members['indices'])) + state.add_edge(csr, 'data', data, 'views', dace.Memlet.from_array('vcsr.data', spmat.members['data'])) ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) @@ -251,7 +452,102 @@ def test_read_nested_structure(): assert np.allclose(B, ref) +def test_write_nested_structure(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + CSR = create_structure('CSRMatrix') + csr_obj = CSR(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz)) + Wrapper = create_structure('WrapperClass') + wrapper_obj = Wrapper(dict(csr=csr_obj)) + + sdfg = dace.SDFG('dense_to_csr') + + sdfg.add_array('A', [M, N], dace.float32) + sdfg.add_datadesc('B', wrapper_obj) + + spmat = wrapper_obj.members['csr'] + sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) + sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) + sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) + + # Make If + if_before = sdfg.add_state('if_before') + if_guard = sdfg.add_state('if_guard') + if_body = sdfg.add_state('if_body') + if_after = sdfg.add_state('if_after') + sdfg.add_edge(if_before, if_guard, dace.InterstateEdge()) + sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0')) + sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'})) + sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0')) + A = if_body.add_access('A') + B = if_body.add_access('B') + indices = if_body.add_access('vindices') + data = if_body.add_access('vdata') + if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) + if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.csr.data', subset='0:nnz')) + t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') + if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx')) + if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.csr.indices', subset='0:nnz')) + # Make For Loop for j + j_before, j_guard, j_after = sdfg.add_loop(None, + if_before, + None, + 'j', + '0', + 'j < N', + 'j + 1', + loop_end_state=if_after) + # Make For Loop for i + i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after) + sdfg.start_state = sdfg.node_id(i_before) + i_before_guard = sdfg.edges_between(i_before, i_guard)[0] + i_before_guard.data.assignments['idx'] = '0' + B = i_guard.add_access('B') + indptr = i_guard.add_access('vindptr') + t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') + i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i')) + i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1')) + B = i_after.add_access('B') + indptr = i_after.add_access('vindptr') + t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') + i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) + i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1')) + + func = sdfg.compile() + + rng = np.random.default_rng(42) + tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + A = tmp.toarray() + B = tmp.tocsr(copy=True) + B.indptr[:] = -1 + B.indices[:] = -1 + B.data[:] = -1 + + outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) + import ctypes + outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR)) + + func(A=A, B=outW, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) + + assert np.allclose(A, B.toarray()) + + if __name__ == "__main__": test_read_structure() test_write_structure() + test_local_structure() test_read_nested_structure() + # test_read_nested_structure_2() + test_write_nested_structure() From b1dbb6b385c5186ac16b5be1ea3d394953c6bf17 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 15:32:40 +0200 Subject: [PATCH 19/48] Structures have name attribute (instead of subclassing). --- dace/data.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/dace/data.py b/dace/data.py index 838fc43542..e424aca66a 100644 --- a/dace/data.py +++ b/dace/data.py @@ -361,9 +361,11 @@ class Structure(Data): desc="Dictionary of structure members", from_json=_arrays_from_json, to_json=_arrays_to_json) + name = Property(dtype=str, desc="Structure name") def __init__(self, members: Dict[str, Any], + name: str = 'Structure', transient: bool = False, storage: dtypes.StorageType = dtypes.StorageType.Default, location: Dict[str, str] = None, @@ -373,6 +375,7 @@ def __init__(self, self.members = members or {} for k, v in self.members.items(): v.transient = transient + self.name = name fields_and_types = dict() symbols = set() for k, v in members.items(): @@ -399,9 +402,20 @@ def __init__(self, fields_and_types[str(s)] = s.dtype else: fields_and_types[str(s)] = dtypes.int32 - dtype = dtypes.pointer(dtypes.struct(self.__class__.__name__, **fields_and_types)) + dtype = dtypes.pointer(dtypes.struct(name, **fields_and_types)) shape = (1,) super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) + + @staticmethod + def from_json(json_obj, context=None): + if json_obj['type'] != 'Structure': + raise TypeError("Invalid data type") + + # Create dummy object + ret = Structure({}) + serialize.set_properties_from_json(ret, json_obj, context=context) + + return ret @property def total_size(self): From 5de2ae35d25b9f78eeecb0080504be34b6577cec Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 15:32:59 +0200 Subject: [PATCH 20/48] Updated tests. --- tests/sdfg/data/structure_test.py | 192 +++++++++++++++--------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 462c6a8e7b..b3d72b9d7a 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -1,6 +1,7 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import dace import numpy as np +import pytest from dace import serialize from dace.properties import make_properties @@ -21,7 +22,7 @@ def from_json(json_obj, context=None): serialize.set_properties_from_json(ret, json_obj, context=context) return ret - + setattr(StructureClass, 'from_json', from_json) StructureClass = make_properties(StructureClass) @@ -31,13 +32,13 @@ def from_json(json_obj, context=None): def test_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix') - csr_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz)) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense') @@ -80,13 +81,13 @@ def test_read_structure(): B = np.zeros((20, 20), dtype=np.float32) inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], - indices=A.indices.__array_interface__['data'][0], - data=A.data.__array_interface__['data'][0], - rows=A.shape[0], - cols=A.shape[1], - M=A.shape[0], - N=A.shape[1], - nnz=A.nnz) + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + N=A.shape[1], + nnz=A.nnz) func(A=inpA, B=B, M=20, N=20, nnz=A.nnz) ref = A.toarray() @@ -97,13 +98,13 @@ def test_read_structure(): def test_write_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix') - csr_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz)) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') sdfg = dace.SDFG('dense_to_csr') @@ -168,13 +169,13 @@ def test_write_structure(): B.data[:] = -1 outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], - indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) @@ -182,23 +183,25 @@ def test_write_structure(): def test_local_structure(): - - M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix') - csr_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz)) - tmp_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), transient=True) - sdfg = dace.SDFG('dense_to_csr') + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix', + transient=True) + + sdfg = dace.SDFG('dense_to_csr_local') sdfg.add_array('A', [M, N], dace.float32) sdfg.add_datadesc('B', csr_obj) @@ -273,16 +276,13 @@ def test_local_structure(): set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz')) set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1')) set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz')) - t, me, mx = set_B.add_mapped_tasklet('set_data', - {'idx': '0:nnz'}, + t, me, mx = set_B.add_mapped_tasklet('set_data', {'idx': '0:nnz'}, {'__inp': dace.Memlet(data='tmp_vdata', subset='idx')}, - '__out = 2 * __inp', - {'__out': dace.Memlet(data='vdata', subset='idx')}, + '__out = 2 * __inp', {'__out': dace.Memlet(data='vdata', subset='idx')}, external_edges=True, input_nodes={'tmp_vdata': tmp_data}, output_nodes={'vdata': B_data}) - func = sdfg.compile() rng = np.random.default_rng(42) @@ -294,13 +294,13 @@ def test_local_structure(): B.data[:] = -1 outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], - indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) @@ -309,15 +309,14 @@ def test_local_structure(): def test_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix') - csr_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz)) - Wrapper = create_structure('WrapperClass') - wrapper_obj = Wrapper(dict(csr=csr_obj)) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') sdfg = dace.SDFG('nested_csr_to_dense') @@ -378,24 +377,25 @@ def test_read_nested_structure(): assert np.allclose(B, ref) +@pytest.mark.skip def test_read_nested_structure_2(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix', - indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz) - CSRView = dace.data.StructureView(CSR.members, transient=True) - Wrapper = create_structure('WrapperClass', csr=CSR) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + CSRView = dace.data.StructureView(csr_obj.members, transient=True) + wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') sdfg = dace.SDFG('nested_csr_to_dense_2') - sdfg.add_datadesc('A', Wrapper) + sdfg.add_datadesc('A', wrapper_obj) sdfg.add_array('B', [M, N], dace.float32) - spmat = Wrapper.members['csr'] + spmat = wrapper_obj.members['csr'] sdfg.add_datadesc('vcsr', CSRView) sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) @@ -428,13 +428,14 @@ def test_read_nested_structure_2(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) B = np.zeros((20, 20), dtype=np.float32) - structclass = CSR.dtype._typeclass.as_ctypes() + structclass = csr_obj.dtype._typeclass.as_ctypes() inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], indices=A.indices.__array_interface__['data'][0], data=A.data.__array_interface__['data'][0], @@ -444,7 +445,7 @@ def test_read_nested_structure_2(): K=A.shape[1], nnz=A.nnz) import ctypes - inpW = Wrapper.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) + inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) ref = A.toarray() @@ -455,15 +456,14 @@ def test_read_nested_structure_2(): def test_write_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - CSR = create_structure('CSRMatrix') - csr_obj = CSR(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz)) - Wrapper = create_structure('WrapperClass') - wrapper_obj = Wrapper(dict(csr=csr_obj)) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') sdfg = dace.SDFG('dense_to_csr') @@ -529,13 +529,13 @@ def test_write_nested_structure(): B.data[:] = -1 outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], - indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + indices=B.indices.__array_interface__['data'][0], + data=B.data.__array_interface__['data'][0], + rows=tmp.shape[0], + cols=tmp.shape[1], + M=tmp.shape[0], + N=tmp.shape[1], + nnz=tmp.nnz) import ctypes outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR)) @@ -549,5 +549,5 @@ def test_write_nested_structure(): test_write_structure() test_local_structure() test_read_nested_structure() - # test_read_nested_structure_2() + test_read_nested_structure_2() test_write_nested_structure() From 1fbc45f66ebcff4979f7cb05566de56b70e2b1b9 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 20:08:26 +0200 Subject: [PATCH 21/48] Removed nested data connectors. --- tests/sdfg/data/structure_test.py | 56 +++++++++++++++++-------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index b3d72b9d7a..8636dc1602 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -58,9 +58,9 @@ def test_read_structure(): indices = state.add_access('vindices') data = state.add_access('vdata') - state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr'])) - state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices'])) - state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data'])) + state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr'])) + state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices'])) + state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data'])) ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) @@ -74,6 +74,7 @@ def test_read_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -129,10 +130,10 @@ def test_write_structure(): indices = if_body.add_access('vindices') data = if_body.add_access('vdata') if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) - if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz')) + if_body.add_edge(data, 'views', B, None, dace.Memlet(data='B.data', subset='0:nnz')) t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx')) - if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz')) + if_body.add_edge(indices, 'views', B, None, dace.Memlet(data='B.indices', subset='0:nnz')) # Make For Loop for j j_before, j_guard, j_after = sdfg.add_loop(None, if_before, @@ -151,13 +152,14 @@ def test_write_structure(): indptr = i_guard.add_access('vindptr') t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i')) - i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) + i_guard.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1')) B = i_after.add_access('B') indptr = i_after.add_access('vindptr') t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) - i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) + i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1')) + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -229,10 +231,10 @@ def test_local_structure(): indices = if_body.add_access('tmp_vindices') data = if_body.add_access('tmp_vdata') if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) - if_body.add_edge(data, 'views', tmp, 'data', dace.Memlet(data='tmp.data', subset='0:nnz')) + if_body.add_edge(data, 'views', tmp, None, dace.Memlet(data='tmp.data', subset='0:nnz')) t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='tmp_vindices', subset='idx')) - if_body.add_edge(indices, 'views', tmp, 'indices', dace.Memlet(data='tmp.indices', subset='0:nnz')) + if_body.add_edge(indices, 'views', tmp, None, dace.Memlet(data='tmp.indices', subset='0:nnz')) # Make For Loop for j j_before, j_guard, j_after = sdfg.add_loop(None, if_before, @@ -251,12 +253,12 @@ def test_local_structure(): indptr = i_guard.add_access('tmp_vindptr') t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='i')) - i_guard.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + i_guard.add_edge(indptr, 'views', tmp, None, dace.Memlet(data='tmp.indptr', subset='0:M+1')) tmp = i_after.add_access('tmp') indptr = i_after.add_access('tmp_vindptr') t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='M')) - i_after.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + i_after.add_edge(indptr, 'views', tmp, None, dace.Memlet(data='tmp.indptr', subset='0:M+1')) set_B = sdfg.add_state('set_B') sdfg.add_edge(i_after, set_B, dace.InterstateEdge()) @@ -264,16 +266,16 @@ def test_local_structure(): tmp_indptr = set_B.add_access('tmp_vindptr') tmp_indices = set_B.add_access('tmp_vindices') tmp_data = set_B.add_access('tmp_vdata') - set_B.add_edge(tmp, 'indptr', tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1')) - set_B.add_edge(tmp, 'indices', tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz')) - set_B.add_edge(tmp, 'data', tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz')) + set_B.add_edge(tmp, None, tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1')) + set_B.add_edge(tmp, None, tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz')) + set_B.add_edge(tmp, None, tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz')) B = set_B.add_access('B') B_indptr = set_B.add_access('vindptr') B_indices = set_B.add_access('vindices') B_data = set_B.add_access('vdata') - set_B.add_edge(B_indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1')) - set_B.add_edge(B_indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz')) - set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz')) + set_B.add_edge(B_indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1')) + set_B.add_edge(B_indices, 'views', B, None, dace.Memlet(data='B.indices', subset='0:nnz')) + set_B.add_edge(B_data, 'views', B, None, dace.Memlet(data='B.data', subset='0:nnz')) set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1')) set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz')) t, me, mx = set_B.add_mapped_tasklet('set_data', {'idx': '0:nnz'}, @@ -283,6 +285,7 @@ def test_local_structure(): input_nodes={'tmp_vdata': tmp_data}, output_nodes={'vdata': B_data}) + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -337,9 +340,9 @@ def test_read_nested_structure(): indices = state.add_access('vindices') data = state.add_access('vdata') - state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) - state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) - state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) + state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) + state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) + state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) @@ -353,6 +356,7 @@ def test_read_nested_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -429,6 +433,7 @@ def test_read_nested_structure_2(): state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') sdfg.view() + return func = sdfg.compile() rng = np.random.default_rng(42) @@ -489,10 +494,10 @@ def test_write_nested_structure(): indices = if_body.add_access('vindices') data = if_body.add_access('vdata') if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx')) - if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.csr.data', subset='0:nnz')) + if_body.add_edge(data, 'views', B, None, dace.Memlet(data='B.csr.data', subset='0:nnz')) t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx')) - if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.csr.indices', subset='0:nnz')) + if_body.add_edge(indices, 'views', B, None, dace.Memlet(data='B.csr.indices', subset='0:nnz')) # Make For Loop for j j_before, j_guard, j_after = sdfg.add_loop(None, if_before, @@ -511,13 +516,14 @@ def test_write_nested_structure(): indptr = i_guard.add_access('vindptr') t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i')) - i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1')) + i_guard.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1')) B = i_after.add_access('B') indptr = i_after.add_access('vindptr') t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) - i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1')) + i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1')) + sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -549,5 +555,5 @@ def test_write_nested_structure(): test_write_structure() test_local_structure() test_read_nested_structure() - test_read_nested_structure_2() + # test_read_nested_structure_2() test_write_nested_structure() From 6fa7e53ea4c39752c60b386895a6c9ba4a542b80 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 20:27:41 +0200 Subject: [PATCH 22/48] Added support for direct access to nested data. --- dace/codegen/targets/cpu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 7ff91cbc7b..137de75c55 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -1169,6 +1169,7 @@ def memlet_definition(self, if not types: types = self._dispatcher.defined_vars.get(ptr, is_global=True) var_type, ctypedef = types + ptr = ptr.replace('.', '->') if fpga.is_fpga_array(desc): decouple_array_interfaces = Config.get_bool("compiler", "xilinx", "decouple_array_interfaces") From 71d7c3db0f2391b79281a89732b64d0d4b861e14 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 20:28:20 +0200 Subject: [PATCH 23/48] WIP: Add nested data free symbols to SDFG. --- dace/sdfg/sdfg.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index a4c29c2e89..1f385a4b75 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -2005,10 +2005,20 @@ def add_datadesc(self, name: str, datadesc: dt.Data, find_new_name=False) -> str raise NameError(f'Array or Stream with name "{name}" already exists in SDFG') self._arrays[name] = datadesc + def _add_symbols(desc: dt.Data): + if isinstance(desc, dt.Structure): + for v in desc.members.values(): + if isinstance(v, dt.Data): + _add_symbols(v) + for sym in desc.free_symbols: + if sym.name not in self.symbols: + self.add_symbol(sym.name, sym.dtype) + # Add free symbols to the SDFG global symbol storage - for sym in datadesc.free_symbols: - if sym.name not in self.symbols: - self.add_symbol(sym.name, sym.dtype) + # for sym in datadesc.free_symbols: + # if sym.name not in self.symbols: + # self.add_symbol(sym.name, sym.dtype) + _add_symbols(datadesc) return name From e0a4409ff4a2b909f901f1a1592d3b9669387807 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 20:29:13 +0200 Subject: [PATCH 24/48] Added test for direct nested data access. --- tests/sdfg/data/structure_test.py | 82 ++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 6 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 8636dc1602..3116a5764a 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -550,10 +550,80 @@ def test_write_nested_structure(): assert np.allclose(A, B.toarray()) +def test_direct_read_structure(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + + sdfg = dace.SDFG('csr_to_dense_direct') + + sdfg.add_datadesc('A', csr_obj) + sdfg.add_array('B', [M, N], dace.float32) + + # sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + # sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + # sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) + + state = sdfg.add_state() + + # A = state.add_access('A') + indptr = state.add_access('A.indptr') + indices = state.add_access('A.indices') + data = state.add_access('A.data') + B = state.add_access('B') + + # indptr = state.add_access('vindptr') + # indices = state.add_access('vindices') + # data = state.add_access('vdata') + + # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr'])) + # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices'])) + # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.indptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.indptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='A.indices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.data', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + sdfg.view() + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + N=A.shape[1], + nnz=A.nnz) + + func(A=inpA, B=B, M=20, N=20, nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + if __name__ == "__main__": - test_read_structure() - test_write_structure() - test_local_structure() - test_read_nested_structure() - # test_read_nested_structure_2() - test_write_nested_structure() + # test_read_structure() + # test_write_structure() + # test_local_structure() + # test_read_nested_structure() + # test_write_nested_structure() + test_direct_read_structure() From 0593ea4f1a86951b210c727c95931ca3664f7423 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 20 Jul 2023 20:55:42 +0200 Subject: [PATCH 25/48] Added test for direct double-nested data accesses. --- tests/sdfg/data/structure_test.py | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 3116a5764a..91429e8bbc 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -620,6 +620,80 @@ def test_direct_read_structure(): assert np.allclose(B, ref) +def test_direct_read_nested_structure(): + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], + indices=dace.int32[nnz], + data=dace.float32[nnz], + rows=M, + cols=N, + nnz=nnz), + name='CSRMatrix') + wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') + + sdfg = dace.SDFG('nested_csr_to_dense_direct') + + sdfg.add_datadesc('A', wrapper_obj) + sdfg.add_array('B', [M, N], dace.float32) + + spmat = wrapper_obj.members['csr'] + sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) + sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) + sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) + + state = sdfg.add_state() + + # A = state.add_access('A') + indptr = state.add_access('A.csr.indptr') + indices = state.add_access('A.csr.indices') + data = state.add_access('A.csr.data') + B = state.add_access('B') + + # indptr = state.add_access('vindptr') + # indices = state.add_access('vindices') + # data = state.add_access('vdata') + + # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) + # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) + # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.csr.indptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.csr.indptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='A.csr.indices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.csr.data', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + sdfg.view() + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + structclass = csr_obj.dtype._typeclass.as_ctypes() + inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], + indices=A.indices.__array_interface__['data'][0], + data=A.data.__array_interface__['data'][0], + rows=A.shape[0], + cols=A.shape[1], + M=A.shape[0], + K=A.shape[1], + nnz=A.nnz) + import ctypes + inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) + + func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + if __name__ == "__main__": # test_read_structure() # test_write_structure() @@ -627,3 +701,4 @@ def test_direct_read_structure(): # test_read_nested_structure() # test_write_nested_structure() test_direct_read_structure() + test_direct_read_nested_structure() From 0df9c3518c6d1ff307314a39dcbc8621423e3af4 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:17:02 +0200 Subject: [PATCH 26/48] Added free-symbols and repr. --- dace/data.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dace/data.py b/dace/data.py index e424aca66a..b54a4f9efb 100644 --- a/dace/data.py +++ b/dace/data.py @@ -364,7 +364,7 @@ class Structure(Data): name = Property(dtype=str, desc="Structure name") def __init__(self, - members: Dict[str, Any], + members: Dict[str, Data], name: str = 'Structure', transient: bool = False, storage: dtypes.StorageType = dtypes.StorageType.Default, @@ -432,6 +432,17 @@ def start_offset(self): @property def strides(self): return [1] + + @property + def free_symbols(self) -> Set[symbolic.SymbolicType]: + """ Returns a set of undefined symbols in this data descriptor. """ + result = set(self.symbols.keys()) + for k, v in self.members.items(): + result |= v.free_symbols + return result + + def __repr__(self): + return f"{self.name} ({', '.join([f'{k}: {v}' for k, v in self.members.items()])})" def as_arg(self, with_types=True, for_call=False, name=None): if self.storage is dtypes.StorageType.GPU_Global: From 909c1aaafd76622cecd4972cd2b3718caf2c261f Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:18:17 +0200 Subject: [PATCH 27/48] Recursively add free symbols from nested data. --- dace/sdfg/sdfg.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 1f385a4b75..ae85bff5d1 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -2015,9 +2015,6 @@ def _add_symbols(desc: dt.Data): self.add_symbol(sym.name, sym.dtype) # Add free symbols to the SDFG global symbol storage - # for sym in datadesc.free_symbols: - # if sym.name not in self.symbols: - # self.add_symbol(sym.name, sym.dtype) _add_symbols(datadesc) return name From e2b0d8b410e699692c1bf4863ae36a0b6f932e27 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:23:48 +0200 Subject: [PATCH 28/48] Updated tests. --- tests/sdfg/data/structure_test.py | 234 +++--------------------------- 1 file changed, 22 insertions(+), 212 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 91429e8bbc..2646fe3d03 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -8,36 +8,10 @@ from scipy import sparse -def create_structure(name: str) -> dace.data.Structure: - - StructureClass = type(name, (dace.data.Structure, ), {}) - - @staticmethod - def from_json(json_obj, context=None): - if json_obj['type'] != name: - raise TypeError("Invalid data type") - - # Create dummy object - ret = StructureClass({}) - serialize.set_properties_from_json(ret, json_obj, context=context) - - return ret - - setattr(StructureClass, 'from_json', from_json) - StructureClass = make_properties(StructureClass) - - return StructureClass - - def test_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense') @@ -83,14 +57,9 @@ def test_read_structure(): inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0], indices=A.indices.__array_interface__['data'][0], - data=A.data.__array_interface__['data'][0], - rows=A.shape[0], - cols=A.shape[1], - M=A.shape[0], - N=A.shape[1], - nnz=A.nnz) + data=A.data.__array_interface__['data'][0]) - func(A=inpA, B=B, M=20, N=20, nnz=A.nnz) + func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz) ref = A.toarray() assert np.allclose(B, ref) @@ -99,12 +68,7 @@ def test_read_structure(): def test_write_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') sdfg = dace.SDFG('dense_to_csr') @@ -172,12 +136,7 @@ def test_write_structure(): outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + data=B.data.__array_interface__['data'][0]) func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) @@ -187,19 +146,9 @@ def test_write_structure(): def test_local_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') - tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix', transient=True) @@ -298,12 +247,7 @@ def test_local_structure(): outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + data=B.data.__array_interface__['data'][0]) func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz) @@ -312,12 +256,7 @@ def test_local_structure(): def test_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -366,93 +305,11 @@ def test_read_nested_structure(): structclass = csr_obj.dtype._typeclass.as_ctypes() inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], indices=A.indices.__array_interface__['data'][0], - data=A.data.__array_interface__['data'][0], - rows=A.shape[0], - cols=A.shape[1], - M=A.shape[0], - K=A.shape[1], - nnz=A.nnz) + data=A.data.__array_interface__['data'][0]) import ctypes inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) - func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) - ref = A.toarray() - - assert np.allclose(B, ref) - - -@pytest.mark.skip -def test_read_nested_structure_2(): - M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), - name='CSRMatrix') - CSRView = dace.data.StructureView(csr_obj.members, transient=True) - wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') - - sdfg = dace.SDFG('nested_csr_to_dense_2') - - sdfg.add_datadesc('A', wrapper_obj) - sdfg.add_array('B', [M, N], dace.float32) - - spmat = wrapper_obj.members['csr'] - sdfg.add_datadesc('vcsr', CSRView) - sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype) - sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype) - sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype) - - state = sdfg.add_state() - - A = state.add_access('A') - B = state.add_access('B') - - csr = state.add_access('vcsr') - indptr = state.add_access('vindptr') - indices = state.add_access('vindices') - data = state.add_access('vdata') - - state.add_edge(A, 'csr', csr, 'views', dace.Memlet.from_array('A.csr', spmat)) - state.add_edge(csr, 'indptr', indptr, 'views', dace.Memlet.from_array('vcsr.indptr', spmat.members['indptr'])) - state.add_edge(csr, 'indices', indices, 'views', dace.Memlet.from_array('vcsr.indices', spmat.members['indices'])) - state.add_edge(csr, 'data', data, 'views', dace.Memlet.from_array('vcsr.data', spmat.members['data'])) - - ime, imx = state.add_map('i', dict(i='0:M')) - jme, jmx = state.add_map('idx', dict(idx='start:stop')) - jme.add_in_connector('start') - jme.add_in_connector('stop') - t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') - - state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') - state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') - state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') - state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') - state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') - - sdfg.view() - return - func = sdfg.compile() - - rng = np.random.default_rng(42) - A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) - B = np.zeros((20, 20), dtype=np.float32) - - structclass = csr_obj.dtype._typeclass.as_ctypes() - inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], - indices=A.indices.__array_interface__['data'][0], - data=A.data.__array_interface__['data'][0], - rows=A.shape[0], - cols=A.shape[1], - M=A.shape[0], - K=A.shape[1], - nnz=A.nnz) - import ctypes - inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) - - func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) + func(A=inpW, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz) ref = A.toarray() assert np.allclose(B, ref) @@ -461,12 +318,7 @@ def test_read_nested_structure_2(): def test_write_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -536,12 +388,7 @@ def test_write_nested_structure(): outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0], indices=B.indices.__array_interface__['data'][0], - data=B.data.__array_interface__['data'][0], - rows=tmp.shape[0], - cols=tmp.shape[1], - M=tmp.shape[0], - N=tmp.shape[1], - nnz=tmp.nnz) + data=B.data.__array_interface__['data'][0]) import ctypes outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR)) @@ -553,12 +400,7 @@ def test_write_nested_structure(): def test_direct_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense_direct') @@ -566,26 +408,13 @@ def test_direct_read_structure(): sdfg.add_datadesc('A', csr_obj) sdfg.add_array('B', [M, N], dace.float32) - # sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) - # sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) - # sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) - state = sdfg.add_state() - # A = state.add_access('A') indptr = state.add_access('A.indptr') indices = state.add_access('A.indices') data = state.add_access('A.data') B = state.add_access('B') - # indptr = state.add_access('vindptr') - # indices = state.add_access('vindices') - # data = state.add_access('vdata') - - # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr'])) - # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices'])) - # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data'])) - ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) jme.add_in_connector('start') @@ -622,12 +451,7 @@ def test_direct_read_structure(): def test_direct_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], - indices=dace.int32[nnz], - data=dace.float32[nnz], - rows=M, - cols=N, - nnz=nnz), + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -643,20 +467,11 @@ def test_direct_read_nested_structure(): state = sdfg.add_state() - # A = state.add_access('A') indptr = state.add_access('A.csr.indptr') indices = state.add_access('A.csr.indices') data = state.add_access('A.csr.data') B = state.add_access('B') - # indptr = state.add_access('vindptr') - # indices = state.add_access('vindices') - # data = state.add_access('vdata') - - # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr'])) - # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices'])) - # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data'])) - ime, imx = state.add_map('i', dict(i='0:M')) jme, jmx = state.add_map('idx', dict(idx='start:stop')) jme.add_in_connector('start') @@ -679,26 +494,21 @@ def test_direct_read_nested_structure(): structclass = csr_obj.dtype._typeclass.as_ctypes() inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0], indices=A.indices.__array_interface__['data'][0], - data=A.data.__array_interface__['data'][0], - rows=A.shape[0], - cols=A.shape[1], - M=A.shape[0], - K=A.shape[1], - nnz=A.nnz) + data=A.data.__array_interface__['data'][0]) import ctypes inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR)) - func(A=inpW, B=B, M=20, N=20, nnz=A.nnz) + func(A=inpW, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz) ref = A.toarray() assert np.allclose(B, ref) if __name__ == "__main__": - # test_read_structure() - # test_write_structure() - # test_local_structure() - # test_read_nested_structure() - # test_write_nested_structure() + test_read_structure() + test_write_structure() + test_local_structure() + test_read_nested_structure() + test_write_nested_structure() test_direct_read_structure() test_direct_read_nested_structure() From 52afc7250b02fb4b85eb3a62bf5104dce9a72995 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:24:14 +0200 Subject: [PATCH 29/48] Scrapped structure private symbols for now. --- dace/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/data.py b/dace/data.py index b54a4f9efb..9d3b6b86f3 100644 --- a/dace/data.py +++ b/dace/data.py @@ -436,7 +436,7 @@ def strides(self): @property def free_symbols(self) -> Set[symbolic.SymbolicType]: """ Returns a set of undefined symbols in this data descriptor. """ - result = set(self.symbols.keys()) + result = set() for k, v in self.members.items(): result |= v.free_symbols return result From 09246442f6e456b4b090651b895be53e3414a512 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:26:10 +0200 Subject: [PATCH 30/48] Updated tests. --- tests/sdfg/data/structure_test.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 2646fe3d03..02b8f0c174 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -48,7 +48,6 @@ def test_read_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -123,7 +122,6 @@ def test_write_structure(): i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1')) - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -234,7 +232,6 @@ def test_local_structure(): input_nodes={'tmp_vdata': tmp_data}, output_nodes={'vdata': B_data}) - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -295,7 +292,6 @@ def test_read_nested_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -375,7 +371,6 @@ def test_write_nested_structure(): i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1')) - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -427,7 +422,6 @@ def test_direct_read_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.data', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) @@ -484,7 +478,6 @@ def test_direct_read_nested_structure(): state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.csr.data', subset='idx'), dst_conn='__val') state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') - sdfg.view() func = sdfg.compile() rng = np.random.default_rng(42) From 8296a6de765b2209cbd644b6017d68304016ef3c Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 21 Jul 2023 18:29:06 +0200 Subject: [PATCH 31/48] Added setitem. --- dace/sdfg/sdfg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index ae85bff5d1..23964dbe41 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -62,8 +62,12 @@ def __getitem__(self, key): token = tokens.pop(0) result = result.members[token] return result - + def __setitem__(self, key, val): + if isinstance(key, str) and '.' in key: + raise KeyError('NestedDict does not support setting nested keys') + super(NestedDict, self).__setitem__(key, val) + def __contains__(self, key): tokens = key.split('.') if isinstance(key, str) else [key] token = tokens.pop(0) From a98fce07b7e78b0bf1a0bc53d17e37e38c22b3dc Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 27 Jul 2023 20:58:42 +0200 Subject: [PATCH 32/48] Serialize Structure members and struct data/length as list of tuples. --- dace/data.py | 5 +++-- dace/dtypes.py | 11 +++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dace/data.py b/dace/data.py index 9d3b6b86f3..fd7cdaf8e3 100644 --- a/dace/data.py +++ b/dace/data.py @@ -344,13 +344,14 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global): def _arrays_to_json(arrays): if arrays is None: return None - return {k: serialize.to_json(v) for k, v in arrays.items()} + sorted_keys = sorted(arrays.keys()) + return [(k, serialize.to_json(arrays[k])) for k in sorted_keys] def _arrays_from_json(obj, context=None): if obj is None: return {} - return {k: serialize.from_json(v, context) for k, v in obj.items()} + return {k: serialize.from_json(v, context) for k, v in obj} @make_properties diff --git a/dace/dtypes.py b/dace/dtypes.py index d01209469f..9c483d5df1 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -768,13 +768,12 @@ def fields(self): return self._data def to_json(self): + sorted_keys = sorted(self._data.keys()) return { 'type': 'struct', 'name': self.name, - 'data': {k: v.to_json() - for k, v in self._data.items()}, - 'length': {k: v - for k, v in self._length.items()}, + 'data': [(k, self._data[k].to_json()) for k in sorted_keys], + 'length': [(k, self._length[k]) for k in sorted_keys if k in self._length], 'bytes': self.bytes } @@ -786,8 +785,8 @@ def from_json(json_obj, context=None): import dace.serialize # Avoid import loop ret = struct(json_obj['name']) - ret._data = {k: json_to_typeclass(v, context) for k, v in json_obj['data'].items()} - ret._length = {k: v for k, v in json_obj['length'].items()} + ret._data = {k: json_to_typeclass(v, context) for k, v in json_obj['data']} + ret._length = {k: v for k, v in json_obj['length']} ret.bytes = json_obj['bytes'] return ret From f431a8df0c99890d5dbeef48674157aa196d6a3e Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 28 Jul 2023 10:29:11 +0200 Subject: [PATCH 33/48] Switched Structures and structs to OrderedDicts. --- dace/data.py | 40 ++++++++++++++++++++----------- dace/dtypes.py | 26 ++++++++++---------- tests/sdfg/data/structure_test.py | 8 +++++++ 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/dace/data.py b/dace/data.py index fd7cdaf8e3..b20f9f7db5 100644 --- a/dace/data.py +++ b/dace/data.py @@ -3,8 +3,9 @@ import ctypes import functools +from collections import OrderedDict from numbers import Number -from typing import Any, Dict, Optional, Sequence, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Set, Tuple import numpy import sympy as sp @@ -344,40 +345,47 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global): def _arrays_to_json(arrays): if arrays is None: return None - sorted_keys = sorted(arrays.keys()) - return [(k, serialize.to_json(arrays[k])) for k in sorted_keys] + return [(k, serialize.to_json(v)) for k, v in arrays.items()] def _arrays_from_json(obj, context=None): if obj is None: return {} - return {k: serialize.from_json(v, context) for k, v in obj} + return OrderedDict((k, serialize.from_json(v, context)) for k, v in obj) @make_properties class Structure(Data): """ Base class for structures. """ - members = Property(dtype=dict, + members = Property(dtype=OrderedDict, desc="Dictionary of structure members", from_json=_arrays_from_json, to_json=_arrays_to_json) + order = ListProperty(element_type=str, desc="Order of structure members") name = Property(dtype=str, desc="Structure name") def __init__(self, members: Dict[str, Data], + order: List[str] = None, name: str = 'Structure', transient: bool = False, storage: dtypes.StorageType = dtypes.StorageType.Default, location: Dict[str, str] = None, lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, debuginfo: dtypes.DebugInfo = None): + + self.order = order or list(members.keys()) + if set(members.keys()) != set(self.order): + raise ValueError('Order must contain all members of the structure.') + # TODO: Should we make a deep-copy here? - self.members = members or {} + self.members = OrderedDict((k, members[k]) for k in self.order) + for k, v in self.members.items(): v.transient = transient self.name = name - fields_and_types = dict() + fields_and_types = OrderedDict() symbols = set() for k, v in members.items(): if isinstance(v, Structure): @@ -396,13 +404,17 @@ def __init__(self, fields_and_types[k] = dtypes.typeclass(type(v)) else: raise TypeError(f"Attribute {k}'s value {v} has unsupported type: {type(v)}") - for s in symbols: - if str(s) in fields_and_types: - continue - if hasattr(s, "dtype"): - fields_and_types[str(s)] = s.dtype - else: - fields_and_types[str(s)] = dtypes.int32 + + # NOTE: We will not store symbols in the dtype for now, but leaving it as a comment to investigate later. + # NOTE: See discussion about data/object symbols. + # for s in symbols: + # if str(s) in fields_and_types: + # continue + # if hasattr(s, "dtype"): + # fields_and_types[str(s)] = s.dtype + # else: + # fields_and_types[str(s)] = dtypes.int32 + dtype = dtypes.pointer(dtypes.struct(name, **fields_and_types)) shape = (1,) super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) diff --git a/dace/dtypes.py b/dace/dtypes.py index 9c483d5df1..678f2f59b0 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -7,6 +7,7 @@ import itertools import numpy import re +from collections import OrderedDict from functools import wraps from typing import Any from dace.config import Config @@ -768,12 +769,11 @@ def fields(self): return self._data def to_json(self): - sorted_keys = sorted(self._data.keys()) return { 'type': 'struct', 'name': self.name, - 'data': [(k, self._data[k].to_json()) for k in sorted_keys], - 'length': [(k, self._length[k]) for k in sorted_keys if k in self._length], + 'data': [(k, v.to_json()) for k, v in self._data.items()], + 'length': [(k, v) for k, v in self._length.items()], 'bytes': self.bytes } @@ -792,19 +792,21 @@ def from_json(json_obj, context=None): return ret def _parse_field_and_types(self, **fields_and_types): - from dace.symbolic import pystr_to_symbolic - self._data = dict() - self._length = dict() + # from dace.symbolic import pystr_to_symbolic + self._data = OrderedDict() + self._length = OrderedDict() self.bytes = 0 for k, v in fields_and_types.items(): if isinstance(v, tuple): t, l = v if not isinstance(t, pointer): raise TypeError("Only pointer types may have a length.") - sym_tokens = pystr_to_symbolic(l).free_symbols - for sym in sym_tokens: - if str(sym) not in fields_and_types.keys(): - raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}") + # TODO: Do we need the free symbols of the length in the struct? + # NOTE: It is needed for the old use of dtype.struct. Are we deprecating that? + # sym_tokens = pystr_to_symbolic(l).free_symbols + # for sym in sym_tokens: + # if str(sym) not in fields_and_types.keys(): + # raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}") self._data[k] = t self._length[k] = l self.bytes += t.bytes @@ -830,7 +832,7 @@ def as_ctypes(self): fields.append((k, v.as_ctypes())) else: fields.append((k, _FFI_CTYPES[v.type])) - fields = sorted(fields, key=lambda f: f[0]) + # fields = sorted(fields, key=lambda f: f[0]) # Create new struct class. struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields}) _FFI_CTYPES[self] = struct_class @@ -844,7 +846,7 @@ def emit_definition(self): {typ} }};""".format( name=self.name, - typ='\n'.join([" %s %s;" % (t.ctype, tname) for tname, t in sorted(self._data.items())]), + typ='\n'.join([" %s %s;" % (t.ctype, tname) for tname, t in self._data.items()]), ) diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 02b8f0c174..995aacb2fd 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -12,6 +12,7 @@ def test_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense') @@ -68,6 +69,7 @@ def test_write_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('dense_to_csr') @@ -145,8 +147,10 @@ def test_local_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix', transient=True) @@ -254,6 +258,7 @@ def test_local_structure(): def test_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -315,6 +320,7 @@ def test_write_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -396,6 +402,7 @@ def test_direct_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense_direct') @@ -446,6 +453,7 @@ def test_direct_read_structure(): def test_direct_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') From 86d9cf2180c0b599b0a025447f1a36b7f9a05ecf Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 28 Jul 2023 10:31:32 +0200 Subject: [PATCH 34/48] Removed order from properties. --- dace/data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dace/data.py b/dace/data.py index b20f9f7db5..d8f2d52998 100644 --- a/dace/data.py +++ b/dace/data.py @@ -362,7 +362,6 @@ class Structure(Data): desc="Dictionary of structure members", from_json=_arrays_from_json, to_json=_arrays_to_json) - order = ListProperty(element_type=str, desc="Order of structure members") name = Property(dtype=str, desc="Structure name") def __init__(self, @@ -375,12 +374,12 @@ def __init__(self, lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, debuginfo: dtypes.DebugInfo = None): - self.order = order or list(members.keys()) - if set(members.keys()) != set(self.order): + order = order or list(members.keys()) + if set(members.keys()) != set(order): raise ValueError('Order must contain all members of the structure.') # TODO: Should we make a deep-copy here? - self.members = OrderedDict((k, members[k]) for k in self.order) + self.members = OrderedDict((k, members[k]) for k in order) for k, v in self.members.items(): v.transient = transient From 76d6266cead9f7b3de58e8fc879a7d978ddbe757 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 28 Jul 2023 12:05:50 +0200 Subject: [PATCH 35/48] `_argminmax` now creates a struct with the members ordered as accessed in the related tasklets. --- dace/frontend/python/replacements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py index 9eac240a87..b325a2ea7e 100644 --- a/dace/frontend/python/replacements.py +++ b/dace/frontend/python/replacements.py @@ -975,7 +975,7 @@ def _argminmax(pv: ProgramVisitor, reduced_shape = list(copy.deepcopy(a_arr.shape)) reduced_shape.pop(axis) - val_and_idx = dace.struct('_val_and_idx', val=a_arr.dtype, idx=result_type) + val_and_idx = dace.struct('_val_and_idx', idx=result_type, val=a_arr.dtype) # HACK: since identity cannot be specified for structs, we have to init the output array reduced_structs, reduced_struct_arr = sdfg.add_temp_transient(reduced_shape, val_and_idx) From 1d3db91f7104e51dd90ce41da3f84a0140ab69e4 Mon Sep 17 00:00:00 2001 From: Samuel Martin Date: Thu, 3 Aug 2023 08:38:15 +0200 Subject: [PATCH 36/48] Update dependency --- dace/external/hlslib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/external/hlslib b/dace/external/hlslib index 1b5b3aee5d..1403cd016c 160000 --- a/dace/external/hlslib +++ b/dace/external/hlslib @@ -1 +1 @@ -Subproject commit 1b5b3aee5dab19adcc443fa9a7cd45244bd246b1 +Subproject commit 1403cd016ce63a9961eeb3899bea70c873a929ce From b47d82b72decce012b088602acc9b8290da04f8e Mon Sep 17 00:00:00 2001 From: Samuel Martin Date: Thu, 3 Aug 2023 13:55:34 +0200 Subject: [PATCH 37/48] Add fix plus testcase --- dace/frontend/fortran/fortran_parser.py | 1 + tests/fortran/array_test.py | 50 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/dace/frontend/fortran/fortran_parser.py b/dace/frontend/fortran/fortran_parser.py index 6d1be7138a..d7112892fe 100644 --- a/dace/frontend/fortran/fortran_parser.py +++ b/dace/frontend/fortran/fortran_parser.py @@ -463,6 +463,7 @@ def subroutine2sdfg(self, node: ast_internal_classes.Subroutine_Subprogram_Node, if i.type == "ALL": shape.append(array.shape[indices]) mysize = mysize * array.shape[indices] + index_list.append(None) else: raise NotImplementedError("Index in ParDecl should be ALL") else: diff --git a/tests/fortran/array_test.py b/tests/fortran/array_test.py index 8685628012..a8ece680a6 100644 --- a/tests/fortran/array_test.py +++ b/tests/fortran/array_test.py @@ -11,6 +11,7 @@ from dace.frontend.fortran import fortran_parser from fparser.two.symbol_table import SymbolTable from dace.sdfg import utils as sdutil +from dace.sdfg.nodes import AccessNode import dace.frontend.fortran.ast_components as ast_components import dace.frontend.fortran.ast_transforms as ast_transforms @@ -167,6 +168,54 @@ def test_fortran_frontend_input_output_connector(): assert (a[1, 2] == 0) +def test_fortran_frontend_memlet_in_map_test(): + """ + Tests that no assumption is made where the iteration variable is inside a memlet subset + """ + test_string = """ + PROGRAM memlet_range_test + implicit None + REAL INP(100, 10) + REAL OUT(100, 10) + CALL memlet_range_test_routine(INP, OUT) + END PROGRAM + + SUBROUTINE memlet_range_test_routine(INP, OUT) + REAL INP(100, 10) + REAL OUT(100, 10) + DO I=1,100 + CALL inner_loops(INP(I, :), OUT(I, :)) + ENDDO + END SUBROUTINE memlet_range_test_routine + + SUBROUTINE inner_loops(INP, OUT) + REAL INP(10) + REAL OUT(10) + DO J=1,10 + OUT(J) = INP(J) + 1 + ENDDO + END SUBROUTINE inner_loops + + """ + sdfg = fortran_parser.create_sdfg_from_string(test_string, "memlet_range_test") + sdfg.simplify() + # Expect that start is begin of for loop -> only one out edge to guard defining iterator variable + assert len(sdfg.out_edges(sdfg.start_state)) == 1 + iter_var = symbolic.symbol(list(sdfg.out_edges(sdfg.start_state)[0].data.assignments.keys())[0]) + + for state in sdfg.states(): + if len(state.nodes()) > 1: + for node in state.nodes(): + if isinstance(node, AccessNode) and node.data in ['INP', 'OUT']: + edges = [*state.in_edges(node), *state.out_edges(node)] + # There should be only one edge in/to the access node + assert len(edges) == 1 + memlet = edges[0].data + # Check that the correct memlet has the iteration variable + assert memlet.subset[0] == (iter_var, iter_var, 1) + assert memlet.subset[1] == (1, 10, 1) + + if __name__ == "__main__": test_fortran_frontend_array_3dmap() @@ -174,3 +223,4 @@ def test_fortran_frontend_input_output_connector(): test_fortran_frontend_input_output_connector() test_fortran_frontend_array_ranges() test_fortran_frontend_twoconnector() + test_fortran_frontend_memlet_in_map_test() From 4c824a310a53c2aefd6d03113dda091f4c48bad8 Mon Sep 17 00:00:00 2001 From: Samuel Martin Date: Thu, 3 Aug 2023 13:59:20 +0200 Subject: [PATCH 38/48] Tried to undo wrong update of dependency --- dace/external/hlslib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/external/hlslib b/dace/external/hlslib index 1403cd016c..1b5b3aee5d 160000 --- a/dace/external/hlslib +++ b/dace/external/hlslib @@ -1 +1 @@ -Subproject commit 1403cd016ce63a9961eeb3899bea70c873a929ce +Subproject commit 1b5b3aee5dab19adcc443fa9a7cd45244bd246b1 From 22718af782d2e36ea7004aa00c79b8fce176fe03 Mon Sep 17 00:00:00 2001 From: Cliff Hodel <111381329+hodelcl@users.noreply.github.com> Date: Wed, 16 Aug 2023 14:15:03 +0200 Subject: [PATCH 39/48] Work Depth Analysis for SDFGs (#1327) * initial push of work_depth analysis script * adding tests to work_depth analysis * rename work depth analysis * todos added * code ready for PR * yapf for formatting * put tests into dace/tests/sdfg * fixed import after merge * merged propgatate_states_symbolically into propagate_states * fixed format issue in work_depth.py * small bugfix --------- Co-authored-by: Cliff Hodel Co-authored-by: Cliff Hodel Co-authored-by: Philipp Schaad --- dace/sdfg/propagation.py | 51 +- dace/sdfg/work_depth_analysis/helpers.py | 331 ++++++++++ dace/sdfg/work_depth_analysis/work_depth.py | 653 ++++++++++++++++++++ tests/sdfg/work_depth_tests.py | 201 ++++++ 4 files changed, 1224 insertions(+), 12 deletions(-) create mode 100644 dace/sdfg/work_depth_analysis/helpers.py create mode 100644 dace/sdfg/work_depth_analysis/work_depth.py create mode 100644 tests/sdfg/work_depth_tests.py diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py index 89ba6928c7..0fec4812b7 100644 --- a/dace/sdfg/propagation.py +++ b/dace/sdfg/propagation.py @@ -10,7 +10,7 @@ import itertools import functools import sympy -from sympy import ceiling +from sympy import ceiling, Symbol from sympy.concrete.summations import Sum import warnings import networkx as nx @@ -564,8 +564,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states): Annotate each valid for loop construct with its loop variable ranges. :param sdfg: The SDFG in which to look. - :param unannotated_cycle_states: List of states in cycles without valid - for loop ranges. + :param unannotated_cycle_states: List of lists. Each sub-list contains the states of one unannotated cycle. """ # We import here to avoid cyclic imports. @@ -652,7 +651,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states): res = find_for_loop(sdfg, guard, begin, itervar=itvar) if res is None: # No range detected, mark as unbounded. - unannotated_cycle_states.extend(cycle) + unannotated_cycle_states.append(cycle) else: itervar, rng, _ = res @@ -674,10 +673,10 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states): else: # There's no guard state, so this cycle marks all states in it as # dynamically unbounded. - unannotated_cycle_states.extend(cycle) + unannotated_cycle_states.append(cycle) -def propagate_states(sdfg) -> None: +def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None: """ Annotate the states of an SDFG with the number of executions. @@ -728,6 +727,9 @@ def propagate_states(sdfg) -> None: once. :param sdfg: The SDFG to annotate. + :param concretize_dynamic_unbounded: If True, we annotate dyncamic unbounded states with symbols of the + form "num_execs_{sdfg_id}_{loop_start_state_id}". Hence, for each + unbounded loop its states will have the same number of symbolic executions. :note: This operates on the SDFG in-place. """ @@ -759,6 +761,9 @@ def propagate_states(sdfg) -> None: # cycle should be marked as unannotated. unannotated_cycle_states = [] _annotate_loop_ranges(sdfg, unannotated_cycle_states) + if not concretize_dynamic_unbounded: + # Flatten the list. This keeps the old behavior of propagate_states. + unannotated_cycle_states = [state for cycle in unannotated_cycle_states for state in cycle] # Keep track of states that fully merge a previous conditional split. We do # this so we can remove the dynamic executions flag for those states. @@ -800,7 +805,7 @@ def propagate_states(sdfg) -> None: # The only exception to this rule: If the state is in an # unannotated loop, i.e. should be annotated as dynamic # unbounded instead, we do that. - if (state in unannotated_cycle_states): + if (not concretize_dynamic_unbounded) and state in unannotated_cycle_states: state.executions = 0 state.dynamic_executions = True else: @@ -872,17 +877,39 @@ def propagate_states(sdfg) -> None: else: # Conditional split or unannotated (dynamic unbounded) loop. unannotated_loop_edge = None - for oedge in out_edges: - if oedge.dst in unannotated_cycle_states: - # This is an unannotated loop down this branch. - unannotated_loop_edge = oedge + if concretize_dynamic_unbounded: + to_remove = [] + for oedge in out_edges: + for cycle in unannotated_cycle_states: + if oedge.dst in cycle: + # This is an unannotated loop down this branch. + unannotated_loop_edge = oedge + # remove cycle, since it is now annotated with symbol + to_remove.append(cycle) + + for c in to_remove: + unannotated_cycle_states.remove(c) + else: + for oedge in out_edges: + if oedge.dst in unannotated_cycle_states: + # This is an unannotated loop down this branch. + unannotated_loop_edge = oedge if unannotated_loop_edge is not None: # Traverse as an unbounded loop. out_edges.remove(unannotated_loop_edge) for oedge in out_edges: traversal_q.append((oedge.dst, state.executions, False, itvar_stack)) - traversal_q.append((unannotated_loop_edge.dst, 0, True, itvar_stack)) + if concretize_dynamic_unbounded: + # Here we introduce the num_exec symbol and propagate it down the loop. + # We can always assume these symbols to be non-negative. + traversal_q.append( + (unannotated_loop_edge.dst, + Symbol(f'num_execs_{sdfg.sdfg_id}_{sdfg.node_id(unannotated_loop_edge.dst)}', + nonnegative=True), False, itvar_stack)) + else: + # Propagate dynamic unbounded. + traversal_q.append((unannotated_loop_edge.dst, 0, True, itvar_stack)) else: # Traverse as a conditional split. proposed_executions = state.executions diff --git a/dace/sdfg/work_depth_analysis/helpers.py b/dace/sdfg/work_depth_analysis/helpers.py new file mode 100644 index 0000000000..a80e769f64 --- /dev/null +++ b/dace/sdfg/work_depth_analysis/helpers.py @@ -0,0 +1,331 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +""" Helper functions used by the work depth analysis. """ + +from dace import SDFG, SDFGState, nodes +from collections import deque +from typing import List, Dict, Set, Tuple, Optional, Union +import networkx as nx + +NodeT = str +EdgeT = Tuple[NodeT, NodeT] + + +class NodeCycle: + + nodes: Set[NodeT] = [] + + def __init__(self, nodes: List[NodeT]) -> None: + self.nodes = set(nodes) + + @property + def length(self) -> int: + return len(self.nodes) + + +UUID_SEPARATOR = '/' + + +def ids_to_string(sdfg_id, state_id=-1, node_id=-1, edge_id=-1): + return (str(sdfg_id) + UUID_SEPARATOR + str(state_id) + UUID_SEPARATOR + str(node_id) + UUID_SEPARATOR + + str(edge_id)) + + +def get_uuid(element, state=None): + if isinstance(element, SDFG): + return ids_to_string(element.sdfg_id) + elif isinstance(element, SDFGState): + return ids_to_string(element.parent.sdfg_id, element.parent.node_id(element)) + elif isinstance(element, nodes.Node): + return ids_to_string(state.parent.sdfg_id, state.parent.node_id(state), state.node_id(element)) + else: + return ids_to_string(-1) + + +def get_domtree(graph: nx.DiGraph, start_node: str, idom: Dict[str, str] = None): + idom = idom or nx.immediate_dominators(graph, start_node) + + alldominated = {n: set() for n in graph.nodes} + domtree = nx.DiGraph() + + for node, dom in idom.items(): + if node is dom: + continue + domtree.add_edge(dom, node) + alldominated[dom].add(node) + + nextidom = idom[dom] + ndom = nextidom if nextidom != dom else None + + while ndom: + alldominated[ndom].add(node) + nextidom = idom[ndom] + ndom = nextidom if nextidom != ndom else None + + # 'Rank' the tree, i.e., annotate each node with the level it is on. + q = deque() + q.append((start_node, 0)) + while q: + node, level = q.popleft() + domtree.add_node(node, level=level) + for s in domtree.successors(node): + q.append((s, level + 1)) + + return alldominated, domtree + + +def get_backedges(graph: nx.DiGraph, + start: Optional[NodeT], + strict: bool = False) -> Union[Set[EdgeT], Tuple[Set[EdgeT], Set[EdgeT]]]: + '''Find all backedges in a directed graph. + + Note: + This algorithm has an algorithmic complexity of O((|V|+|E|)*C) for a + graph with vertices V, edges E, and C cycles. + + Args: + graph (nx.DiGraph): The graph for which to search backedges. + start (str): Start node of the graph. If no start is provided, a node + with no incoming edges is used as the start. If no such node can + be found, a `ValueError` is raised. + + Returns: + A set of backedges in the graph. + + Raises: + ValueError: If no `start` is provided and the graph contains no nodes + with no incoming edges. + ''' + backedges = set() + eclipsed_backedges = set() + + if start is None: + for node in graph.nodes(): + if graph.in_degree(node) == 0: + start = node + break + if start is None: + raise ValueError('No start node provided and no start node could ' + 'be determined automatically') + + # Gather all cycles in the graph. Cycles are represented as a sequence of + # nodes. + # O((|V|+|E|)*(C+1)), for C cycles. + all_cycles_nx: List[List[NodeT]] = nx.cycles.simple_cycles(graph) + #all_cycles_nx: List[List[NodeT]] = nx.simple_cycles(graph) + all_cycles: Set[NodeCycle] = set() + for cycle in all_cycles_nx: + all_cycles.add(NodeCycle(cycle)) + + # Construct a dictionary mapping a node to the cycles containing that node. + # O(|V|*|C|) + cycle_map: Dict[NodeT, Set[NodeCycle]] = dict() + for cycle in all_cycles: + for node in cycle.nodes: + try: + cycle_map[node].add(cycle) + except KeyError: + cycle_map[node] = set([cycle]) + + # Do a BFS traversal of the graph to detect the back edges. + # For each node that is part of an (unhandled) cycle, find the longest + # still unhandled cycle and try to use it to find the back edge for it. + bfs_frontier = [start] + visited: Set[NodeT] = set([start]) + handled_cycles: Set[NodeCycle] = set() + unhandled_cycles = all_cycles + while bfs_frontier: + node = bfs_frontier.pop(0) + pred = [p for p in graph.predecessors(node) if p not in visited] + longest_cycles: Dict[NodeT, NodeCycle] = dict() + try: + cycles = cycle_map[node] + remove_cycles = set() + for cycle in cycles: + if cycle not in handled_cycles: + for p in pred: + if p in cycle.nodes: + if p not in longest_cycles: + longest_cycles[p] = cycle + else: + if cycle.length > longest_cycles[p].length: + longest_cycles[p] = cycle + else: + remove_cycles.add(cycle) + for cycle in remove_cycles: + cycles.remove(cycle) + except KeyError: + longest_cycles = dict() + + # For the current node, find the incoming edge which belongs to the + # cycle and has not been visited yet, which indicates a backedge. + node_backedge_candidates: Set[Tuple[EdgeT, NodeCycle]] = set() + for p, longest_cycle in longest_cycles.items(): + handled_cycles.add(longest_cycle) + unhandled_cycles.remove(longest_cycle) + cycle_map[node].remove(longest_cycle) + backedge_candidates = graph.in_edges(node) + for candidate in backedge_candidates: + src = candidate[0] + dst = candidate[0] + if src not in visited and src in longest_cycle.nodes: + node_backedge_candidates.add((candidate, longest_cycle)) + if not strict: + backedges.add(candidate) + + # Make sure that any cycle containing this back edge is + # not evaluated again, i.e., mark as handled. + remove_cycles = set() + for cycle in unhandled_cycles: + if src in cycle.nodes and dst in cycle.nodes: + handled_cycles.add(cycle) + remove_cycles.add(cycle) + for cycle in remove_cycles: + unhandled_cycles.remove(cycle) + + # If strict is set, we only report the longest cycle's back edges for + # any given node, and separately return any other backedges as + # 'eclipsed' backedges. In the case of a while-loop, for example, + # the loop edge is considered a backedge, while a continue inside the + # loop is considered an 'eclipsed' backedge. + if strict: + longest_candidate: Tuple[EdgeT, NodeCycle] = None + eclipsed_candidates = set() + for be_candidate in node_backedge_candidates: + if longest_candidate is None: + longest_candidate = be_candidate + elif longest_candidate[1].length < be_candidate[1].length: + eclipsed_candidates.add(longest_candidate[0]) + longest_candidate = be_candidate + else: + eclipsed_candidates.add(be_candidate[0]) + if longest_candidate is not None: + backedges.add(longest_candidate[0]) + if eclipsed_candidates: + eclipsed_backedges.update(eclipsed_candidates) + + # Continue BFS. + for neighbour in graph.successors(node): + if neighbour not in visited: + visited.add(neighbour) + bfs_frontier.append(neighbour) + + if strict: + return backedges, eclipsed_backedges + else: + return backedges + + +def find_loop_guards_tails_exits(sdfg_nx: nx.DiGraph): + """ + Detects loops in a SDFG. For each loop, it identifies (node, oNode, exit). + We know that there is a backedge from oNode to node that creates the loop and that exit is the exit state of the loop. + + :param sdfg_nx: The networkx representation of a SDFG. + """ + + # preparation phase: compute dominators, backedges etc + for node in sdfg_nx.nodes(): + if sdfg_nx.in_degree(node) == 0: + start = node + break + if start is None: + raise ValueError('No start node could be determined') + + # sdfg can have multiple end nodes --> not good for postDomTree + # --> add a new end node + artificial_end_node = 'artificial_end_node' + sdfg_nx.add_node(artificial_end_node) + for node in sdfg_nx.nodes(): + if sdfg_nx.out_degree(node) == 0 and node != artificial_end_node: + # this is an end node of the sdfg + sdfg_nx.add_edge(node, artificial_end_node) + + # sanity check: + if sdfg_nx.in_degree(artificial_end_node) == 0: + raise ValueError('No end node could be determined in the SDFG') + + # compute dominators and backedges + iDoms = nx.immediate_dominators(sdfg_nx, start) + allDom, domTree = get_domtree(sdfg_nx, start, iDoms) + + reversed_sdfg_nx = sdfg_nx.reverse() + iPostDoms = nx.immediate_dominators(reversed_sdfg_nx, artificial_end_node) + allPostDoms, postDomTree = get_domtree(reversed_sdfg_nx, artificial_end_node, iPostDoms) + + backedges = get_backedges(sdfg_nx, start) + backedgesDstDict = {} + for be in backedges: + if be[1] in backedgesDstDict: + backedgesDstDict[be[1]].add(be) + else: + backedgesDstDict[be[1]] = set([be]) + + # This list will be filled with triples (node, oNode, exit), one triple for each loop construct in the SDFG. + # There will always be a backedge from oNode to node. Either node or oNode will be the corresponding loop guard, + # depending on whether it is a while-do or a do-while loop. exit will always be the exit state of the loop. + nodes_oNodes_exits = [] + + # iterate over all nodes + for node in sdfg_nx.nodes(): + # Check if any backedge ends in node. + if node in backedgesDstDict: + inc_backedges = backedgesDstDict[node] + + # gather all successors of node that are not reached by backedges + successors = [] + for edge in sdfg_nx.out_edges(node): + if not edge in backedges: + successors.append(edge[1]) + + # For each incoming backedge, we want to find oNode and exit. There can be multiple backedges, in case + # we have a continue statement in the original code. But we can handle these backedges normally. + for be in inc_backedges: + # since node has an incoming backedge, it is either a loop guard or loop tail + # oNode will exactly be the other thing + oNode = be[0] + exitCandidates = set() + # search for exit candidates: + # a state is a exit candidate if: + # - it is in successor and it does not dominate oNode (else it dominates + # the last loop state, and hence is inside the loop itself) + # - is is a successor of oNode (but not node) + # This handles both cases of while-do and do-while loops + for succ in successors: + if succ != oNode and oNode not in allDom[succ]: + exitCandidates.add(succ) + for succ in sdfg_nx.successors(oNode): + if succ != node: + exitCandidates.add(succ) + + if len(exitCandidates) == 0: + raise ValueError('failed to find any exit nodes') + elif len(exitCandidates) > 1: + # Find the exit candidate that sits highest up in the + # postdominator tree (i.e., has the lowest level). + # That must be the exit node (it must post-dominate) + # everything inside the loop. If there are multiple + # candidates on the lowest level (i.e., disjoint set of + # postdominated nodes), there are multiple exit paths, + # and they all share one level. + cand = exitCandidates.pop() + minSet = set([cand]) + minLevel = nx.get_node_attributes(postDomTree, 'level')[cand] + for cand in exitCandidates: + curr_level = nx.get_node_attributes(postDomTree, 'level')[cand] + if curr_level < minLevel: + # new minimum found + minLevel = curr_level + minSet.clear() + minSet.add(cand) + elif curr_level == minLevel: + # add cand to curr set + minSet.add(cand) + + if len(minSet) > 0: + exitCandidates = minSet + else: + raise ValueError('failed to find exit minSet') + + # now we have a triple (node, oNode, exitCandidates) + nodes_oNodes_exits.append((node, oNode, exitCandidates)) + + return nodes_oNodes_exits diff --git a/dace/sdfg/work_depth_analysis/work_depth.py b/dace/sdfg/work_depth_analysis/work_depth.py new file mode 100644 index 0000000000..a05fe10266 --- /dev/null +++ b/dace/sdfg/work_depth_analysis/work_depth.py @@ -0,0 +1,653 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +""" Work depth analysis for any input SDFG. Can be used with the DaCe VS Code extension or +from command line as a Python script. """ + +import argparse +from collections import deque +from dace.sdfg import nodes as nd, propagation, InterstateEdge +from dace import SDFG, SDFGState, dtypes +from dace.subsets import Range +from typing import Tuple, Dict +import os +import sympy as sp +from copy import deepcopy +from dace.libraries.blas import MatMul +from dace.libraries.standard import Reduce, Transpose +from dace.symbolic import pystr_to_symbolic +import ast +import astunparse +import warnings + +from dace.sdfg.work_depth_analysis.helpers import get_uuid, find_loop_guards_tails_exits + + +def get_array_size_symbols(sdfg): + """ + Returns all symbols that appear isolated in shapes of the SDFG's arrays. + These symbols can then be assumed to be positive. + + :note: This only works if a symbol appears in isolation, i.e. array A[N]. + If we have A[N+1], we cannot assume N to be positive. + :param sdfg: The SDFG in which it searches for symbols. + :return: A set containing symbols which we can assume to be positive. + """ + symbols = set() + for _, _, arr in sdfg.arrays_recursive(): + for s in arr.shape: + if isinstance(s, sp.Symbol): + symbols.add(s) + return symbols + + +def posify_certain_symbols(expr, syms_to_posify): + """ + Takes an expression and evaluates it while assuming that certain symbols are positive. + + :param expr: The expression to evaluate. + :param syms_to_posify: List of symbols we assume to be positive. + :note: This is adapted from the Sympy function posify. + """ + + expr = sp.sympify(expr) + + reps = {s: sp.Dummy(s.name, positive=True, **s.assumptions0) for s in syms_to_posify if s.is_positive is None} + expr = expr.subs(reps) + return expr.subs({r: s for s, r in reps.items()}) + + +def symeval(val, symbols): + """ + Takes a sympy expression and substitutes its symbols according to a dict { old_symbol: new_symbol}. + + :param val: The expression we are updating. + :param symbols: Dictionary of key value pairs { old_symbol: new_symbol}. + """ + first_replacement = {pystr_to_symbolic(k): pystr_to_symbolic('__REPLSYM_' + k) for k in symbols.keys()} + second_replacement = {pystr_to_symbolic('__REPLSYM_' + k): v for k, v in symbols.items()} + return val.subs(first_replacement).subs(second_replacement) + + +def evaluate_symbols(base, new): + result = {} + for k, v in new.items(): + result[k] = symeval(v, base) + return result + + +def count_work_matmul(node, symbols, state): + A_memlet = next(e for e in state.in_edges(node) if e.dst_conn == '_a') + B_memlet = next(e for e in state.in_edges(node) if e.dst_conn == '_b') + C_memlet = next(e for e in state.out_edges(node) if e.src_conn == '_c') + result = 2 # Multiply, add + # Batch + if len(C_memlet.data.subset) == 3: + result *= symeval(C_memlet.data.subset.size()[0], symbols) + # M*N + result *= symeval(C_memlet.data.subset.size()[-2], symbols) + result *= symeval(C_memlet.data.subset.size()[-1], symbols) + # K + result *= symeval(A_memlet.data.subset.size()[-1], symbols) + return result + + +def count_work_reduce(node, symbols, state): + result = 0 + if node.wcr is not None: + result += count_arithmetic_ops_code(node.wcr) + in_memlet = None + in_edges = state.in_edges(node) + if in_edges is not None and len(in_edges) == 1: + in_memlet = in_edges[0] + if in_memlet is not None and in_memlet.data.volume is not None: + result *= in_memlet.data.volume + else: + result = 0 + return result + + +LIBNODES_TO_WORK = { + MatMul: count_work_matmul, + Transpose: lambda *args: 0, + Reduce: count_work_reduce, +} + + +def count_depth_matmul(node, symbols, state): + # For now we set it equal to work: see comments in count_depth_reduce just below + return count_work_matmul(node, symbols, state) + + +def count_depth_reduce(node, symbols, state): + # depth of reduction is log2 of the work + # TODO: Can we actually assume this? Or is it equal to the work? + # Another thing to consider is that we essetially do NOT count wcr edges as operations for now... + + # return sp.ceiling(sp.log(count_work_reduce(node, symbols, state), 2)) + # set it equal to work for now + return count_work_reduce(node, symbols, state) + + +LIBNODES_TO_DEPTH = { + MatMul: count_depth_matmul, + Transpose: lambda *args: 0, + Reduce: count_depth_reduce, +} + +bigo = sp.Function('bigo') +PYFUNC_TO_ARITHMETICS = { + 'float': 0, + 'dace.float64': 0, + 'dace.int64': 0, + 'math.exp': 1, + 'exp': 1, + 'math.tanh': 1, + 'sin': 1, + 'cos': 1, + 'tanh': 1, + 'math.sqrt': 1, + 'sqrt': 1, + 'atan2:': 1, + 'min': 0, + 'max': 0, + 'ceiling': 0, + 'floor': 0, + 'abs': 0 +} + + +class ArithmeticCounter(ast.NodeVisitor): + + def __init__(self): + self.count = 0 + + def visit_BinOp(self, node): + if isinstance(node.op, ast.MatMult): + raise NotImplementedError('MatMult op count requires shape ' + 'inference') + self.count += 1 + return self.generic_visit(node) + + def visit_UnaryOp(self, node): + self.count += 1 + return self.generic_visit(node) + + def visit_Call(self, node): + fname = astunparse.unparse(node.func)[:-1] + if fname not in PYFUNC_TO_ARITHMETICS: + print( + 'WARNING: Unrecognized python function "%s". If this is a type conversion, like "dace.float64", then this is fine.' + % fname) + return self.generic_visit(node) + self.count += PYFUNC_TO_ARITHMETICS[fname] + return self.generic_visit(node) + + def visit_AugAssign(self, node): + return self.visit_BinOp(node) + + def visit_For(self, node): + raise NotImplementedError + + def visit_While(self, node): + raise NotImplementedError + + +def count_arithmetic_ops_code(code): + ctr = ArithmeticCounter() + if isinstance(code, (tuple, list)): + for stmt in code: + ctr.visit(stmt) + elif isinstance(code, str): + ctr.visit(ast.parse(code)) + else: + ctr.visit(code) + return ctr.count + + +class DepthCounter(ast.NodeVisitor): + # so far this is identical to the ArithmeticCounter above. + def __init__(self): + self.count = 0 + + def visit_BinOp(self, node): + if isinstance(node.op, ast.MatMult): + raise NotImplementedError('MatMult op count requires shape ' + 'inference') + self.count += 1 + return self.generic_visit(node) + + def visit_UnaryOp(self, node): + self.count += 1 + return self.generic_visit(node) + + def visit_Call(self, node): + fname = astunparse.unparse(node.func)[:-1] + if fname not in PYFUNC_TO_ARITHMETICS: + print( + 'WARNING: Unrecognized python function "%s". If this is a type conversion, like "dace.float64", then this is fine.' + % fname) + return self.generic_visit(node) + self.count += PYFUNC_TO_ARITHMETICS[fname] + return self.generic_visit(node) + + def visit_AugAssign(self, node): + return self.visit_BinOp(node) + + def visit_For(self, node): + raise NotImplementedError + + def visit_While(self, node): + raise NotImplementedError + + +def count_depth_code(code): + # so far this is the same as the work counter, since work = depth for each tasklet, as we can't assume any parallelism + ctr = ArithmeticCounter() + if isinstance(code, (tuple, list)): + for stmt in code: + ctr.visit(stmt) + elif isinstance(code, str): + ctr.visit(ast.parse(code)) + else: + ctr.visit(code) + return ctr.count + + +def tasklet_work(tasklet_node, state): + if tasklet_node.code.language == dtypes.Language.CPP: + for oedge in state.out_edges(tasklet_node): + return bigo(oedge.data.num_accesses) + + elif tasklet_node.code.language == dtypes.Language.Python: + return count_arithmetic_ops_code(tasklet_node.code.code) + else: + # other languages not implemented, count whole tasklet as work of 1 + warnings.warn('Work of tasklets only properly analyzed for Python or CPP. For all other ' + 'languages work = 1 will be counted for each tasklet.') + return 1 + + +def tasklet_depth(tasklet_node, state): + # TODO: how to get depth of CPP tasklets? + # For now we use depth == work: + if tasklet_node.code.language == dtypes.Language.CPP: + for oedge in state.out_edges(tasklet_node): + return bigo(oedge.data.num_accesses) + if tasklet_node.code.language == dtypes.Language.Python: + return count_depth_code(tasklet_node.code.code) + else: + # other languages not implemented, count whole tasklet as work of 1 + warnings.warn('Depth of tasklets only properly analyzed for Python code. For all other ' + 'languages depth = 1 will be counted for each tasklet.') + return 1 + + +def get_tasklet_work(node, state): + return tasklet_work(node, state), -1 + + +def get_tasklet_work_depth(node, state): + return tasklet_work(node, state), tasklet_depth(node, state) + + +def get_tasklet_avg_par(node, state): + return tasklet_work(node, state), tasklet_depth(node, state) + + +def sdfg_work_depth(sdfg: SDFG, w_d_map: Dict[str, Tuple[sp.Expr, sp.Expr]], analyze_tasklet, + symbols) -> Tuple[sp.Expr, sp.Expr]: + """ + Analyze the work and depth of a given SDFG. + First we determine the work and depth of each state. Then we break loops in the state machine, such that we get a DAG. + Lastly, we compute the path with most work and the path with the most depth in order to get the total work depth. + + :param sdfg: The SDFG to analyze. + :param w_d_map: Dictionary which will save the result. + :param analyze_tasklet: Function used to analyze tasklet nodes. + :param symbols: A dictionary mapping local nested SDFG symbols to global symbols. + :return: A tuple containing the work and depth of the SDFG. + """ + + # First determine the work and depth of each state individually. + # Keep track of the work and depth for each state in a dictionary, where work and depth are multiplied by the number + # of times the state will be executed. + state_depths: Dict[SDFGState, sp.Expr] = {} + state_works: Dict[SDFGState, sp.Expr] = {} + for state in sdfg.nodes(): + state_work, state_depth = state_work_depth(state, w_d_map, analyze_tasklet, symbols) + state_works[state] = sp.simplify(state_work * state.executions) + state_depths[state] = sp.simplify(state_depth * state.executions) + w_d_map[get_uuid(state)] = (state_works[state], state_depths[state]) + + # Prepare the SDFG for a depth analysis by breaking loops. This removes the edge between the last loop state and + # the guard, and instead places an edge between the last loop state and the exit state. + # This transforms the state machine into a DAG. Hence, we can find the "heaviest" and "deepest" paths in linear time. + # Additionally, construct a dummy exit state and connect every state that has no outgoing edges to it. + + # identify all loops in the SDFG + nodes_oNodes_exits = find_loop_guards_tails_exits(sdfg._nx) + + # Now we need to go over each triple (node, oNode, exits). For each triple, we + # - remove edge (oNode, node), i.e. the backward edge + # - for all exits e, add edge (oNode, e). This edge may already exist + for node, oNode, exits in nodes_oNodes_exits: + sdfg.remove_edge(sdfg.edges_between(oNode, node)[0]) + for e in exits: + if len(sdfg.edges_between(oNode, e)) == 0: + # no edge there yet + sdfg.add_edge(oNode, e, InterstateEdge()) + + # add a dummy exit to the SDFG, such that each path ends there. + dummy_exit = sdfg.add_state('dummy_exit') + for state in sdfg.nodes(): + if len(sdfg.out_edges(state)) == 0 and state != dummy_exit: + sdfg.add_edge(state, dummy_exit, InterstateEdge()) + + # These two dicts save the current length of the "heaviest", resp. "deepest", paths at each state. + work_map: Dict[SDFGState, sp.Expr] = {} + depth_map: Dict[SDFGState, sp.Expr] = {} + # The dummy state has 0 work and depth. + state_depths[dummy_exit] = sp.sympify(0) + state_works[dummy_exit] = sp.sympify(0) + + # Perform a BFS traversal of the state machine and calculate the maximum work / depth at each state. Only advance to + # the next state in the BFS if all incoming edges have been visited, to ensure the maximum work / depth expressions + # have been calculated. + traversal_q = deque() + traversal_q.append((sdfg.start_state, sp.sympify(0), sp.sympify(0), None)) + visited = set() + while traversal_q: + state, depth, work, ie = traversal_q.popleft() + + if ie is not None: + visited.add(ie) + + n_depth = sp.simplify(depth + state_depths[state]) + n_work = sp.simplify(work + state_works[state]) + + # If we are analysing average parallelism, we don't search "heaviest" and "deepest" paths separately, but we want one + # single path with the least average parallelsim (of all paths with more than 0 work). + if analyze_tasklet == get_tasklet_avg_par: + if state in depth_map: # and hence als state in work_map + # if current path has 0 depth, we don't do anything. + if n_depth != 0: + # see if we need to update the work and depth of the current state + # we update if avg parallelism of new incoming path is less than current avg parallelism + old_avg_par = sp.simplify(work_map[state] / depth_map[state]) + new_avg_par = sp.simplify(n_work / n_depth) + + if depth_map[state] == 0 or new_avg_par < old_avg_par: + # old value was divided by zero or new path gives actually worse avg par, then we keep new value + depth_map[state] = n_depth + work_map[state] = n_work + else: + depth_map[state] = n_depth + work_map[state] = n_work + else: + # search heaviest and deepest path separately + if state in depth_map: # and consequently also in work_map + depth_map[state] = sp.Max(depth_map[state], n_depth) + work_map[state] = sp.Max(work_map[state], n_work) + else: + depth_map[state] = n_depth + work_map[state] = n_work + + out_edges = sdfg.out_edges(state) + # only advance after all incoming edges were visited (meaning that current work depth values of state are final). + if any(iedge not in visited for iedge in sdfg.in_edges(state)): + pass + else: + for oedge in out_edges: + traversal_q.append((oedge.dst, depth_map[state], work_map[state], oedge)) + + try: + max_depth = depth_map[dummy_exit] + max_work = work_map[dummy_exit] + except KeyError: + # If we get a KeyError above, this means that the traversal never reached the dummy_exit state. + # This happens if the loops were not properly detected and broken. + raise Exception( + 'Analysis failed, since not all loops got detected. It may help to use more structured loop constructs.') + + sdfg_result = (sp.simplify(max_work), sp.simplify(max_depth)) + w_d_map[get_uuid(sdfg)] = sdfg_result + return sdfg_result + + +def scope_work_depth(state: SDFGState, + w_d_map: Dict[str, sp.Expr], + analyze_tasklet, + symbols, + entry: nd.EntryNode = None) -> Tuple[sp.Expr, sp.Expr]: + """ + Analyze the work and depth of a scope. + This works by traversing through the scope analyzing the work and depth of each encountered node. + Depending on what kind of node we encounter, we do the following: + - EntryNode: Recursively analyze work depth of scope. + - Tasklet: use analyze_tasklet to get work depth of tasklet node. + - NestedSDFG: After translating its local symbols to global symbols, we analyze the nested SDFG recursively. + - LibraryNode: Library nodes are analyzed with special functions depending on their type. + Work inside a state can simply be summed up, but for the depth we need to find the longest path. Since dataflow is a DAG, + this can be done in linear time by traversing the graph in topological order. + + :param state: The state in which the scope to analyze is contained. + :param sym_map: A dictionary mapping symbols to their values. + :param entry: The entry node of the scope to analyze. If None, the entire state is analyzed. + :return: A tuple containing the work and depth of the scope. + """ + + # find the work and depth of each node + # for maps and nested SDFG, we do it recursively + work = sp.sympify(0) + max_depth = sp.sympify(0) + scope_nodes = state.scope_children()[entry] + scope_exit = None if entry is None else state.exit_node(entry) + for node in scope_nodes: + # add node to map + w_d_map[get_uuid(node, state)] = (sp.sympify(0), sp.sympify(0)) + if isinstance(node, nd.EntryNode): + # If the scope contains an entry node, we need to recursively analyze the sub-scope of the entry node first. + # The resulting work/depth are summarized into the entry node + s_work, s_depth = scope_work_depth(state, w_d_map, analyze_tasklet, symbols, node) + # add up work for whole state, but also save work for this sub-scope scope in w_d_map + work += s_work + w_d_map[get_uuid(node, state)] = (s_work, s_depth) + elif node == scope_exit: + # don't do anything for exit nodes, everthing handled already in the corresponding entry node. + pass + elif isinstance(node, nd.Tasklet): + # add up work for whole state, but also save work for this node in w_d_map + t_work, t_depth = analyze_tasklet(node, state) + work += t_work + w_d_map[get_uuid(node, state)] = (sp.sympify(t_work), sp.sympify(t_depth)) + elif isinstance(node, nd.NestedSDFG): + # keep track of nested symbols: "symbols" maps local nested SDFG symbols to global symbols. + # We only want global symbols in our final work depth expressions. + nested_syms = {} + nested_syms.update(symbols) + nested_syms.update(evaluate_symbols(symbols, node.symbol_mapping)) + # Nested SDFGs are recursively analyzed first. + nsdfg_work, nsdfg_depth = sdfg_work_depth(node.sdfg, w_d_map, analyze_tasklet, nested_syms) + + # add up work for whole state, but also save work for this nested SDFG in w_d_map + work += nsdfg_work + w_d_map[get_uuid(node, state)] = (nsdfg_work, nsdfg_depth) + elif isinstance(node, nd.LibraryNode): + lib_node_work = LIBNODES_TO_WORK[type(node)](node, symbols, state) + work += lib_node_work + lib_node_depth = -1 # not analyzed + if analyze_tasklet != get_tasklet_work: + # we are analyzing depth + lib_node_depth = LIBNODES_TO_DEPTH[type(node)](node, symbols, state) + w_d_map[get_uuid(node, state)] = (lib_node_work, lib_node_depth) + + if entry is not None: + # If the scope being analyzed is a map, multiply the work by the number of iterations of the map. + if isinstance(entry, nd.MapEntry): + nmap: nd.Map = entry.map + range: Range = nmap.range + n_exec = range.num_elements_exact() + work = work * sp.simplify(n_exec) + else: + print('WARNING: Only Map scopes are supported in work analysis for now. Assuming 1 iteration.') + + # Work inside a state can simply be summed up. But now we need to find the depth of a state (i.e. longest path). + # Since dataflow graph is a DAG, this can be done in linear time. + max_depth = sp.sympify(0) + # only do this if we are analyzing depth + if analyze_tasklet == get_tasklet_work_depth or analyze_tasklet == get_tasklet_avg_par: + # Calculate the maximum depth of the scope by finding the 'deepest' path from the source to the sink. This is done by + # a traversal in topological order, where each node propagates its current max depth for all incoming paths. + traversal_q = deque() + visited = set() + # find all starting nodes + if entry: + # the entry is the starting node + traversal_q.append((entry, sp.sympify(0), None)) + else: + for node in scope_nodes: + if len(state.in_edges(node)) == 0: + # This node is a start node of the traversal + traversal_q.append((node, sp.sympify(0), None)) + # this map keeps track of the length of the longest path ending at each state so far seen. + depth_map = {} + while traversal_q: + node, in_depth, in_edge = traversal_q.popleft() + + if in_edge is not None: + visited.add(in_edge) + + n_depth = sp.simplify(in_depth + w_d_map[get_uuid(node, state)][1]) + + if node in depth_map: + depth_map[node] = sp.Max(depth_map[node], n_depth) + else: + depth_map[node] = n_depth + + out_edges = state.out_edges(node) + # Only advance to next node, if all incoming edges have been visited or the current node is the entry (aka starting node). + # If the current node is the exit of the scope, we stop, such that we don't leave the scope. + if (all(iedge in visited for iedge in state.in_edges(node)) or node == entry) and node != scope_exit: + # If we encounter a nested map, we must not analyze its contents (as they have already been recursively analyzed). + # Hence, we continue from the outgoing edges of the corresponding exit. + if isinstance(node, nd.EntryNode) and node != entry: + exit_node = state.exit_node(node) + # replace out_edges with the out_edges of the scope exit node + out_edges = state.out_edges(exit_node) + for oedge in out_edges: + traversal_q.append((oedge.dst, depth_map[node], oedge)) + if len(out_edges) == 0 or node == scope_exit: + # We have reached an end node --> update max_depth + max_depth = sp.Max(max_depth, depth_map[node]) + + # summarise work / depth of the whole scope in the dictionary + scope_result = (sp.simplify(work), sp.simplify(max_depth)) + w_d_map[get_uuid(state)] = scope_result + return scope_result + + +def state_work_depth(state: SDFGState, w_d_map: Dict[str, sp.Expr], analyze_tasklet, + symbols) -> Tuple[sp.Expr, sp.Expr]: + """ + Analyze the work and depth of a state. + + :param state: The state to analyze. + :param w_d_map: The result will be saved to this map. + :param analyze_tasklet: Function used to analyze tasklet nodes. + :param symbols: A dictionary mapping local nested SDFG symbols to global symbols. + :return: A tuple containing the work and depth of the state. + """ + work, depth = scope_work_depth(state, w_d_map, analyze_tasklet, symbols, None) + return work, depth + + +def analyze_sdfg(sdfg: SDFG, w_d_map: Dict[str, sp.Expr], analyze_tasklet) -> None: + """ + Analyze a given SDFG. We can either analyze work, work and depth or average parallelism. + + :note: SDFGs should have split interstate edges. This means there should be no interstate edges containing both a + condition and an assignment. + :param sdfg: The SDFG to analyze. + :param w_d_map: Dictionary of SDFG elements to (work, depth) tuples. Result will be saved in here. + :param analyze_tasklet: The function used to analyze tasklet nodes. Analyzes either just work, work and depth or average parallelism. + """ + + # deepcopy such that original sdfg not changed + sdfg = deepcopy(sdfg) + + # Run state propagation for all SDFGs recursively. This is necessary to determine the number of times each state + # will be executed, or to determine upper bounds for that number (such as in the case of branching) + for sd in sdfg.all_sdfgs_recursive(): + propagation.propagate_states(sd, concretize_dynamic_unbounded=True) + + # Analyze the work and depth of the SDFG. + symbols = {} + sdfg_work_depth(sdfg, w_d_map, analyze_tasklet, symbols) + + # Note: This posify could be done more often to improve performance. + array_symbols = get_array_size_symbols(sdfg) + for k, (v_w, v_d) in w_d_map.items(): + # The symeval replaces nested SDFG symbols with their global counterparts. + v_w = posify_certain_symbols(symeval(v_w, symbols), array_symbols) + v_d = posify_certain_symbols(symeval(v_d, symbols), array_symbols) + w_d_map[k] = (v_w, v_d) + + +################################################################################ +# Utility functions for running the analysis from the command line ############# +################################################################################ + + +def main() -> None: + + parser = argparse.ArgumentParser('work_depth', + usage='python work_depth.py [-h] filename --analyze {work,workDepth,avgPar}', + description='Analyze the work/depth of an SDFG.') + + parser.add_argument('filename', type=str, help='The SDFG file to analyze.') + parser.add_argument('--analyze', + choices=['work', 'workDepth', 'avgPar'], + default='workDepth', + help='Choose what to analyze. Default: workDepth') + + args = parser.parse_args() + + if not os.path.exists(args.filename): + print(args.filename, 'does not exist.') + exit() + + if args.analyze == 'workDepth': + analyze_tasklet = get_tasklet_work_depth + elif args.analyze == 'avgPar': + analyze_tasklet = get_tasklet_avg_par + elif args.analyze == 'work': + analyze_tasklet = get_tasklet_work + + sdfg = SDFG.from_file(args.filename) + work_depth_map = {} + analyze_sdfg(sdfg, work_depth_map, analyze_tasklet) + + if args.analyze == 'workDepth': + for k, v, in work_depth_map.items(): + work_depth_map[k] = (str(sp.simplify(v[0])), str(sp.simplify(v[1]))) + elif args.analyze == 'work': + for k, v, in work_depth_map.items(): + work_depth_map[k] = str(sp.simplify(v[0])) + elif args.analyze == 'avgPar': + for k, v, in work_depth_map.items(): + work_depth_map[k] = str(sp.simplify(v[0] / v[1]) if str(v[1]) != '0' else 0) # work / depth = avg par + + result_whole_sdfg = work_depth_map[get_uuid(sdfg)] + + print(80 * '-') + if args.analyze == 'workDepth': + print("Work:\t", result_whole_sdfg[0]) + print("Depth:\t", result_whole_sdfg[1]) + elif args.analyze == 'work': + print("Work:\t", result_whole_sdfg) + elif args.analyze == 'avgPar': + print("Average Parallelism:\t", result_whole_sdfg) + print(80 * '-') + + +if __name__ == '__main__': + main() diff --git a/tests/sdfg/work_depth_tests.py b/tests/sdfg/work_depth_tests.py new file mode 100644 index 0000000000..133afe8ae4 --- /dev/null +++ b/tests/sdfg/work_depth_tests.py @@ -0,0 +1,201 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +""" Contains test cases for the work depth analysis. """ +import dace as dc +from dace.sdfg.work_depth_analysis.work_depth import analyze_sdfg, get_tasklet_work_depth +from dace.sdfg.work_depth_analysis.helpers import get_uuid +import sympy as sp + +from dace.transformation.interstate import NestSDFG +from dace.transformation.dataflow import MapExpansion + +# TODO: add tests for library nodes (e.g. reduce, matMul) + +N = dc.symbol('N') +M = dc.symbol('M') +K = dc.symbol('K') + + +@dc.program +def single_map(x: dc.float64[N], y: dc.float64[N], z: dc.float64[N]): + z[:] = x + y + + +@dc.program +def single_for_loop(x: dc.float64[N], y: dc.float64[N]): + for i in range(N): + x[i] += y[i] + + +@dc.program +def if_else(x: dc.int64[1000], y: dc.int64[1000], z: dc.int64[1000], sum: dc.int64[1]): + if x[10] > 50: + z[:] = x + y # 1000 work, 1 depth + else: + for i in range(100): # 100 work, 100 depth + sum += x[i] + + +@dc.program +def if_else_sym(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], sum: dc.int64[1]): + if x[10] > 50: + z[:] = x + y # N work, 1 depth + else: + for i in range(K): # K work, K depth + sum += x[i] + + +@dc.program +def nested_sdfg(x: dc.float64[N], y: dc.float64[N], z: dc.float64[N]): + single_map(x, y, z) + single_for_loop(x, y) + + +@dc.program +def nested_maps(x: dc.float64[N, M], y: dc.float64[N, M], z: dc.float64[N, M]): + z[:, :] = x + y + + +@dc.program +def nested_for_loops(x: dc.float64[N], y: dc.float64[K]): + for i in range(N): + for j in range(K): + x[i] += y[j] + + +@dc.program +def nested_if_else(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], sum: dc.int64[1]): + if x[10] > 50: + if x[9] > 50: + z[:] = x + y # N work, 1 depth + z[:] += 2 * x # 2*N work, 2 depth --> total outer if: 3*N work, 3 depth + else: + if y[9] > 50: + for i in range(K): + sum += x[i] # K work, K depth + else: + for j in range(M): + sum += x[j] # M work, M depth + z[:] = x + y # N work, depth 1 --> total inner else: M+N work, M+1 depth + # --> total outer else: Max(K, M+N) work, Max(K, M+1) depth + # --> total over both branches: Max(K, M+N, 3*N) work, Max(K, M+1, 3) depth + + +@dc.program +def max_of_positive_symbol(x: dc.float64[N]): + if x[0] > 0: + for i in range(2 * N): # work 2*N^2, depth 2*N + x += 1 + else: + for j in range(3 * N): # work 3*N^2, depth 3*N + x += 1 + # total is work 3*N^2, depth 3*N without any max + + +@dc.program +def multiple_array_sizes(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], x2: dc.int64[M], y2: dc.int64[M], + z2: dc.int64[M], x3: dc.int64[K], y3: dc.int64[K], z3: dc.int64[K]): + if x[0] > 0: + z[:] = 2 * x + y # work 2*N, depth 2 + elif x[1] > 0: + z2[:] = 2 * x2 + y2 # work 2*M + 3, depth 5 + z2[0] += 3 + z[1] + z[2] + elif x[2] > 0: + z3[:] = 2 * x3 + y3 # work 2*K, depth 2 + elif x[3] > 0: + z[:] = 3 * x + y + 1 # work 3*N, depth 3 + # --> work= Max(3*N, 2*M, 2*K) and depth = 5 + + +@dc.program +def unbounded_while_do(x: dc.float64[N]): + while x[0] < 100: + x += 1 + + +@dc.program +def unbounded_do_while(x: dc.float64[N]): + while True: + x += 1 + if x[0] >= 100: + break + + +@dc.program +def unbounded_nonnegify(x: dc.float64[N]): + while x[0] < 100: + if x[1] < 42: + x += 3 * x + else: + x += x + + +@dc.program +def continue_for_loop(x: dc.float64[N]): + for i in range(N): + if x[i] > 100: + continue + x += 1 + + +@dc.program +def break_for_loop(x: dc.float64[N]): + for i in range(N): + if x[i] > 100: + break + x += 1 + + +@dc.program +def break_while_loop(x: dc.float64[N]): + while x[0] > 10: + if x[1] > 100: + break + x += 1 + + +tests_cases = [ + (single_map, (N, 1)), + (single_for_loop, (N, N)), + (if_else, (1000, 100)), + (if_else_sym, (sp.Max(K, N), sp.Max(1, K))), + (nested_sdfg, (2 * N, N + 1)), + (nested_maps, (M * N, 1)), + (nested_for_loops, (K * N, K * N)), + (nested_if_else, (sp.Max(K, 3 * N, M + N), sp.Max(3, K, M + 1))), + (max_of_positive_symbol, (3 * N**2, 3 * N)), + (multiple_array_sizes, (sp.Max(2 * K, 3 * N, 2 * M + 3), 5)), + (unbounded_while_do, (sp.Symbol('num_execs_0_2', nonnegative=True) * N, sp.Symbol('num_execs_0_2', + nonnegative=True))), + # We get this Max(1, num_execs), since it is a do-while loop, but the num_execs symbol does not capture this. + (unbounded_do_while, (sp.Max(1, sp.Symbol('num_execs_0_1', nonnegative=True)) * N, + sp.Max(1, sp.Symbol('num_execs_0_1', nonnegative=True)))), + (unbounded_nonnegify, (2 * sp.Symbol('num_execs_0_7', nonnegative=True) * N, + 2 * sp.Symbol('num_execs_0_7', nonnegative=True))), + (continue_for_loop, (sp.Symbol('num_execs_0_6', nonnegative=True) * N, sp.Symbol('num_execs_0_6', + nonnegative=True))), + (break_for_loop, (N**2, N)), + (break_while_loop, (sp.Symbol('num_execs_0_5', nonnegative=True) * N, sp.Symbol('num_execs_0_5', nonnegative=True))) +] + + +def test_work_depth(): + good = 0 + failed = 0 + exception = 0 + failed_tests = [] + for test, correct in tests_cases: + w_d_map = {} + sdfg = test.to_sdfg() + if 'nested_sdfg' in test.name: + sdfg.apply_transformations(NestSDFG) + if 'nested_maps' in test.name: + sdfg.apply_transformations(MapExpansion) + + analyze_sdfg(sdfg, w_d_map, get_tasklet_work_depth) + res = w_d_map[get_uuid(sdfg)] + # check result + assert correct == res + + +if __name__ == '__main__': + test_work_depth() From 1cb9f9fa459390df0267b1f9365bb62793563b95 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 17 Aug 2023 13:58:33 +0200 Subject: [PATCH 40/48] Added support for StructureViews. --- dace/codegen/compiled_sdfg.py | 2 +- dace/codegen/dispatcher.py | 4 ++-- dace/codegen/targets/cpu.py | 20 ++++++++++++++++---- dace/codegen/targets/framecode.py | 2 +- dace/data.py | 1 + dace/sdfg/utils.py | 2 +- 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index 863e804802..9ee0772eeb 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -473,7 +473,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: else: warnings.warn(f'Casting scalar argument "{a}" from {type(arg).__name__} to {atype.dtype.type}') arglist[i] = atype.dtype.type(arg) - elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray) + elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray) and not isinstance(atype, dt.StructArray) and atype.dtype.as_numpy_dtype() != arg.dtype): # Make exception for vector types if (isinstance(atype.dtype, dtypes.vector) and atype.dtype.vtype.as_numpy_dtype() == arg.dtype): diff --git a/dace/codegen/dispatcher.py b/dace/codegen/dispatcher.py index 0b4f58d5ef..5972f5759d 100644 --- a/dace/codegen/dispatcher.py +++ b/dace/codegen/dispatcher.py @@ -504,11 +504,11 @@ def get_copy_dispatcher(self, src_node, dst_node, edge, sdfg, state): dst_is_data = True # Skip copies to/from views where edge matches - if src_is_data and isinstance(src_node.desc(sdfg), dt.View): + if src_is_data and isinstance(src_node.desc(sdfg), (dt.StructureView, dt.View)): e = sdutil.get_view_edge(state, src_node) if e is edge: return None - if dst_is_data and isinstance(dst_node.desc(sdfg), dt.View): + if dst_is_data and isinstance(dst_node.desc(sdfg), (dt.StructureView, dt.View)): e = sdutil.get_view_edge(state, dst_node) if e is edge: return None diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 3cd262e050..1fa4778806 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -215,9 +215,21 @@ def allocate_view(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.A ancestor=0, is_write=is_write) if not declared: - declaration_stream.write(f'{atype} {aname};', sdfg, state_id, node) ctypedef = dtypes.pointer(nodedesc.dtype).ctype self._dispatcher.declared_arrays.add(aname, DefinedType.Pointer, ctypedef) + if isinstance(nodedesc, data.StructureView): + for k, v in nodedesc.members.items(): + if isinstance(v, data.Data): + ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype + defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer + self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef) + self._dispatcher.defined_vars.add(f"{name}.{k}", defined_type, ctypedef) + # TODO: Find a better way to do this (the issue is with pointers of pointers) + if atype.endswith('*'): + atype = atype[:-1] + if value.startswith('&'): + value = value[1:] + declaration_stream.write(f'{atype} {aname};', sdfg, state_id, node) allocation_stream.write(f'{aname} = {value};', sdfg, state_id, node) def allocate_reference(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.AccessNode, @@ -311,7 +323,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d if not isinstance(nodedesc.dtype, dtypes.opaque): arrsize_bytes = arrsize * nodedesc.dtype.bytes - if isinstance(nodedesc, data.Structure): + if isinstance(nodedesc, data.Structure) and not isinstance(nodedesc, data.StructureView): declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n") define_var(name, DefinedType.Pointer, nodedesc.ctype) for k, v in nodedesc.members.items(): @@ -322,7 +334,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream, declaration_stream, allocation_stream) return - if isinstance(nodedesc, data.View): + if isinstance(nodedesc, (data.StructureView, data.View)): return self.allocate_view(sdfg, dfg, state_id, node, function_stream, declaration_stream, allocation_stream) if isinstance(nodedesc, data.Reference): return self.allocate_reference(sdfg, dfg, state_id, node, function_stream, declaration_stream, @@ -487,7 +499,7 @@ def deallocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, dtypes.AllocationLifetime.External) self._dispatcher.declared_arrays.remove(alloc_name, is_global=is_global) - if isinstance(nodedesc, (data.Scalar, data.View, data.Stream, data.Reference)): + if isinstance(nodedesc, (data.Scalar, data.StructureView, data.View, data.Stream, data.Reference)): return elif (nodedesc.storage == dtypes.StorageType.CPU_Heap or (nodedesc.storage == dtypes.StorageType.Register and symbolic.issymbolic(arrsize, sdfg.constants))): diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index 52915f51b5..9ee5c2ef17 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -749,7 +749,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG): instances = access_instances[sdfg.sdfg_id][name] # A view gets "allocated" everywhere it appears - if isinstance(desc, data.View): + if isinstance(desc, (data.StructureView, data.View)): for s, n in instances: self.to_allocate[s].append((sdfg, s, n, False, True, False)) self.to_allocate[s].append((sdfg, s, n, False, False, True)) diff --git a/dace/data.py b/dace/data.py index 99d7ffc774..bf771db1d4 100644 --- a/dace/data.py +++ b/dace/data.py @@ -510,6 +510,7 @@ def validate(self): if self.lifetime != dtypes.AllocationLifetime.Scope: raise ValueError('Only Scope allocation lifetime is supported for Views') + @make_properties class Scalar(Data): """ Data descriptor of a scalar value. """ diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index d08518b10c..3396335ece 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -1396,7 +1396,7 @@ def is_nonfree_sym_dependent(node: nd.AccessNode, desc: dt.Data, state: SDFGStat :param state: the state that contains the node :param fsymbols: the free symbols to check against """ - if isinstance(desc, dt.View): + if isinstance(desc, (dt.StructureView, dt.View)): # Views can be non-free symbol dependent due to the adjacent edges. e = get_view_edge(state, node) if e.data: From 5a2c4602c2341f057a5159c3cbe2437f33ab24e8 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 17 Aug 2023 13:58:58 +0200 Subject: [PATCH 41/48] Added tests for StructArrays. --- tests/sdfg/data/struct_array_test.py | 184 +++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 tests/sdfg/data/struct_array_test.py diff --git a/tests/sdfg/data/struct_array_test.py b/tests/sdfg/data/struct_array_test.py new file mode 100644 index 0000000000..9b40379e53 --- /dev/null +++ b/tests/sdfg/data/struct_array_test.py @@ -0,0 +1,184 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +import ctypes +import dace +import numpy as np + +from scipy import sparse + + +def test_read_struct_array(): + + L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], + name='CSRMatrix') + csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], + name='CSRMatrix', + transient=True) + + sdfg = dace.SDFG('array_of_csr_to_dense') + + sdfg.add_datadesc('A', csr_obj[L]) + sdfg.add_array('B', [L, M, N], dace.float32) + + sdfg.add_datadesc('vcsr', csr_obj_view) + sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) + + state = sdfg.add_state() + + A = state.add_access('A') + B = state.add_access('B') + + bme, bmx = state.add_map('b', dict(b='0:L')) + bme.map.schedule = dace.ScheduleType.Sequential + + vcsr = state.add_access('vcsr') + indptr = state.add_access('vindptr') + indices = state.add_access('vindices') + data = state.add_access('vdata') + + state.add_memlet_path(A, bme, vcsr, dst_conn='views', memlet=dace.Memlet(data='A', subset='b')) + state.add_edge(vcsr, None, indptr, 'views', memlet=dace.Memlet.from_array('vcsr.indptr', csr_obj.members['indptr'])) + state.add_edge(vcsr, None, indices, 'views', memlet=dace.Memlet.from_array('vcsr.indices', csr_obj.members['indices'])) + state.add_edge(vcsr, None, data, 'views', memlet=dace.Memlet.from_array('vcsr.data', csr_obj.members['data'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, bmx, B, memlet=dace.Memlet(data='B', subset='b, 0:M, 0:N', volume=1), src_conn='__out') + + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = np.ndarray((10,), dtype=sparse.csr_matrix) + dace_A = np.ndarray((10,), dtype=ctypes.c_void_p) + B = np.zeros((10, 20, 20), dtype=np.float32) + + ctypes_A = [] + for b in range(10): + A[b] = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + ctypes_obj = csr_obj.dtype._typeclass.as_ctypes()(indptr=A[b].indptr.__array_interface__['data'][0], + indices=A[b].indices.__array_interface__['data'][0], + data=A[b].data.__array_interface__['data'][0]) + ctypes_A.append(ctypes_obj) # This is needed to keep the object alive ... + dace_A[b] = ctypes.addressof(ctypes_obj) + + func(A=dace_A, B=B, L=A.shape[0], M=A[0].shape[0], N=A[0].shape[1], nnz=A[0].nnz) + ref = np.ndarray((10, 20, 20), dtype=np.float32) + for b in range(10): + ref[b] = A[b].toarray() + + assert np.allclose(B, ref) + + +def test_write_struct_array(): + + L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) + csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], + name='CSRMatrix') + csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + order=['indptr', 'indices', 'data'], + name='CSRMatrix', + transient=True) + + sdfg = dace.SDFG('array_dense_to_csr') + + sdfg.add_array('A', [L, M, N], dace.float32) + sdfg.add_datadesc('B', csr_obj[L]) + + sdfg.add_datadesc('vcsr', csr_obj_view) + sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) + sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) + sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) + + # Make If + if_before = sdfg.add_state('if_before') + if_guard = sdfg.add_state('if_guard') + if_body = sdfg.add_state('if_body') + if_after = sdfg.add_state('if_after') + sdfg.add_edge(if_before, if_guard, dace.InterstateEdge()) + sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[k, i, j] != 0')) + sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'})) + sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[k, i, j] == 0')) + A = if_body.add_access('A') + vcsr = if_body.add_access('vcsr') + B = if_body.add_access('B') + indices = if_body.add_access('vindices') + data = if_body.add_access('vdata') + if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='k, i, j', other_subset='idx')) + if_body.add_edge(data, 'views', vcsr, None, dace.Memlet(data='vcsr.data', subset='0:nnz')) + t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j') + if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx')) + if_body.add_edge(indices, 'views', vcsr, None, dace.Memlet(data='vcsr.indices', subset='0:nnz')) + if_body.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k')) + # Make For Loop for j + j_before, j_guard, j_after = sdfg.add_loop(None, + if_before, + None, + 'j', + '0', + 'j < N', + 'j + 1', + loop_end_state=if_after) + # Make For Loop for i + i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after) + sdfg.start_state = sdfg.node_id(i_before) + i_before_guard = sdfg.edges_between(i_before, i_guard)[0] + i_before_guard.data.assignments['idx'] = '0' + vcsr = i_guard.add_access('vcsr') + B = i_guard.add_access('B') + indptr = i_guard.add_access('vindptr') + t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx') + i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i')) + i_guard.add_edge(indptr, 'views', vcsr, None, dace.Memlet(data='vcsr.indptr', subset='0:M+1')) + i_guard.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k')) + vcsr = i_after.add_access('vcsr') + B = i_after.add_access('B') + indptr = i_after.add_access('vindptr') + t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz') + i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M')) + i_after.add_edge(indptr, 'views', vcsr, None, dace.Memlet(data='vcsr.indptr', subset='0:M+1')) + i_after.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k')) + + k_before, k_guard, k_after = sdfg.add_loop(None, i_before, None, 'k', '0', 'k < L', 'k + 1', loop_end_state=i_after) + + func = sdfg.compile() + + rng = np.random.default_rng(42) + B = np.ndarray((10,), dtype=sparse.csr_matrix) + dace_B = np.ndarray((10,), dtype=ctypes.c_void_p) + A = np.empty((10, 20, 20), dtype=np.float32) + + ctypes_B = [] + for b in range(10): + B[b] = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + A[b] = B[b].toarray() + nnz = B[b].nnz + B[b].indptr[:] = -1 + B[b].indices[:] = -1 + B[b].data[:] = -1 + ctypes_obj = csr_obj.dtype._typeclass.as_ctypes()(indptr=B[b].indptr.__array_interface__['data'][0], + indices=B[b].indices.__array_interface__['data'][0], + data=B[b].data.__array_interface__['data'][0]) + ctypes_B.append(ctypes_obj) # This is needed to keep the object alive ... + dace_B[b] = ctypes.addressof(ctypes_obj) + + func(A=A, B=dace_B, L=B.shape[0], M=B[0].shape[0], N=B[0].shape[1], nnz=nnz) + for b in range(10): + assert np.allclose(A[b], B[b].toarray()) + + +if __name__ == '__main__': + test_read_struct_array() + test_write_struct_array() From f1b0c73dffee4468119cd1575edecc9f1fa7bdab Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 17 Aug 2023 15:15:24 +0200 Subject: [PATCH 42/48] Fixed serialization. --- dace/data.py | 22 +++++++++++++++++++++- dace/properties.py | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/dace/data.py b/dace/data.py index bf771db1d4..37d532ac44 100644 --- a/dace/data.py +++ b/dace/data.py @@ -1102,9 +1102,29 @@ def __init__(self, pool=False): self.stype = stype - dtype = stype.dtype + if stype: + dtype = stype.dtype + else: + dtype = dtypes.int8 super(StructArray, self).__init__(dtype, shape, transient, allow_conflicts, storage, location, strides, offset, may_alias, lifetime, alignment, debuginfo, total_size, start_offset, optional, pool) + + @classmethod + def from_json(cls, json_obj, context=None): + # Create dummy object + ret = cls(None, ()) + serialize.set_properties_from_json(ret, json_obj, context=context) + + # Default shape-related properties + if not ret.offset: + ret.offset = [0] * len(ret.shape) + if not ret.strides: + # Default strides are C-ordered + ret.strides = [_prod(ret.shape[i + 1:]) for i in range(len(ret.shape))] + if ret.total_size == 0: + ret.total_size = _prod(ret.shape) + + return ret @make_properties diff --git a/dace/properties.py b/dace/properties.py index fb37ec7a7c..0bec65d0ec 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -1408,7 +1408,7 @@ def to_string(obj): def to_json(self, obj): if obj is None: return None - return obj.dtype.to_json() + return obj.to_json() @staticmethod def from_json(obj, context=None): From 82c2bb82315fdb94a2033b84295ed888859c5b62 Mon Sep 17 00:00:00 2001 From: Tiziano De Matteis <5871117+TizianoDeMatteis@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:44:27 +0200 Subject: [PATCH 43/48] Have memory type as argument for fpga auto interleave (#1352) Co-authored-by: Tiziano De Matteis --- dace/transformation/auto/fpga.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dace/transformation/auto/fpga.py b/dace/transformation/auto/fpga.py index 4295699cdb..573341e1f6 100644 --- a/dace/transformation/auto/fpga.py +++ b/dace/transformation/auto/fpga.py @@ -44,24 +44,28 @@ def fpga_global_to_local(sdfg: SDFG, max_size: int = 1048576) -> None: print(f'Applied {len(converted)} Global-To-Local{": " if len(converted)>0 else "."} {", ".join(converted)}') -def fpga_rr_interleave_containers_to_banks(sdfg: SDFG, num_banks: int = 4): +def fpga_rr_interleave_containers_to_banks(sdfg: SDFG, num_banks: int = 4, memory_type: str = "DDR"): """ Allocates the (global) arrays to FPGA off-chip memory banks, interleaving them in a Round-Robin (RR) fashion. This applies to all the arrays in the SDFG hierarchy. :param sdfg: The SDFG to operate on. :param num_banks: number of off-chip memory banks to consider + :param memory_type: type of off-chip memory, either "DDR" or "HBM" (if the target FPGA supports it) :return: a list containing the number of (transient) arrays allocated to each bank :note: Operates in-place on the SDFG. """ + if memory_type.upper() not in {"DDR", "HBM"}: + raise ValueError("Memory type should be either \"DDR\" or \"HBM\"") + # keep track of memory allocated to each bank num_allocated = [0 for i in range(num_banks)] i = 0 for sd, aname, desc in sdfg.arrays_recursive(): if not isinstance(desc, dt.Stream) and desc.storage == dtypes.StorageType.FPGA_Global and desc.transient: - desc.location["memorytype"] = "ddr" + desc.location["memorytype"] = memory_type.upper() desc.location["bank"] = str(i % num_banks) num_allocated[i % num_banks] = num_allocated[i % num_banks] + 1 i = i + 1 From c5889a4e3092a89a5466f6b8c2fe29d3ea3ad1a1 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Mon, 21 Aug 2023 17:20:43 +0200 Subject: [PATCH 44/48] Addressed comments. --- dace/codegen/targets/cpp.py | 2 ++ dace/codegen/targets/cpu.py | 15 +++++++++------ dace/data.py | 6 +++--- dace/dtypes.py | 2 +- dace/properties.py | 8 +++++--- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py index 093a324d9a..d3d4f50ccd 100644 --- a/dace/codegen/targets/cpp.py +++ b/dace/codegen/targets/cpp.py @@ -370,6 +370,8 @@ def make_const(expr: str) -> str: # Register defined variable dispatcher.defined_vars.add(pointer_name, defined_type, typedef, allow_shadowing=True) + # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and structures. + # NOTE: Since structures are implemented as pointers, we replace dots with arrows. expr = expr.replace('.', '->') return (typedef + ref, pointer_name, expr) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 20615a3136..0464672390 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -55,10 +55,13 @@ def __init__(self, frame_codegen, sdfg): # Keep track of generated NestedSDG, and the name of the assigned function self._generated_nested_sdfg = dict() + # NOTE: Multi-nesting with StructArrays must be further investigated. def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''): for k, v in struct.members.items(): if isinstance(v, data.Structure): _visit_structure(v, args, f'{prefix}.{k}') + elif isinstance(v, data.StructArray): + _visit_structure(v.stype, args, f'{prefix}.{k}') elif isinstance(v, data.Data): args[f'{prefix}.{k}'] = v @@ -71,11 +74,7 @@ def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''): elif isinstance(arg_type, data.StructArray): desc = sdfg.arrays[name] desc = desc.stype - for attr in dir(desc): - value = getattr(desc, attr) - if isinstance(value, data.Data): - assert attr in sdfg.arrays - arglist[attr] = value + _visit_structure(desc, arglist, name) for name, arg_type in arglist.items(): if isinstance(arg_type, (data.Scalar, data.Structure)): @@ -300,6 +299,8 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d name = node.data alloc_name = cpp.ptr(name, nodedesc, sdfg, self._frame) name = alloc_name + # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and + # NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows. alloc_name = alloc_name.replace('.', '->') if nodedesc.transient is False: @@ -324,7 +325,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d arrsize_bytes = arrsize * nodedesc.dtype.bytes if isinstance(nodedesc, data.Structure) and not isinstance(nodedesc, data.StructureView): - declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n") + declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type};\n") define_var(name, DefinedType.Pointer, nodedesc.ctype) for k, v in nodedesc.members.items(): if isinstance(v, data.Data): @@ -1183,6 +1184,8 @@ def memlet_definition(self, if not types: types = self._dispatcher.defined_vars.get(ptr, is_global=True) var_type, ctypedef = types + # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and + # NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows. ptr = ptr.replace('.', '->') if fpga.is_fpga_array(desc): diff --git a/dace/data.py b/dace/data.py index 37d532ac44..5f05cbfcc8 100644 --- a/dace/data.py +++ b/dace/data.py @@ -374,7 +374,7 @@ class Structure(Data): desc="Dictionary of structure members", from_json=_arrays_from_json, to_json=_arrays_to_json) - name = Property(dtype=str, desc="Structure name") + name = Property(dtype=str, desc="Structure type name") def __init__(self, members: Dict[str, Data], @@ -478,7 +478,7 @@ def as_arg(self, with_types=True, for_call=False, name=None): def __getitem__(self, s): """ This is syntactic sugar that allows us to define an array type with the following syntax: ``Structure[N,M]`` - :return: A ``data.Array`` data descriptor. + :return: A ``data.StructArray`` data descriptor. """ if isinstance(s, list) or isinstance(s, tuple): return StructArray(self, tuple(s)) @@ -1084,7 +1084,7 @@ class StructArray(Array): stype = NestedDataClassProperty(allow_none=True, default=None) def __init__(self, - stype, + stype: Structure, shape, transient=False, allow_conflicts=False, diff --git a/dace/dtypes.py b/dace/dtypes.py index 888f74f6b9..f0bac23958 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -835,9 +835,9 @@ def as_ctypes(self): fields.append((k, v.as_ctypes())) else: fields.append((k, _FFI_CTYPES[v.type])) - # fields = sorted(fields, key=lambda f: f[0]) # Create new struct class. struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields}) + # NOTE: Each call to `type` returns a different class, so we need to cache it to ensure uniqueness. _FFI_CTYPES[self] = struct_class return struct_class diff --git a/dace/properties.py b/dace/properties.py index 0bec65d0ec..0adcfe3e97 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -1392,12 +1392,14 @@ def __get__(self, obj, objtype=None) -> 'Data': @property def dtype(self): - return pydoc.locate("dace.data.Data") + from dace import data as dt + return dt.Data @staticmethod def from_string(s): - dtype = pydoc.locate("dace.data.{}".format(s)) - if dtype is None or not isinstance(dtype, pydoc.locate("dace.data.Data")): + from dace import data as dt + dtype = getattr(dt, s, None) + if dtype is None or not isinstance(dtype, dt.Data): raise ValueError("Not a valid data type: {}".format(s)) return dtype From eabbd1d6cd451556813ffea93cfa771767ef8561 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 22 Aug 2023 15:52:45 +0200 Subject: [PATCH 45/48] Addressed comments. --- dace/data.py | 27 +++++++++++---------------- dace/properties.py | 4 ++++ tests/sdfg/data/struct_array_test.py | 23 +++++++++++------------ tests/sdfg/data/structure_test.py | 8 -------- 4 files changed, 26 insertions(+), 36 deletions(-) diff --git a/dace/data.py b/dace/data.py index 5f05cbfcc8..3b571e6537 100644 --- a/dace/data.py +++ b/dace/data.py @@ -5,7 +5,7 @@ from collections import OrderedDict from numbers import Number -from typing import Any, Dict, List, Optional, Sequence, Set, Tuple +from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import numpy import sympy as sp @@ -19,7 +19,8 @@ from dace import serialize, symbolic from dace.codegen import cppunparse from dace.properties import (DebugInfoProperty, DictProperty, EnumProperty, ListProperty, NestedDataClassProperty, - Property, ShapeProperty, SymbolicProperty, TypeClassProperty, make_properties) + OrderedDictProperty, Property, ShapeProperty, SymbolicProperty, TypeClassProperty, + make_properties) def create_datadescriptor(obj, no_custom_desc=False): @@ -370,15 +371,14 @@ def _arrays_from_json(obj, context=None): class Structure(Data): """ Base class for structures. """ - members = Property(dtype=OrderedDict, - desc="Dictionary of structure members", - from_json=_arrays_from_json, - to_json=_arrays_to_json) + members = OrderedDictProperty(default=OrderedDict(), + desc="Dictionary of structure members", + from_json=_arrays_from_json, + to_json=_arrays_to_json) name = Property(dtype=str, desc="Structure type name") def __init__(self, - members: Dict[str, Data], - order: List[str] = None, + members: Union[Dict[str, Data], List[Tuple[str, Data]]], name: str = 'Structure', transient: bool = False, storage: dtypes.StorageType = dtypes.StorageType.Default, @@ -386,19 +386,14 @@ def __init__(self, lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, debuginfo: dtypes.DebugInfo = None): - order = order or list(members.keys()) - if set(members.keys()) != set(order): - raise ValueError('Order must contain all members of the structure.') - - # TODO: Should we make a deep-copy here? - self.members = OrderedDict((k, members[k]) for k in order) - + self.members = OrderedDict(members) for k, v in self.members.items(): v.transient = transient + self.name = name fields_and_types = OrderedDict() symbols = set() - for k, v in members.items(): + for k, v in self.members.items(): if isinstance(v, Structure): symbols |= v.free_symbols fields_and_types[k] = (v.dtype, str(v.total_size)) diff --git a/dace/properties.py b/dace/properties.py index 0adcfe3e97..61e569341f 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -145,11 +145,15 @@ def fs(obj, *args, **kwargs): self._from_json = lambda *args, **kwargs: dace.serialize.from_json(*args, known_type=dtype, **kwargs) else: self._from_json = from_json + if self.from_json != from_json: + self.from_json = from_json if to_json is None: self._to_json = dace.serialize.to_json else: self._to_json = to_json + if self.to_json != to_json: + self.to_json = to_json if meta_to_json is None: diff --git a/tests/sdfg/data/struct_array_test.py b/tests/sdfg/data/struct_array_test.py index 9b40379e53..8e0f2f4739 100644 --- a/tests/sdfg/data/struct_array_test.py +++ b/tests/sdfg/data/struct_array_test.py @@ -10,12 +10,11 @@ def test_read_struct_array(): L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') - csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], - name='CSRMatrix', - transient=True) + csr_obj_view = dace.data.StructureView( + [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])], + name='CSRMatrix', + transient=True) sdfg = dace.SDFG('array_of_csr_to_dense') @@ -84,13 +83,13 @@ def test_read_struct_array(): def test_write_struct_array(): L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) - csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], - name='CSRMatrix') - csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], - name='CSRMatrix', - transient=True) + csr_obj = dace.data.Structure( + [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])], + name='CSRMatrix') + csr_obj_view = dace.data.StructureView( + dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), + name='CSRMatrix', + transient=True) sdfg = dace.SDFG('array_dense_to_csr') diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py index 995aacb2fd..02b8f0c174 100644 --- a/tests/sdfg/data/structure_test.py +++ b/tests/sdfg/data/structure_test.py @@ -12,7 +12,6 @@ def test_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense') @@ -69,7 +68,6 @@ def test_write_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('dense_to_csr') @@ -147,10 +145,8 @@ def test_local_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix', transient=True) @@ -258,7 +254,6 @@ def test_local_structure(): def test_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -320,7 +315,6 @@ def test_write_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') @@ -402,7 +396,6 @@ def test_direct_read_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') sdfg = dace.SDFG('csr_to_dense_direct') @@ -453,7 +446,6 @@ def test_direct_read_structure(): def test_direct_read_nested_structure(): M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - order=['indptr', 'indices', 'data'], name='CSRMatrix') wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper') From c5ca99ad37e7ceef6da71026c3c8bb579f64117f Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Tue, 29 Aug 2023 23:05:10 -0700 Subject: [PATCH 46/48] Eliminate extraneous branch-end gotos in code generation (#1355) --- dace/codegen/control_flow.py | 77 +++++++++++++++----- dace/codegen/targets/framecode.py | 2 +- tests/codegen/control_flow_detection_test.py | 29 ++++++++ 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/dace/codegen/control_flow.py b/dace/codegen/control_flow.py index 182604c892..1b97241e47 100644 --- a/dace/codegen/control_flow.py +++ b/dace/codegen/control_flow.py @@ -82,6 +82,9 @@ class ControlFlow: # a string with its generated code. dispatch_state: Callable[[SDFGState], str] + # The parent control flow block of this one, used to avoid generating extraneous ``goto``s + parent: Optional['ControlFlow'] + @property def first_state(self) -> SDFGState: """ @@ -222,11 +225,18 @@ def as_cpp(self, codegen, symbols) -> str: out_edges = sdfg.out_edges(elem.state) for j, e in enumerate(out_edges): if e not in self.gotos_to_ignore: - # If this is the last generated edge and it leads - # to the next state, skip emitting goto + # Skip gotos to immediate successors successor = None - if (j == (len(out_edges) - 1) and (i + 1) < len(self.elements)): - successor = self.elements[i + 1].first_state + # If this is the last generated edge + if j == (len(out_edges) - 1): + if (i + 1) < len(self.elements): + # If last edge leads to next state in block + successor = self.elements[i + 1].first_state + elif i == len(self.elements) - 1: + # If last edge leads to first state in next block + next_block = _find_next_block(self) + if next_block is not None: + successor = next_block.first_state expr += elem.generate_transition(sdfg, e, successor) else: @@ -478,13 +488,14 @@ def children(self) -> List[ControlFlow]: def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[InterstateEdge], leave_edge: Edge[InterstateEdge], back_edges: List[Edge[InterstateEdge]], - dispatch_state: Callable[[SDFGState], str]) -> Union[ForScope, WhileScope]: + dispatch_state: Callable[[SDFGState], + str], parent_block: GeneralBlock) -> Union[ForScope, WhileScope]: """ Helper method that constructs the correct structured loop construct from a set of states. Can construct for or while loops. """ - body = GeneralBlock(dispatch_state, [], [], [], [], [], True) + body = GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True) guard_inedges = sdfg.in_edges(guard) increment_edges = [e for e in guard_inedges if e in back_edges] @@ -535,10 +546,10 @@ def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[Intersta # Also ignore assignments in increment edge (handled in for stmt) body.assignments_to_ignore.append(increment_edge) - return ForScope(dispatch_state, itvar, guard, init, condition, update, body, init_edges) + return ForScope(dispatch_state, parent_block, itvar, guard, init, condition, update, body, init_edges) # Otherwise, it is a while loop - return WhileScope(dispatch_state, guard, condition, body) + return WhileScope(dispatch_state, parent_block, guard, condition, body) def _cases_from_branches( @@ -617,6 +628,31 @@ def _child_of(node: SDFGState, parent: SDFGState, ptree: Dict[SDFGState, SDFGSta return False +def _find_next_block(block: ControlFlow) -> Optional[ControlFlow]: + """ + Returns the immediate successor control flow block. + """ + # Find block in parent + parent = block.parent + if parent is None: + return None + ind = next(i for i, b in enumerate(parent.children) if b is block) + if ind == len(parent.children) - 1 or isinstance(parent, (IfScope, IfElseChain, SwitchCaseScope)): + # If last block, or other children are not reachable from current node (branches), + # recursively continue upwards + return _find_next_block(parent) + return parent.children[ind + 1] + + +def _reset_block_parents(block: ControlFlow): + """ + Fixes block parents after processing. + """ + for child in block.children: + child.parent = block + _reset_block_parents(child) + + def _structured_control_flow_traversal(sdfg: SDFG, start: SDFGState, ptree: Dict[SDFGState, SDFGState], @@ -645,7 +681,7 @@ def _structured_control_flow_traversal(sdfg: SDFG, """ def make_empty_block(): - return GeneralBlock(dispatch_state, [], [], [], [], [], True) + return GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True) # Traverse states in custom order visited = set() if visited is None else visited @@ -657,7 +693,7 @@ def make_empty_block(): if node in visited or node is stop: continue visited.add(node) - stateblock = SingleState(dispatch_state, node) + stateblock = SingleState(dispatch_state, parent_block, node) oe = sdfg.out_edges(node) if len(oe) == 0: # End state @@ -708,12 +744,14 @@ def make_empty_block(): if (len(oe) == 2 and oe[0].data.condition_sympy() == sp.Not(oe[1].data.condition_sympy())): # If without else if oe[0].dst is mergestate: - branch_block = IfScope(dispatch_state, sdfg, node, oe[1].data.condition, cblocks[oe[1]]) + branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[1].data.condition, + cblocks[oe[1]]) elif oe[1].dst is mergestate: - branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]]) + branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition, + cblocks[oe[0]]) else: - branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]], - cblocks[oe[1]]) + branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition, + cblocks[oe[0]], cblocks[oe[1]]) else: # If there are 2 or more edges (one is not the negation of the # other): @@ -721,10 +759,10 @@ def make_empty_block(): if switch: # If all edges are of form "x == y" for a single x and # integer y, it is a switch/case - branch_block = SwitchCaseScope(dispatch_state, sdfg, node, switch[0], switch[1]) + branch_block = SwitchCaseScope(dispatch_state, parent_block, sdfg, node, switch[0], switch[1]) else: # Otherwise, create if/else if/.../else goto exit chain - branch_block = IfElseChain(dispatch_state, sdfg, node, + branch_block = IfElseChain(dispatch_state, parent_block, sdfg, node, [(e.data.condition, cblocks[e] if e in cblocks else make_empty_block()) for e in oe]) # End of branch classification @@ -739,11 +777,11 @@ def make_empty_block(): loop_exit = None scope = None if ptree[oe[0].dst] == node and ptree[oe[1].dst] != node: - scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state) + scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state, parent_block) body_start = oe[0].dst loop_exit = oe[1].dst elif ptree[oe[1].dst] == node and ptree[oe[0].dst] != node: - scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state) + scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state, parent_block) body_start = oe[1].dst loop_exit = oe[0].dst @@ -836,7 +874,8 @@ def structured_control_flow_tree(sdfg: SDFG, dispatch_state: Callable[[SDFGState if len(common_frontier) == 1: branch_merges[state] = next(iter(common_frontier)) - root_block = GeneralBlock(dispatch_state, [], [], [], [], [], True) + root_block = GeneralBlock(dispatch_state, None, [], [], [], [], [], True) _structured_control_flow_traversal(sdfg, sdfg.start_state, ptree, branch_merges, back_edges, dispatch_state, root_block) + _reset_block_parents(root_block) return root_block diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index 9ee5c2ef17..dfdbbb392b 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -471,7 +471,7 @@ def dispatch_state(state: SDFGState) -> str: # If disabled, generate entire graph as general control flow block states_topological = list(sdfg.topological_sort(sdfg.start_state)) last = states_topological[-1] - cft = cflow.GeneralBlock(dispatch_state, + cft = cflow.GeneralBlock(dispatch_state, None, [cflow.SingleState(dispatch_state, s, s is last) for s in states_topological], [], [], [], [], False) diff --git a/tests/codegen/control_flow_detection_test.py b/tests/codegen/control_flow_detection_test.py index 99d6a39b29..982140f7ed 100644 --- a/tests/codegen/control_flow_detection_test.py +++ b/tests/codegen/control_flow_detection_test.py @@ -120,6 +120,33 @@ def test_single_outedge_branch(): assert np.allclose(res, 2) +def test_extraneous_goto(): + + @dace.program + def tester(a: dace.float64[20]): + if a[0] < 0: + a[1] = 1 + a[2] = 1 + + sdfg = tester.to_sdfg(simplify=True) + assert 'goto' not in sdfg.generate_code()[0].code + + +def test_extraneous_goto_nested(): + + @dace.program + def tester(a: dace.float64[20]): + if a[0] < 0: + if a[0] < 1: + a[1] = 1 + else: + a[1] = 2 + a[2] = 1 + + sdfg = tester.to_sdfg(simplify=True) + assert 'goto' not in sdfg.generate_code()[0].code + + if __name__ == '__main__': test_for_loop_detection() test_invalid_for_loop_detection() @@ -128,3 +155,5 @@ def test_single_outedge_branch(): test_edge_sympy_function('TrueFalse') test_edge_sympy_function('SwitchCase') test_single_outedge_branch() + test_extraneous_goto() + test_extraneous_goto_nested() From c34de8e3336343b0f11bddd0b61099ab1f22eb47 Mon Sep 17 00:00:00 2001 From: Lukas Truemper Date: Sat, 2 Sep 2023 15:34:08 +0200 Subject: [PATCH 47/48] TaskletFusion: Fix additional edges in case of none-connectors --- .../transformation/dataflow/tasklet_fusion.py | 3 ++ tests/transformations/tasklet_fusion_test.py | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/dace/transformation/dataflow/tasklet_fusion.py b/dace/transformation/dataflow/tasklet_fusion.py index 99f8f625be..d6b4a3039b 100644 --- a/dace/transformation/dataflow/tasklet_fusion.py +++ b/dace/transformation/dataflow/tasklet_fusion.py @@ -249,6 +249,9 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG): t1.language) for in_edge in graph.in_edges(t1): + if in_edge.src_conn is None and isinstance(in_edge.src, dace.nodes.EntryNode): + if len(new_tasklet.in_connectors) > 0: + continue graph.add_edge(in_edge.src, in_edge.src_conn, new_tasklet, in_edge.dst_conn, in_edge.data) for in_edge in graph.in_edges(t2): diff --git a/tests/transformations/tasklet_fusion_test.py b/tests/transformations/tasklet_fusion_test.py index c7fd6802d5..743010e8c9 100644 --- a/tests/transformations/tasklet_fusion_test.py +++ b/tests/transformations/tasklet_fusion_test.py @@ -213,6 +213,49 @@ def test_map_with_tasklets(language: str, with_data: bool): ref = map_with_tasklets.f(A, B) assert (np.allclose(C, ref)) +def test_none_connector(): + @dace.program + def sdfg_none_connector(A: dace.float32[32], B: dace.float32[32]): + tmp = dace.define_local([32], dace.float32) + for i in dace.map[0:32]: + with dace.tasklet: + a >> tmp[i] + a = 0 + + tmp2 = dace.define_local([32], dace.float32) + for i in dace.map[0:32]: + with dace.tasklet: + a << A[i] + b >> tmp2[i] + b = a + 1 + + + for i in dace.map[0:32]: + with dace.tasklet: + a << tmp[i] + b << tmp2[i] + c >> B[i] + c = a + b + + sdfg = sdfg_none_connector.to_sdfg() + sdfg.simplify() + applied = sdfg.apply_transformations_repeated(MapFusion) + assert applied == 2 + + map_entry = None + for node in sdfg.start_state.nodes(): + if isinstance(node, dace.nodes.MapEntry): + map_entry = node + break + + assert map_entry is not None + assert len([edge.src_conn for edge in sdfg.start_state.out_edges(map_entry) if edge.src_conn is None]) == 1 + + applied = sdfg.apply_transformations_repeated(TaskletFusion) + assert applied == 2 + + assert sdfg.start_state.out_degree(map_entry) == 1 + assert len([edge.src_conn for edge in sdfg.start_state.out_edges(map_entry) if edge.src_conn is None]) == 0 if __name__ == '__main__': test_basic() @@ -224,3 +267,4 @@ def test_map_with_tasklets(language: str, with_data: bool): test_map_with_tasklets(language='Python', with_data=True) test_map_with_tasklets(language='CPP', with_data=False) test_map_with_tasklets(language='CPP', with_data=True) + test_none_connector() From f95f8162a4e77d7a386ccd20c9e4ef71a3ad9787 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Mon, 4 Sep 2023 23:58:33 -0700 Subject: [PATCH 48/48] Fix dynamic memlet propagation condition (#1364) --- dace/sdfg/propagation.py | 4 ++-- tests/python_frontend/argument_test.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py index 0fec4812b7..0554775dcd 100644 --- a/dace/sdfg/propagation.py +++ b/dace/sdfg/propagation.py @@ -1477,8 +1477,8 @@ def propagate_subset(memlets: List[Memlet], new_memlet.volume = simplify(sum(m.volume for m in memlets) * functools.reduce(lambda a, b: a * b, rng.size(), 1)) if any(m.dynamic for m in memlets): new_memlet.dynamic = True - elif symbolic.issymbolic(new_memlet.volume) and any(s not in defined_variables - for s in new_memlet.volume.free_symbols): + if symbolic.issymbolic(new_memlet.volume) and any(s not in defined_variables + for s in new_memlet.volume.free_symbols): new_memlet.dynamic = True new_memlet.volume = 0 diff --git a/tests/python_frontend/argument_test.py b/tests/python_frontend/argument_test.py index 1f43337eb8..cb47188029 100644 --- a/tests/python_frontend/argument_test.py +++ b/tests/python_frontend/argument_test.py @@ -2,6 +2,7 @@ import dace import pytest +import numpy as np N = dace.symbol('N') @@ -16,5 +17,29 @@ def test_extra_args(): imgcpy([[1, 2], [3, 4]], [[4, 3], [2, 1]], 0.0, 1.0) +def test_missing_arguments_regression(): + + def nester(a, b, T): + for i, j in dace.map[0:20, 0:20]: + start = 0 + end = min(T, 6) + + elem: dace.float64 = 0 + for ii in range(start, end): + if ii % 2 == 0: + elem += b[ii] + + a[j, i] = elem + + @dace.program + def tester(x: dace.float64[20, 20]): + gdx = np.ones((10, ), dace.float64) + for T in range(2): + nester(x, gdx, T) + + tester.to_sdfg().compile() + + if __name__ == '__main__': test_extra_args() + test_missing_arguments_regression()