From 57abd284500c1990ad2744160eb92aab5d08756d Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 18 Jul 2023 14:41:47 +0200
Subject: [PATCH 01/48] Added NestedDataClassProperty for nested data.

---
 dace/properties.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/dace/properties.py b/dace/properties.py
index 6e883f8549..30a3e0913b 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1381,6 +1381,45 @@ def from_json(obj, context=None):
             raise TypeError("Cannot parse type from: {}".format(obj))
 
 
+class NestedDataClassProperty(Property):
+    """ Custom property type for nested data. """
+
+    def __get__(self, obj, objtype=None) -> 'Data':
+        return super().__get__(obj, objtype)
+
+    @property
+    def dtype(self):
+        return pydoc.locate("dace.data.Data")
+
+    @staticmethod
+    def from_string(s):
+        dtype = pydoc.locate("dace.data.{}".format(s))
+        if dtype is None or not isinstance(dtype, pydoc.locate("dace.data.Data")):
+            raise ValueError("Not a valid data type: {}".format(s))
+        return dtype
+
+    @staticmethod
+    def to_string(obj):
+        return obj.to_string()
+
+    def to_json(self, obj):
+        if obj is None:
+            return None
+        return obj.dtype.to_json()
+
+    @staticmethod
+    def from_json(obj, context=None):
+        if obj is None:
+            return None
+        elif isinstance(obj, str):
+            return NestedDataClassProperty.from_string(obj)
+        elif isinstance(obj, dict):
+            # Let the deserializer handle this
+            return dace.serialize.from_json(obj)
+        else:
+            raise TypeError("Cannot parse type from: {}".format(obj))
+
+
 class LibraryImplementationProperty(Property):
     """
     Property for choosing an implementation type for a library node. On the

From 09465d242fbf33036ebf35e1c9b43357c60648ca Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 18 Jul 2023 14:42:33 +0200
Subject: [PATCH 02/48] Added Structures and StructArrays.

---
 dace/data.py | 121 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 6 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index 2fc5f334c6..886fed75de 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -1,10 +1,10 @@
-# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import copy as cp
 import ctypes
 import functools
-import re
+
 from numbers import Number
-from typing import Any, Dict, Optional, Sequence, Set, Tuple
+from typing import Any, Dict, Optional, Sequence, Set, Tuple, Union
 
 import numpy
 import sympy as sp
@@ -17,9 +17,8 @@
 import dace.dtypes as dtypes
 from dace import serialize, symbolic
 from dace.codegen import cppunparse
-from dace.properties import (CodeProperty, DebugInfoProperty, DictProperty, EnumProperty, ListProperty, Property,
-                             ReferenceProperty, ShapeProperty, SubsetProperty, SymbolicProperty, TypeClassProperty,
-                             make_properties)
+from dace.properties import (DebugInfoProperty, DictProperty, EnumProperty, ListProperty, NestedDataClassProperty,
+                             Property, ShapeProperty, SymbolicProperty, TypeClassProperty, make_properties)
 
 
 def create_datadescriptor(obj, no_custom_desc=False):
@@ -342,6 +341,86 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global):
         return new_desc
 
 
+class Structure(Data):
+    """ Base class for structures. """
+
+    def __init__(self,
+                 shape: Sequence[Union[int, symbolic.SymbolicType]] = None,
+                 transient: bool = False,
+                 storage: dtypes.StorageType = dtypes.StorageType.Default,
+                 location: Dict[str, str] = None,
+                 lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
+                 debuginfo: dtypes.DebugInfo = None):
+        fields = {
+            attr: getattr(self, attr)
+            for attr in dir(self) if (
+                not attr in dir(Data) and
+                not attr.startswith("_") and
+                not attr in ('total_size', 'offset', 'start_offset', 'strides'))}
+        fields_and_types = dict()
+        symbols = set()
+        for attr in dir(self):
+            if (attr in dir(Data) or attr.startswith("__") or
+                    attr in ('total_size', 'offset', 'start_offset', 'strides')):
+                continue
+            value = getattr(self, attr)
+            if isinstance(value, Array):
+                symbols |= value.free_symbols
+                fields_and_types[attr] = (dtypes.pointer(value.dtype), str(_prod(value.shape)))
+            elif isinstance(value, Scalar):
+                symbols |= value.free_symbols
+                fields_and_types[attr] = value.dtype
+            elif isinstance(value, (sp.Basic, symbolic.SymExpr)):
+                symbols |= value.free_symbols
+                fields_and_types[attr] = symbolic.symtype(value)
+            elif isinstance(value, (int, numpy.integer)):
+                fields_and_types[attr] = dtypes.typeclass(type(value))
+            else:
+                raise TypeError(f"Attribute {attr}'s value {value} has unsupported type: {type(value)}")
+        for s in symbols:
+            if str(s) in fields_and_types:
+                continue
+            if hasattr(s, "dtype"):
+                fields_and_types[str(s)] = s.dtype
+            else:
+                fields_and_types[str(s)] = dtypes.int32
+        dtype = dtypes.struct(self.__class__.__name__, **fields_and_types)
+        shape = shape or (1,)
+        super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)
+
+    @property
+    def total_size(self):
+        return -1
+
+    @property
+    def offset(self):
+        return [0]
+
+    @property
+    def start_offset(self):
+        return 0
+
+    @property
+    def strides(self):
+        return [1]
+
+    def as_arg(self, with_types=True, for_call=False, name=None):
+        if self.storage is dtypes.StorageType.GPU_Global:
+            return Array(self.dtype, [1]).as_arg(with_types, for_call, name)
+        if not with_types or for_call:
+            return name
+        return self.dtype.as_arg(name)
+
+    def __getitem__(self, s):
+        """ This is syntactic sugar that allows us to define an array type
+            with the following syntax: ``Structure[N,M]``
+            :return: A ``data.Array`` data descriptor.
+        """
+        if isinstance(s, list) or isinstance(s, tuple):
+            return StructArray(self, tuple(s))
+        return StructArray(self, (s, ))
+
+
 @make_properties
 class Scalar(Data):
     """ Data descriptor of a scalar value. """
@@ -902,6 +981,36 @@ def free_symbols(self):
         return result
 
 
+@make_properties
+class StructArray(Array):
+    """ Array of Structures. """
+
+    stype = NestedDataClassProperty(allow_none=True, default=None)
+
+    def __init__(self,
+                 stype,
+                 shape,
+                 transient=False,
+                 allow_conflicts=False,
+                 storage=dtypes.StorageType.Default,
+                 location=None,
+                 strides=None,
+                 offset=None,
+                 may_alias=False,
+                 lifetime=dtypes.AllocationLifetime.Scope,
+                 alignment=0,
+                 debuginfo=None,
+                 total_size=-1,
+                 start_offset=None,
+                 optional=None,
+                 pool=False):
+
+        self.stype = stype
+        dtype = stype.dtype
+        super(StructArray, self).__init__(dtype, shape, transient, allow_conflicts, storage, location, strides, offset,
+                                          may_alias, lifetime, alignment, debuginfo, total_size, start_offset, optional, pool)
+
+
 @make_properties
 class View(Array):
     """ 

From 51776a1b746126194fc1eebcece20adbe88be302 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 18 Jul 2023 15:09:00 +0200
Subject: [PATCH 03/48] Break array lengths down to their symbolic tokents.

---
 dace/dtypes.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/dace/dtypes.py b/dace/dtypes.py
index dee2283f25..230197bc6f 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -791,6 +791,7 @@ def from_json(json_obj, context=None):
         return ret
 
     def _parse_field_and_types(self, **fields_and_types):
+        from dace.symbolic import pystr_to_symbolic
         self._data = dict()
         self._length = dict()
         self.bytes = 0
@@ -799,8 +800,12 @@ def _parse_field_and_types(self, **fields_and_types):
                 t, l = v
                 if not isinstance(t, pointer):
                     raise TypeError("Only pointer types may have a length.")
-                if l not in fields_and_types.keys():
-                    raise ValueError("Length {} not a field of struct {}".format(l, self.name))
+                sym_tokens = pystr_to_symbolic(l).free_symbols
+                for sym in sym_tokens:
+                    if str(sym) not in fields_and_types.keys():
+                        raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}")
+                # if l not in fields_and_types.keys():
+                #     raise ValueError("Length {} not a field of struct {}".format(l, self.name))
                 self._data[k] = t
                 self._length[k] = l
                 self.bytes += t.bytes

From b23ed86de823398321ef6f620e3db0d3fd7f857b Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 18 Jul 2023 15:11:09 +0200
Subject: [PATCH 04/48] Allow structures to have fields whose name doesn't
 start with underscore.

---
 dace/properties.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dace/properties.py b/dace/properties.py
index 30a3e0913b..679c0b9596 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import ast
 from collections import OrderedDict
 import copy
@@ -412,12 +412,12 @@ def initialize_properties(obj, *args, **kwargs):
             except AttributeError:
                 if not prop.unmapped:
                     raise PropertyError("Property {} is unassigned in __init__ for {}".format(name, cls.__name__))
-        # Assert that there are no fields in the object not captured by
-        # properties, unless they are prefixed with "_"
-        for name, prop in obj.__dict__.items():
-            if (name not in properties and not name.startswith("_") and name not in dir(type(obj))):
-                raise PropertyError("{} : Variable {} is neither a Property nor "
-                                    "an internal variable (prefixed with \"_\")".format(str(type(obj)), name))
+        # Assert that there are no fields in the object not captured by properties, unless they are prefixed with "_"
+        if not isinstance(obj, dace.data.Structure):
+            for name, prop in obj.__dict__.items():
+                if (name not in properties and not name.startswith("_") and name not in dir(type(obj))):
+                    raise PropertyError("{} : Variable {} is neither a Property nor "
+                                        "an internal variable (prefixed with \"_\")".format(str(type(obj)), name))
 
     # Replace the __init__ method
     cls.__init__ = initialize_properties

From 777821f0a940bc2f981ef5c04749c0f49968e0d1 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:21:54 +0200
Subject: [PATCH 05/48] Structures now have a "members" dictionary. Their dtype
 is a pointer to the corresponding dtypes.struct typeclass.

---
 dace/data.py | 64 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index 886fed75de..0f1ef1f266 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -341,42 +341,54 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global):
         return new_desc
 
 
+def _arrays_to_json(arrays):
+    if arrays is None:
+        return None
+    return {k: serialize.to_json(v) for k, v in arrays.items()}
+
+
+def _arrays_from_json(obj, context=None):
+    if obj is None:
+        return {}
+    return {k: serialize.from_json(v, context) for k, v in obj.items()}
+
+
+@make_properties
 class Structure(Data):
     """ Base class for structures. """
 
+    members = Property(dtype=dict,
+                       desc="Dictionary of structure members",
+                       from_json=_arrays_from_json,
+                       to_json=_arrays_to_json)
+
     def __init__(self,
-                 shape: Sequence[Union[int, symbolic.SymbolicType]] = None,
+                 members: Dict[str, Any],
                  transient: bool = False,
                  storage: dtypes.StorageType = dtypes.StorageType.Default,
                  location: Dict[str, str] = None,
                  lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
                  debuginfo: dtypes.DebugInfo = None):
-        fields = {
-            attr: getattr(self, attr)
-            for attr in dir(self) if (
-                not attr in dir(Data) and
-                not attr.startswith("_") and
-                not attr in ('total_size', 'offset', 'start_offset', 'strides'))}
+        self.members = members or {}
         fields_and_types = dict()
         symbols = set()
-        for attr in dir(self):
-            if (attr in dir(Data) or attr.startswith("__") or
-                    attr in ('total_size', 'offset', 'start_offset', 'strides')):
-                continue
-            value = getattr(self, attr)
-            if isinstance(value, Array):
-                symbols |= value.free_symbols
-                fields_and_types[attr] = (dtypes.pointer(value.dtype), str(_prod(value.shape)))
-            elif isinstance(value, Scalar):
-                symbols |= value.free_symbols
-                fields_and_types[attr] = value.dtype
-            elif isinstance(value, (sp.Basic, symbolic.SymExpr)):
-                symbols |= value.free_symbols
-                fields_and_types[attr] = symbolic.symtype(value)
-            elif isinstance(value, (int, numpy.integer)):
-                fields_and_types[attr] = dtypes.typeclass(type(value))
+        for k, v in members.items():
+            if isinstance(v, Structure):
+                symbols |= v.free_symbols
+                fields_and_types[k] = (v.dtype, str(v.total_size))
+            elif isinstance(v, Array):
+                symbols |= v.free_symbols
+                fields_and_types[k] = (dtypes.pointer(v.dtype), str(_prod(v.shape)))
+            elif isinstance(v, Scalar):
+                symbols |= v.free_symbols
+                fields_and_types[k] = v.dtype
+            elif isinstance(v, (sp.Basic, symbolic.SymExpr)):
+                symbols |= v.free_symbols
+                fields_and_types[k] = symbolic.symtype(v)
+            elif isinstance(v, (int, numpy.integer)):
+                fields_and_types[k] = dtypes.typeclass(type(v))
             else:
-                raise TypeError(f"Attribute {attr}'s value {value} has unsupported type: {type(value)}")
+                raise TypeError(f"Attribute {k}'s value {v} has unsupported type: {type(v)}")
         for s in symbols:
             if str(s) in fields_and_types:
                 continue
@@ -384,8 +396,8 @@ def __init__(self,
                 fields_and_types[str(s)] = s.dtype
             else:
                 fields_and_types[str(s)] = dtypes.int32
-        dtype = dtypes.struct(self.__class__.__name__, **fields_and_types)
-        shape = shape or (1,)
+        dtype = dtypes.pointer(dtypes.struct(self.__class__.__name__, **fields_and_types))
+        shape = (1,)
         super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)
 
     @property

From ebf72068e4b27ed777fb835bc75c835980d502d6 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:24:37 +0200
Subject: [PATCH 06/48] dtype.structs store their ctype in `_FFI_CTYPES`.

---
 dace/dtypes.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/dace/dtypes.py b/dace/dtypes.py
index 230197bc6f..d01209469f 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """ A module that contains various DaCe type definitions. """
 from __future__ import print_function
 import ctypes
@@ -654,6 +654,8 @@ def from_json(json_obj, context=None):
 
     def as_ctypes(self):
         """ Returns the ctypes version of the typeclass. """
+        if isinstance(self._typeclass, struct):
+            return ctypes.POINTER(self._typeclass.as_ctypes())
         return ctypes.POINTER(_FFI_CTYPES[self.type])
 
     def as_numpy_dtype(self):
@@ -804,8 +806,6 @@ def _parse_field_and_types(self, **fields_and_types):
                 for sym in sym_tokens:
                     if str(sym) not in fields_and_types.keys():
                         raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}")
-                # if l not in fields_and_types.keys():
-                #     raise ValueError("Length {} not a field of struct {}".format(l, self.name))
                 self._data[k] = t
                 self._length[k] = l
                 self.bytes += t.bytes
@@ -817,16 +817,24 @@ def _parse_field_and_types(self, **fields_and_types):
 
     def as_ctypes(self):
         """ Returns the ctypes version of the typeclass. """
+        if self in _FFI_CTYPES:
+            return _FFI_CTYPES[self]
         # Populate the ctype fields for the struct class.
         fields = []
         for k, v in self._data.items():
             if isinstance(v, pointer):
-                fields.append((k, ctypes.c_void_p))  # ctypes.POINTER(_FFI_CTYPES[v.type])))
+                if isinstance(v._typeclass, struct):
+                    fields.append((k, ctypes.POINTER(v._typeclass.as_ctypes())))
+                else:
+                    fields.append((k, ctypes.c_void_p))
+            elif isinstance(v, struct):
+                fields.append((k, v.as_ctypes()))
             else:
                 fields.append((k, _FFI_CTYPES[v.type]))
         fields = sorted(fields, key=lambda f: f[0])
         # Create new struct class.
         struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields})
+        _FFI_CTYPES[self] = struct_class
         return struct_class
 
     def as_numpy_dtype(self):

From c52a48257ffbb7933aec3b04fd7029cdafce77a8 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:26:03 +0200
Subject: [PATCH 07/48] Reverted underscore exception for Structures.

---
 dace/properties.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/dace/properties.py b/dace/properties.py
index 679c0b9596..2225b6d853 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -413,11 +413,10 @@ def initialize_properties(obj, *args, **kwargs):
                 if not prop.unmapped:
                     raise PropertyError("Property {} is unassigned in __init__ for {}".format(name, cls.__name__))
         # Assert that there are no fields in the object not captured by properties, unless they are prefixed with "_"
-        if not isinstance(obj, dace.data.Structure):
-            for name, prop in obj.__dict__.items():
-                if (name not in properties and not name.startswith("_") and name not in dir(type(obj))):
-                    raise PropertyError("{} : Variable {} is neither a Property nor "
-                                        "an internal variable (prefixed with \"_\")".format(str(type(obj)), name))
+        for name, prop in obj.__dict__.items():
+            if (name not in properties and not name.startswith("_") and name not in dir(type(obj))):
+                raise PropertyError("{} : Variable {} is neither a Property nor "
+                                    "an internal variable (prefixed with \"_\")".format(str(type(obj)), name))
 
     # Replace the __init__ method
     cls.__init__ = initialize_properties

From 40cc858f992d71a49730d934268c31d380d8e82b Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:26:40 +0200
Subject: [PATCH 08/48] Small fixes.

---
 dace/codegen/compiled_sdfg.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py
index d0d29cfa1e..863e804802 100644
--- a/dace/codegen/compiled_sdfg.py
+++ b/dace/codegen/compiled_sdfg.py
@@ -452,9 +452,10 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
                 # GPU scalars are pointers, so this is fine
                 if atype.storage != dtypes.StorageType.GPU_Global:
                     raise TypeError('Passing an array to a scalar (type %s) in argument "%s"' % (atype.dtype.ctype, a))
-            elif not isinstance(atype, dt.Array) and not isinstance(atype.dtype, dtypes.callback) and not isinstance(
-                    arg,
-                (atype.dtype.type, sp.Basic)) and not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype):
+            elif (not isinstance(atype, (dt.Array, dt.Structure)) and
+                  not isinstance(atype.dtype, dtypes.callback) and
+                  not isinstance(arg, (atype.dtype.type, sp.Basic)) and
+                  not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype)):
                 if isinstance(arg, int) and atype.dtype.type == np.int64:
                     pass
                 elif isinstance(arg, float) and atype.dtype.type == np.float64:
@@ -521,7 +522,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
         # Construct init args, which only consist of the symbols
         symbols = self._free_symbols
         initargs = tuple(
-            actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg
+            actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg
             for arg, actype, atype, aname in callparams if aname in symbols)
 
         # Replace arrays with their base host/device pointers
@@ -531,7 +532,8 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
 
         try:
             newargs = tuple(
-                actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg for arg, actype, atype in newargs)
+                actype(arg) if not isinstance(arg, (ctypes._SimpleCData)) else arg
+                for arg, actype, atype in newargs)
         except TypeError:
             # Pinpoint bad argument
             for i, (arg, actype, _) in enumerate(newargs):

From dd73aaa8816864958fc4fd547e16d5372519f167 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:27:17 +0200
Subject: [PATCH 09/48] WIP: Replace ',' with '->' to quickly support nested
 data.

---
 dace/codegen/targets/cpp.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index afbc6fca12..7d54e985f5 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -370,6 +370,8 @@ def make_const(expr: str) -> str:
     # Register defined variable
     dispatcher.defined_vars.add(pointer_name, defined_type, typedef, allow_shadowing=True)
 
+    expr = expr.replace('.', '->')
+
     return (typedef + ref, pointer_name, expr)
 
 

From 623a7f88838f0a3bc033333bef28e4de03544d37 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:28:08 +0200
Subject: [PATCH 10/48] Recursively add to arglist nested data descriptors.

---
 dace/codegen/targets/cpu.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index eb7d232966..2759c9744c 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -55,10 +55,30 @@ def __init__(self, frame_codegen, sdfg):
         # Keep track of generated NestedSDG, and the name of the assigned function
         self._generated_nested_sdfg = dict()
 
-        # Keeps track of generated connectors, so we know how to access them in
-        # nested scopes
+        def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''):
+            for k, v in struct.members.items():
+                if isinstance(v, data.Structure):
+                    _visit_structure(v, args, f'{prefix}.{k}')
+                elif isinstance(v, data.Data):
+                    args[f'{prefix}.{k}'] = v
+
+        # Keeps track of generated connectors, so we know how to access them in nested scopes
+        arglist = dict(self._frame.arglist)
         for name, arg_type in self._frame.arglist.items():
-            if isinstance(arg_type, data.Scalar):
+            if isinstance(arg_type, data.Structure):
+                desc = sdfg.arrays[name]
+                _visit_structure(arg_type, arglist, name)
+            elif isinstance(arg_type, data.StructArray):
+                desc = sdfg.arrays[name]
+                desc = desc.stype
+                for attr in dir(desc):
+                    value = getattr(desc, attr)
+                    if isinstance(value, data.Data):
+                        assert attr in sdfg.arrays
+                        arglist[attr] = value
+
+        for name, arg_type in arglist.items():
+            if isinstance(arg_type, (data.Scalar, data.Structure)):
                 # GPU global memory is only accessed via pointers
                 # TODO(later): Fix workaround somehow
                 if arg_type.storage is dtypes.StorageType.GPU_Global:

From 1e5baddcbda6e0d78bd9526af7e1a0b78627a4e3 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:28:50 +0200
Subject: [PATCH 11/48] Recursively look into nested data to emit definitions.

---
 dace/codegen/targets/framecode.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py
index 6f302c11ba..be6b85602a 100644
--- a/dace/codegen/targets/framecode.py
+++ b/dace/codegen/targets/framecode.py
@@ -150,15 +150,23 @@ def generate_fileheader(self, sdfg: SDFG, global_stream: CodeIOStream, backend:
         for _, arrname, arr in sdfg.arrays_recursive():
             if arr is not None:
                 datatypes.add(arr.dtype)
+        
+        def _emit_definitions(dtype: dtypes.typeclass, wrote_something: bool) -> bool:
+            if isinstance(dtype, dtypes.pointer):
+                wrote_something = _emit_definitions(dtype._typeclass, wrote_something)
+            elif isinstance(dtype, dtypes.struct):
+                for field in dtype.fields.values():
+                    wrote_something = _emit_definitions(field, wrote_something)
+            if hasattr(dtype, 'emit_definition'):
+                if not wrote_something:
+                    global_stream.write("", sdfg)
+                global_stream.write(dtype.emit_definition(), sdfg)
+            return wrote_something
 
         # Emit unique definitions
         wrote_something = False
         for typ in datatypes:
-            if hasattr(typ, 'emit_definition'):
-                if not wrote_something:
-                    global_stream.write("", sdfg)
-                wrote_something = True
-                global_stream.write(typ.emit_definition(), sdfg)
+            wrote_something = _emit_definitions(typ, wrote_something)
         if wrote_something:
             global_stream.write("", sdfg)
 

From 36d4e826ac769f1cb99ecc3c8fe8206c0690cdab Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:30:21 +0200
Subject: [PATCH 12/48] SDFG data (_arrays) are now stored in a NestedDict.

---
 dace/sdfg/sdfg.py | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 18763e385a..6e4c3587f4 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -48,6 +48,35 @@
     from dace.codegen.compiled_sdfg import CompiledSDFG
 
 
+class NestedDict(dict):
+
+    def __init__(self):
+        super(NestedDict, self).__init__()
+
+    def __getitem__(self, key):
+        tokens = key.split('.')
+        token = tokens.pop(0)
+        result = super(NestedDict, self).__getitem__(token)
+        while tokens:
+            token = tokens.pop(0)
+            result = result.members[token]
+        return result
+    
+    def __contains__(self, key):
+        tokens = key.split('.')
+        token = tokens.pop(0)
+        result = super(NestedDict, self).__contains__(token)
+        desc = None
+        while tokens and result:
+            if desc is None:
+                desc = super(NestedDict, self).__getitem__(token)
+            else:
+                desc = desc.members[token]
+            token = tokens.pop(0)
+            result = token in desc.members
+        return result
+
+
 def _arrays_to_json(arrays):
     if arrays is None:
         return None
@@ -375,7 +404,7 @@ class SDFG(OrderedDiGraph[SDFGState, InterstateEdge]):
     name = Property(dtype=str, desc="Name of the SDFG")
     arg_names = ListProperty(element_type=str, desc='Ordered argument names (used for calling conventions).')
     constants_prop = Property(dtype=dict, default={}, desc="Compile-time constants")
-    _arrays = Property(dtype=dict,
+    _arrays = Property(dtype=NestedDict,
                        desc="Data descriptors for this SDFG",
                        to_json=_arrays_to_json,
                        from_json=_arrays_from_json)
@@ -456,7 +485,7 @@ def __init__(self,
         self._sdfg_list = [self]
         self._start_state: Optional[int] = None
         self._cached_start_state: Optional[SDFGState] = None
-        self._arrays = {}  # type: Dict[str, dt.Array]
+        self._arrays = NestedDict()  # type: Dict[str, dt.Array]
         self._labels: Set[str] = set()
         self.global_code = {'frame': CodeBlock("", dtypes.Language.CPP)}
         self.init_code = {'frame': CodeBlock("", dtypes.Language.CPP)}

From 38a4265a29c64f6100e03f536aecdd09fd160dca Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:31:11 +0200
Subject: [PATCH 13/48] Adjusted the matching check for memlet data and src/dst
 nodes to not fail for Structures.

---
 dace/sdfg/validation.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py
index 3bac646479..c963df9d7e 100644
--- a/dace/sdfg/validation.py
+++ b/dace/sdfg/validation.py
@@ -587,9 +587,14 @@ def validate_state(state: 'dace.sdfg.SDFGState',
                         break
 
         # Check if memlet data matches src or dst nodes
-        if (e.data.data is not None and (isinstance(src_node, nd.AccessNode) or isinstance(dst_node, nd.AccessNode))
-                and (not isinstance(src_node, nd.AccessNode) or e.data.data != src_node.data)
-                and (not isinstance(dst_node, nd.AccessNode) or e.data.data != dst_node.data)):
+        name = e.data.data
+        if isinstance(src_node, nd.AccessNode) and isinstance(sdfg.arrays[src_node.data], dt.Structure):
+            name = None
+        if isinstance(dst_node, nd.AccessNode) and isinstance(sdfg.arrays[dst_node.data], dt.Structure):
+            name = None
+        if (name is not None and (isinstance(src_node, nd.AccessNode) or isinstance(dst_node, nd.AccessNode))
+                and (not isinstance(src_node, nd.AccessNode) or (name != src_node.data and name != e.src_conn))
+                and (not isinstance(dst_node, nd.AccessNode) or (name != dst_node.data and name != e.dst_conn))):
             raise InvalidSDFGEdgeError(
                 "Memlet data does not match source or destination "
                 "data nodes)",

From 479cb2ad240dd167a7b26d2665527e04727cffe6 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 19:32:51 +0200
Subject: [PATCH 14/48] Added tests.

---
 tests/sdfg/data/structure_test.py | 240 ++++++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)
 create mode 100644 tests/sdfg/data/structure_test.py

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
new file mode 100644
index 0000000000..3783a98068
--- /dev/null
+++ b/tests/sdfg/data/structure_test.py
@@ -0,0 +1,240 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import numpy as np
+
+from scipy import sparse
+
+
+def create_structure(name: str, **members) -> dace.data.Structure:
+
+    StructureClass = type(name, (dace.data.Structure, ), {})
+    return StructureClass(members)
+
+
+def test_read_structure():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix',
+                           indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz)
+
+    sdfg = dace.SDFG('csr_to_dense')
+
+    sdfg.add_datadesc('A', CSR)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype)
+    sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype)
+    sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    A = state.add_access('A')
+    B = state.add_access('B')
+
+    indptr = state.add_access('vindptr')
+    indices = state.add_access('vindices')
+    data = state.add_access('vdata')
+
+    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', CSR.members['indptr']))
+    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', CSR.members['indices']))
+    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', CSR.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    inpA = CSR.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
+                                            indices=A.indices.__array_interface__['data'][0],
+                                            data=A.data.__array_interface__['data'][0],
+                                            rows=A.shape[0],
+                                            cols=A.shape[1],
+                                            M=A.shape[0],
+                                            N=A.shape[1],
+                                            nnz=A.nnz)
+
+    func(A=inpA, B=B, M=20, N=20, nnz=A.nnz)
+    ref = A.toarray()
+
+    assert np.allclose(B, ref)
+
+
+def test_write_structure():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix',
+                           indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz)
+
+    sdfg = dace.SDFG('dense_to_csr')
+
+    sdfg.add_array('A', [M, N], dace.float32)
+    sdfg.add_datadesc('B', CSR)
+
+    sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype)
+    sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype)
+    sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype)
+
+    # Make If
+    if_before = sdfg.add_state('if_before')
+    if_guard = sdfg.add_state('if_guard')
+    if_body = sdfg.add_state('if_body')
+    if_after = sdfg.add_state('if_after')
+    sdfg.add_edge(if_before, if_guard, dace.InterstateEdge())
+    sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0'))
+    sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'}))
+    sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0'))
+    A = if_body.add_access('A')
+    B = if_body.add_access('B')
+    indices = if_body.add_access('vindices')
+    data = if_body.add_access('vdata')
+    if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
+    if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz'))
+    t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
+    if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx'))
+    if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz'))
+    # Make For Loop  for j
+    j_before, j_guard, j_after = sdfg.add_loop(None,
+                                               if_before,
+                                               None,
+                                               'j',
+                                               '0',
+                                               'j < N',
+                                               'j + 1',
+                                               loop_end_state=if_after)
+    # Make For Loop  for i
+    i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after)
+    sdfg.start_state = sdfg.node_id(i_before)
+    i_before_guard = sdfg.edges_between(i_before, i_guard)[0]
+    i_before_guard.data.assignments['idx'] = '0'
+    B = i_guard.add_access('B')
+    indptr = i_guard.add_access('vindptr')
+    t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
+    i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i'))
+    i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
+    B = i_after.add_access('B')
+    indptr = i_after.add_access('vindptr')
+    t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
+    i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
+    i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    A = tmp.toarray()
+    B = tmp.tocsr(copy=True)
+    B.indptr[:] = -1
+    B.indices[:] = -1
+    B.data[:] = -1
+
+    outB = CSR.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
+                                            indices=B.indices.__array_interface__['data'][0],
+                                            data=B.data.__array_interface__['data'][0],
+                                            rows=tmp.shape[0],
+                                            cols=tmp.shape[1],
+                                            M=tmp.shape[0],
+                                            N=tmp.shape[1],
+                                            nnz=tmp.nnz)
+
+    func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
+
+    assert np.allclose(A, B.toarray())
+
+
+def test_read_nested_structure():
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix',
+                           indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz)
+    Wrapper = create_structure('WrapperClass', csr=CSR)
+
+    sdfg = dace.SDFG('nested_csr_to_dense')
+
+    sdfg.add_datadesc('A', Wrapper)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    spmat = Wrapper.members['csr']
+    sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
+    sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
+    sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    A = state.add_access('A')
+    B = state.add_access('B')
+
+    indptr = state.add_access('vindptr')
+    indices = state.add_access('vindices')
+    data = state.add_access('vdata')
+
+    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
+    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
+    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    structclass = CSR.dtype._typeclass.as_ctypes()
+    inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
+                         indices=A.indices.__array_interface__['data'][0],
+                         data=A.data.__array_interface__['data'][0],
+                         rows=A.shape[0],
+                         cols=A.shape[1],
+                         M=A.shape[0],
+                         K=A.shape[1],
+                         nnz=A.nnz)
+    import ctypes
+    inpW = Wrapper.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
+
+    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
+    ref = A.toarray()
+
+    assert np.allclose(B, ref)
+
+
+if __name__ == "__main__":
+    test_read_structure()
+    test_write_structure()
+    test_read_nested_structure()

From 8365ab34926a01d65a67d93d1b1bbaf2e67eac11 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 20:25:26 +0200
Subject: [PATCH 15/48] Serialization fixes.

---
 dace/sdfg/sdfg.py                 | 13 ++++++++++---
 tests/sdfg/data/structure_test.py | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 6e4c3587f4..b5598870ec 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -50,8 +50,9 @@
 
 class NestedDict(dict):
 
-    def __init__(self):
-        super(NestedDict, self).__init__()
+    def __init__(self, mapping=None):
+        mapping = mapping or {}
+        super(NestedDict, self).__init__(mapping)
 
     def __getitem__(self, key):
         tokens = key.split('.')
@@ -89,6 +90,12 @@ def _arrays_from_json(obj, context=None):
     return {k: dace.serialize.from_json(v, context) for k, v in obj.items()}
 
 
+def _nested_arrays_from_json(obj, context=None):
+    if obj is None:
+        return NestedDict({})
+    return NestedDict({k: dace.serialize.from_json(v, context) for k, v in obj.items()})
+
+
 def _replace_dict_keys(d, old, new):
     if old in d:
         if new in d:
@@ -407,7 +414,7 @@ class SDFG(OrderedDiGraph[SDFGState, InterstateEdge]):
     _arrays = Property(dtype=NestedDict,
                        desc="Data descriptors for this SDFG",
                        to_json=_arrays_to_json,
-                       from_json=_arrays_from_json)
+                       from_json=_nested_arrays_from_json)
     symbols = DictProperty(str, dtypes.typeclass, desc="Global symbols for this SDFG")
 
     instrument = EnumProperty(dtype=dtypes.InstrumentationType,
diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 3783a98068..5348ecaa5a 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -2,12 +2,29 @@
 import dace
 import numpy as np
 
+from dace import serialize
+from dace.properties import make_properties
 from scipy import sparse
 
 
 def create_structure(name: str, **members) -> dace.data.Structure:
 
     StructureClass = type(name, (dace.data.Structure, ), {})
+
+    @staticmethod
+    def from_json(json_obj, context=None):
+        if json_obj['type'] != name:
+            raise TypeError("Invalid data type")
+
+        # Create dummy object
+        ret = StructureClass({})
+        serialize.set_properties_from_json(ret, json_obj, context=context)
+
+        return ret
+    
+    setattr(StructureClass, 'from_json', from_json)
+    StructureClass = make_properties(StructureClass)
+
     return StructureClass(members)
 
 

From 14ba6655c883f2f0761ca4ccacfb722d82b7eac3 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 19 Jul 2023 20:29:36 +0200
Subject: [PATCH 16/48] Fixed NestedDict for non-str keys.

---
 dace/sdfg/sdfg.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index b5598870ec..a4c29c2e89 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -55,16 +55,17 @@ def __init__(self, mapping=None):
         super(NestedDict, self).__init__(mapping)
 
     def __getitem__(self, key):
-        tokens = key.split('.')
+        tokens = key.split('.') if isinstance(key, str) else [key]
         token = tokens.pop(0)
         result = super(NestedDict, self).__getitem__(token)
         while tokens:
             token = tokens.pop(0)
             result = result.members[token]
         return result
+
     
     def __contains__(self, key):
-        tokens = key.split('.')
+        tokens = key.split('.') if isinstance(key, str) else [key]
         token = tokens.pop(0)
         result = super(NestedDict, self).__contains__(token)
         desc = None

From 80d6f10af1efe172560d64b976c451a91670b2fb Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 14:56:21 +0200
Subject: [PATCH 17/48] Added support for transient Structures.

---
 dace/codegen/targets/cpu.py | 16 ++++++++++++++--
 dace/data.py                | 28 ++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 2759c9744c..7ff91cbc7b 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -286,16 +286,17 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
         name = node.data
         alloc_name = cpp.ptr(name, nodedesc, sdfg, self._frame)
         name = alloc_name
+        alloc_name = alloc_name.replace('.', '->')
 
         if nodedesc.transient is False:
             return
 
         # Check if array is already allocated
-        if self._dispatcher.defined_vars.has(alloc_name):
+        if self._dispatcher.defined_vars.has(name):
             return
 
         # Check if array is already declared
-        declared = self._dispatcher.declared_arrays.has(alloc_name)
+        declared = self._dispatcher.declared_arrays.has(name)
 
         define_var = self._dispatcher.defined_vars.add
         if nodedesc.lifetime in (dtypes.AllocationLifetime.Persistent, dtypes.AllocationLifetime.External):
@@ -308,6 +309,17 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
         if not isinstance(nodedesc.dtype, dtypes.opaque):
             arrsize_bytes = arrsize * nodedesc.dtype.bytes
 
+        if isinstance(nodedesc, data.Structure):
+            declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n")
+            define_var(name, DefinedType.Pointer, nodedesc.ctype)
+            for k, v in nodedesc.members.items():
+                if isinstance(v, data.Data):
+                    ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype
+                    defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer
+                    self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef)
+                    self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream,
+                                        declaration_stream, allocation_stream)
+            return
         if isinstance(nodedesc, data.View):
             return self.allocate_view(sdfg, dfg, state_id, node, function_stream, declaration_stream, allocation_stream)
         if isinstance(nodedesc, data.Reference):
diff --git a/dace/data.py b/dace/data.py
index 0f1ef1f266..838fc43542 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -369,7 +369,10 @@ def __init__(self,
                  location: Dict[str, str] = None,
                  lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
                  debuginfo: dtypes.DebugInfo = None):
+        # TODO: Should we make a deep-copy here?
         self.members = members or {}
+        for k, v in self.members.items():
+            v.transient = transient
         fields_and_types = dict()
         symbols = set()
         for k, v in members.items():
@@ -433,6 +436,31 @@ def __getitem__(self, s):
         return StructArray(self, (s, ))
 
 
+@make_properties
+class StructureView(Structure):
+    """ 
+    Data descriptor that acts as a reference (or view) of another structure.
+    """
+
+    @staticmethod
+    def from_json(json_obj, context=None):
+        if json_obj['type'] != 'StructureView':
+            raise TypeError("Invalid data type")
+
+        # Create dummy object
+        ret = StructureView({})
+        serialize.set_properties_from_json(ret, json_obj, context=context)
+
+        return ret
+
+    def validate(self):
+        super().validate()
+
+        # We ensure that allocation lifetime is always set to Scope, since the
+        # view is generated upon "allocation"
+        if self.lifetime != dtypes.AllocationLifetime.Scope:
+            raise ValueError('Only Scope allocation lifetime is supported for Views')
+
 @make_properties
 class Scalar(Data):
     """ Data descriptor of a scalar value. """

From 9658c2236b7ba154bccbbd3b839944f4f88c2668 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 14:56:40 +0200
Subject: [PATCH 18/48] Edited tests.

---
 tests/sdfg/data/structure_test.py | 346 +++++++++++++++++++++++++++---
 1 file changed, 321 insertions(+), 25 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 5348ecaa5a..462c6a8e7b 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -7,7 +7,7 @@
 from scipy import sparse
 
 
-def create_structure(name: str, **members) -> dace.data.Structure:
+def create_structure(name: str) -> dace.data.Structure:
 
     StructureClass = type(name, (dace.data.Structure, ), {})
 
@@ -25,28 +25,28 @@ def from_json(json_obj, context=None):
     setattr(StructureClass, 'from_json', from_json)
     StructureClass = make_properties(StructureClass)
 
-    return StructureClass(members)
+    return StructureClass
 
 
 def test_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix',
-                           indptr=dace.int32[M + 1],
+    CSR = create_structure('CSRMatrix')
+    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
                            indices=dace.int32[nnz],
                            data=dace.float32[nnz],
                            rows=M,
                            cols=N,
-                           nnz=nnz)
+                           nnz=nnz))
 
     sdfg = dace.SDFG('csr_to_dense')
 
-    sdfg.add_datadesc('A', CSR)
+    sdfg.add_datadesc('A', csr_obj)
     sdfg.add_array('B', [M, N], dace.float32)
 
-    sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype)
-    sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype)
-    sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype)
+    sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
 
     state = sdfg.add_state()
 
@@ -57,9 +57,9 @@ def test_read_structure():
     indices = state.add_access('vindices')
     data = state.add_access('vdata')
 
-    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', CSR.members['indptr']))
-    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', CSR.members['indices']))
-    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', CSR.members['data']))
+    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr']))
+    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices']))
+    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data']))
 
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
@@ -79,7 +79,7 @@ def test_read_structure():
     A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
     B = np.zeros((20, 20), dtype=np.float32)
 
-    inpA = CSR.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
+    inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
                                             indices=A.indices.__array_interface__['data'][0],
                                             data=A.data.__array_interface__['data'][0],
                                             rows=A.shape[0],
@@ -97,22 +97,22 @@ def test_read_structure():
 def test_write_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix',
-                           indptr=dace.int32[M + 1],
+    CSR = create_structure('CSRMatrix')
+    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
                            indices=dace.int32[nnz],
                            data=dace.float32[nnz],
                            rows=M,
                            cols=N,
-                           nnz=nnz)
+                           nnz=nnz))
 
     sdfg = dace.SDFG('dense_to_csr')
 
     sdfg.add_array('A', [M, N], dace.float32)
-    sdfg.add_datadesc('B', CSR)
+    sdfg.add_datadesc('B', csr_obj)
 
-    sdfg.add_view('vindptr', CSR.members['indptr'].shape, CSR.members['indptr'].dtype)
-    sdfg.add_view('vindices', CSR.members['indices'].shape, CSR.members['indices'].dtype)
-    sdfg.add_view('vdata', CSR.members['data'].shape, CSR.members['data'].dtype)
+    sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
 
     # Make If
     if_before = sdfg.add_state('if_before')
@@ -167,7 +167,7 @@ def test_write_structure():
     B.indices[:] = -1
     B.data[:] = -1
 
-    outB = CSR.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
+    outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
                                             indices=B.indices.__array_interface__['data'][0],
                                             data=B.data.__array_interface__['data'][0],
                                             rows=tmp.shape[0],
@@ -181,7 +181,204 @@ def test_write_structure():
     assert np.allclose(A, B.toarray())
 
 
+def test_local_structure():
+    
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix')
+    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz))
+    tmp_obj = CSR(dict(indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz), transient=True)
+
+    sdfg = dace.SDFG('dense_to_csr')
+
+    sdfg.add_array('A', [M, N], dace.float32)
+    sdfg.add_datadesc('B', csr_obj)
+    sdfg.add_datadesc('tmp', tmp_obj)
+
+    sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
+
+    sdfg.add_view('tmp_vindptr', tmp_obj.members['indptr'].shape, tmp_obj.members['indptr'].dtype)
+    sdfg.add_view('tmp_vindices', tmp_obj.members['indices'].shape, tmp_obj.members['indices'].dtype)
+    sdfg.add_view('tmp_vdata', tmp_obj.members['data'].shape, tmp_obj.members['data'].dtype)
+
+    # Make If
+    if_before = sdfg.add_state('if_before')
+    if_guard = sdfg.add_state('if_guard')
+    if_body = sdfg.add_state('if_body')
+    if_after = sdfg.add_state('if_after')
+    sdfg.add_edge(if_before, if_guard, dace.InterstateEdge())
+    sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0'))
+    sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'}))
+    sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0'))
+    A = if_body.add_access('A')
+    tmp = if_body.add_access('tmp')
+    indices = if_body.add_access('tmp_vindices')
+    data = if_body.add_access('tmp_vdata')
+    if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
+    if_body.add_edge(data, 'views', tmp, 'data', dace.Memlet(data='tmp.data', subset='0:nnz'))
+    t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
+    if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='tmp_vindices', subset='idx'))
+    if_body.add_edge(indices, 'views', tmp, 'indices', dace.Memlet(data='tmp.indices', subset='0:nnz'))
+    # Make For Loop  for j
+    j_before, j_guard, j_after = sdfg.add_loop(None,
+                                               if_before,
+                                               None,
+                                               'j',
+                                               '0',
+                                               'j < N',
+                                               'j + 1',
+                                               loop_end_state=if_after)
+    # Make For Loop  for i
+    i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after)
+    sdfg.start_state = sdfg.node_id(i_before)
+    i_before_guard = sdfg.edges_between(i_before, i_guard)[0]
+    i_before_guard.data.assignments['idx'] = '0'
+    tmp = i_guard.add_access('tmp')
+    indptr = i_guard.add_access('tmp_vindptr')
+    t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
+    i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='i'))
+    i_guard.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+    tmp = i_after.add_access('tmp')
+    indptr = i_after.add_access('tmp_vindptr')
+    t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
+    i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='M'))
+    i_after.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+
+    set_B = sdfg.add_state('set_B')
+    sdfg.add_edge(i_after, set_B, dace.InterstateEdge())
+    tmp = set_B.add_access('tmp')
+    tmp_indptr = set_B.add_access('tmp_vindptr')
+    tmp_indices = set_B.add_access('tmp_vindices')
+    tmp_data = set_B.add_access('tmp_vdata')
+    set_B.add_edge(tmp, 'indptr', tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+    set_B.add_edge(tmp, 'indices', tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz'))
+    set_B.add_edge(tmp, 'data', tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz'))
+    B = set_B.add_access('B')
+    B_indptr = set_B.add_access('vindptr')
+    B_indices = set_B.add_access('vindices')
+    B_data = set_B.add_access('vdata')
+    set_B.add_edge(B_indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
+    set_B.add_edge(B_indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz'))
+    set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz'))
+    set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1'))
+    set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz'))
+    t, me, mx = set_B.add_mapped_tasklet('set_data',
+                                         {'idx': '0:nnz'},
+                                         {'__inp': dace.Memlet(data='tmp_vdata', subset='idx')},
+                                         '__out = 2 * __inp',
+                                         {'__out': dace.Memlet(data='vdata', subset='idx')},
+                                         external_edges=True,
+                                         input_nodes={'tmp_vdata': tmp_data},
+                                         output_nodes={'vdata': B_data})
+
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    A = tmp.toarray()
+    B = tmp.tocsr(copy=True)
+    B.indptr[:] = -1
+    B.indices[:] = -1
+    B.data[:] = -1
+
+    outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
+                                            indices=B.indices.__array_interface__['data'][0],
+                                            data=B.data.__array_interface__['data'][0],
+                                            rows=tmp.shape[0],
+                                            cols=tmp.shape[1],
+                                            M=tmp.shape[0],
+                                            N=tmp.shape[1],
+                                            nnz=tmp.nnz)
+
+    func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
+
+    assert np.allclose(A * 2, B.toarray())
+
+
 def test_read_nested_structure():
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix')
+    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz))
+    Wrapper = create_structure('WrapperClass')
+    wrapper_obj = Wrapper(dict(csr=csr_obj))
+
+    sdfg = dace.SDFG('nested_csr_to_dense')
+
+    sdfg.add_datadesc('A', wrapper_obj)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    spmat = wrapper_obj.members['csr']
+    sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
+    sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
+    sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    A = state.add_access('A')
+    B = state.add_access('B')
+
+    indptr = state.add_access('vindptr')
+    indices = state.add_access('vindices')
+    data = state.add_access('vdata')
+
+    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
+    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
+    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    structclass = csr_obj.dtype._typeclass.as_ctypes()
+    inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
+                         indices=A.indices.__array_interface__['data'][0],
+                         data=A.data.__array_interface__['data'][0],
+                         rows=A.shape[0],
+                         cols=A.shape[1],
+                         M=A.shape[0],
+                         K=A.shape[1],
+                         nnz=A.nnz)
+    import ctypes
+    inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
+
+    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
+    ref = A.toarray()
+
+    assert np.allclose(B, ref)
+
+
+def test_read_nested_structure_2():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     CSR = create_structure('CSRMatrix',
                            indptr=dace.int32[M + 1],
@@ -190,14 +387,16 @@ def test_read_nested_structure():
                            rows=M,
                            cols=N,
                            nnz=nnz)
+    CSRView = dace.data.StructureView(CSR.members, transient=True)
     Wrapper = create_structure('WrapperClass', csr=CSR)
 
-    sdfg = dace.SDFG('nested_csr_to_dense')
+    sdfg = dace.SDFG('nested_csr_to_dense_2')
 
     sdfg.add_datadesc('A', Wrapper)
     sdfg.add_array('B', [M, N], dace.float32)
 
     spmat = Wrapper.members['csr']
+    sdfg.add_datadesc('vcsr', CSRView)
     sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
     sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
     sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
@@ -207,13 +406,15 @@ def test_read_nested_structure():
     A = state.add_access('A')
     B = state.add_access('B')
 
+    csr = state.add_access('vcsr')
     indptr = state.add_access('vindptr')
     indices = state.add_access('vindices')
     data = state.add_access('vdata')
 
-    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
-    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
-    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
+    state.add_edge(A, 'csr', csr, 'views', dace.Memlet.from_array('A.csr', spmat))
+    state.add_edge(csr, 'indptr', indptr, 'views', dace.Memlet.from_array('vcsr.indptr', spmat.members['indptr']))
+    state.add_edge(csr, 'indices', indices, 'views', dace.Memlet.from_array('vcsr.indices', spmat.members['indices']))
+    state.add_edge(csr, 'data', data, 'views', dace.Memlet.from_array('vcsr.data', spmat.members['data']))
 
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
@@ -251,7 +452,102 @@ def test_read_nested_structure():
     assert np.allclose(B, ref)
 
 
+def test_write_nested_structure():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    CSR = create_structure('CSRMatrix')
+    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
+                           indices=dace.int32[nnz],
+                           data=dace.float32[nnz],
+                           rows=M,
+                           cols=N,
+                           nnz=nnz))
+    Wrapper = create_structure('WrapperClass')
+    wrapper_obj = Wrapper(dict(csr=csr_obj))
+
+    sdfg = dace.SDFG('dense_to_csr')
+
+    sdfg.add_array('A', [M, N], dace.float32)
+    sdfg.add_datadesc('B', wrapper_obj)
+
+    spmat = wrapper_obj.members['csr']
+    sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
+    sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
+    sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
+
+    # Make If
+    if_before = sdfg.add_state('if_before')
+    if_guard = sdfg.add_state('if_guard')
+    if_body = sdfg.add_state('if_body')
+    if_after = sdfg.add_state('if_after')
+    sdfg.add_edge(if_before, if_guard, dace.InterstateEdge())
+    sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[i, j] != 0'))
+    sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'}))
+    sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[i, j] == 0'))
+    A = if_body.add_access('A')
+    B = if_body.add_access('B')
+    indices = if_body.add_access('vindices')
+    data = if_body.add_access('vdata')
+    if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
+    if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.csr.data', subset='0:nnz'))
+    t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
+    if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx'))
+    if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.csr.indices', subset='0:nnz'))
+    # Make For Loop  for j
+    j_before, j_guard, j_after = sdfg.add_loop(None,
+                                               if_before,
+                                               None,
+                                               'j',
+                                               '0',
+                                               'j < N',
+                                               'j + 1',
+                                               loop_end_state=if_after)
+    # Make For Loop  for i
+    i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after)
+    sdfg.start_state = sdfg.node_id(i_before)
+    i_before_guard = sdfg.edges_between(i_before, i_guard)[0]
+    i_before_guard.data.assignments['idx'] = '0'
+    B = i_guard.add_access('B')
+    indptr = i_guard.add_access('vindptr')
+    t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
+    i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i'))
+    i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
+    B = i_after.add_access('B')
+    indptr = i_after.add_access('vindptr')
+    t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
+    i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
+    i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    tmp = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    A = tmp.toarray()
+    B = tmp.tocsr(copy=True)
+    B.indptr[:] = -1
+    B.indices[:] = -1
+    B.data[:] = -1
+
+    outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
+                                            indices=B.indices.__array_interface__['data'][0],
+                                            data=B.data.__array_interface__['data'][0],
+                                            rows=tmp.shape[0],
+                                            cols=tmp.shape[1],
+                                            M=tmp.shape[0],
+                                            N=tmp.shape[1],
+                                            nnz=tmp.nnz)
+    import ctypes
+    outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR))
+
+    func(A=A, B=outW, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
+
+    assert np.allclose(A, B.toarray())
+
+
 if __name__ == "__main__":
     test_read_structure()
     test_write_structure()
+    test_local_structure()
     test_read_nested_structure()
+    # test_read_nested_structure_2()
+    test_write_nested_structure()

From b1dbb6b385c5186ac16b5be1ea3d394953c6bf17 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 15:32:40 +0200
Subject: [PATCH 19/48] Structures have name attribute (instead of
 subclassing).

---
 dace/data.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/dace/data.py b/dace/data.py
index 838fc43542..e424aca66a 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -361,9 +361,11 @@ class Structure(Data):
                        desc="Dictionary of structure members",
                        from_json=_arrays_from_json,
                        to_json=_arrays_to_json)
+    name = Property(dtype=str, desc="Structure name")
 
     def __init__(self,
                  members: Dict[str, Any],
+                 name: str = 'Structure',
                  transient: bool = False,
                  storage: dtypes.StorageType = dtypes.StorageType.Default,
                  location: Dict[str, str] = None,
@@ -373,6 +375,7 @@ def __init__(self,
         self.members = members or {}
         for k, v in self.members.items():
             v.transient = transient
+        self.name = name
         fields_and_types = dict()
         symbols = set()
         for k, v in members.items():
@@ -399,9 +402,20 @@ def __init__(self,
                 fields_and_types[str(s)] = s.dtype
             else:
                 fields_and_types[str(s)] = dtypes.int32
-        dtype = dtypes.pointer(dtypes.struct(self.__class__.__name__, **fields_and_types))
+        dtype = dtypes.pointer(dtypes.struct(name, **fields_and_types))
         shape = (1,)
         super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)
+    
+    @staticmethod
+    def from_json(json_obj, context=None):
+        if json_obj['type'] != 'Structure':
+            raise TypeError("Invalid data type")
+
+        # Create dummy object
+        ret = Structure({})
+        serialize.set_properties_from_json(ret, json_obj, context=context)
+
+        return ret
 
     @property
     def total_size(self):

From 5de2ae35d25b9f78eeecb0080504be34b6577cec Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 15:32:59 +0200
Subject: [PATCH 20/48] Updated tests.

---
 tests/sdfg/data/structure_test.py | 192 +++++++++++++++---------------
 1 file changed, 96 insertions(+), 96 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 462c6a8e7b..b3d72b9d7a 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -1,6 +1,7 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
 import numpy as np
+import pytest
 
 from dace import serialize
 from dace.properties import make_properties
@@ -21,7 +22,7 @@ def from_json(json_obj, context=None):
         serialize.set_properties_from_json(ret, json_obj, context=context)
 
         return ret
-    
+
     setattr(StructureClass, 'from_json', from_json)
     StructureClass = make_properties(StructureClass)
 
@@ -31,13 +32,13 @@ def from_json(json_obj, context=None):
 def test_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix')
-    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense')
 
@@ -80,13 +81,13 @@ def test_read_structure():
     B = np.zeros((20, 20), dtype=np.float32)
 
     inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
-                                            indices=A.indices.__array_interface__['data'][0],
-                                            data=A.data.__array_interface__['data'][0],
-                                            rows=A.shape[0],
-                                            cols=A.shape[1],
-                                            M=A.shape[0],
-                                            N=A.shape[1],
-                                            nnz=A.nnz)
+                                                indices=A.indices.__array_interface__['data'][0],
+                                                data=A.data.__array_interface__['data'][0],
+                                                rows=A.shape[0],
+                                                cols=A.shape[1],
+                                                M=A.shape[0],
+                                                N=A.shape[1],
+                                                nnz=A.nnz)
 
     func(A=inpA, B=B, M=20, N=20, nnz=A.nnz)
     ref = A.toarray()
@@ -97,13 +98,13 @@ def test_read_structure():
 def test_write_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix')
-    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
 
     sdfg = dace.SDFG('dense_to_csr')
 
@@ -168,13 +169,13 @@ def test_write_structure():
     B.data[:] = -1
 
     outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
-                                            indices=B.indices.__array_interface__['data'][0],
-                                            data=B.data.__array_interface__['data'][0],
-                                            rows=tmp.shape[0],
-                                            cols=tmp.shape[1],
-                                            M=tmp.shape[0],
-                                            N=tmp.shape[1],
-                                            nnz=tmp.nnz)
+                                                indices=B.indices.__array_interface__['data'][0],
+                                                data=B.data.__array_interface__['data'][0],
+                                                rows=tmp.shape[0],
+                                                cols=tmp.shape[1],
+                                                M=tmp.shape[0],
+                                                N=tmp.shape[1],
+                                                nnz=tmp.nnz)
 
     func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
 
@@ -182,23 +183,25 @@ def test_write_structure():
 
 
 def test_local_structure():
-    
-    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix')
-    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz))
-    tmp_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz), transient=True)
 
-    sdfg = dace.SDFG('dense_to_csr')
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+    tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix',
+                                  transient=True)
+
+    sdfg = dace.SDFG('dense_to_csr_local')
 
     sdfg.add_array('A', [M, N], dace.float32)
     sdfg.add_datadesc('B', csr_obj)
@@ -273,16 +276,13 @@ def test_local_structure():
     set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz'))
     set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1'))
     set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz'))
-    t, me, mx = set_B.add_mapped_tasklet('set_data',
-                                         {'idx': '0:nnz'},
+    t, me, mx = set_B.add_mapped_tasklet('set_data', {'idx': '0:nnz'},
                                          {'__inp': dace.Memlet(data='tmp_vdata', subset='idx')},
-                                         '__out = 2 * __inp',
-                                         {'__out': dace.Memlet(data='vdata', subset='idx')},
+                                         '__out = 2 * __inp', {'__out': dace.Memlet(data='vdata', subset='idx')},
                                          external_edges=True,
                                          input_nodes={'tmp_vdata': tmp_data},
                                          output_nodes={'vdata': B_data})
 
-
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -294,13 +294,13 @@ def test_local_structure():
     B.data[:] = -1
 
     outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
-                                            indices=B.indices.__array_interface__['data'][0],
-                                            data=B.data.__array_interface__['data'][0],
-                                            rows=tmp.shape[0],
-                                            cols=tmp.shape[1],
-                                            M=tmp.shape[0],
-                                            N=tmp.shape[1],
-                                            nnz=tmp.nnz)
+                                                indices=B.indices.__array_interface__['data'][0],
+                                                data=B.data.__array_interface__['data'][0],
+                                                rows=tmp.shape[0],
+                                                cols=tmp.shape[1],
+                                                M=tmp.shape[0],
+                                                N=tmp.shape[1],
+                                                nnz=tmp.nnz)
 
     func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
 
@@ -309,15 +309,14 @@ def test_local_structure():
 
 def test_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix')
-    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz))
-    Wrapper = create_structure('WrapperClass')
-    wrapper_obj = Wrapper(dict(csr=csr_obj))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+    wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
     sdfg = dace.SDFG('nested_csr_to_dense')
 
@@ -378,24 +377,25 @@ def test_read_nested_structure():
     assert np.allclose(B, ref)
 
 
+@pytest.mark.skip
 def test_read_nested_structure_2():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix',
-                           indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz)
-    CSRView = dace.data.StructureView(CSR.members, transient=True)
-    Wrapper = create_structure('WrapperClass', csr=CSR)
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+    CSRView = dace.data.StructureView(csr_obj.members, transient=True)
+    wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
     sdfg = dace.SDFG('nested_csr_to_dense_2')
 
-    sdfg.add_datadesc('A', Wrapper)
+    sdfg.add_datadesc('A', wrapper_obj)
     sdfg.add_array('B', [M, N], dace.float32)
 
-    spmat = Wrapper.members['csr']
+    spmat = wrapper_obj.members['csr']
     sdfg.add_datadesc('vcsr', CSRView)
     sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
     sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
@@ -428,13 +428,14 @@ def test_read_nested_structure_2():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
     A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
     B = np.zeros((20, 20), dtype=np.float32)
 
-    structclass = CSR.dtype._typeclass.as_ctypes()
+    structclass = csr_obj.dtype._typeclass.as_ctypes()
     inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
                          indices=A.indices.__array_interface__['data'][0],
                          data=A.data.__array_interface__['data'][0],
@@ -444,7 +445,7 @@ def test_read_nested_structure_2():
                          K=A.shape[1],
                          nnz=A.nnz)
     import ctypes
-    inpW = Wrapper.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
+    inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
 
     func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
     ref = A.toarray()
@@ -455,15 +456,14 @@ def test_read_nested_structure_2():
 def test_write_nested_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    CSR = create_structure('CSRMatrix')
-    csr_obj = CSR(dict(indptr=dace.int32[M + 1],
-                           indices=dace.int32[nnz],
-                           data=dace.float32[nnz],
-                           rows=M,
-                           cols=N,
-                           nnz=nnz))
-    Wrapper = create_structure('WrapperClass')
-    wrapper_obj = Wrapper(dict(csr=csr_obj))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+    wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
     sdfg = dace.SDFG('dense_to_csr')
 
@@ -529,13 +529,13 @@ def test_write_nested_structure():
     B.data[:] = -1
 
     outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
-                                            indices=B.indices.__array_interface__['data'][0],
-                                            data=B.data.__array_interface__['data'][0],
-                                            rows=tmp.shape[0],
-                                            cols=tmp.shape[1],
-                                            M=tmp.shape[0],
-                                            N=tmp.shape[1],
-                                            nnz=tmp.nnz)
+                                                  indices=B.indices.__array_interface__['data'][0],
+                                                  data=B.data.__array_interface__['data'][0],
+                                                  rows=tmp.shape[0],
+                                                  cols=tmp.shape[1],
+                                                  M=tmp.shape[0],
+                                                  N=tmp.shape[1],
+                                                  nnz=tmp.nnz)
     import ctypes
     outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR))
 
@@ -549,5 +549,5 @@ def test_write_nested_structure():
     test_write_structure()
     test_local_structure()
     test_read_nested_structure()
-    # test_read_nested_structure_2()
+    test_read_nested_structure_2()
     test_write_nested_structure()

From 1fbc45f66ebcff4979f7cb05566de56b70e2b1b9 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 20:08:26 +0200
Subject: [PATCH 21/48] Removed nested data connectors.

---
 tests/sdfg/data/structure_test.py | 56 +++++++++++++++++--------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index b3d72b9d7a..8636dc1602 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -58,9 +58,9 @@ def test_read_structure():
     indices = state.add_access('vindices')
     data = state.add_access('vdata')
 
-    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr']))
-    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices']))
-    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data']))
+    state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr']))
+    state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices']))
+    state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data']))
 
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
@@ -74,6 +74,7 @@ def test_read_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -129,10 +130,10 @@ def test_write_structure():
     indices = if_body.add_access('vindices')
     data = if_body.add_access('vdata')
     if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
-    if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz'))
+    if_body.add_edge(data, 'views', B, None, dace.Memlet(data='B.data', subset='0:nnz'))
     t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
     if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx'))
-    if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz'))
+    if_body.add_edge(indices, 'views', B, None, dace.Memlet(data='B.indices', subset='0:nnz'))
     # Make For Loop  for j
     j_before, j_guard, j_after = sdfg.add_loop(None,
                                                if_before,
@@ -151,13 +152,14 @@ def test_write_structure():
     indptr = i_guard.add_access('vindptr')
     t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
     i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i'))
-    i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
+    i_guard.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1'))
     B = i_after.add_access('B')
     indptr = i_after.add_access('vindptr')
     t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
     i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
-    i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
+    i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1'))
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -229,10 +231,10 @@ def test_local_structure():
     indices = if_body.add_access('tmp_vindices')
     data = if_body.add_access('tmp_vdata')
     if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
-    if_body.add_edge(data, 'views', tmp, 'data', dace.Memlet(data='tmp.data', subset='0:nnz'))
+    if_body.add_edge(data, 'views', tmp, None, dace.Memlet(data='tmp.data', subset='0:nnz'))
     t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
     if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='tmp_vindices', subset='idx'))
-    if_body.add_edge(indices, 'views', tmp, 'indices', dace.Memlet(data='tmp.indices', subset='0:nnz'))
+    if_body.add_edge(indices, 'views', tmp, None, dace.Memlet(data='tmp.indices', subset='0:nnz'))
     # Make For Loop  for j
     j_before, j_guard, j_after = sdfg.add_loop(None,
                                                if_before,
@@ -251,12 +253,12 @@ def test_local_structure():
     indptr = i_guard.add_access('tmp_vindptr')
     t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
     i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='i'))
-    i_guard.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+    i_guard.add_edge(indptr, 'views', tmp, None, dace.Memlet(data='tmp.indptr', subset='0:M+1'))
     tmp = i_after.add_access('tmp')
     indptr = i_after.add_access('tmp_vindptr')
     t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
     i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='tmp_vindptr', subset='M'))
-    i_after.add_edge(indptr, 'views', tmp, 'indptr', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+    i_after.add_edge(indptr, 'views', tmp, None, dace.Memlet(data='tmp.indptr', subset='0:M+1'))
 
     set_B = sdfg.add_state('set_B')
     sdfg.add_edge(i_after, set_B, dace.InterstateEdge())
@@ -264,16 +266,16 @@ def test_local_structure():
     tmp_indptr = set_B.add_access('tmp_vindptr')
     tmp_indices = set_B.add_access('tmp_vindices')
     tmp_data = set_B.add_access('tmp_vdata')
-    set_B.add_edge(tmp, 'indptr', tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
-    set_B.add_edge(tmp, 'indices', tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz'))
-    set_B.add_edge(tmp, 'data', tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz'))
+    set_B.add_edge(tmp, None, tmp_indptr, 'views', dace.Memlet(data='tmp.indptr', subset='0:M+1'))
+    set_B.add_edge(tmp, None, tmp_indices, 'views', dace.Memlet(data='tmp.indices', subset='0:nnz'))
+    set_B.add_edge(tmp, None, tmp_data, 'views', dace.Memlet(data='tmp.data', subset='0:nnz'))
     B = set_B.add_access('B')
     B_indptr = set_B.add_access('vindptr')
     B_indices = set_B.add_access('vindices')
     B_data = set_B.add_access('vdata')
-    set_B.add_edge(B_indptr, 'views', B, 'indptr', dace.Memlet(data='B.indptr', subset='0:M+1'))
-    set_B.add_edge(B_indices, 'views', B, 'indices', dace.Memlet(data='B.indices', subset='0:nnz'))
-    set_B.add_edge(B_data, 'views', B, 'data', dace.Memlet(data='B.data', subset='0:nnz'))
+    set_B.add_edge(B_indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1'))
+    set_B.add_edge(B_indices, 'views', B, None, dace.Memlet(data='B.indices', subset='0:nnz'))
+    set_B.add_edge(B_data, 'views', B, None, dace.Memlet(data='B.data', subset='0:nnz'))
     set_B.add_edge(tmp_indptr, None, B_indptr, None, dace.Memlet(data='tmp_vindptr', subset='0:M+1'))
     set_B.add_edge(tmp_indices, None, B_indices, None, dace.Memlet(data='tmp_vindices', subset='0:nnz'))
     t, me, mx = set_B.add_mapped_tasklet('set_data', {'idx': '0:nnz'},
@@ -283,6 +285,7 @@ def test_local_structure():
                                          input_nodes={'tmp_vdata': tmp_data},
                                          output_nodes={'vdata': B_data})
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -337,9 +340,9 @@ def test_read_nested_structure():
     indices = state.add_access('vindices')
     data = state.add_access('vdata')
 
-    state.add_edge(A, 'indptr', indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
-    state.add_edge(A, 'indices', indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
-    state.add_edge(A, 'data', data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
+    state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
+    state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
+    state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
 
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
@@ -353,6 +356,7 @@ def test_read_nested_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -429,6 +433,7 @@ def test_read_nested_structure_2():
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
     sdfg.view()
+    return
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -489,10 +494,10 @@ def test_write_nested_structure():
     indices = if_body.add_access('vindices')
     data = if_body.add_access('vdata')
     if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='i, j', other_subset='idx'))
-    if_body.add_edge(data, 'views', B, 'data', dace.Memlet(data='B.csr.data', subset='0:nnz'))
+    if_body.add_edge(data, 'views', B, None, dace.Memlet(data='B.csr.data', subset='0:nnz'))
     t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
     if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx'))
-    if_body.add_edge(indices, 'views', B, 'indices', dace.Memlet(data='B.csr.indices', subset='0:nnz'))
+    if_body.add_edge(indices, 'views', B, None, dace.Memlet(data='B.csr.indices', subset='0:nnz'))
     # Make For Loop  for j
     j_before, j_guard, j_after = sdfg.add_loop(None,
                                                if_before,
@@ -511,13 +516,14 @@ def test_write_nested_structure():
     indptr = i_guard.add_access('vindptr')
     t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
     i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i'))
-    i_guard.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
+    i_guard.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
     B = i_after.add_access('B')
     indptr = i_after.add_access('vindptr')
     t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
     i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
-    i_after.add_edge(indptr, 'views', B, 'indptr', dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
+    i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
 
+    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -549,5 +555,5 @@ def test_write_nested_structure():
     test_write_structure()
     test_local_structure()
     test_read_nested_structure()
-    test_read_nested_structure_2()
+    # test_read_nested_structure_2()
     test_write_nested_structure()

From 6fa7e53ea4c39752c60b386895a6c9ba4a542b80 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 20:27:41 +0200
Subject: [PATCH 22/48] Added support for direct access to nested data.

---
 dace/codegen/targets/cpu.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 7ff91cbc7b..137de75c55 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1169,6 +1169,7 @@ def memlet_definition(self,
         if not types:
             types = self._dispatcher.defined_vars.get(ptr, is_global=True)
         var_type, ctypedef = types
+        ptr = ptr.replace('.', '->')
 
         if fpga.is_fpga_array(desc):
             decouple_array_interfaces = Config.get_bool("compiler", "xilinx", "decouple_array_interfaces")

From 71d7c3db0f2391b79281a89732b64d0d4b861e14 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 20:28:20 +0200
Subject: [PATCH 23/48] WIP: Add nested data free symbols to SDFG.

---
 dace/sdfg/sdfg.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index a4c29c2e89..1f385a4b75 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -2005,10 +2005,20 @@ def add_datadesc(self, name: str, datadesc: dt.Data, find_new_name=False) -> str
                 raise NameError(f'Array or Stream with name "{name}" already exists in SDFG')
         self._arrays[name] = datadesc
 
+        def _add_symbols(desc: dt.Data):
+            if isinstance(desc, dt.Structure):
+                for v in desc.members.values():
+                    if isinstance(v, dt.Data):
+                        _add_symbols(v)
+            for sym in desc.free_symbols:
+                if sym.name not in self.symbols:
+                    self.add_symbol(sym.name, sym.dtype)
+
         # Add free symbols to the SDFG global symbol storage
-        for sym in datadesc.free_symbols:
-            if sym.name not in self.symbols:
-                self.add_symbol(sym.name, sym.dtype)
+        # for sym in datadesc.free_symbols:
+        #     if sym.name not in self.symbols:
+        #         self.add_symbol(sym.name, sym.dtype)
+        _add_symbols(datadesc)
 
         return name
 

From e0a4409ff4a2b909f901f1a1592d3b9669387807 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 20:29:13 +0200
Subject: [PATCH 24/48] Added test for direct nested data access.

---
 tests/sdfg/data/structure_test.py | 82 ++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 6 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 8636dc1602..3116a5764a 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -550,10 +550,80 @@ def test_write_nested_structure():
     assert np.allclose(A, B.toarray())
 
 
+def test_direct_read_structure():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+
+    sdfg = dace.SDFG('csr_to_dense_direct')
+
+    sdfg.add_datadesc('A', csr_obj)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    # sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    # sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    # sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    # A = state.add_access('A')
+    indptr = state.add_access('A.indptr')
+    indices = state.add_access('A.indices')
+    data = state.add_access('A.data')
+    B = state.add_access('B')
+
+    # indptr = state.add_access('vindptr')
+    # indices = state.add_access('vindices')
+    # data = state.add_access('vdata')
+
+    # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr']))
+    # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices']))
+    # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.indptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.indptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='A.indices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.data', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    sdfg.view()
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
+                                                indices=A.indices.__array_interface__['data'][0],
+                                                data=A.data.__array_interface__['data'][0],
+                                                rows=A.shape[0],
+                                                cols=A.shape[1],
+                                                M=A.shape[0],
+                                                N=A.shape[1],
+                                                nnz=A.nnz)
+
+    func(A=inpA, B=B, M=20, N=20, nnz=A.nnz)
+    ref = A.toarray()
+
+    assert np.allclose(B, ref)
+
+
 if __name__ == "__main__":
-    test_read_structure()
-    test_write_structure()
-    test_local_structure()
-    test_read_nested_structure()
-    # test_read_nested_structure_2()
-    test_write_nested_structure()
+    # test_read_structure()
+    # test_write_structure()
+    # test_local_structure()
+    # test_read_nested_structure()
+    # test_write_nested_structure()
+    test_direct_read_structure()

From 0593ea4f1a86951b210c727c95931ca3664f7423 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 20 Jul 2023 20:55:42 +0200
Subject: [PATCH 25/48] Added test for direct double-nested data accesses.

---
 tests/sdfg/data/structure_test.py | 75 +++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 3116a5764a..91429e8bbc 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -620,6 +620,80 @@ def test_direct_read_structure():
     assert np.allclose(B, ref)
 
 
+def test_direct_read_nested_structure():
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
+                                       indices=dace.int32[nnz],
+                                       data=dace.float32[nnz],
+                                       rows=M,
+                                       cols=N,
+                                       nnz=nnz),
+                                  name='CSRMatrix')
+    wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
+
+    sdfg = dace.SDFG('nested_csr_to_dense_direct')
+
+    sdfg.add_datadesc('A', wrapper_obj)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    spmat = wrapper_obj.members['csr']
+    sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
+    sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
+    sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    # A = state.add_access('A')
+    indptr = state.add_access('A.csr.indptr')
+    indices = state.add_access('A.csr.indices')
+    data = state.add_access('A.csr.data')
+    B = state.add_access('B')
+
+    # indptr = state.add_access('vindptr')
+    # indices = state.add_access('vindices')
+    # data = state.add_access('vdata')
+
+    # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
+    # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
+    # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.csr.indptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='A.csr.indptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='A.csr.indices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.csr.data', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    sdfg.view()
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    structclass = csr_obj.dtype._typeclass.as_ctypes()
+    inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
+                         indices=A.indices.__array_interface__['data'][0],
+                         data=A.data.__array_interface__['data'][0],
+                         rows=A.shape[0],
+                         cols=A.shape[1],
+                         M=A.shape[0],
+                         K=A.shape[1],
+                         nnz=A.nnz)
+    import ctypes
+    inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
+
+    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
+    ref = A.toarray()
+
+    assert np.allclose(B, ref)
+
+
 if __name__ == "__main__":
     # test_read_structure()
     # test_write_structure()
@@ -627,3 +701,4 @@ def test_direct_read_structure():
     # test_read_nested_structure()
     # test_write_nested_structure()
     test_direct_read_structure()
+    test_direct_read_nested_structure()

From 0df9c3518c6d1ff307314a39dcbc8621423e3af4 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:17:02 +0200
Subject: [PATCH 26/48] Added free-symbols and repr.

---
 dace/data.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dace/data.py b/dace/data.py
index e424aca66a..b54a4f9efb 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -364,7 +364,7 @@ class Structure(Data):
     name = Property(dtype=str, desc="Structure name")
 
     def __init__(self,
-                 members: Dict[str, Any],
+                 members: Dict[str, Data],
                  name: str = 'Structure',
                  transient: bool = False,
                  storage: dtypes.StorageType = dtypes.StorageType.Default,
@@ -432,6 +432,17 @@ def start_offset(self):
     @property
     def strides(self):
         return [1]
+    
+    @property
+    def free_symbols(self) -> Set[symbolic.SymbolicType]:
+        """ Returns a set of undefined symbols in this data descriptor. """
+        result = set(self.symbols.keys())
+        for k, v in self.members.items():
+            result |= v.free_symbols
+        return result
+
+    def __repr__(self):
+        return f"{self.name} ({', '.join([f'{k}: {v}' for k, v in self.members.items()])})"
 
     def as_arg(self, with_types=True, for_call=False, name=None):
         if self.storage is dtypes.StorageType.GPU_Global:

From 909c1aaafd76622cecd4972cd2b3718caf2c261f Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:18:17 +0200
Subject: [PATCH 27/48] Recursively add free symbols from nested data.

---
 dace/sdfg/sdfg.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 1f385a4b75..ae85bff5d1 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -2015,9 +2015,6 @@ def _add_symbols(desc: dt.Data):
                     self.add_symbol(sym.name, sym.dtype)
 
         # Add free symbols to the SDFG global symbol storage
-        # for sym in datadesc.free_symbols:
-        #     if sym.name not in self.symbols:
-        #         self.add_symbol(sym.name, sym.dtype)
         _add_symbols(datadesc)
 
         return name

From e2b0d8b410e699692c1bf4863ae36a0b6f932e27 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:23:48 +0200
Subject: [PATCH 28/48] Updated tests.

---
 tests/sdfg/data/structure_test.py | 234 +++---------------------------
 1 file changed, 22 insertions(+), 212 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 91429e8bbc..2646fe3d03 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -8,36 +8,10 @@
 from scipy import sparse
 
 
-def create_structure(name: str) -> dace.data.Structure:
-
-    StructureClass = type(name, (dace.data.Structure, ), {})
-
-    @staticmethod
-    def from_json(json_obj, context=None):
-        if json_obj['type'] != name:
-            raise TypeError("Invalid data type")
-
-        # Create dummy object
-        ret = StructureClass({})
-        serialize.set_properties_from_json(ret, json_obj, context=context)
-
-        return ret
-
-    setattr(StructureClass, 'from_json', from_json)
-    StructureClass = make_properties(StructureClass)
-
-    return StructureClass
-
-
 def test_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense')
@@ -83,14 +57,9 @@ def test_read_structure():
 
     inpA = csr_obj.dtype._typeclass.as_ctypes()(indptr=A.indptr.__array_interface__['data'][0],
                                                 indices=A.indices.__array_interface__['data'][0],
-                                                data=A.data.__array_interface__['data'][0],
-                                                rows=A.shape[0],
-                                                cols=A.shape[1],
-                                                M=A.shape[0],
-                                                N=A.shape[1],
-                                                nnz=A.nnz)
+                                                data=A.data.__array_interface__['data'][0])
 
-    func(A=inpA, B=B, M=20, N=20, nnz=A.nnz)
+    func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz)
     ref = A.toarray()
 
     assert np.allclose(B, ref)
@@ -99,12 +68,7 @@ def test_read_structure():
 def test_write_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('dense_to_csr')
@@ -172,12 +136,7 @@ def test_write_structure():
 
     outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
                                                 indices=B.indices.__array_interface__['data'][0],
-                                                data=B.data.__array_interface__['data'][0],
-                                                rows=tmp.shape[0],
-                                                cols=tmp.shape[1],
-                                                M=tmp.shape[0],
-                                                N=tmp.shape[1],
-                                                nnz=tmp.nnz)
+                                                data=B.data.__array_interface__['data'][0])
 
     func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
 
@@ -187,19 +146,9 @@ def test_write_structure():
 def test_local_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
-    tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix',
                                   transient=True)
 
@@ -298,12 +247,7 @@ def test_local_structure():
 
     outB = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
                                                 indices=B.indices.__array_interface__['data'][0],
-                                                data=B.data.__array_interface__['data'][0],
-                                                rows=tmp.shape[0],
-                                                cols=tmp.shape[1],
-                                                M=tmp.shape[0],
-                                                N=tmp.shape[1],
-                                                nnz=tmp.nnz)
+                                                data=B.data.__array_interface__['data'][0])
 
     func(A=A, B=outB, M=tmp.shape[0], N=tmp.shape[1], nnz=tmp.nnz)
 
@@ -312,12 +256,7 @@ def test_local_structure():
 
 def test_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -366,93 +305,11 @@ def test_read_nested_structure():
     structclass = csr_obj.dtype._typeclass.as_ctypes()
     inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
                          indices=A.indices.__array_interface__['data'][0],
-                         data=A.data.__array_interface__['data'][0],
-                         rows=A.shape[0],
-                         cols=A.shape[1],
-                         M=A.shape[0],
-                         K=A.shape[1],
-                         nnz=A.nnz)
+                         data=A.data.__array_interface__['data'][0])
     import ctypes
     inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
 
-    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
-    ref = A.toarray()
-
-    assert np.allclose(B, ref)
-
-
-@pytest.mark.skip
-def test_read_nested_structure_2():
-    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
-                                  name='CSRMatrix')
-    CSRView = dace.data.StructureView(csr_obj.members, transient=True)
-    wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
-
-    sdfg = dace.SDFG('nested_csr_to_dense_2')
-
-    sdfg.add_datadesc('A', wrapper_obj)
-    sdfg.add_array('B', [M, N], dace.float32)
-
-    spmat = wrapper_obj.members['csr']
-    sdfg.add_datadesc('vcsr', CSRView)
-    sdfg.add_view('vindptr', spmat.members['indptr'].shape, spmat.members['indptr'].dtype)
-    sdfg.add_view('vindices', spmat.members['indices'].shape, spmat.members['indices'].dtype)
-    sdfg.add_view('vdata', spmat.members['data'].shape, spmat.members['data'].dtype)
-
-    state = sdfg.add_state()
-
-    A = state.add_access('A')
-    B = state.add_access('B')
-
-    csr = state.add_access('vcsr')
-    indptr = state.add_access('vindptr')
-    indices = state.add_access('vindices')
-    data = state.add_access('vdata')
-
-    state.add_edge(A, 'csr', csr, 'views', dace.Memlet.from_array('A.csr', spmat))
-    state.add_edge(csr, 'indptr', indptr, 'views', dace.Memlet.from_array('vcsr.indptr', spmat.members['indptr']))
-    state.add_edge(csr, 'indices', indices, 'views', dace.Memlet.from_array('vcsr.indices', spmat.members['indices']))
-    state.add_edge(csr, 'data', data, 'views', dace.Memlet.from_array('vcsr.data', spmat.members['data']))
-
-    ime, imx = state.add_map('i', dict(i='0:M'))
-    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
-    jme.add_in_connector('start')
-    jme.add_in_connector('stop')
-    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
-
-    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
-    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
-    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
-    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
-    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
-
-    sdfg.view()
-    return
-    func = sdfg.compile()
-
-    rng = np.random.default_rng(42)
-    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
-    B = np.zeros((20, 20), dtype=np.float32)
-
-    structclass = csr_obj.dtype._typeclass.as_ctypes()
-    inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
-                         indices=A.indices.__array_interface__['data'][0],
-                         data=A.data.__array_interface__['data'][0],
-                         rows=A.shape[0],
-                         cols=A.shape[1],
-                         M=A.shape[0],
-                         K=A.shape[1],
-                         nnz=A.nnz)
-    import ctypes
-    inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
-
-    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
+    func(A=inpW, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz)
     ref = A.toarray()
 
     assert np.allclose(B, ref)
@@ -461,12 +318,7 @@ def test_read_nested_structure_2():
 def test_write_nested_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -536,12 +388,7 @@ def test_write_nested_structure():
 
     outCSR = csr_obj.dtype._typeclass.as_ctypes()(indptr=B.indptr.__array_interface__['data'][0],
                                                   indices=B.indices.__array_interface__['data'][0],
-                                                  data=B.data.__array_interface__['data'][0],
-                                                  rows=tmp.shape[0],
-                                                  cols=tmp.shape[1],
-                                                  M=tmp.shape[0],
-                                                  N=tmp.shape[1],
-                                                  nnz=tmp.nnz)
+                                                  data=B.data.__array_interface__['data'][0])
     import ctypes
     outW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(outCSR))
 
@@ -553,12 +400,7 @@ def test_write_nested_structure():
 def test_direct_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense_direct')
@@ -566,26 +408,13 @@ def test_direct_read_structure():
     sdfg.add_datadesc('A', csr_obj)
     sdfg.add_array('B', [M, N], dace.float32)
 
-    # sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
-    # sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
-    # sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
-
     state = sdfg.add_state()
 
-    # A = state.add_access('A')
     indptr = state.add_access('A.indptr')
     indices = state.add_access('A.indices')
     data = state.add_access('A.data')
     B = state.add_access('B')
 
-    # indptr = state.add_access('vindptr')
-    # indices = state.add_access('vindices')
-    # data = state.add_access('vdata')
-
-    # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.indptr', csr_obj.members['indptr']))
-    # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.indices', csr_obj.members['indices']))
-    # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.data', csr_obj.members['data']))
-
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
     jme.add_in_connector('start')
@@ -622,12 +451,7 @@ def test_direct_read_structure():
 
 def test_direct_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1],
-                                       indices=dace.int32[nnz],
-                                       data=dace.float32[nnz],
-                                       rows=M,
-                                       cols=N,
-                                       nnz=nnz),
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -643,20 +467,11 @@ def test_direct_read_nested_structure():
 
     state = sdfg.add_state()
 
-    # A = state.add_access('A')
     indptr = state.add_access('A.csr.indptr')
     indices = state.add_access('A.csr.indices')
     data = state.add_access('A.csr.data')
     B = state.add_access('B')
 
-    # indptr = state.add_access('vindptr')
-    # indices = state.add_access('vindices')
-    # data = state.add_access('vdata')
-
-    # state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.csr.indptr', spmat.members['indptr']))
-    # state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.csr.indices', spmat.members['indices']))
-    # state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.csr.data', spmat.members['data']))
-
     ime, imx = state.add_map('i', dict(i='0:M'))
     jme, jmx = state.add_map('idx', dict(idx='start:stop'))
     jme.add_in_connector('start')
@@ -679,26 +494,21 @@ def test_direct_read_nested_structure():
     structclass = csr_obj.dtype._typeclass.as_ctypes()
     inpCSR = structclass(indptr=A.indptr.__array_interface__['data'][0],
                          indices=A.indices.__array_interface__['data'][0],
-                         data=A.data.__array_interface__['data'][0],
-                         rows=A.shape[0],
-                         cols=A.shape[1],
-                         M=A.shape[0],
-                         K=A.shape[1],
-                         nnz=A.nnz)
+                         data=A.data.__array_interface__['data'][0])
     import ctypes
     inpW = wrapper_obj.dtype._typeclass.as_ctypes()(csr=ctypes.pointer(inpCSR))
 
-    func(A=inpW, B=B, M=20, N=20, nnz=A.nnz)
+    func(A=inpW, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz)
     ref = A.toarray()
 
     assert np.allclose(B, ref)
 
 
 if __name__ == "__main__":
-    # test_read_structure()
-    # test_write_structure()
-    # test_local_structure()
-    # test_read_nested_structure()
-    # test_write_nested_structure()
+    test_read_structure()
+    test_write_structure()
+    test_local_structure()
+    test_read_nested_structure()
+    test_write_nested_structure()
     test_direct_read_structure()
     test_direct_read_nested_structure()

From 52afc7250b02fb4b85eb3a62bf5104dce9a72995 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:24:14 +0200
Subject: [PATCH 29/48] Scrapped structure private symbols for now.

---
 dace/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/data.py b/dace/data.py
index b54a4f9efb..9d3b6b86f3 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -436,7 +436,7 @@ def strides(self):
     @property
     def free_symbols(self) -> Set[symbolic.SymbolicType]:
         """ Returns a set of undefined symbols in this data descriptor. """
-        result = set(self.symbols.keys())
+        result = set()
         for k, v in self.members.items():
             result |= v.free_symbols
         return result

From 09246442f6e456b4b090651b895be53e3414a512 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:26:10 +0200
Subject: [PATCH 30/48] Updated tests.

---
 tests/sdfg/data/structure_test.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 2646fe3d03..02b8f0c174 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -48,7 +48,6 @@ def test_read_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -123,7 +122,6 @@ def test_write_structure():
     i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
     i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.indptr', subset='0:M+1'))
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -234,7 +232,6 @@ def test_local_structure():
                                          input_nodes={'tmp_vdata': tmp_data},
                                          output_nodes={'vdata': B_data})
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -295,7 +292,6 @@ def test_read_nested_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -375,7 +371,6 @@ def test_write_nested_structure():
     i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
     i_after.add_edge(indptr, 'views', B, None, dace.Memlet(data='B.csr.indptr', subset='0:M+1'))
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -427,7 +422,6 @@ def test_direct_read_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.data', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)
@@ -484,7 +478,6 @@ def test_direct_read_nested_structure():
     state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='A.csr.data', subset='idx'), dst_conn='__val')
     state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
 
-    sdfg.view()
     func = sdfg.compile()
 
     rng = np.random.default_rng(42)

From 8296a6de765b2209cbd644b6017d68304016ef3c Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 21 Jul 2023 18:29:06 +0200
Subject: [PATCH 31/48] Added setitem.

---
 dace/sdfg/sdfg.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index ae85bff5d1..23964dbe41 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -62,8 +62,12 @@ def __getitem__(self, key):
             token = tokens.pop(0)
             result = result.members[token]
         return result
-
     
+    def __setitem__(self, key, val):
+        if isinstance(key, str) and '.' in key:
+            raise KeyError('NestedDict does not support setting nested keys')
+        super(NestedDict, self).__setitem__(key, val)
+
     def __contains__(self, key):
         tokens = key.split('.') if isinstance(key, str) else [key]
         token = tokens.pop(0)

From a98fce07b7e78b0bf1a0bc53d17e37e38c22b3dc Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 27 Jul 2023 20:58:42 +0200
Subject: [PATCH 32/48] Serialize Structure members and struct data/length as
 list of tuples.

---
 dace/data.py   |  5 +++--
 dace/dtypes.py | 11 +++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index 9d3b6b86f3..fd7cdaf8e3 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -344,13 +344,14 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global):
 def _arrays_to_json(arrays):
     if arrays is None:
         return None
-    return {k: serialize.to_json(v) for k, v in arrays.items()}
+    sorted_keys = sorted(arrays.keys())
+    return [(k, serialize.to_json(arrays[k])) for k in sorted_keys]
 
 
 def _arrays_from_json(obj, context=None):
     if obj is None:
         return {}
-    return {k: serialize.from_json(v, context) for k, v in obj.items()}
+    return {k: serialize.from_json(v, context) for k, v in obj}
 
 
 @make_properties
diff --git a/dace/dtypes.py b/dace/dtypes.py
index d01209469f..9c483d5df1 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -768,13 +768,12 @@ def fields(self):
         return self._data
 
     def to_json(self):
+        sorted_keys = sorted(self._data.keys())
         return {
             'type': 'struct',
             'name': self.name,
-            'data': {k: v.to_json()
-                     for k, v in self._data.items()},
-            'length': {k: v
-                       for k, v in self._length.items()},
+            'data': [(k, self._data[k].to_json()) for k in sorted_keys],
+            'length': [(k, self._length[k]) for k in sorted_keys if k in self._length],
             'bytes': self.bytes
         }
 
@@ -786,8 +785,8 @@ def from_json(json_obj, context=None):
         import dace.serialize  # Avoid import loop
 
         ret = struct(json_obj['name'])
-        ret._data = {k: json_to_typeclass(v, context) for k, v in json_obj['data'].items()}
-        ret._length = {k: v for k, v in json_obj['length'].items()}
+        ret._data = {k: json_to_typeclass(v, context) for k, v in json_obj['data']}
+        ret._length = {k: v for k, v in json_obj['length']}
         ret.bytes = json_obj['bytes']
 
         return ret

From f431a8df0c99890d5dbeef48674157aa196d6a3e Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 28 Jul 2023 10:29:11 +0200
Subject: [PATCH 33/48] Switched Structures and structs to OrderedDicts.

---
 dace/data.py                      | 40 ++++++++++++++++++++-----------
 dace/dtypes.py                    | 26 ++++++++++----------
 tests/sdfg/data/structure_test.py |  8 +++++++
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index fd7cdaf8e3..b20f9f7db5 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -3,8 +3,9 @@
 import ctypes
 import functools
 
+from collections import OrderedDict
 from numbers import Number
-from typing import Any, Dict, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
 
 import numpy
 import sympy as sp
@@ -344,40 +345,47 @@ def add(X: dace.float32[10, 10] @ dace.StorageType.GPU_Global):
 def _arrays_to_json(arrays):
     if arrays is None:
         return None
-    sorted_keys = sorted(arrays.keys())
-    return [(k, serialize.to_json(arrays[k])) for k in sorted_keys]
+    return [(k, serialize.to_json(v)) for k, v in arrays.items()]
 
 
 def _arrays_from_json(obj, context=None):
     if obj is None:
         return {}
-    return {k: serialize.from_json(v, context) for k, v in obj}
+    return OrderedDict((k, serialize.from_json(v, context)) for k, v in obj)
 
 
 @make_properties
 class Structure(Data):
     """ Base class for structures. """
 
-    members = Property(dtype=dict,
+    members = Property(dtype=OrderedDict,
                        desc="Dictionary of structure members",
                        from_json=_arrays_from_json,
                        to_json=_arrays_to_json)
+    order = ListProperty(element_type=str, desc="Order of structure members")
     name = Property(dtype=str, desc="Structure name")
 
     def __init__(self,
                  members: Dict[str, Data],
+                 order: List[str] = None,
                  name: str = 'Structure',
                  transient: bool = False,
                  storage: dtypes.StorageType = dtypes.StorageType.Default,
                  location: Dict[str, str] = None,
                  lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
                  debuginfo: dtypes.DebugInfo = None):
+
+        self.order = order or list(members.keys())
+        if set(members.keys()) != set(self.order):
+            raise ValueError('Order must contain all members of the structure.')
+        
         # TODO: Should we make a deep-copy here?
-        self.members = members or {}
+        self.members = OrderedDict((k, members[k]) for k in self.order)
+
         for k, v in self.members.items():
             v.transient = transient
         self.name = name
-        fields_and_types = dict()
+        fields_and_types = OrderedDict()
         symbols = set()
         for k, v in members.items():
             if isinstance(v, Structure):
@@ -396,13 +404,17 @@ def __init__(self,
                 fields_and_types[k] = dtypes.typeclass(type(v))
             else:
                 raise TypeError(f"Attribute {k}'s value {v} has unsupported type: {type(v)}")
-        for s in symbols:
-            if str(s) in fields_and_types:
-                continue
-            if hasattr(s, "dtype"):
-                fields_and_types[str(s)] = s.dtype
-            else:
-                fields_and_types[str(s)] = dtypes.int32
+        
+        # NOTE: We will not store symbols in the dtype for now, but leaving it as a comment to investigate later.
+        # NOTE: See discussion about data/object symbols.
+        # for s in symbols:
+        #     if str(s) in fields_and_types:
+        #         continue
+        #     if hasattr(s, "dtype"):
+        #         fields_and_types[str(s)] = s.dtype
+        #     else:
+        #         fields_and_types[str(s)] = dtypes.int32
+
         dtype = dtypes.pointer(dtypes.struct(name, **fields_and_types))
         shape = (1,)
         super(Structure, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)
diff --git a/dace/dtypes.py b/dace/dtypes.py
index 9c483d5df1..678f2f59b0 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -7,6 +7,7 @@
 import itertools
 import numpy
 import re
+from collections import OrderedDict
 from functools import wraps
 from typing import Any
 from dace.config import Config
@@ -768,12 +769,11 @@ def fields(self):
         return self._data
 
     def to_json(self):
-        sorted_keys = sorted(self._data.keys())
         return {
             'type': 'struct',
             'name': self.name,
-            'data': [(k, self._data[k].to_json()) for k in sorted_keys],
-            'length': [(k, self._length[k]) for k in sorted_keys if k in self._length],
+            'data': [(k, v.to_json()) for k, v in self._data.items()],
+            'length': [(k, v) for k, v in self._length.items()],
             'bytes': self.bytes
         }
 
@@ -792,19 +792,21 @@ def from_json(json_obj, context=None):
         return ret
 
     def _parse_field_and_types(self, **fields_and_types):
-        from dace.symbolic import pystr_to_symbolic
-        self._data = dict()
-        self._length = dict()
+        # from dace.symbolic import pystr_to_symbolic
+        self._data = OrderedDict()
+        self._length = OrderedDict()
         self.bytes = 0
         for k, v in fields_and_types.items():
             if isinstance(v, tuple):
                 t, l = v
                 if not isinstance(t, pointer):
                     raise TypeError("Only pointer types may have a length.")
-                sym_tokens = pystr_to_symbolic(l).free_symbols
-                for sym in sym_tokens:
-                    if str(sym) not in fields_and_types.keys():
-                        raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}")
+                # TODO: Do we need the free symbols of the length in the struct?
+                # NOTE: It is needed for the old use of dtype.struct. Are we deprecating that?
+                # sym_tokens = pystr_to_symbolic(l).free_symbols
+                # for sym in sym_tokens:
+                #     if str(sym) not in fields_and_types.keys():
+                #         raise ValueError(f"Symbol {sym} in {k}'s length {l} is not a field of struct {self.name}")
                 self._data[k] = t
                 self._length[k] = l
                 self.bytes += t.bytes
@@ -830,7 +832,7 @@ def as_ctypes(self):
                 fields.append((k, v.as_ctypes()))
             else:
                 fields.append((k, _FFI_CTYPES[v.type]))
-        fields = sorted(fields, key=lambda f: f[0])
+        # fields = sorted(fields, key=lambda f: f[0])
         # Create new struct class.
         struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields})
         _FFI_CTYPES[self] = struct_class
@@ -844,7 +846,7 @@ def emit_definition(self):
 {typ}
 }};""".format(
             name=self.name,
-            typ='\n'.join(["    %s %s;" % (t.ctype, tname) for tname, t in sorted(self._data.items())]),
+            typ='\n'.join(["    %s %s;" % (t.ctype, tname) for tname, t in self._data.items()]),
         )
 
 
diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 02b8f0c174..995aacb2fd 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -12,6 +12,7 @@ def test_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense')
@@ -68,6 +69,7 @@ def test_write_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('dense_to_csr')
@@ -145,8 +147,10 @@ def test_local_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix',
                                   transient=True)
 
@@ -254,6 +258,7 @@ def test_local_structure():
 def test_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -315,6 +320,7 @@ def test_write_nested_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -396,6 +402,7 @@ def test_direct_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense_direct')
@@ -446,6 +453,7 @@ def test_direct_read_structure():
 def test_direct_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 

From 86d9cf2180c0b599b0a025447f1a36b7f9a05ecf Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 28 Jul 2023 10:31:32 +0200
Subject: [PATCH 34/48] Removed order from properties.

---
 dace/data.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index b20f9f7db5..d8f2d52998 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -362,7 +362,6 @@ class Structure(Data):
                        desc="Dictionary of structure members",
                        from_json=_arrays_from_json,
                        to_json=_arrays_to_json)
-    order = ListProperty(element_type=str, desc="Order of structure members")
     name = Property(dtype=str, desc="Structure name")
 
     def __init__(self,
@@ -375,12 +374,12 @@ def __init__(self,
                  lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
                  debuginfo: dtypes.DebugInfo = None):
 
-        self.order = order or list(members.keys())
-        if set(members.keys()) != set(self.order):
+        order = order or list(members.keys())
+        if set(members.keys()) != set(order):
             raise ValueError('Order must contain all members of the structure.')
         
         # TODO: Should we make a deep-copy here?
-        self.members = OrderedDict((k, members[k]) for k in self.order)
+        self.members = OrderedDict((k, members[k]) for k in order)
 
         for k, v in self.members.items():
             v.transient = transient

From 76d6266cead9f7b3de58e8fc879a7d978ddbe757 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 28 Jul 2023 12:05:50 +0200
Subject: [PATCH 35/48] `_argminmax` now creates a struct with the members
 ordered as accessed in the related tasklets.

---
 dace/frontend/python/replacements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py
index 9eac240a87..b325a2ea7e 100644
--- a/dace/frontend/python/replacements.py
+++ b/dace/frontend/python/replacements.py
@@ -975,7 +975,7 @@ def _argminmax(pv: ProgramVisitor,
     reduced_shape = list(copy.deepcopy(a_arr.shape))
     reduced_shape.pop(axis)
 
-    val_and_idx = dace.struct('_val_and_idx', val=a_arr.dtype, idx=result_type)
+    val_and_idx = dace.struct('_val_and_idx', idx=result_type, val=a_arr.dtype)
 
     # HACK: since identity cannot be specified for structs, we have to init the output array
     reduced_structs, reduced_struct_arr = sdfg.add_temp_transient(reduced_shape, val_and_idx)

From 1d3db91f7104e51dd90ce41da3f84a0140ab69e4 Mon Sep 17 00:00:00 2001
From: Samuel Martin <martisam@student.ethz.ch>
Date: Thu, 3 Aug 2023 08:38:15 +0200
Subject: [PATCH 36/48] Update dependency

---
 dace/external/hlslib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/external/hlslib b/dace/external/hlslib
index 1b5b3aee5d..1403cd016c 160000
--- a/dace/external/hlslib
+++ b/dace/external/hlslib
@@ -1 +1 @@
-Subproject commit 1b5b3aee5dab19adcc443fa9a7cd45244bd246b1
+Subproject commit 1403cd016ce63a9961eeb3899bea70c873a929ce

From b47d82b72decce012b088602acc9b8290da04f8e Mon Sep 17 00:00:00 2001
From: Samuel Martin <martisam@student.ethz.ch>
Date: Thu, 3 Aug 2023 13:55:34 +0200
Subject: [PATCH 37/48] Add fix plus testcase

---
 dace/frontend/fortran/fortran_parser.py |  1 +
 tests/fortran/array_test.py             | 50 +++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/dace/frontend/fortran/fortran_parser.py b/dace/frontend/fortran/fortran_parser.py
index 6d1be7138a..d7112892fe 100644
--- a/dace/frontend/fortran/fortran_parser.py
+++ b/dace/frontend/fortran/fortran_parser.py
@@ -463,6 +463,7 @@ def subroutine2sdfg(self, node: ast_internal_classes.Subroutine_Subprogram_Node,
                                 if i.type == "ALL":
                                     shape.append(array.shape[indices])
                                     mysize = mysize * array.shape[indices]
+                                    index_list.append(None)
                                 else:
                                     raise NotImplementedError("Index in ParDecl should be ALL")
                             else:
diff --git a/tests/fortran/array_test.py b/tests/fortran/array_test.py
index 8685628012..a8ece680a6 100644
--- a/tests/fortran/array_test.py
+++ b/tests/fortran/array_test.py
@@ -11,6 +11,7 @@
 from dace.frontend.fortran import fortran_parser
 from fparser.two.symbol_table import SymbolTable
 from dace.sdfg import utils as sdutil
+from dace.sdfg.nodes import AccessNode
 
 import dace.frontend.fortran.ast_components as ast_components
 import dace.frontend.fortran.ast_transforms as ast_transforms
@@ -167,6 +168,54 @@ def test_fortran_frontend_input_output_connector():
     assert (a[1, 2] == 0)
 
 
+def test_fortran_frontend_memlet_in_map_test():
+    """
+    Tests that no assumption is made where the iteration variable is inside a memlet subset
+    """
+    test_string = """
+        PROGRAM memlet_range_test
+        implicit None
+        REAL INP(100, 10)
+        REAL OUT(100, 10)
+        CALL memlet_range_test_routine(INP, OUT)
+        END PROGRAM
+
+        SUBROUTINE memlet_range_test_routine(INP, OUT)
+            REAL INP(100, 10)
+            REAL OUT(100, 10)
+            DO I=1,100
+                CALL inner_loops(INP(I, :), OUT(I, :))
+            ENDDO
+        END SUBROUTINE memlet_range_test_routine
+
+        SUBROUTINE inner_loops(INP, OUT)
+            REAL INP(10)
+            REAL OUT(10)
+            DO J=1,10
+                OUT(J) = INP(J) + 1
+            ENDDO
+        END SUBROUTINE inner_loops
+
+    """
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "memlet_range_test")
+    sdfg.simplify()
+    # Expect that start is begin of for loop -> only one out edge to guard defining iterator variable
+    assert len(sdfg.out_edges(sdfg.start_state)) == 1
+    iter_var = symbolic.symbol(list(sdfg.out_edges(sdfg.start_state)[0].data.assignments.keys())[0])
+
+    for state in sdfg.states():
+        if len(state.nodes()) > 1:
+            for node in state.nodes():
+                if isinstance(node, AccessNode) and node.data in ['INP', 'OUT']:
+                    edges = [*state.in_edges(node), *state.out_edges(node)]
+                    # There should be only one edge in/to the access node
+                    assert len(edges) == 1
+                    memlet = edges[0].data
+                    # Check that the correct memlet has the iteration variable
+                    assert memlet.subset[0] == (iter_var, iter_var, 1)
+                    assert memlet.subset[1] == (1, 10, 1)
+
+
 if __name__ == "__main__":
 
     test_fortran_frontend_array_3dmap()
@@ -174,3 +223,4 @@ def test_fortran_frontend_input_output_connector():
     test_fortran_frontend_input_output_connector()
     test_fortran_frontend_array_ranges()
     test_fortran_frontend_twoconnector()
+    test_fortran_frontend_memlet_in_map_test()

From 4c824a310a53c2aefd6d03113dda091f4c48bad8 Mon Sep 17 00:00:00 2001
From: Samuel Martin <martisam@student.ethz.ch>
Date: Thu, 3 Aug 2023 13:59:20 +0200
Subject: [PATCH 38/48] Tried to undo wrong update of dependency

---
 dace/external/hlslib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/external/hlslib b/dace/external/hlslib
index 1403cd016c..1b5b3aee5d 160000
--- a/dace/external/hlslib
+++ b/dace/external/hlslib
@@ -1 +1 @@
-Subproject commit 1403cd016ce63a9961eeb3899bea70c873a929ce
+Subproject commit 1b5b3aee5dab19adcc443fa9a7cd45244bd246b1

From 22718af782d2e36ea7004aa00c79b8fce176fe03 Mon Sep 17 00:00:00 2001
From: Cliff Hodel <111381329+hodelcl@users.noreply.github.com>
Date: Wed, 16 Aug 2023 14:15:03 +0200
Subject: [PATCH 39/48] Work Depth Analysis for SDFGs (#1327)

* initial push of work_depth analysis script

* adding tests to work_depth analysis

* rename work depth analysis

* todos added

* code ready for PR

* yapf for formatting

* put tests into dace/tests/sdfg

* fixed import after merge

* merged propgatate_states_symbolically into propagate_states

* fixed format issue in work_depth.py

* small bugfix

---------

Co-authored-by: Cliff Hodel <hodelcl@student.ethz.ch>
Co-authored-by: Cliff Hodel <hodelcl@ethz.ch>
Co-authored-by: Philipp Schaad <schaad.phil@gmail.com>
---
 dace/sdfg/propagation.py                    |  51 +-
 dace/sdfg/work_depth_analysis/helpers.py    | 331 ++++++++++
 dace/sdfg/work_depth_analysis/work_depth.py | 653 ++++++++++++++++++++
 tests/sdfg/work_depth_tests.py              | 201 ++++++
 4 files changed, 1224 insertions(+), 12 deletions(-)
 create mode 100644 dace/sdfg/work_depth_analysis/helpers.py
 create mode 100644 dace/sdfg/work_depth_analysis/work_depth.py
 create mode 100644 tests/sdfg/work_depth_tests.py

diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py
index 89ba6928c7..0fec4812b7 100644
--- a/dace/sdfg/propagation.py
+++ b/dace/sdfg/propagation.py
@@ -10,7 +10,7 @@
 import itertools
 import functools
 import sympy
-from sympy import ceiling
+from sympy import ceiling, Symbol
 from sympy.concrete.summations import Sum
 import warnings
 import networkx as nx
@@ -564,8 +564,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
     Annotate each valid for loop construct with its loop variable ranges.
 
     :param sdfg: The SDFG in which to look.
-    :param unannotated_cycle_states: List of states in cycles without valid
-                                     for loop ranges.
+    :param unannotated_cycle_states: List of lists. Each sub-list contains the states of one unannotated cycle.
     """
 
     # We import here to avoid cyclic imports.
@@ -652,7 +651,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
             res = find_for_loop(sdfg, guard, begin, itervar=itvar)
             if res is None:
                 # No range detected, mark as unbounded.
-                unannotated_cycle_states.extend(cycle)
+                unannotated_cycle_states.append(cycle)
             else:
                 itervar, rng, _ = res
 
@@ -674,10 +673,10 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
         else:
             # There's no guard state, so this cycle marks all states in it as
             # dynamically unbounded.
-            unannotated_cycle_states.extend(cycle)
+            unannotated_cycle_states.append(cycle)
 
 
-def propagate_states(sdfg) -> None:
+def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None:
     """
     Annotate the states of an SDFG with the number of executions.
 
@@ -728,6 +727,9 @@ def propagate_states(sdfg) -> None:
            once.
 
     :param sdfg: The SDFG to annotate.
+    :param concretize_dynamic_unbounded: If True, we annotate dyncamic unbounded states with symbols of the
+                                         form "num_execs_{sdfg_id}_{loop_start_state_id}". Hence, for each
+                                         unbounded loop its states will have the same number of symbolic executions.
     :note: This operates on the SDFG in-place.
     """
 
@@ -759,6 +761,9 @@ def propagate_states(sdfg) -> None:
     # cycle should be marked as unannotated.
     unannotated_cycle_states = []
     _annotate_loop_ranges(sdfg, unannotated_cycle_states)
+    if not concretize_dynamic_unbounded:
+        # Flatten the list. This keeps the old behavior of propagate_states.
+        unannotated_cycle_states = [state for cycle in unannotated_cycle_states for state in cycle]
 
     # Keep track of states that fully merge a previous conditional split. We do
     # this so we can remove the dynamic executions flag for those states.
@@ -800,7 +805,7 @@ def propagate_states(sdfg) -> None:
                 # The only exception to this rule: If the state is in an
                 # unannotated loop, i.e. should be annotated as dynamic
                 # unbounded instead, we do that.
-                if (state in unannotated_cycle_states):
+                if (not concretize_dynamic_unbounded) and state in unannotated_cycle_states:
                     state.executions = 0
                     state.dynamic_executions = True
                 else:
@@ -872,17 +877,39 @@ def propagate_states(sdfg) -> None:
                 else:
                     # Conditional split or unannotated (dynamic unbounded) loop.
                     unannotated_loop_edge = None
-                    for oedge in out_edges:
-                        if oedge.dst in unannotated_cycle_states:
-                            # This is an unannotated loop down this branch.
-                            unannotated_loop_edge = oedge
+                    if concretize_dynamic_unbounded:
+                        to_remove = []
+                        for oedge in out_edges:
+                            for cycle in unannotated_cycle_states:
+                                if oedge.dst in cycle:
+                                    # This is an unannotated loop down this branch.
+                                    unannotated_loop_edge = oedge
+                                    # remove cycle, since it is now annotated with symbol
+                                    to_remove.append(cycle)
+
+                        for c in to_remove:
+                            unannotated_cycle_states.remove(c)
+                    else:
+                        for oedge in out_edges:
+                            if oedge.dst in unannotated_cycle_states:
+                                # This is an unannotated loop down this branch.
+                                unannotated_loop_edge = oedge
 
                     if unannotated_loop_edge is not None:
                         # Traverse as an unbounded loop.
                         out_edges.remove(unannotated_loop_edge)
                         for oedge in out_edges:
                             traversal_q.append((oedge.dst, state.executions, False, itvar_stack))
-                        traversal_q.append((unannotated_loop_edge.dst, 0, True, itvar_stack))
+                        if concretize_dynamic_unbounded:
+                            # Here we introduce the num_exec symbol and propagate it down the loop.
+                            # We can always assume these symbols to be non-negative.
+                            traversal_q.append(
+                                (unannotated_loop_edge.dst,
+                                 Symbol(f'num_execs_{sdfg.sdfg_id}_{sdfg.node_id(unannotated_loop_edge.dst)}',
+                                        nonnegative=True), False, itvar_stack))
+                        else:
+                            # Propagate dynamic unbounded.
+                            traversal_q.append((unannotated_loop_edge.dst, 0, True, itvar_stack))
                     else:
                         # Traverse as a conditional split.
                         proposed_executions = state.executions
diff --git a/dace/sdfg/work_depth_analysis/helpers.py b/dace/sdfg/work_depth_analysis/helpers.py
new file mode 100644
index 0000000000..a80e769f64
--- /dev/null
+++ b/dace/sdfg/work_depth_analysis/helpers.py
@@ -0,0 +1,331 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" Helper functions used by the work depth analysis. """
+
+from dace import SDFG, SDFGState, nodes
+from collections import deque
+from typing import List, Dict, Set, Tuple, Optional, Union
+import networkx as nx
+
+NodeT = str
+EdgeT = Tuple[NodeT, NodeT]
+
+
+class NodeCycle:
+
+    nodes: Set[NodeT] = []
+
+    def __init__(self, nodes: List[NodeT]) -> None:
+        self.nodes = set(nodes)
+
+    @property
+    def length(self) -> int:
+        return len(self.nodes)
+
+
+UUID_SEPARATOR = '/'
+
+
+def ids_to_string(sdfg_id, state_id=-1, node_id=-1, edge_id=-1):
+    return (str(sdfg_id) + UUID_SEPARATOR + str(state_id) + UUID_SEPARATOR + str(node_id) + UUID_SEPARATOR +
+            str(edge_id))
+
+
+def get_uuid(element, state=None):
+    if isinstance(element, SDFG):
+        return ids_to_string(element.sdfg_id)
+    elif isinstance(element, SDFGState):
+        return ids_to_string(element.parent.sdfg_id, element.parent.node_id(element))
+    elif isinstance(element, nodes.Node):
+        return ids_to_string(state.parent.sdfg_id, state.parent.node_id(state), state.node_id(element))
+    else:
+        return ids_to_string(-1)
+
+
+def get_domtree(graph: nx.DiGraph, start_node: str, idom: Dict[str, str] = None):
+    idom = idom or nx.immediate_dominators(graph, start_node)
+
+    alldominated = {n: set() for n in graph.nodes}
+    domtree = nx.DiGraph()
+
+    for node, dom in idom.items():
+        if node is dom:
+            continue
+        domtree.add_edge(dom, node)
+        alldominated[dom].add(node)
+
+        nextidom = idom[dom]
+        ndom = nextidom if nextidom != dom else None
+
+        while ndom:
+            alldominated[ndom].add(node)
+            nextidom = idom[ndom]
+            ndom = nextidom if nextidom != ndom else None
+
+    # 'Rank' the tree, i.e., annotate each node with the level it is on.
+    q = deque()
+    q.append((start_node, 0))
+    while q:
+        node, level = q.popleft()
+        domtree.add_node(node, level=level)
+        for s in domtree.successors(node):
+            q.append((s, level + 1))
+
+    return alldominated, domtree
+
+
+def get_backedges(graph: nx.DiGraph,
+                  start: Optional[NodeT],
+                  strict: bool = False) -> Union[Set[EdgeT], Tuple[Set[EdgeT], Set[EdgeT]]]:
+    '''Find all backedges in a directed graph.
+
+    Note:
+        This algorithm has an algorithmic complexity of O((|V|+|E|)*C) for a
+        graph with vertices V, edges E, and C cycles.
+
+    Args:
+        graph (nx.DiGraph): The graph for which to search backedges.
+        start (str): Start node of the graph. If no start is provided, a node
+            with no incoming edges is used as the start. If no such node can
+            be found, a `ValueError` is raised.
+
+    Returns:
+        A set of backedges in the graph.
+
+    Raises:
+        ValueError: If no `start` is provided and the graph contains no nodes
+            with no incoming edges.
+    '''
+    backedges = set()
+    eclipsed_backedges = set()
+
+    if start is None:
+        for node in graph.nodes():
+            if graph.in_degree(node) == 0:
+                start = node
+                break
+    if start is None:
+        raise ValueError('No start node provided and no start node could ' + 'be determined automatically')
+
+    # Gather all cycles in the graph. Cycles are represented as a sequence of
+    # nodes.
+    # O((|V|+|E|)*(C+1)), for C cycles.
+    all_cycles_nx: List[List[NodeT]] = nx.cycles.simple_cycles(graph)
+    #all_cycles_nx: List[List[NodeT]] = nx.simple_cycles(graph)
+    all_cycles: Set[NodeCycle] = set()
+    for cycle in all_cycles_nx:
+        all_cycles.add(NodeCycle(cycle))
+
+    # Construct a dictionary mapping a node to the cycles containing that node.
+    # O(|V|*|C|)
+    cycle_map: Dict[NodeT, Set[NodeCycle]] = dict()
+    for cycle in all_cycles:
+        for node in cycle.nodes:
+            try:
+                cycle_map[node].add(cycle)
+            except KeyError:
+                cycle_map[node] = set([cycle])
+
+    # Do a BFS traversal of the graph to detect the back edges.
+    # For each node that is part of an (unhandled) cycle, find the longest
+    # still unhandled cycle and try to use it to find the back edge for it.
+    bfs_frontier = [start]
+    visited: Set[NodeT] = set([start])
+    handled_cycles: Set[NodeCycle] = set()
+    unhandled_cycles = all_cycles
+    while bfs_frontier:
+        node = bfs_frontier.pop(0)
+        pred = [p for p in graph.predecessors(node) if p not in visited]
+        longest_cycles: Dict[NodeT, NodeCycle] = dict()
+        try:
+            cycles = cycle_map[node]
+            remove_cycles = set()
+            for cycle in cycles:
+                if cycle not in handled_cycles:
+                    for p in pred:
+                        if p in cycle.nodes:
+                            if p not in longest_cycles:
+                                longest_cycles[p] = cycle
+                            else:
+                                if cycle.length > longest_cycles[p].length:
+                                    longest_cycles[p] = cycle
+                else:
+                    remove_cycles.add(cycle)
+            for cycle in remove_cycles:
+                cycles.remove(cycle)
+        except KeyError:
+            longest_cycles = dict()
+
+        # For the current node, find the incoming edge which belongs to the
+        # cycle and has not been visited yet, which indicates a backedge.
+        node_backedge_candidates: Set[Tuple[EdgeT, NodeCycle]] = set()
+        for p, longest_cycle in longest_cycles.items():
+            handled_cycles.add(longest_cycle)
+            unhandled_cycles.remove(longest_cycle)
+            cycle_map[node].remove(longest_cycle)
+            backedge_candidates = graph.in_edges(node)
+            for candidate in backedge_candidates:
+                src = candidate[0]
+                dst = candidate[0]
+                if src not in visited and src in longest_cycle.nodes:
+                    node_backedge_candidates.add((candidate, longest_cycle))
+                    if not strict:
+                        backedges.add(candidate)
+
+                    # Make sure that any cycle containing this back edge is
+                    # not evaluated again, i.e., mark as handled.
+                    remove_cycles = set()
+                    for cycle in unhandled_cycles:
+                        if src in cycle.nodes and dst in cycle.nodes:
+                            handled_cycles.add(cycle)
+                            remove_cycles.add(cycle)
+                    for cycle in remove_cycles:
+                        unhandled_cycles.remove(cycle)
+
+        # If strict is set, we only report the longest cycle's back edges for
+        # any given node, and separately return any other backedges as
+        # 'eclipsed' backedges. In the case of a while-loop, for example,
+        # the loop edge is considered a backedge, while a continue inside the
+        # loop is considered an 'eclipsed' backedge.
+        if strict:
+            longest_candidate: Tuple[EdgeT, NodeCycle] = None
+            eclipsed_candidates = set()
+            for be_candidate in node_backedge_candidates:
+                if longest_candidate is None:
+                    longest_candidate = be_candidate
+                elif longest_candidate[1].length < be_candidate[1].length:
+                    eclipsed_candidates.add(longest_candidate[0])
+                    longest_candidate = be_candidate
+                else:
+                    eclipsed_candidates.add(be_candidate[0])
+            if longest_candidate is not None:
+                backedges.add(longest_candidate[0])
+            if eclipsed_candidates:
+                eclipsed_backedges.update(eclipsed_candidates)
+
+        # Continue BFS.
+        for neighbour in graph.successors(node):
+            if neighbour not in visited:
+                visited.add(neighbour)
+                bfs_frontier.append(neighbour)
+
+    if strict:
+        return backedges, eclipsed_backedges
+    else:
+        return backedges
+
+
+def find_loop_guards_tails_exits(sdfg_nx: nx.DiGraph):
+    """
+    Detects loops in a SDFG. For each loop, it identifies (node, oNode, exit).
+    We know that there is a backedge from oNode to node that creates the loop and that exit is the exit state of the loop.
+    
+    :param sdfg_nx: The networkx representation of a SDFG.
+    """
+
+    # preparation phase: compute dominators, backedges etc
+    for node in sdfg_nx.nodes():
+        if sdfg_nx.in_degree(node) == 0:
+            start = node
+            break
+    if start is None:
+        raise ValueError('No start node could be determined')
+
+    # sdfg can have multiple end nodes --> not good for postDomTree
+    # --> add a new end node
+    artificial_end_node = 'artificial_end_node'
+    sdfg_nx.add_node(artificial_end_node)
+    for node in sdfg_nx.nodes():
+        if sdfg_nx.out_degree(node) == 0 and node != artificial_end_node:
+            # this is an end node of the sdfg
+            sdfg_nx.add_edge(node, artificial_end_node)
+
+    # sanity check:
+    if sdfg_nx.in_degree(artificial_end_node) == 0:
+        raise ValueError('No end node could be determined in the SDFG')
+
+    # compute dominators and backedges
+    iDoms = nx.immediate_dominators(sdfg_nx, start)
+    allDom, domTree = get_domtree(sdfg_nx, start, iDoms)
+
+    reversed_sdfg_nx = sdfg_nx.reverse()
+    iPostDoms = nx.immediate_dominators(reversed_sdfg_nx, artificial_end_node)
+    allPostDoms, postDomTree = get_domtree(reversed_sdfg_nx, artificial_end_node, iPostDoms)
+
+    backedges = get_backedges(sdfg_nx, start)
+    backedgesDstDict = {}
+    for be in backedges:
+        if be[1] in backedgesDstDict:
+            backedgesDstDict[be[1]].add(be)
+        else:
+            backedgesDstDict[be[1]] = set([be])
+
+    # This list will be filled with triples (node, oNode, exit), one triple for each loop construct in the SDFG.
+    # There will always be a backedge from oNode to node. Either node or oNode will be the corresponding loop guard,
+    # depending on whether it is a while-do or a do-while loop. exit will always be the exit state of the loop.
+    nodes_oNodes_exits = []
+
+    # iterate over all nodes
+    for node in sdfg_nx.nodes():
+        # Check if any backedge ends in node.
+        if node in backedgesDstDict:
+            inc_backedges = backedgesDstDict[node]
+
+            # gather all successors of node that are not reached by backedges
+            successors = []
+            for edge in sdfg_nx.out_edges(node):
+                if not edge in backedges:
+                    successors.append(edge[1])
+
+            # For each incoming backedge, we want to find oNode and exit. There can be multiple backedges, in case
+            # we have a continue statement in the original code. But we can handle these backedges normally.
+            for be in inc_backedges:
+                # since node has an incoming backedge, it is either a loop guard or loop tail
+                # oNode will exactly be the other thing
+                oNode = be[0]
+                exitCandidates = set()
+                # search for exit candidates:
+                # a state is a exit candidate if:
+                #   - it is in successor and it does not dominate oNode (else it dominates
+                #           the last loop state, and hence is inside the loop itself)
+                #   - is is a successor of oNode (but not node)
+                # This handles both cases of while-do and do-while loops
+                for succ in successors:
+                    if succ != oNode and oNode not in allDom[succ]:
+                        exitCandidates.add(succ)
+                for succ in sdfg_nx.successors(oNode):
+                    if succ != node:
+                        exitCandidates.add(succ)
+
+                if len(exitCandidates) == 0:
+                    raise ValueError('failed to find any exit nodes')
+                elif len(exitCandidates) > 1:
+                    # Find the exit candidate that sits highest up in the
+                    # postdominator tree (i.e., has the lowest level).
+                    # That must be the exit node (it must post-dominate)
+                    # everything inside the loop. If there are multiple
+                    # candidates on the lowest level (i.e., disjoint set of
+                    # postdominated nodes), there are multiple exit paths,
+                    # and they all share one level.
+                    cand = exitCandidates.pop()
+                    minSet = set([cand])
+                    minLevel = nx.get_node_attributes(postDomTree, 'level')[cand]
+                    for cand in exitCandidates:
+                        curr_level = nx.get_node_attributes(postDomTree, 'level')[cand]
+                        if curr_level < minLevel:
+                            # new minimum found
+                            minLevel = curr_level
+                            minSet.clear()
+                            minSet.add(cand)
+                        elif curr_level == minLevel:
+                            # add cand to curr set
+                            minSet.add(cand)
+
+                    if len(minSet) > 0:
+                        exitCandidates = minSet
+                    else:
+                        raise ValueError('failed to find exit minSet')
+
+                # now we have a triple (node, oNode, exitCandidates)
+                nodes_oNodes_exits.append((node, oNode, exitCandidates))
+
+    return nodes_oNodes_exits
diff --git a/dace/sdfg/work_depth_analysis/work_depth.py b/dace/sdfg/work_depth_analysis/work_depth.py
new file mode 100644
index 0000000000..a05fe10266
--- /dev/null
+++ b/dace/sdfg/work_depth_analysis/work_depth.py
@@ -0,0 +1,653 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" Work depth analysis for any input SDFG. Can be used with the DaCe VS Code extension or
+from command line as a Python script. """
+
+import argparse
+from collections import deque
+from dace.sdfg import nodes as nd, propagation, InterstateEdge
+from dace import SDFG, SDFGState, dtypes
+from dace.subsets import Range
+from typing import Tuple, Dict
+import os
+import sympy as sp
+from copy import deepcopy
+from dace.libraries.blas import MatMul
+from dace.libraries.standard import Reduce, Transpose
+from dace.symbolic import pystr_to_symbolic
+import ast
+import astunparse
+import warnings
+
+from dace.sdfg.work_depth_analysis.helpers import get_uuid, find_loop_guards_tails_exits
+
+
+def get_array_size_symbols(sdfg):
+    """
+    Returns all symbols that appear isolated in shapes of the SDFG's arrays.
+    These symbols can then be assumed to be positive.
+
+    :note: This only works if a symbol appears in isolation, i.e. array A[N].
+           If we have A[N+1], we cannot assume N to be positive.
+    :param sdfg: The SDFG in which it searches for symbols.
+    :return: A set containing symbols which we can assume to be positive.
+    """
+    symbols = set()
+    for _, _, arr in sdfg.arrays_recursive():
+        for s in arr.shape:
+            if isinstance(s, sp.Symbol):
+                symbols.add(s)
+    return symbols
+
+
+def posify_certain_symbols(expr, syms_to_posify):
+    """
+    Takes an expression and evaluates it while assuming that certain symbols are positive.
+
+    :param expr: The expression to evaluate.
+    :param syms_to_posify: List of symbols we assume to be positive.
+    :note: This is adapted from the Sympy function posify.
+    """
+
+    expr = sp.sympify(expr)
+
+    reps = {s: sp.Dummy(s.name, positive=True, **s.assumptions0) for s in syms_to_posify if s.is_positive is None}
+    expr = expr.subs(reps)
+    return expr.subs({r: s for s, r in reps.items()})
+
+
+def symeval(val, symbols):
+    """
+    Takes a sympy expression and substitutes its symbols according to a dict { old_symbol: new_symbol}.
+
+    :param val: The expression we are updating.
+    :param symbols: Dictionary of key value pairs { old_symbol: new_symbol}.
+    """
+    first_replacement = {pystr_to_symbolic(k): pystr_to_symbolic('__REPLSYM_' + k) for k in symbols.keys()}
+    second_replacement = {pystr_to_symbolic('__REPLSYM_' + k): v for k, v in symbols.items()}
+    return val.subs(first_replacement).subs(second_replacement)
+
+
+def evaluate_symbols(base, new):
+    result = {}
+    for k, v in new.items():
+        result[k] = symeval(v, base)
+    return result
+
+
+def count_work_matmul(node, symbols, state):
+    A_memlet = next(e for e in state.in_edges(node) if e.dst_conn == '_a')
+    B_memlet = next(e for e in state.in_edges(node) if e.dst_conn == '_b')
+    C_memlet = next(e for e in state.out_edges(node) if e.src_conn == '_c')
+    result = 2  # Multiply, add
+    # Batch
+    if len(C_memlet.data.subset) == 3:
+        result *= symeval(C_memlet.data.subset.size()[0], symbols)
+    # M*N
+    result *= symeval(C_memlet.data.subset.size()[-2], symbols)
+    result *= symeval(C_memlet.data.subset.size()[-1], symbols)
+    # K
+    result *= symeval(A_memlet.data.subset.size()[-1], symbols)
+    return result
+
+
+def count_work_reduce(node, symbols, state):
+    result = 0
+    if node.wcr is not None:
+        result += count_arithmetic_ops_code(node.wcr)
+    in_memlet = None
+    in_edges = state.in_edges(node)
+    if in_edges is not None and len(in_edges) == 1:
+        in_memlet = in_edges[0]
+    if in_memlet is not None and in_memlet.data.volume is not None:
+        result *= in_memlet.data.volume
+    else:
+        result = 0
+    return result
+
+
+LIBNODES_TO_WORK = {
+    MatMul: count_work_matmul,
+    Transpose: lambda *args: 0,
+    Reduce: count_work_reduce,
+}
+
+
+def count_depth_matmul(node, symbols, state):
+    # For now we set it equal to work: see comments in count_depth_reduce just below
+    return count_work_matmul(node, symbols, state)
+
+
+def count_depth_reduce(node, symbols, state):
+    # depth of reduction is log2 of the work
+    # TODO: Can we actually assume this? Or is it equal to the work?
+    #       Another thing to consider is that we essetially do NOT count wcr edges as operations for now...
+
+    # return sp.ceiling(sp.log(count_work_reduce(node, symbols, state), 2))
+    # set it equal to work for now
+    return count_work_reduce(node, symbols, state)
+
+
+LIBNODES_TO_DEPTH = {
+    MatMul: count_depth_matmul,
+    Transpose: lambda *args: 0,
+    Reduce: count_depth_reduce,
+}
+
+bigo = sp.Function('bigo')
+PYFUNC_TO_ARITHMETICS = {
+    'float': 0,
+    'dace.float64': 0,
+    'dace.int64': 0,
+    'math.exp': 1,
+    'exp': 1,
+    'math.tanh': 1,
+    'sin': 1,
+    'cos': 1,
+    'tanh': 1,
+    'math.sqrt': 1,
+    'sqrt': 1,
+    'atan2:': 1,
+    'min': 0,
+    'max': 0,
+    'ceiling': 0,
+    'floor': 0,
+    'abs': 0
+}
+
+
+class ArithmeticCounter(ast.NodeVisitor):
+
+    def __init__(self):
+        self.count = 0
+
+    def visit_BinOp(self, node):
+        if isinstance(node.op, ast.MatMult):
+            raise NotImplementedError('MatMult op count requires shape '
+                                      'inference')
+        self.count += 1
+        return self.generic_visit(node)
+
+    def visit_UnaryOp(self, node):
+        self.count += 1
+        return self.generic_visit(node)
+
+    def visit_Call(self, node):
+        fname = astunparse.unparse(node.func)[:-1]
+        if fname not in PYFUNC_TO_ARITHMETICS:
+            print(
+                'WARNING: Unrecognized python function "%s". If this is a type conversion, like "dace.float64", then this is fine.'
+                % fname)
+            return self.generic_visit(node)
+        self.count += PYFUNC_TO_ARITHMETICS[fname]
+        return self.generic_visit(node)
+
+    def visit_AugAssign(self, node):
+        return self.visit_BinOp(node)
+
+    def visit_For(self, node):
+        raise NotImplementedError
+
+    def visit_While(self, node):
+        raise NotImplementedError
+
+
+def count_arithmetic_ops_code(code):
+    ctr = ArithmeticCounter()
+    if isinstance(code, (tuple, list)):
+        for stmt in code:
+            ctr.visit(stmt)
+    elif isinstance(code, str):
+        ctr.visit(ast.parse(code))
+    else:
+        ctr.visit(code)
+    return ctr.count
+
+
+class DepthCounter(ast.NodeVisitor):
+    # so far this is identical to the ArithmeticCounter above.
+    def __init__(self):
+        self.count = 0
+
+    def visit_BinOp(self, node):
+        if isinstance(node.op, ast.MatMult):
+            raise NotImplementedError('MatMult op count requires shape '
+                                      'inference')
+        self.count += 1
+        return self.generic_visit(node)
+
+    def visit_UnaryOp(self, node):
+        self.count += 1
+        return self.generic_visit(node)
+
+    def visit_Call(self, node):
+        fname = astunparse.unparse(node.func)[:-1]
+        if fname not in PYFUNC_TO_ARITHMETICS:
+            print(
+                'WARNING: Unrecognized python function "%s". If this is a type conversion, like "dace.float64", then this is fine.'
+                % fname)
+            return self.generic_visit(node)
+        self.count += PYFUNC_TO_ARITHMETICS[fname]
+        return self.generic_visit(node)
+
+    def visit_AugAssign(self, node):
+        return self.visit_BinOp(node)
+
+    def visit_For(self, node):
+        raise NotImplementedError
+
+    def visit_While(self, node):
+        raise NotImplementedError
+
+
+def count_depth_code(code):
+    # so far this is the same as the work counter, since work = depth for each tasklet, as we can't assume any parallelism
+    ctr = ArithmeticCounter()
+    if isinstance(code, (tuple, list)):
+        for stmt in code:
+            ctr.visit(stmt)
+    elif isinstance(code, str):
+        ctr.visit(ast.parse(code))
+    else:
+        ctr.visit(code)
+    return ctr.count
+
+
+def tasklet_work(tasklet_node, state):
+    if tasklet_node.code.language == dtypes.Language.CPP:
+        for oedge in state.out_edges(tasklet_node):
+            return bigo(oedge.data.num_accesses)
+
+    elif tasklet_node.code.language == dtypes.Language.Python:
+        return count_arithmetic_ops_code(tasklet_node.code.code)
+    else:
+        # other languages not implemented, count whole tasklet as work of 1
+        warnings.warn('Work of tasklets only properly analyzed for Python or CPP. For all other '
+                      'languages work = 1 will be counted for each tasklet.')
+        return 1
+
+
+def tasklet_depth(tasklet_node, state):
+    # TODO: how to get depth of CPP tasklets?
+    # For now we use depth == work:
+    if tasklet_node.code.language == dtypes.Language.CPP:
+        for oedge in state.out_edges(tasklet_node):
+            return bigo(oedge.data.num_accesses)
+    if tasklet_node.code.language == dtypes.Language.Python:
+        return count_depth_code(tasklet_node.code.code)
+    else:
+        # other languages not implemented, count whole tasklet as work of 1
+        warnings.warn('Depth of tasklets only properly analyzed for Python code. For all other '
+                      'languages depth = 1 will be counted for each tasklet.')
+        return 1
+
+
+def get_tasklet_work(node, state):
+    return tasklet_work(node, state), -1
+
+
+def get_tasklet_work_depth(node, state):
+    return tasklet_work(node, state), tasklet_depth(node, state)
+
+
+def get_tasklet_avg_par(node, state):
+    return tasklet_work(node, state), tasklet_depth(node, state)
+
+
+def sdfg_work_depth(sdfg: SDFG, w_d_map: Dict[str, Tuple[sp.Expr, sp.Expr]], analyze_tasklet,
+                    symbols) -> Tuple[sp.Expr, sp.Expr]:
+    """
+    Analyze the work and depth of a given SDFG.
+    First we determine the work and depth of each state. Then we break loops in the state machine, such that we get a DAG.
+    Lastly, we compute the path with most work and the path with the most depth in order to get the total work depth.
+
+    :param sdfg: The SDFG to analyze.
+    :param w_d_map: Dictionary which will save the result.
+    :param analyze_tasklet: Function used to analyze tasklet nodes.
+    :param symbols: A dictionary mapping local nested SDFG symbols to global symbols.
+    :return: A tuple containing the work and depth of the SDFG.
+    """
+
+    # First determine the work and depth of each state individually.
+    # Keep track of the work and depth for each state in a dictionary, where work and depth are multiplied by the number
+    # of times the state will be executed.
+    state_depths: Dict[SDFGState, sp.Expr] = {}
+    state_works: Dict[SDFGState, sp.Expr] = {}
+    for state in sdfg.nodes():
+        state_work, state_depth = state_work_depth(state, w_d_map, analyze_tasklet, symbols)
+        state_works[state] = sp.simplify(state_work * state.executions)
+        state_depths[state] = sp.simplify(state_depth * state.executions)
+        w_d_map[get_uuid(state)] = (state_works[state], state_depths[state])
+
+    # Prepare the SDFG for a depth analysis by breaking loops. This removes the edge between the last loop state and
+    # the guard, and instead places an edge between the last loop state and the exit state.
+    # This transforms the state machine into a DAG. Hence, we can find the "heaviest" and "deepest" paths in linear time.
+    # Additionally, construct a dummy exit state and connect every state that has no outgoing edges to it.
+
+    # identify all loops in the SDFG
+    nodes_oNodes_exits = find_loop_guards_tails_exits(sdfg._nx)
+
+    # Now we need to go over each triple (node, oNode, exits). For each triple, we
+    #       - remove edge (oNode, node), i.e. the backward edge
+    #       - for all exits e, add edge (oNode, e). This edge may already exist
+    for node, oNode, exits in nodes_oNodes_exits:
+        sdfg.remove_edge(sdfg.edges_between(oNode, node)[0])
+        for e in exits:
+            if len(sdfg.edges_between(oNode, e)) == 0:
+                # no edge there yet
+                sdfg.add_edge(oNode, e, InterstateEdge())
+
+    # add a dummy exit to the SDFG, such that each path ends there.
+    dummy_exit = sdfg.add_state('dummy_exit')
+    for state in sdfg.nodes():
+        if len(sdfg.out_edges(state)) == 0 and state != dummy_exit:
+            sdfg.add_edge(state, dummy_exit, InterstateEdge())
+
+    # These two dicts save the current length of the "heaviest", resp. "deepest", paths at each state.
+    work_map: Dict[SDFGState, sp.Expr] = {}
+    depth_map: Dict[SDFGState, sp.Expr] = {}
+    # The dummy state has 0 work and depth.
+    state_depths[dummy_exit] = sp.sympify(0)
+    state_works[dummy_exit] = sp.sympify(0)
+
+    # Perform a BFS traversal of the state machine and calculate the maximum work / depth at each state. Only advance to
+    # the next state in the BFS if all incoming edges have been visited, to ensure the maximum work / depth expressions
+    # have been calculated.
+    traversal_q = deque()
+    traversal_q.append((sdfg.start_state, sp.sympify(0), sp.sympify(0), None))
+    visited = set()
+    while traversal_q:
+        state, depth, work, ie = traversal_q.popleft()
+
+        if ie is not None:
+            visited.add(ie)
+
+        n_depth = sp.simplify(depth + state_depths[state])
+        n_work = sp.simplify(work + state_works[state])
+
+        # If we are analysing average parallelism, we don't search "heaviest" and "deepest" paths separately, but we want one
+        # single path with the least average parallelsim (of all paths with more than 0 work).
+        if analyze_tasklet == get_tasklet_avg_par:
+            if state in depth_map:  # and hence als state in work_map
+                # if current path has 0 depth, we don't do anything.
+                if n_depth != 0:
+                    # see if we need to update the work and depth of the current state
+                    # we update if avg parallelism of new incoming path is less than current avg parallelism
+                    old_avg_par = sp.simplify(work_map[state] / depth_map[state])
+                    new_avg_par = sp.simplify(n_work / n_depth)
+
+                    if depth_map[state] == 0 or new_avg_par < old_avg_par:
+                        # old value was divided by zero or new path gives actually worse avg par, then we keep new value
+                        depth_map[state] = n_depth
+                        work_map[state] = n_work
+            else:
+                depth_map[state] = n_depth
+                work_map[state] = n_work
+        else:
+            # search heaviest and deepest path separately
+            if state in depth_map:  # and consequently also in work_map
+                depth_map[state] = sp.Max(depth_map[state], n_depth)
+                work_map[state] = sp.Max(work_map[state], n_work)
+            else:
+                depth_map[state] = n_depth
+                work_map[state] = n_work
+
+        out_edges = sdfg.out_edges(state)
+        # only advance after all incoming edges were visited (meaning that current work depth values of state are final).
+        if any(iedge not in visited for iedge in sdfg.in_edges(state)):
+            pass
+        else:
+            for oedge in out_edges:
+                traversal_q.append((oedge.dst, depth_map[state], work_map[state], oedge))
+
+    try:
+        max_depth = depth_map[dummy_exit]
+        max_work = work_map[dummy_exit]
+    except KeyError:
+        # If we get a KeyError above, this means that the traversal never reached the dummy_exit state.
+        # This happens if the loops were not properly detected and broken.
+        raise Exception(
+            'Analysis failed, since not all loops got detected. It may help to use more structured loop constructs.')
+
+    sdfg_result = (sp.simplify(max_work), sp.simplify(max_depth))
+    w_d_map[get_uuid(sdfg)] = sdfg_result
+    return sdfg_result
+
+
+def scope_work_depth(state: SDFGState,
+                     w_d_map: Dict[str, sp.Expr],
+                     analyze_tasklet,
+                     symbols,
+                     entry: nd.EntryNode = None) -> Tuple[sp.Expr, sp.Expr]:
+    """
+    Analyze the work and depth of a scope.
+    This works by traversing through the scope analyzing the work and depth of each encountered node.
+    Depending on what kind of node we encounter, we do the following:
+        - EntryNode: Recursively analyze work depth of scope.
+        - Tasklet: use analyze_tasklet to get work depth of tasklet node.
+        - NestedSDFG: After translating its local symbols to global symbols, we analyze the nested SDFG recursively.
+        - LibraryNode: Library nodes are analyzed with special functions depending on their type.
+    Work inside a state can simply be summed up, but for the depth we need to find the longest path. Since dataflow is a DAG,
+    this can be done in linear time by traversing the graph in topological order.
+
+    :param state: The state in which the scope to analyze is contained.
+    :param sym_map: A dictionary mapping symbols to their values.
+    :param entry: The entry node of the scope to analyze. If None, the entire state is analyzed.
+    :return: A tuple containing the work and depth of the scope.
+    """
+
+    # find the work and depth of each node
+    # for maps and nested SDFG, we do it recursively
+    work = sp.sympify(0)
+    max_depth = sp.sympify(0)
+    scope_nodes = state.scope_children()[entry]
+    scope_exit = None if entry is None else state.exit_node(entry)
+    for node in scope_nodes:
+        # add node to map
+        w_d_map[get_uuid(node, state)] = (sp.sympify(0), sp.sympify(0))
+        if isinstance(node, nd.EntryNode):
+            # If the scope contains an entry node, we need to recursively analyze the sub-scope of the entry node first.
+            # The resulting work/depth are summarized into the entry node
+            s_work, s_depth = scope_work_depth(state, w_d_map, analyze_tasklet, symbols, node)
+            # add up work for whole state, but also save work for this sub-scope scope in w_d_map
+            work += s_work
+            w_d_map[get_uuid(node, state)] = (s_work, s_depth)
+        elif node == scope_exit:
+            # don't do anything for exit nodes, everthing handled already in the corresponding entry node.
+            pass
+        elif isinstance(node, nd.Tasklet):
+            # add up work for whole state, but also save work for this node in w_d_map
+            t_work, t_depth = analyze_tasklet(node, state)
+            work += t_work
+            w_d_map[get_uuid(node, state)] = (sp.sympify(t_work), sp.sympify(t_depth))
+        elif isinstance(node, nd.NestedSDFG):
+            # keep track of nested symbols: "symbols" maps local nested SDFG symbols to global symbols.
+            # We only want global symbols in our final work depth expressions.
+            nested_syms = {}
+            nested_syms.update(symbols)
+            nested_syms.update(evaluate_symbols(symbols, node.symbol_mapping))
+            # Nested SDFGs are recursively analyzed first.
+            nsdfg_work, nsdfg_depth = sdfg_work_depth(node.sdfg, w_d_map, analyze_tasklet, nested_syms)
+
+            # add up work for whole state, but also save work for this nested SDFG in w_d_map
+            work += nsdfg_work
+            w_d_map[get_uuid(node, state)] = (nsdfg_work, nsdfg_depth)
+        elif isinstance(node, nd.LibraryNode):
+            lib_node_work = LIBNODES_TO_WORK[type(node)](node, symbols, state)
+            work += lib_node_work
+            lib_node_depth = -1  # not analyzed
+            if analyze_tasklet != get_tasklet_work:
+                # we are analyzing depth
+                lib_node_depth = LIBNODES_TO_DEPTH[type(node)](node, symbols, state)
+            w_d_map[get_uuid(node, state)] = (lib_node_work, lib_node_depth)
+
+    if entry is not None:
+        # If the scope being analyzed is a map, multiply the work by the number of iterations of the map.
+        if isinstance(entry, nd.MapEntry):
+            nmap: nd.Map = entry.map
+            range: Range = nmap.range
+            n_exec = range.num_elements_exact()
+            work = work * sp.simplify(n_exec)
+        else:
+            print('WARNING: Only Map scopes are supported in work analysis for now. Assuming 1 iteration.')
+
+    # Work inside a state can simply be summed up. But now we need to find the depth of a state (i.e. longest path).
+    # Since dataflow graph is a DAG, this can be done in linear time.
+    max_depth = sp.sympify(0)
+    # only do this if we are analyzing depth
+    if analyze_tasklet == get_tasklet_work_depth or analyze_tasklet == get_tasklet_avg_par:
+        # Calculate the maximum depth of the scope by finding the 'deepest' path from the source to the sink. This is done by
+        # a traversal in topological order, where each node propagates its current max depth for all incoming paths.
+        traversal_q = deque()
+        visited = set()
+        # find all starting nodes
+        if entry:
+            # the entry is the starting node
+            traversal_q.append((entry, sp.sympify(0), None))
+        else:
+            for node in scope_nodes:
+                if len(state.in_edges(node)) == 0:
+                    # This node is a start node of the traversal
+                    traversal_q.append((node, sp.sympify(0), None))
+        # this map keeps track of the length of the longest path ending at each state so far seen.
+        depth_map = {}
+        while traversal_q:
+            node, in_depth, in_edge = traversal_q.popleft()
+
+            if in_edge is not None:
+                visited.add(in_edge)
+
+            n_depth = sp.simplify(in_depth + w_d_map[get_uuid(node, state)][1])
+
+            if node in depth_map:
+                depth_map[node] = sp.Max(depth_map[node], n_depth)
+            else:
+                depth_map[node] = n_depth
+
+            out_edges = state.out_edges(node)
+            # Only advance to next node, if all incoming edges have been visited or the current node is the entry (aka starting node).
+            # If the current node is the exit of the scope, we stop, such that we don't leave the scope.
+            if (all(iedge in visited for iedge in state.in_edges(node)) or node == entry) and node != scope_exit:
+                # If we encounter a nested map, we must not analyze its contents (as they have already been recursively analyzed).
+                # Hence, we continue from the outgoing edges of the corresponding exit.
+                if isinstance(node, nd.EntryNode) and node != entry:
+                    exit_node = state.exit_node(node)
+                    # replace out_edges with the out_edges of the scope exit node
+                    out_edges = state.out_edges(exit_node)
+                for oedge in out_edges:
+                    traversal_q.append((oedge.dst, depth_map[node], oedge))
+            if len(out_edges) == 0 or node == scope_exit:
+                # We have reached an end node --> update max_depth
+                max_depth = sp.Max(max_depth, depth_map[node])
+
+    # summarise work / depth of the whole scope in the dictionary
+    scope_result = (sp.simplify(work), sp.simplify(max_depth))
+    w_d_map[get_uuid(state)] = scope_result
+    return scope_result
+
+
+def state_work_depth(state: SDFGState, w_d_map: Dict[str, sp.Expr], analyze_tasklet,
+                     symbols) -> Tuple[sp.Expr, sp.Expr]:
+    """
+    Analyze the work and depth of a state.
+
+    :param state: The state to analyze.
+    :param w_d_map: The result will be saved to this map.
+    :param analyze_tasklet: Function used to analyze tasklet nodes.
+    :param symbols: A dictionary mapping local nested SDFG symbols to global symbols.
+    :return: A tuple containing the work and depth of the state.
+    """
+    work, depth = scope_work_depth(state, w_d_map, analyze_tasklet, symbols, None)
+    return work, depth
+
+
+def analyze_sdfg(sdfg: SDFG, w_d_map: Dict[str, sp.Expr], analyze_tasklet) -> None:
+    """
+    Analyze a given SDFG. We can either analyze work, work and depth or average parallelism.
+
+    :note: SDFGs should have split interstate edges. This means there should be no interstate edges containing both a
+        condition and an assignment.
+    :param sdfg: The SDFG to analyze.
+    :param w_d_map: Dictionary of SDFG elements to (work, depth) tuples. Result will be saved in here.
+    :param analyze_tasklet: The function used to analyze tasklet nodes. Analyzes either just work, work and depth or average parallelism.
+    """
+
+    # deepcopy such that original sdfg not changed
+    sdfg = deepcopy(sdfg)
+
+    # Run state propagation for all SDFGs recursively. This is necessary to determine the number of times each state
+    # will be executed, or to determine upper bounds for that number (such as in the case of branching)
+    for sd in sdfg.all_sdfgs_recursive():
+        propagation.propagate_states(sd, concretize_dynamic_unbounded=True)
+
+    # Analyze the work and depth of the SDFG.
+    symbols = {}
+    sdfg_work_depth(sdfg, w_d_map, analyze_tasklet, symbols)
+
+    # Note: This posify could be done more often to improve performance.
+    array_symbols = get_array_size_symbols(sdfg)
+    for k, (v_w, v_d) in w_d_map.items():
+        # The symeval replaces nested SDFG symbols with their global counterparts.
+        v_w = posify_certain_symbols(symeval(v_w, symbols), array_symbols)
+        v_d = posify_certain_symbols(symeval(v_d, symbols), array_symbols)
+        w_d_map[k] = (v_w, v_d)
+
+
+################################################################################
+# Utility functions for running the analysis from the command line #############
+################################################################################
+
+
+def main() -> None:
+
+    parser = argparse.ArgumentParser('work_depth',
+                                     usage='python work_depth.py [-h] filename --analyze {work,workDepth,avgPar}',
+                                     description='Analyze the work/depth of an SDFG.')
+
+    parser.add_argument('filename', type=str, help='The SDFG file to analyze.')
+    parser.add_argument('--analyze',
+                        choices=['work', 'workDepth', 'avgPar'],
+                        default='workDepth',
+                        help='Choose what to analyze. Default: workDepth')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.filename):
+        print(args.filename, 'does not exist.')
+        exit()
+
+    if args.analyze == 'workDepth':
+        analyze_tasklet = get_tasklet_work_depth
+    elif args.analyze == 'avgPar':
+        analyze_tasklet = get_tasklet_avg_par
+    elif args.analyze == 'work':
+        analyze_tasklet = get_tasklet_work
+
+    sdfg = SDFG.from_file(args.filename)
+    work_depth_map = {}
+    analyze_sdfg(sdfg, work_depth_map, analyze_tasklet)
+
+    if args.analyze == 'workDepth':
+        for k, v, in work_depth_map.items():
+            work_depth_map[k] = (str(sp.simplify(v[0])), str(sp.simplify(v[1])))
+    elif args.analyze == 'work':
+        for k, v, in work_depth_map.items():
+            work_depth_map[k] = str(sp.simplify(v[0]))
+    elif args.analyze == 'avgPar':
+        for k, v, in work_depth_map.items():
+            work_depth_map[k] = str(sp.simplify(v[0] / v[1]) if str(v[1]) != '0' else 0)  # work / depth = avg par
+
+    result_whole_sdfg = work_depth_map[get_uuid(sdfg)]
+
+    print(80 * '-')
+    if args.analyze == 'workDepth':
+        print("Work:\t", result_whole_sdfg[0])
+        print("Depth:\t", result_whole_sdfg[1])
+    elif args.analyze == 'work':
+        print("Work:\t", result_whole_sdfg)
+    elif args.analyze == 'avgPar':
+        print("Average Parallelism:\t", result_whole_sdfg)
+    print(80 * '-')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/sdfg/work_depth_tests.py b/tests/sdfg/work_depth_tests.py
new file mode 100644
index 0000000000..133afe8ae4
--- /dev/null
+++ b/tests/sdfg/work_depth_tests.py
@@ -0,0 +1,201 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" Contains test cases for the work depth analysis. """
+import dace as dc
+from dace.sdfg.work_depth_analysis.work_depth import analyze_sdfg, get_tasklet_work_depth
+from dace.sdfg.work_depth_analysis.helpers import get_uuid
+import sympy as sp
+
+from dace.transformation.interstate import NestSDFG
+from dace.transformation.dataflow import MapExpansion
+
+# TODO: add tests for library nodes (e.g. reduce, matMul)
+
+N = dc.symbol('N')
+M = dc.symbol('M')
+K = dc.symbol('K')
+
+
+@dc.program
+def single_map(x: dc.float64[N], y: dc.float64[N], z: dc.float64[N]):
+    z[:] = x + y
+
+
+@dc.program
+def single_for_loop(x: dc.float64[N], y: dc.float64[N]):
+    for i in range(N):
+        x[i] += y[i]
+
+
+@dc.program
+def if_else(x: dc.int64[1000], y: dc.int64[1000], z: dc.int64[1000], sum: dc.int64[1]):
+    if x[10] > 50:
+        z[:] = x + y  # 1000 work, 1 depth
+    else:
+        for i in range(100):  # 100 work, 100 depth
+            sum += x[i]
+
+
+@dc.program
+def if_else_sym(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], sum: dc.int64[1]):
+    if x[10] > 50:
+        z[:] = x + y  # N work, 1 depth
+    else:
+        for i in range(K):  # K work, K depth
+            sum += x[i]
+
+
+@dc.program
+def nested_sdfg(x: dc.float64[N], y: dc.float64[N], z: dc.float64[N]):
+    single_map(x, y, z)
+    single_for_loop(x, y)
+
+
+@dc.program
+def nested_maps(x: dc.float64[N, M], y: dc.float64[N, M], z: dc.float64[N, M]):
+    z[:, :] = x + y
+
+
+@dc.program
+def nested_for_loops(x: dc.float64[N], y: dc.float64[K]):
+    for i in range(N):
+        for j in range(K):
+            x[i] += y[j]
+
+
+@dc.program
+def nested_if_else(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], sum: dc.int64[1]):
+    if x[10] > 50:
+        if x[9] > 50:
+            z[:] = x + y  # N work, 1 depth
+        z[:] += 2 * x  # 2*N work, 2 depth     --> total outer if: 3*N work, 3 depth
+    else:
+        if y[9] > 50:
+            for i in range(K):
+                sum += x[i]  # K work, K depth
+        else:
+            for j in range(M):
+                sum += x[j]  # M work, M depth
+            z[:] = x + y  # N work, depth 1       --> total inner else: M+N work, M+1 depth
+            # --> total outer else: Max(K, M+N) work, Max(K, M+1) depth
+            # --> total over both branches: Max(K, M+N, 3*N) work, Max(K, M+1, 3) depth
+
+
+@dc.program
+def max_of_positive_symbol(x: dc.float64[N]):
+    if x[0] > 0:
+        for i in range(2 * N):  # work 2*N^2, depth 2*N
+            x += 1
+    else:
+        for j in range(3 * N):  # work 3*N^2, depth 3*N
+            x += 1
+            # total is work 3*N^2, depth 3*N without any max
+
+
+@dc.program
+def multiple_array_sizes(x: dc.int64[N], y: dc.int64[N], z: dc.int64[N], x2: dc.int64[M], y2: dc.int64[M],
+                         z2: dc.int64[M], x3: dc.int64[K], y3: dc.int64[K], z3: dc.int64[K]):
+    if x[0] > 0:
+        z[:] = 2 * x + y  # work 2*N, depth 2
+    elif x[1] > 0:
+        z2[:] = 2 * x2 + y2  # work 2*M + 3, depth 5
+        z2[0] += 3 + z[1] + z[2]
+    elif x[2] > 0:
+        z3[:] = 2 * x3 + y3  # work 2*K, depth 2
+    elif x[3] > 0:
+        z[:] = 3 * x + y + 1  # work 3*N, depth 3
+        # --> work= Max(3*N, 2*M, 2*K) and depth = 5
+
+
+@dc.program
+def unbounded_while_do(x: dc.float64[N]):
+    while x[0] < 100:
+        x += 1
+
+
+@dc.program
+def unbounded_do_while(x: dc.float64[N]):
+    while True:
+        x += 1
+        if x[0] >= 100:
+            break
+
+
+@dc.program
+def unbounded_nonnegify(x: dc.float64[N]):
+    while x[0] < 100:
+        if x[1] < 42:
+            x += 3 * x
+        else:
+            x += x
+
+
+@dc.program
+def continue_for_loop(x: dc.float64[N]):
+    for i in range(N):
+        if x[i] > 100:
+            continue
+        x += 1
+
+
+@dc.program
+def break_for_loop(x: dc.float64[N]):
+    for i in range(N):
+        if x[i] > 100:
+            break
+        x += 1
+
+
+@dc.program
+def break_while_loop(x: dc.float64[N]):
+    while x[0] > 10:
+        if x[1] > 100:
+            break
+        x += 1
+
+
+tests_cases = [
+    (single_map, (N, 1)),
+    (single_for_loop, (N, N)),
+    (if_else, (1000, 100)),
+    (if_else_sym, (sp.Max(K, N), sp.Max(1, K))),
+    (nested_sdfg, (2 * N, N + 1)),
+    (nested_maps, (M * N, 1)),
+    (nested_for_loops, (K * N, K * N)),
+    (nested_if_else, (sp.Max(K, 3 * N, M + N), sp.Max(3, K, M + 1))),
+    (max_of_positive_symbol, (3 * N**2, 3 * N)),
+    (multiple_array_sizes, (sp.Max(2 * K, 3 * N, 2 * M + 3), 5)),
+    (unbounded_while_do, (sp.Symbol('num_execs_0_2', nonnegative=True) * N, sp.Symbol('num_execs_0_2',
+                                                                                      nonnegative=True))),
+    # We get this Max(1, num_execs), since it is a do-while loop, but the num_execs symbol does not capture this.
+    (unbounded_do_while, (sp.Max(1, sp.Symbol('num_execs_0_1', nonnegative=True)) * N,
+                          sp.Max(1, sp.Symbol('num_execs_0_1', nonnegative=True)))),
+    (unbounded_nonnegify, (2 * sp.Symbol('num_execs_0_7', nonnegative=True) * N,
+                           2 * sp.Symbol('num_execs_0_7', nonnegative=True))),
+    (continue_for_loop, (sp.Symbol('num_execs_0_6', nonnegative=True) * N, sp.Symbol('num_execs_0_6',
+                                                                                     nonnegative=True))),
+    (break_for_loop, (N**2, N)),
+    (break_while_loop, (sp.Symbol('num_execs_0_5', nonnegative=True) * N, sp.Symbol('num_execs_0_5', nonnegative=True)))
+]
+
+
+def test_work_depth():
+    good = 0
+    failed = 0
+    exception = 0
+    failed_tests = []
+    for test, correct in tests_cases:
+        w_d_map = {}
+        sdfg = test.to_sdfg()
+        if 'nested_sdfg' in test.name:
+            sdfg.apply_transformations(NestSDFG)
+        if 'nested_maps' in test.name:
+            sdfg.apply_transformations(MapExpansion)
+
+        analyze_sdfg(sdfg, w_d_map, get_tasklet_work_depth)
+        res = w_d_map[get_uuid(sdfg)]
+        # check result
+        assert correct == res
+
+
+if __name__ == '__main__':
+    test_work_depth()

From 1cb9f9fa459390df0267b1f9365bb62793563b95 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 17 Aug 2023 13:58:33 +0200
Subject: [PATCH 40/48] Added support for StructureViews.

---
 dace/codegen/compiled_sdfg.py     |  2 +-
 dace/codegen/dispatcher.py        |  4 ++--
 dace/codegen/targets/cpu.py       | 20 ++++++++++++++++----
 dace/codegen/targets/framecode.py |  2 +-
 dace/data.py                      |  1 +
 dace/sdfg/utils.py                |  2 +-
 6 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py
index 863e804802..9ee0772eeb 100644
--- a/dace/codegen/compiled_sdfg.py
+++ b/dace/codegen/compiled_sdfg.py
@@ -473,7 +473,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
                 else:
                     warnings.warn(f'Casting scalar argument "{a}" from {type(arg).__name__} to {atype.dtype.type}')
                     arglist[i] = atype.dtype.type(arg)
-            elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray)
+            elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray) and not isinstance(atype, dt.StructArray)
                   and atype.dtype.as_numpy_dtype() != arg.dtype):
                 # Make exception for vector types
                 if (isinstance(atype.dtype, dtypes.vector) and atype.dtype.vtype.as_numpy_dtype() == arg.dtype):
diff --git a/dace/codegen/dispatcher.py b/dace/codegen/dispatcher.py
index 0b4f58d5ef..5972f5759d 100644
--- a/dace/codegen/dispatcher.py
+++ b/dace/codegen/dispatcher.py
@@ -504,11 +504,11 @@ def get_copy_dispatcher(self, src_node, dst_node, edge, sdfg, state):
             dst_is_data = True
 
         # Skip copies to/from views where edge matches
-        if src_is_data and isinstance(src_node.desc(sdfg), dt.View):
+        if src_is_data and isinstance(src_node.desc(sdfg), (dt.StructureView, dt.View)):
             e = sdutil.get_view_edge(state, src_node)
             if e is edge:
                 return None
-        if dst_is_data and isinstance(dst_node.desc(sdfg), dt.View):
+        if dst_is_data and isinstance(dst_node.desc(sdfg), (dt.StructureView, dt.View)):
             e = sdutil.get_view_edge(state, dst_node)
             if e is edge:
                 return None
diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 3cd262e050..1fa4778806 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -215,9 +215,21 @@ def allocate_view(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.A
                                                         ancestor=0,
                                                         is_write=is_write)
         if not declared:
-            declaration_stream.write(f'{atype} {aname};', sdfg, state_id, node)
             ctypedef = dtypes.pointer(nodedesc.dtype).ctype
             self._dispatcher.declared_arrays.add(aname, DefinedType.Pointer, ctypedef)
+            if isinstance(nodedesc, data.StructureView):
+                for k, v in nodedesc.members.items():
+                    if isinstance(v, data.Data):
+                        ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype
+                        defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer
+                        self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef)
+                        self._dispatcher.defined_vars.add(f"{name}.{k}", defined_type, ctypedef)
+                # TODO: Find a better way to do this (the issue is with pointers of pointers)
+                if atype.endswith('*'):
+                    atype = atype[:-1]
+                if value.startswith('&'):
+                    value = value[1:]
+            declaration_stream.write(f'{atype} {aname};', sdfg, state_id, node)
         allocation_stream.write(f'{aname} = {value};', sdfg, state_id, node)
 
     def allocate_reference(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.AccessNode,
@@ -311,7 +323,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
         if not isinstance(nodedesc.dtype, dtypes.opaque):
             arrsize_bytes = arrsize * nodedesc.dtype.bytes
 
-        if isinstance(nodedesc, data.Structure):
+        if isinstance(nodedesc, data.Structure) and not isinstance(nodedesc, data.StructureView):
             declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n")
             define_var(name, DefinedType.Pointer, nodedesc.ctype)
             for k, v in nodedesc.members.items():
@@ -322,7 +334,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
                     self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream,
                                         declaration_stream, allocation_stream)
             return
-        if isinstance(nodedesc, data.View):
+        if isinstance(nodedesc, (data.StructureView, data.View)):
             return self.allocate_view(sdfg, dfg, state_id, node, function_stream, declaration_stream, allocation_stream)
         if isinstance(nodedesc, data.Reference):
             return self.allocate_reference(sdfg, dfg, state_id, node, function_stream, declaration_stream,
@@ -487,7 +499,7 @@ def deallocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream,
                                               dtypes.AllocationLifetime.External)
             self._dispatcher.declared_arrays.remove(alloc_name, is_global=is_global)
 
-        if isinstance(nodedesc, (data.Scalar, data.View, data.Stream, data.Reference)):
+        if isinstance(nodedesc, (data.Scalar, data.StructureView, data.View, data.Stream, data.Reference)):
             return
         elif (nodedesc.storage == dtypes.StorageType.CPU_Heap
               or (nodedesc.storage == dtypes.StorageType.Register and symbolic.issymbolic(arrsize, sdfg.constants))):
diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py
index 52915f51b5..9ee5c2ef17 100644
--- a/dace/codegen/targets/framecode.py
+++ b/dace/codegen/targets/framecode.py
@@ -749,7 +749,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG):
                     instances = access_instances[sdfg.sdfg_id][name]
 
                     # A view gets "allocated" everywhere it appears
-                    if isinstance(desc, data.View):
+                    if isinstance(desc, (data.StructureView, data.View)):
                         for s, n in instances:
                             self.to_allocate[s].append((sdfg, s, n, False, True, False))
                             self.to_allocate[s].append((sdfg, s, n, False, False, True))
diff --git a/dace/data.py b/dace/data.py
index 99d7ffc774..bf771db1d4 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -510,6 +510,7 @@ def validate(self):
         if self.lifetime != dtypes.AllocationLifetime.Scope:
             raise ValueError('Only Scope allocation lifetime is supported for Views')
 
+
 @make_properties
 class Scalar(Data):
     """ Data descriptor of a scalar value. """
diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py
index d08518b10c..3396335ece 100644
--- a/dace/sdfg/utils.py
+++ b/dace/sdfg/utils.py
@@ -1396,7 +1396,7 @@ def is_nonfree_sym_dependent(node: nd.AccessNode, desc: dt.Data, state: SDFGStat
     :param state: the state that contains the node
     :param fsymbols: the free symbols to check against
     """
-    if isinstance(desc, dt.View):
+    if isinstance(desc, (dt.StructureView, dt.View)):
         # Views can be non-free symbol dependent due to the adjacent edges.
         e = get_view_edge(state, node)
         if e.data:

From 5a2c4602c2341f057a5159c3cbe2437f33ab24e8 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 17 Aug 2023 13:58:58 +0200
Subject: [PATCH 41/48] Added tests for StructArrays.

---
 tests/sdfg/data/struct_array_test.py | 184 +++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 tests/sdfg/data/struct_array_test.py

diff --git a/tests/sdfg/data/struct_array_test.py b/tests/sdfg/data/struct_array_test.py
new file mode 100644
index 0000000000..9b40379e53
--- /dev/null
+++ b/tests/sdfg/data/struct_array_test.py
@@ -0,0 +1,184 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import ctypes
+import dace
+import numpy as np
+
+from scipy import sparse
+
+
+def test_read_struct_array():
+
+    L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz'))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
+                                  name='CSRMatrix')
+    csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
+                                  name='CSRMatrix',
+                                  transient=True)
+
+    sdfg = dace.SDFG('array_of_csr_to_dense')
+
+    sdfg.add_datadesc('A', csr_obj[L])
+    sdfg.add_array('B', [L, M, N], dace.float32)
+
+    sdfg.add_datadesc('vcsr', csr_obj_view)
+    sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
+
+    state = sdfg.add_state()
+
+    A = state.add_access('A')
+    B = state.add_access('B')
+
+    bme, bmx = state.add_map('b', dict(b='0:L'))
+    bme.map.schedule = dace.ScheduleType.Sequential
+
+    vcsr = state.add_access('vcsr')
+    indptr = state.add_access('vindptr')
+    indices = state.add_access('vindices')
+    data = state.add_access('vdata')
+
+    state.add_memlet_path(A, bme, vcsr, dst_conn='views', memlet=dace.Memlet(data='A', subset='b'))
+    state.add_edge(vcsr, None, indptr, 'views', memlet=dace.Memlet.from_array('vcsr.indptr', csr_obj.members['indptr']))
+    state.add_edge(vcsr, None, indices, 'views', memlet=dace.Memlet.from_array('vcsr.indices', csr_obj.members['indices']))
+    state.add_edge(vcsr, None, data, 'views', memlet=dace.Memlet.from_array('vcsr.data', csr_obj.members['data']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, bmx, B, memlet=dace.Memlet(data='B', subset='b, 0:M, 0:N', volume=1), src_conn='__out')
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = np.ndarray((10,), dtype=sparse.csr_matrix)
+    dace_A = np.ndarray((10,), dtype=ctypes.c_void_p)  
+    B = np.zeros((10, 20, 20), dtype=np.float32)
+
+    ctypes_A = []
+    for b in range(10):
+        A[b] = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+        ctypes_obj = csr_obj.dtype._typeclass.as_ctypes()(indptr=A[b].indptr.__array_interface__['data'][0],
+                                                          indices=A[b].indices.__array_interface__['data'][0],
+                                                          data=A[b].data.__array_interface__['data'][0])
+        ctypes_A.append(ctypes_obj)  # This is needed to keep the object alive ...
+        dace_A[b] = ctypes.addressof(ctypes_obj)
+
+    func(A=dace_A, B=B, L=A.shape[0], M=A[0].shape[0], N=A[0].shape[1], nnz=A[0].nnz)
+    ref = np.ndarray((10, 20, 20), dtype=np.float32)
+    for b in range(10):
+        ref[b] = A[b].toarray()
+
+    assert np.allclose(B, ref)
+
+
+def test_write_struct_array():
+
+    L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz'))
+    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
+                                  name='CSRMatrix')
+    csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+                                  order=['indptr', 'indices', 'data'],
+                                  name='CSRMatrix',
+                                  transient=True)
+
+    sdfg = dace.SDFG('array_dense_to_csr')
+
+    sdfg.add_array('A', [L, M, N], dace.float32)
+    sdfg.add_datadesc('B', csr_obj[L])
+
+    sdfg.add_datadesc('vcsr', csr_obj_view)
+    sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype)
+
+    # Make If
+    if_before = sdfg.add_state('if_before')
+    if_guard = sdfg.add_state('if_guard')
+    if_body = sdfg.add_state('if_body')
+    if_after = sdfg.add_state('if_after')
+    sdfg.add_edge(if_before, if_guard, dace.InterstateEdge())
+    sdfg.add_edge(if_guard, if_body, dace.InterstateEdge(condition='A[k, i, j] != 0'))
+    sdfg.add_edge(if_body, if_after, dace.InterstateEdge(assignments={'idx': 'idx + 1'}))
+    sdfg.add_edge(if_guard, if_after, dace.InterstateEdge(condition='A[k, i, j] == 0'))
+    A = if_body.add_access('A')
+    vcsr = if_body.add_access('vcsr')
+    B = if_body.add_access('B')
+    indices = if_body.add_access('vindices')
+    data = if_body.add_access('vdata')
+    if_body.add_edge(A, None, data, None, dace.Memlet(data='A', subset='k, i, j', other_subset='idx'))
+    if_body.add_edge(data, 'views', vcsr, None, dace.Memlet(data='vcsr.data', subset='0:nnz'))
+    t = if_body.add_tasklet('set_indices', {}, {'__out'}, '__out = j')
+    if_body.add_edge(t, '__out', indices, None, dace.Memlet(data='vindices', subset='idx'))
+    if_body.add_edge(indices, 'views', vcsr, None, dace.Memlet(data='vcsr.indices', subset='0:nnz'))
+    if_body.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k'))
+    # Make For Loop  for j
+    j_before, j_guard, j_after = sdfg.add_loop(None,
+                                               if_before,
+                                               None,
+                                               'j',
+                                               '0',
+                                               'j < N',
+                                               'j + 1',
+                                               loop_end_state=if_after)
+    # Make For Loop  for i
+    i_before, i_guard, i_after = sdfg.add_loop(None, j_before, None, 'i', '0', 'i < M', 'i + 1', loop_end_state=j_after)
+    sdfg.start_state = sdfg.node_id(i_before)
+    i_before_guard = sdfg.edges_between(i_before, i_guard)[0]
+    i_before_guard.data.assignments['idx'] = '0'
+    vcsr = i_guard.add_access('vcsr')
+    B = i_guard.add_access('B')
+    indptr = i_guard.add_access('vindptr')
+    t = i_guard.add_tasklet('set_indptr', {}, {'__out'}, '__out = idx')
+    i_guard.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='i'))
+    i_guard.add_edge(indptr, 'views', vcsr, None, dace.Memlet(data='vcsr.indptr', subset='0:M+1'))
+    i_guard.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k'))
+    vcsr = i_after.add_access('vcsr')
+    B = i_after.add_access('B')
+    indptr = i_after.add_access('vindptr')
+    t = i_after.add_tasklet('set_indptr', {}, {'__out'}, '__out = nnz')
+    i_after.add_edge(t, '__out', indptr, None, dace.Memlet(data='vindptr', subset='M'))
+    i_after.add_edge(indptr, 'views', vcsr, None, dace.Memlet(data='vcsr.indptr', subset='0:M+1'))
+    i_after.add_edge(vcsr, 'views', B, None, dace.Memlet(data='B', subset='k'))
+
+    k_before, k_guard, k_after = sdfg.add_loop(None, i_before, None, 'k', '0', 'k < L', 'k + 1', loop_end_state=i_after)
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    B = np.ndarray((10,), dtype=sparse.csr_matrix)
+    dace_B = np.ndarray((10,), dtype=ctypes.c_void_p)  
+    A = np.empty((10, 20, 20), dtype=np.float32)
+
+    ctypes_B = []
+    for b in range(10):
+        B[b] = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+        A[b] = B[b].toarray()
+        nnz = B[b].nnz
+        B[b].indptr[:] = -1
+        B[b].indices[:] = -1
+        B[b].data[:] = -1
+        ctypes_obj = csr_obj.dtype._typeclass.as_ctypes()(indptr=B[b].indptr.__array_interface__['data'][0],
+                                                          indices=B[b].indices.__array_interface__['data'][0],
+                                                          data=B[b].data.__array_interface__['data'][0])
+        ctypes_B.append(ctypes_obj)  # This is needed to keep the object alive ...
+        dace_B[b] = ctypes.addressof(ctypes_obj)
+
+    func(A=A, B=dace_B, L=B.shape[0], M=B[0].shape[0], N=B[0].shape[1], nnz=nnz)
+    for b in range(10):
+        assert np.allclose(A[b], B[b].toarray())
+
+
+if __name__ == '__main__':
+    test_read_struct_array()
+    test_write_struct_array()

From f1b0c73dffee4468119cd1575edecc9f1fa7bdab Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Thu, 17 Aug 2023 15:15:24 +0200
Subject: [PATCH 42/48] Fixed serialization.

---
 dace/data.py       | 22 +++++++++++++++++++++-
 dace/properties.py |  2 +-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index bf771db1d4..37d532ac44 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -1102,9 +1102,29 @@ def __init__(self,
                  pool=False):
 
         self.stype = stype
-        dtype = stype.dtype
+        if stype:
+            dtype = stype.dtype
+        else:
+            dtype = dtypes.int8
         super(StructArray, self).__init__(dtype, shape, transient, allow_conflicts, storage, location, strides, offset,
                                           may_alias, lifetime, alignment, debuginfo, total_size, start_offset, optional, pool)
+    
+    @classmethod
+    def from_json(cls, json_obj, context=None):
+        # Create dummy object
+        ret = cls(None, ())
+        serialize.set_properties_from_json(ret, json_obj, context=context)
+
+        # Default shape-related properties
+        if not ret.offset:
+            ret.offset = [0] * len(ret.shape)
+        if not ret.strides:
+            # Default strides are C-ordered
+            ret.strides = [_prod(ret.shape[i + 1:]) for i in range(len(ret.shape))]
+        if ret.total_size == 0:
+            ret.total_size = _prod(ret.shape)
+        
+        return ret
 
 
 @make_properties
diff --git a/dace/properties.py b/dace/properties.py
index fb37ec7a7c..0bec65d0ec 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1408,7 +1408,7 @@ def to_string(obj):
     def to_json(self, obj):
         if obj is None:
             return None
-        return obj.dtype.to_json()
+        return obj.to_json()
 
     @staticmethod
     def from_json(obj, context=None):

From 82c2bb82315fdb94a2033b84295ed888859c5b62 Mon Sep 17 00:00:00 2001
From: Tiziano De Matteis <5871117+TizianoDeMatteis@users.noreply.github.com>
Date: Mon, 21 Aug 2023 16:44:27 +0200
Subject: [PATCH 43/48] Have memory type as argument for fpga auto interleave
 (#1352)

Co-authored-by: Tiziano De Matteis <tdematt@inf.ethz.ch>
---
 dace/transformation/auto/fpga.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dace/transformation/auto/fpga.py b/dace/transformation/auto/fpga.py
index 4295699cdb..573341e1f6 100644
--- a/dace/transformation/auto/fpga.py
+++ b/dace/transformation/auto/fpga.py
@@ -44,24 +44,28 @@ def fpga_global_to_local(sdfg: SDFG, max_size: int = 1048576) -> None:
         print(f'Applied {len(converted)} Global-To-Local{": " if len(converted)>0 else "."} {", ".join(converted)}')
 
 
-def fpga_rr_interleave_containers_to_banks(sdfg: SDFG, num_banks: int = 4):
+def fpga_rr_interleave_containers_to_banks(sdfg: SDFG, num_banks: int = 4, memory_type: str = "DDR"):
     """
     Allocates the (global) arrays to FPGA off-chip memory banks, interleaving them in a
     Round-Robin (RR) fashion. This applies to all the arrays in the SDFG hierarchy.
 
     :param sdfg: The SDFG to operate on.
     :param num_banks: number of off-chip memory banks to consider
+    :param memory_type: type of off-chip memory, either "DDR"  or "HBM" (if the target FPGA supports it)
     :return: a list containing  the number of (transient) arrays allocated to each bank
     :note: Operates in-place on the SDFG.
     """
 
+    if memory_type.upper() not in {"DDR", "HBM"}:
+        raise ValueError("Memory type should be either \"DDR\" or \"HBM\"")
+
     # keep track of memory allocated to each bank
     num_allocated = [0 for i in range(num_banks)]
 
     i = 0
     for sd, aname, desc in sdfg.arrays_recursive():
         if not isinstance(desc, dt.Stream) and desc.storage == dtypes.StorageType.FPGA_Global and desc.transient:
-            desc.location["memorytype"] = "ddr"
+            desc.location["memorytype"] = memory_type.upper()
             desc.location["bank"] = str(i % num_banks)
             num_allocated[i % num_banks] = num_allocated[i % num_banks] + 1
             i = i + 1

From c5889a4e3092a89a5466f6b8c2fe29d3ea3ad1a1 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 21 Aug 2023 17:20:43 +0200
Subject: [PATCH 44/48] Addressed comments.

---
 dace/codegen/targets/cpp.py |  2 ++
 dace/codegen/targets/cpu.py | 15 +++++++++------
 dace/data.py                |  6 +++---
 dace/dtypes.py              |  2 +-
 dace/properties.py          |  8 +++++---
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index 093a324d9a..d3d4f50ccd 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -370,6 +370,8 @@ def make_const(expr: str) -> str:
     # Register defined variable
     dispatcher.defined_vars.add(pointer_name, defined_type, typedef, allow_shadowing=True)
 
+    # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and structures.
+    # NOTE: Since structures are implemented as pointers, we replace dots with arrows.
     expr = expr.replace('.', '->')
 
     return (typedef + ref, pointer_name, expr)
diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 20615a3136..0464672390 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -55,10 +55,13 @@ def __init__(self, frame_codegen, sdfg):
         # Keep track of generated NestedSDG, and the name of the assigned function
         self._generated_nested_sdfg = dict()
 
+        # NOTE: Multi-nesting with StructArrays must be further investigated.
         def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''):
             for k, v in struct.members.items():
                 if isinstance(v, data.Structure):
                     _visit_structure(v, args, f'{prefix}.{k}')
+                elif isinstance(v, data.StructArray):
+                    _visit_structure(v.stype, args, f'{prefix}.{k}')
                 elif isinstance(v, data.Data):
                     args[f'{prefix}.{k}'] = v
 
@@ -71,11 +74,7 @@ def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''):
             elif isinstance(arg_type, data.StructArray):
                 desc = sdfg.arrays[name]
                 desc = desc.stype
-                for attr in dir(desc):
-                    value = getattr(desc, attr)
-                    if isinstance(value, data.Data):
-                        assert attr in sdfg.arrays
-                        arglist[attr] = value
+                _visit_structure(desc, arglist, name)
 
         for name, arg_type in arglist.items():
             if isinstance(arg_type, (data.Scalar, data.Structure)):
@@ -300,6 +299,8 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
         name = node.data
         alloc_name = cpp.ptr(name, nodedesc, sdfg, self._frame)
         name = alloc_name
+        # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and
+        # NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows.
         alloc_name = alloc_name.replace('.', '->')
 
         if nodedesc.transient is False:
@@ -324,7 +325,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
             arrsize_bytes = arrsize * nodedesc.dtype.bytes
 
         if isinstance(nodedesc, data.Structure) and not isinstance(nodedesc, data.StructureView):
-            declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type}();\n")
+            declaration_stream.write(f"{nodedesc.ctype} {name} = new {nodedesc.dtype.base_type};\n")
             define_var(name, DefinedType.Pointer, nodedesc.ctype)
             for k, v in nodedesc.members.items():
                 if isinstance(v, data.Data):
@@ -1183,6 +1184,8 @@ def memlet_definition(self,
         if not types:
             types = self._dispatcher.defined_vars.get(ptr, is_global=True)
         var_type, ctypedef = types
+        # NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and
+        # NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows.
         ptr = ptr.replace('.', '->')
 
         if fpga.is_fpga_array(desc):
diff --git a/dace/data.py b/dace/data.py
index 37d532ac44..5f05cbfcc8 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -374,7 +374,7 @@ class Structure(Data):
                        desc="Dictionary of structure members",
                        from_json=_arrays_from_json,
                        to_json=_arrays_to_json)
-    name = Property(dtype=str, desc="Structure name")
+    name = Property(dtype=str, desc="Structure type name")
 
     def __init__(self,
                  members: Dict[str, Data],
@@ -478,7 +478,7 @@ def as_arg(self, with_types=True, for_call=False, name=None):
     def __getitem__(self, s):
         """ This is syntactic sugar that allows us to define an array type
             with the following syntax: ``Structure[N,M]``
-            :return: A ``data.Array`` data descriptor.
+            :return: A ``data.StructArray`` data descriptor.
         """
         if isinstance(s, list) or isinstance(s, tuple):
             return StructArray(self, tuple(s))
@@ -1084,7 +1084,7 @@ class StructArray(Array):
     stype = NestedDataClassProperty(allow_none=True, default=None)
 
     def __init__(self,
-                 stype,
+                 stype: Structure,
                  shape,
                  transient=False,
                  allow_conflicts=False,
diff --git a/dace/dtypes.py b/dace/dtypes.py
index 888f74f6b9..f0bac23958 100644
--- a/dace/dtypes.py
+++ b/dace/dtypes.py
@@ -835,9 +835,9 @@ def as_ctypes(self):
                 fields.append((k, v.as_ctypes()))
             else:
                 fields.append((k, _FFI_CTYPES[v.type]))
-        # fields = sorted(fields, key=lambda f: f[0])
         # Create new struct class.
         struct_class = type("NewStructClass", (ctypes.Structure, ), {"_fields_": fields})
+        # NOTE: Each call to `type` returns a different class, so we need to cache it to ensure uniqueness.
         _FFI_CTYPES[self] = struct_class
         return struct_class
 
diff --git a/dace/properties.py b/dace/properties.py
index 0bec65d0ec..0adcfe3e97 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1392,12 +1392,14 @@ def __get__(self, obj, objtype=None) -> 'Data':
 
     @property
     def dtype(self):
-        return pydoc.locate("dace.data.Data")
+        from dace import data as dt
+        return dt.Data
 
     @staticmethod
     def from_string(s):
-        dtype = pydoc.locate("dace.data.{}".format(s))
-        if dtype is None or not isinstance(dtype, pydoc.locate("dace.data.Data")):
+        from dace import data as dt
+        dtype = getattr(dt, s, None)
+        if dtype is None or not isinstance(dtype, dt.Data):
             raise ValueError("Not a valid data type: {}".format(s))
         return dtype
 

From eabbd1d6cd451556813ffea93cfa771767ef8561 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 22 Aug 2023 15:52:45 +0200
Subject: [PATCH 45/48] Addressed comments.

---
 dace/data.py                         | 27 +++++++++++----------------
 dace/properties.py                   |  4 ++++
 tests/sdfg/data/struct_array_test.py | 23 +++++++++++------------
 tests/sdfg/data/structure_test.py    |  8 --------
 4 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/dace/data.py b/dace/data.py
index 5f05cbfcc8..3b571e6537 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -5,7 +5,7 @@
 
 from collections import OrderedDict
 from numbers import Number
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
 
 import numpy
 import sympy as sp
@@ -19,7 +19,8 @@
 from dace import serialize, symbolic
 from dace.codegen import cppunparse
 from dace.properties import (DebugInfoProperty, DictProperty, EnumProperty, ListProperty, NestedDataClassProperty,
-                             Property, ShapeProperty, SymbolicProperty, TypeClassProperty, make_properties)
+                             OrderedDictProperty, Property, ShapeProperty, SymbolicProperty, TypeClassProperty,
+                             make_properties)
 
 
 def create_datadescriptor(obj, no_custom_desc=False):
@@ -370,15 +371,14 @@ def _arrays_from_json(obj, context=None):
 class Structure(Data):
     """ Base class for structures. """
 
-    members = Property(dtype=OrderedDict,
-                       desc="Dictionary of structure members",
-                       from_json=_arrays_from_json,
-                       to_json=_arrays_to_json)
+    members = OrderedDictProperty(default=OrderedDict(),
+                                  desc="Dictionary of structure members",
+                                  from_json=_arrays_from_json,
+                                  to_json=_arrays_to_json)
     name = Property(dtype=str, desc="Structure type name")
 
     def __init__(self,
-                 members: Dict[str, Data],
-                 order: List[str] = None,
+                 members: Union[Dict[str, Data], List[Tuple[str, Data]]],
                  name: str = 'Structure',
                  transient: bool = False,
                  storage: dtypes.StorageType = dtypes.StorageType.Default,
@@ -386,19 +386,14 @@ def __init__(self,
                  lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
                  debuginfo: dtypes.DebugInfo = None):
 
-        order = order or list(members.keys())
-        if set(members.keys()) != set(order):
-            raise ValueError('Order must contain all members of the structure.')
-        
-        # TODO: Should we make a deep-copy here?
-        self.members = OrderedDict((k, members[k]) for k in order)
-
+        self.members = OrderedDict(members)
         for k, v in self.members.items():
             v.transient = transient
+
         self.name = name
         fields_and_types = OrderedDict()
         symbols = set()
-        for k, v in members.items():
+        for k, v in self.members.items():
             if isinstance(v, Structure):
                 symbols |= v.free_symbols
                 fields_and_types[k] = (v.dtype, str(v.total_size))
diff --git a/dace/properties.py b/dace/properties.py
index 0adcfe3e97..61e569341f 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -145,11 +145,15 @@ def fs(obj, *args, **kwargs):
                 self._from_json = lambda *args, **kwargs: dace.serialize.from_json(*args, known_type=dtype, **kwargs)
         else:
             self._from_json = from_json
+            if self.from_json != from_json:
+                self.from_json = from_json
 
         if to_json is None:
             self._to_json = dace.serialize.to_json
         else:
             self._to_json = to_json
+            if self.to_json != to_json:
+                self.to_json = to_json
 
         if meta_to_json is None:
 
diff --git a/tests/sdfg/data/struct_array_test.py b/tests/sdfg/data/struct_array_test.py
index 9b40379e53..8e0f2f4739 100644
--- a/tests/sdfg/data/struct_array_test.py
+++ b/tests/sdfg/data/struct_array_test.py
@@ -10,12 +10,11 @@ def test_read_struct_array():
 
     L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
-    csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
-                                  name='CSRMatrix',
-                                  transient=True)
+    csr_obj_view = dace.data.StructureView(
+        [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])],
+        name='CSRMatrix',
+        transient=True)
 
     sdfg = dace.SDFG('array_of_csr_to_dense')
 
@@ -84,13 +83,13 @@ def test_read_struct_array():
 def test_write_struct_array():
 
     L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz'))
-    csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
-                                  name='CSRMatrix')
-    csr_obj_view = dace.data.StructureView(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
-                                  name='CSRMatrix',
-                                  transient=True)
+    csr_obj = dace.data.Structure(
+        [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])],
+        name='CSRMatrix')
+    csr_obj_view = dace.data.StructureView(
+        dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
+        name='CSRMatrix',
+        transient=True)
 
     sdfg = dace.SDFG('array_dense_to_csr')
 
diff --git a/tests/sdfg/data/structure_test.py b/tests/sdfg/data/structure_test.py
index 995aacb2fd..02b8f0c174 100644
--- a/tests/sdfg/data/structure_test.py
+++ b/tests/sdfg/data/structure_test.py
@@ -12,7 +12,6 @@ def test_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense')
@@ -69,7 +68,6 @@ def test_write_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('dense_to_csr')
@@ -147,10 +145,8 @@ def test_local_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     tmp_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix',
                                   transient=True)
 
@@ -258,7 +254,6 @@ def test_local_structure():
 def test_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -320,7 +315,6 @@ def test_write_nested_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 
@@ -402,7 +396,6 @@ def test_direct_read_structure():
 
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
 
     sdfg = dace.SDFG('csr_to_dense_direct')
@@ -453,7 +446,6 @@ def test_direct_read_structure():
 def test_direct_read_nested_structure():
     M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
     csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]),
-                                  order=['indptr', 'indices', 'data'],
                                   name='CSRMatrix')
     wrapper_obj = dace.data.Structure(dict(csr=csr_obj), name='Wrapper')
 

From c5ca99ad37e7ceef6da71026c3c8bb579f64117f Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Tue, 29 Aug 2023 23:05:10 -0700
Subject: [PATCH 46/48] Eliminate extraneous branch-end gotos in code
 generation (#1355)

---
 dace/codegen/control_flow.py                 | 77 +++++++++++++++-----
 dace/codegen/targets/framecode.py            |  2 +-
 tests/codegen/control_flow_detection_test.py | 29 ++++++++
 3 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/dace/codegen/control_flow.py b/dace/codegen/control_flow.py
index 182604c892..1b97241e47 100644
--- a/dace/codegen/control_flow.py
+++ b/dace/codegen/control_flow.py
@@ -82,6 +82,9 @@ class ControlFlow:
     # a string with its generated code.
     dispatch_state: Callable[[SDFGState], str]
 
+    # The parent control flow block of this one, used to avoid generating extraneous ``goto``s
+    parent: Optional['ControlFlow']
+
     @property
     def first_state(self) -> SDFGState:
         """ 
@@ -222,11 +225,18 @@ def as_cpp(self, codegen, symbols) -> str:
                 out_edges = sdfg.out_edges(elem.state)
                 for j, e in enumerate(out_edges):
                     if e not in self.gotos_to_ignore:
-                        # If this is the last generated edge and it leads
-                        # to the next state, skip emitting goto
+                        # Skip gotos to immediate successors
                         successor = None
-                        if (j == (len(out_edges) - 1) and (i + 1) < len(self.elements)):
-                            successor = self.elements[i + 1].first_state
+                        # If this is the last generated edge
+                        if j == (len(out_edges) - 1):
+                            if (i + 1) < len(self.elements):
+                                # If last edge leads to next state in block
+                                successor = self.elements[i + 1].first_state
+                            elif i == len(self.elements) - 1:
+                                # If last edge leads to first state in next block
+                                next_block = _find_next_block(self) 
+                                if next_block is not None:
+                                    successor = next_block.first_state
 
                         expr += elem.generate_transition(sdfg, e, successor)
                     else:
@@ -478,13 +488,14 @@ def children(self) -> List[ControlFlow]:
 
 def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[InterstateEdge],
                          leave_edge: Edge[InterstateEdge], back_edges: List[Edge[InterstateEdge]],
-                         dispatch_state: Callable[[SDFGState], str]) -> Union[ForScope, WhileScope]:
+                         dispatch_state: Callable[[SDFGState],
+                                                  str], parent_block: GeneralBlock) -> Union[ForScope, WhileScope]:
     """ 
     Helper method that constructs the correct structured loop construct from a
     set of states. Can construct for or while loops.
     """
 
-    body = GeneralBlock(dispatch_state, [], [], [], [], [], True)
+    body = GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True)
 
     guard_inedges = sdfg.in_edges(guard)
     increment_edges = [e for e in guard_inedges if e in back_edges]
@@ -535,10 +546,10 @@ def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[Intersta
             # Also ignore assignments in increment edge (handled in for stmt)
             body.assignments_to_ignore.append(increment_edge)
 
-            return ForScope(dispatch_state, itvar, guard, init, condition, update, body, init_edges)
+            return ForScope(dispatch_state, parent_block, itvar, guard, init, condition, update, body, init_edges)
 
     # Otherwise, it is a while loop
-    return WhileScope(dispatch_state, guard, condition, body)
+    return WhileScope(dispatch_state, parent_block, guard, condition, body)
 
 
 def _cases_from_branches(
@@ -617,6 +628,31 @@ def _child_of(node: SDFGState, parent: SDFGState, ptree: Dict[SDFGState, SDFGSta
     return False
 
 
+def _find_next_block(block: ControlFlow) -> Optional[ControlFlow]:
+    """
+    Returns the immediate successor control flow block.
+    """
+    # Find block in parent
+    parent = block.parent
+    if parent is None:
+        return None
+    ind = next(i for i, b in enumerate(parent.children) if b is block)
+    if ind == len(parent.children) - 1 or isinstance(parent, (IfScope, IfElseChain, SwitchCaseScope)):
+        # If last block, or other children are not reachable from current node (branches),
+        # recursively continue upwards
+        return _find_next_block(parent)
+    return parent.children[ind + 1]
+
+
+def _reset_block_parents(block: ControlFlow):
+    """
+    Fixes block parents after processing.
+    """
+    for child in block.children:
+        child.parent = block
+        _reset_block_parents(child)
+
+
 def _structured_control_flow_traversal(sdfg: SDFG,
                                        start: SDFGState,
                                        ptree: Dict[SDFGState, SDFGState],
@@ -645,7 +681,7 @@ def _structured_control_flow_traversal(sdfg: SDFG,
     """
 
     def make_empty_block():
-        return GeneralBlock(dispatch_state, [], [], [], [], [], True)
+        return GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True)
 
     # Traverse states in custom order
     visited = set() if visited is None else visited
@@ -657,7 +693,7 @@ def make_empty_block():
         if node in visited or node is stop:
             continue
         visited.add(node)
-        stateblock = SingleState(dispatch_state, node)
+        stateblock = SingleState(dispatch_state, parent_block, node)
 
         oe = sdfg.out_edges(node)
         if len(oe) == 0:  # End state
@@ -708,12 +744,14 @@ def make_empty_block():
             if (len(oe) == 2 and oe[0].data.condition_sympy() == sp.Not(oe[1].data.condition_sympy())):
                 # If without else
                 if oe[0].dst is mergestate:
-                    branch_block = IfScope(dispatch_state, sdfg, node, oe[1].data.condition, cblocks[oe[1]])
+                    branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[1].data.condition,
+                                           cblocks[oe[1]])
                 elif oe[1].dst is mergestate:
-                    branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]])
+                    branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition,
+                                           cblocks[oe[0]])
                 else:
-                    branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]],
-                                           cblocks[oe[1]])
+                    branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition,
+                                           cblocks[oe[0]], cblocks[oe[1]])
             else:
                 # If there are 2 or more edges (one is not the negation of the
                 # other):
@@ -721,10 +759,10 @@ def make_empty_block():
                 if switch:
                     # If all edges are of form "x == y" for a single x and
                     # integer y, it is a switch/case
-                    branch_block = SwitchCaseScope(dispatch_state, sdfg, node, switch[0], switch[1])
+                    branch_block = SwitchCaseScope(dispatch_state, parent_block, sdfg, node, switch[0], switch[1])
                 else:
                     # Otherwise, create if/else if/.../else goto exit chain
-                    branch_block = IfElseChain(dispatch_state, sdfg, node,
+                    branch_block = IfElseChain(dispatch_state, parent_block, sdfg, node,
                                                [(e.data.condition, cblocks[e] if e in cblocks else make_empty_block())
                                                 for e in oe])
             # End of branch classification
@@ -739,11 +777,11 @@ def make_empty_block():
             loop_exit = None
             scope = None
             if ptree[oe[0].dst] == node and ptree[oe[1].dst] != node:
-                scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state)
+                scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state, parent_block)
                 body_start = oe[0].dst
                 loop_exit = oe[1].dst
             elif ptree[oe[1].dst] == node and ptree[oe[0].dst] != node:
-                scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state)
+                scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state, parent_block)
                 body_start = oe[1].dst
                 loop_exit = oe[0].dst
 
@@ -836,7 +874,8 @@ def structured_control_flow_tree(sdfg: SDFG, dispatch_state: Callable[[SDFGState
         if len(common_frontier) == 1:
             branch_merges[state] = next(iter(common_frontier))
 
-    root_block = GeneralBlock(dispatch_state, [], [], [], [], [], True)
+    root_block = GeneralBlock(dispatch_state, None, [], [], [], [], [], True)
     _structured_control_flow_traversal(sdfg, sdfg.start_state, ptree, branch_merges, back_edges, dispatch_state,
                                        root_block)
+    _reset_block_parents(root_block)
     return root_block
diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py
index 9ee5c2ef17..dfdbbb392b 100644
--- a/dace/codegen/targets/framecode.py
+++ b/dace/codegen/targets/framecode.py
@@ -471,7 +471,7 @@ def dispatch_state(state: SDFGState) -> str:
             # If disabled, generate entire graph as general control flow block
             states_topological = list(sdfg.topological_sort(sdfg.start_state))
             last = states_topological[-1]
-            cft = cflow.GeneralBlock(dispatch_state,
+            cft = cflow.GeneralBlock(dispatch_state, None,
                                      [cflow.SingleState(dispatch_state, s, s is last) for s in states_topological], [],
                                      [], [], [], False)
 
diff --git a/tests/codegen/control_flow_detection_test.py b/tests/codegen/control_flow_detection_test.py
index 99d6a39b29..982140f7ed 100644
--- a/tests/codegen/control_flow_detection_test.py
+++ b/tests/codegen/control_flow_detection_test.py
@@ -120,6 +120,33 @@ def test_single_outedge_branch():
     assert np.allclose(res, 2)
 
 
+def test_extraneous_goto():
+
+    @dace.program
+    def tester(a: dace.float64[20]):
+        if a[0] < 0:
+            a[1] = 1
+        a[2] = 1
+
+    sdfg = tester.to_sdfg(simplify=True)
+    assert 'goto' not in sdfg.generate_code()[0].code
+
+
+def test_extraneous_goto_nested():
+
+    @dace.program
+    def tester(a: dace.float64[20]):
+        if a[0] < 0:
+            if a[0] < 1:
+                a[1] = 1
+            else:
+                a[1] = 2
+        a[2] = 1
+
+    sdfg = tester.to_sdfg(simplify=True)
+    assert 'goto' not in sdfg.generate_code()[0].code
+
+
 if __name__ == '__main__':
     test_for_loop_detection()
     test_invalid_for_loop_detection()
@@ -128,3 +155,5 @@ def test_single_outedge_branch():
     test_edge_sympy_function('TrueFalse')
     test_edge_sympy_function('SwitchCase')
     test_single_outedge_branch()
+    test_extraneous_goto()
+    test_extraneous_goto_nested()

From c34de8e3336343b0f11bddd0b61099ab1f22eb47 Mon Sep 17 00:00:00 2001
From: Lukas Truemper <lukas.truemper@outlook.de>
Date: Sat, 2 Sep 2023 15:34:08 +0200
Subject: [PATCH 47/48] TaskletFusion: Fix additional edges in case of
 none-connectors

---
 .../transformation/dataflow/tasklet_fusion.py |  3 ++
 tests/transformations/tasklet_fusion_test.py  | 44 +++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/dace/transformation/dataflow/tasklet_fusion.py b/dace/transformation/dataflow/tasklet_fusion.py
index 99f8f625be..d6b4a3039b 100644
--- a/dace/transformation/dataflow/tasklet_fusion.py
+++ b/dace/transformation/dataflow/tasklet_fusion.py
@@ -249,6 +249,9 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG):
                                         t1.language)
 
         for in_edge in graph.in_edges(t1):
+            if in_edge.src_conn is None and isinstance(in_edge.src, dace.nodes.EntryNode):
+                if len(new_tasklet.in_connectors) > 0:
+                    continue
             graph.add_edge(in_edge.src, in_edge.src_conn, new_tasklet, in_edge.dst_conn, in_edge.data)
 
         for in_edge in graph.in_edges(t2):
diff --git a/tests/transformations/tasklet_fusion_test.py b/tests/transformations/tasklet_fusion_test.py
index c7fd6802d5..743010e8c9 100644
--- a/tests/transformations/tasklet_fusion_test.py
+++ b/tests/transformations/tasklet_fusion_test.py
@@ -213,6 +213,49 @@ def test_map_with_tasklets(language: str, with_data: bool):
     ref = map_with_tasklets.f(A, B)
     assert (np.allclose(C, ref))
 
+def test_none_connector():
+    @dace.program
+    def sdfg_none_connector(A: dace.float32[32], B: dace.float32[32]):
+        tmp = dace.define_local([32], dace.float32)
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a >> tmp[i]
+                a = 0
+
+        tmp2 = dace.define_local([32], dace.float32)
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a << A[i]
+                b >> tmp2[i]
+                b = a + 1
+
+
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a << tmp[i]
+                b << tmp2[i]
+                c >> B[i]
+                c = a + b
+
+    sdfg = sdfg_none_connector.to_sdfg()
+    sdfg.simplify()
+    applied = sdfg.apply_transformations_repeated(MapFusion)
+    assert applied == 2
+
+    map_entry = None
+    for node in sdfg.start_state.nodes():
+        if isinstance(node, dace.nodes.MapEntry):
+            map_entry = node
+            break
+    
+    assert map_entry is not None
+    assert len([edge.src_conn for edge in sdfg.start_state.out_edges(map_entry) if edge.src_conn is None]) == 1
+
+    applied = sdfg.apply_transformations_repeated(TaskletFusion)
+    assert applied == 2
+
+    assert sdfg.start_state.out_degree(map_entry) == 1
+    assert len([edge.src_conn for edge in sdfg.start_state.out_edges(map_entry) if edge.src_conn is None]) == 0
 
 if __name__ == '__main__':
     test_basic()
@@ -224,3 +267,4 @@ def test_map_with_tasklets(language: str, with_data: bool):
     test_map_with_tasklets(language='Python', with_data=True)
     test_map_with_tasklets(language='CPP', with_data=False)
     test_map_with_tasklets(language='CPP', with_data=True)
+    test_none_connector()

From f95f8162a4e77d7a386ccd20c9e4ef71a3ad9787 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 4 Sep 2023 23:58:33 -0700
Subject: [PATCH 48/48] Fix dynamic memlet propagation condition (#1364)

---
 dace/sdfg/propagation.py               |  4 ++--
 tests/python_frontend/argument_test.py | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py
index 0fec4812b7..0554775dcd 100644
--- a/dace/sdfg/propagation.py
+++ b/dace/sdfg/propagation.py
@@ -1477,8 +1477,8 @@ def propagate_subset(memlets: List[Memlet],
     new_memlet.volume = simplify(sum(m.volume for m in memlets) * functools.reduce(lambda a, b: a * b, rng.size(), 1))
     if any(m.dynamic for m in memlets):
         new_memlet.dynamic = True
-    elif symbolic.issymbolic(new_memlet.volume) and any(s not in defined_variables
-                                                        for s in new_memlet.volume.free_symbols):
+    if symbolic.issymbolic(new_memlet.volume) and any(s not in defined_variables
+                                                      for s in new_memlet.volume.free_symbols):
         new_memlet.dynamic = True
         new_memlet.volume = 0
 
diff --git a/tests/python_frontend/argument_test.py b/tests/python_frontend/argument_test.py
index 1f43337eb8..cb47188029 100644
--- a/tests/python_frontend/argument_test.py
+++ b/tests/python_frontend/argument_test.py
@@ -2,6 +2,7 @@
 
 import dace
 import pytest
+import numpy as np
 
 N = dace.symbol('N')
 
@@ -16,5 +17,29 @@ def test_extra_args():
         imgcpy([[1, 2], [3, 4]], [[4, 3], [2, 1]], 0.0, 1.0)
 
 
+def test_missing_arguments_regression():
+
+    def nester(a, b, T):
+        for i, j in dace.map[0:20, 0:20]:
+            start = 0
+            end = min(T, 6)
+
+            elem: dace.float64 = 0
+            for ii in range(start, end):
+                if ii % 2 == 0:
+                    elem += b[ii]
+
+            a[j, i] = elem
+
+    @dace.program
+    def tester(x: dace.float64[20, 20]):
+        gdx = np.ones((10, ), dace.float64)
+        for T in range(2):
+            nester(x, gdx, T)
+
+    tester.to_sdfg().compile()
+
+
 if __name__ == '__main__':
     test_extra_args()
+    test_missing_arguments_regression()