From e2246ee5b534cca54300ebb6be087226b62582e2 Mon Sep 17 00:00:00 2001
From: Vaughn Kottler <vaughnkottler@gmail.com>
Date: Mon, 16 Oct 2023 19:19:42 -0500
Subject: [PATCH 1/2] 3.1.2 - Heuristics to improve output

---
 .github/workflows/python-package.yml |  2 +-
 README.md                            |  4 +-
 ifgen/__init__.py                    |  4 +-
 ifgen/svd/group/base.py              | 44 +++++++++++++----
 ifgen/svd/group/fields.py            | 71 ++++++++++++++++++++--------
 ifgen/svd/model/field.py             | 55 ++++++++++++++++++++-
 ifgen/svd/model/peripheral.py        | 35 ++++++++++++++
 local/variables/package.yaml         |  2 +-
 pyproject.toml                       |  2 +-
 9 files changed, 182 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 3867cb8..9aa2918 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -77,7 +77,7 @@ jobs:
 
       - run: |
           mk python-release owner=vkottler \
-                            repo=ifgen version=3.1.1
+                            repo=ifgen version=3.1.2
         if: |
           matrix.python-version == '3.11'
           && matrix.system == 'ubuntu-latest'
diff --git a/README.md b/README.md
index 83d8792..6113e6a 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
     =====================================
     generator=datazen
     version=3.1.4
-    hash=9fcb7464a9f8a23b45f51e6f91e5dcfd
+    hash=975b48a74c4be1ac5e11b915ab1b99fd
     =====================================
 -->
 
-# ifgen ([3.1.1](https://pypi.org/project/ifgen/))
+# ifgen ([3.1.2](https://pypi.org/project/ifgen/))
 
 [![python](https://img.shields.io/pypi/pyversions/ifgen.svg)](https://pypi.org/project/ifgen/)
 ![Build Status](https://github.com/vkottler/ifgen/workflows/Python%20Package/badge.svg)
diff --git a/ifgen/__init__.py b/ifgen/__init__.py
index e0a7ea3..0ce1f64 100644
--- a/ifgen/__init__.py
+++ b/ifgen/__init__.py
@@ -1,7 +1,7 @@
 # =====================================
 # generator=datazen
 # version=3.1.4
-# hash=45c636153116b5681132452943461260
+# hash=206c822433faa199cbbeaf11941043ba
 # =====================================
 
 """
@@ -10,4 +10,4 @@
 
 DESCRIPTION = "An interface generator for distributed computing."
 PKG_NAME = "ifgen"
-VERSION = "3.1.1"
+VERSION = "3.1.2"
diff --git a/ifgen/svd/group/base.py b/ifgen/svd/group/base.py
index d7b754e..301fe56 100644
--- a/ifgen/svd/group/base.py
+++ b/ifgen/svd/group/base.py
@@ -4,7 +4,7 @@
 
 # built-in
 from dataclasses import dataclass
-from typing import Iterator
+from typing import Iterator, Optional
 
 # internal
 from ifgen.svd.model.peripheral import Peripheral
@@ -24,6 +24,32 @@ def peripherals(self) -> Iterator[Peripheral]:
         yield from self.derivatives
 
 
+def get_derived(
+    peripheral: Peripheral, peripherals: list[Peripheral]
+) -> Optional[Peripheral]:
+    """Determine if this peripheral is derived from any other peripheral."""
+
+    result = None
+
+    if peripheral.derived:
+        result = peripheral.derived_elem
+
+    # Check if this peripheral is equivalent to some other peripheral.
+    else:
+        for other in peripherals:
+            # Always return None if you get far enough to see yourself in the
+            # list. That way this peripheral becomes the effective 'root'.
+            if other is peripheral:
+                break
+
+            if not other.is_alternate() and not other.derived:
+                if other == peripheral:
+                    result = other
+                    break
+
+    return result
+
+
 def peripheral_groups(
     peripherals: dict[str, Peripheral]
 ) -> dict[str, PeripheralGroup]:
@@ -31,10 +57,13 @@ def peripheral_groups(
 
     result: dict[str, PeripheralGroup] = {}
 
-    for peripheral in peripherals.values():
+    peripherals_list = list(peripherals[x] for x in sorted(peripherals))
+    for peripheral in peripherals_list:
         name = peripheral.base_name()
-        if peripheral.derived:
-            name = peripheral.derived_elem.base_name()
+
+        derived = get_derived(peripheral, peripherals_list)
+        if derived is not None:
+            name = derived.base_name()
 
         if name not in result:
             # Validate this later.
@@ -43,11 +72,10 @@ def peripheral_groups(
         group = result[name]
 
         if group.root is None:
-            group.root = peripheral
+            group.root = derived if derived is not None else peripheral
         else:
-            result[peripheral.derived_elem.base_name()].derivatives.append(
-                peripheral
-            )
+            assert derived is not None
+            result[derived.base_name()].derivatives.append(peripheral)
 
     # Validate groups.
     for name, group in result.items():
diff --git a/ifgen/svd/group/fields.py b/ifgen/svd/group/fields.py
index 234f59d..3546dd9 100644
--- a/ifgen/svd/group/fields.py
+++ b/ifgen/svd/group/fields.py
@@ -7,7 +7,6 @@
 
 # internal
 from ifgen.svd.model.enum import EnumeratedValues
-from ifgen.svd.model.field import Field
 from ifgen.svd.model.peripheral import Cluster, Register, RegisterData
 
 StructMap = dict[str, Any]
@@ -86,31 +85,59 @@ def handle_cluster(
 
 RegisterMap = dict[str, Register]
 
+IGNORE_WORDS = {
+    "the",
+    "as",
+    "a",
+    "is",
+    "will",
+    "but",
+    "are",
+    "yet",
+    "that",
+    "to",
+    "and",
+    "in",
+    "of",
+}
+
+
+def is_name_part(value: str) -> bool:
+    """Determine if a word should be part of an enumeration value name."""
+    return bool(value) and value not in IGNORE_WORDS
+
+
+def as_alnum(word: str) -> str:
+    """Get a word's alpha-numeric contents only."""
+
+    result = ""
+    for char in word:
+        if char.isalnum():
+            result += char
+
+    return result
+
+
+def handle_enum_name(name: str, description: str = None) -> str:
+    """Attempt to generate more useful enumeration names."""
 
-def bit_field_data(field: Field, output: dict[str, Any]) -> None:
-    """Populate bit-field data."""
+    if name.startswith("value") and description:
+        new_name = description.replace("-", "_")
 
-    field.handle_description(output)
+        alnum_parts = [as_alnum(x.strip().lower()) for x in new_name.split()]
 
-    # We don't currently handle arrays of bit-fields.
-    assert "dim" not in field.raw_data
+        # Prune some words if the description is very long.
+        if len(alnum_parts) > 1:
+            alnum_parts = list(filter(is_name_part, alnum_parts))
 
-    if "bitRange" in field.raw_data:
-        msb_str, lsb_str = field.raw_data["bitRange"].split(":")
-        lsb = int(lsb_str.replace("]", ""))
-        msb = int(msb_str.replace("[", ""))
-    elif "lsb" in field.raw_data:
-        lsb = int(field.raw_data["lsb"])
-        msb = int(field.raw_data["msb"])
+        assert alnum_parts, (name, description)
 
-    output["index"] = lsb
+        new_name = "_".join(alnum_parts)
 
-    width = (msb - lsb) + 1
-    assert width >= 1, (msb, lsb, field.name)
-    output["width"] = width
+        assert new_name, (name, description)
+        name = new_name
 
-    output["read"] = "read" in field.access
-    output["write"] = "write" in field.access
+    return name
 
 
 def translate_enums(enum: EnumeratedValues) -> dict[str, Any]:
@@ -140,7 +167,9 @@ def translate_enums(enum: EnumeratedValues) -> dict[str, Any]:
         else:
             enum_data["value"] = int(value_str)
 
-        result[name] = enum_data
+        result[
+            handle_enum_name(name, value.raw_data.get("description"))
+        ] = enum_data
 
     return result
 
@@ -169,7 +198,7 @@ def process_bit_fields(
     # Process fields.
     for name, field in register.fields.items():
         field_data: dict[str, Any] = {"name": name}
-        bit_field_data(field, field_data)
+        field_data.update(field.ifgen_data)
         result.append(field_data)
 
         # Handle creating an enumeration.
diff --git a/ifgen/svd/model/field.py b/ifgen/svd/model/field.py
index 87066ed..e0e1220 100644
--- a/ifgen/svd/model/field.py
+++ b/ifgen/svd/model/field.py
@@ -4,7 +4,8 @@
 
 # built-in
 from dataclasses import dataclass
-from typing import Iterable, Optional
+from functools import cached_property
+from typing import Any, Iterable, Optional
 from xml.etree import ElementTree
 
 # internal
@@ -21,6 +22,24 @@ class Field(DerivedMixin):
     derived_from: Optional["Field"]
     enum: Optional[EnumeratedValues]
 
+    def __eq__(self, other) -> bool:
+        """Determine if two fields are equivalent."""
+
+        result = False
+
+        if isinstance(other, Field):
+            our_data = self.ifgen_data
+            their_data = other.ifgen_data
+
+            result = (
+                our_data["index"] == their_data["index"]
+                and our_data["width"] == their_data["width"]
+                and our_data["read"] == their_data["read"]
+                and our_data["write"] == their_data["write"]
+            )
+
+        return result
+
     @property
     def access(self) -> str:
         """Get this instance's access property."""
@@ -52,6 +71,40 @@ def string_keys(cls) -> Iterable[StringKeyVal]:
             StringKeyVal("readAction", False),
         ]
 
+    @cached_property
+    def ifgen_data(self) -> dict[str, Any]:
+        """Populate bit-field data."""
+
+        output: dict[str, Any] = {}
+
+        self.handle_description(output)
+
+        # We don't currently handle arrays of bit-fields.
+        assert "dim" not in self.raw_data
+
+        lsb = -1
+        msb = -1
+        if "bitRange" in self.raw_data:
+            msb_str, lsb_str = self.raw_data["bitRange"].split(":")
+            lsb = int(lsb_str.replace("]", ""))
+            msb = int(msb_str.replace("[", ""))
+        elif "lsb" in self.raw_data:
+            lsb = int(self.raw_data["lsb"])
+            msb = int(self.raw_data["msb"])
+
+        assert lsb != -1 and msb != -1
+
+        output["index"] = lsb
+
+        width = (msb - lsb) + 1
+        assert width >= 1, (msb, lsb, self.name)
+        output["width"] = width
+
+        output["read"] = "read" in self.access
+        output["write"] = "write" in self.access
+
+        return output
+
 
 FieldMap = dict[str, Field]
 
diff --git a/ifgen/svd/model/peripheral.py b/ifgen/svd/model/peripheral.py
index 96a05a0..2b90d44 100644
--- a/ifgen/svd/model/peripheral.py
+++ b/ifgen/svd/model/peripheral.py
@@ -26,6 +26,13 @@ class Cluster(DerivedMixin):
     children: RegisterData
     peripheral: "Peripheral"
 
+    def __eq__(self, other) -> bool:
+        """Determine if two clusers are equivalent."""
+
+        return isinstance(other, Cluster) and all(
+            x == y for x, y in zip(self.children, other.children)
+        )
+
     @classmethod
     def string_keys(cls) -> Iterable[StringKeyVal]:
         """Get string keys for this instance type."""
@@ -43,6 +50,20 @@ def string_keys(cls) -> Iterable[StringKeyVal]:
         )
 
 
+def fields_equal(left: Optional[FieldMap], right: Optional[FieldMap]) -> bool:
+    """Determine if two field maps are equivalent."""
+
+    result = left is None and right is None
+
+    if left is not None and right is not None and len(left) == len(right):
+        for lkey, lvalue in left.items():
+            result = lkey in right and lvalue == right[lkey]
+            if not result:
+                break
+
+    return result
+
+
 @dataclass
 class Register(DerivedMixin):
     """A container for register information."""
@@ -51,6 +72,13 @@ class Register(DerivedMixin):
     fields: Optional[FieldMap]
     peripheral: "Peripheral"
 
+    def __eq__(self, other) -> bool:
+        """Determine if two registers are equivalent."""
+
+        return isinstance(other, Register) and fields_equal(
+            self.fields, other.fields
+        )
+
     @property
     def bits(self) -> int:
         """Get the size of this register in bits."""
@@ -127,6 +155,13 @@ class Peripheral(DerivedMixin):
 
     registers: RegisterData
 
+    def __eq__(self, other) -> bool:
+        """Determine if two peripherals are equivalent."""
+
+        return isinstance(other, Peripheral) and all(
+            x == y for x, y in zip(self.registers, other.registers)
+        )
+
     @property
     def bits(self) -> Optional[int]:
         """Get size for this peripheral in bits."""
diff --git a/local/variables/package.yaml b/local/variables/package.yaml
index bf5283b..e8a31af 100644
--- a/local/variables/package.yaml
+++ b/local/variables/package.yaml
@@ -1,5 +1,5 @@
 ---
 major: 3
 minor: 1
-patch: 1
+patch: 2
 entry: ig
diff --git a/pyproject.toml b/pyproject.toml
index bde405a..b2e7160 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta:__legacy__"
 
 [project]
 name = "ifgen"
-version = "3.1.1"
+version = "3.1.2"
 description = "An interface generator for distributed computing."
 readme = "README.md"
 requires-python = ">=3.11"

From 76b9406ea80d88fb3304aa0eeceebb2aa129d9f5 Mon Sep 17 00:00:00 2001
From: Vaughn Kottler <vaughnkottler@gmail.com>
Date: Mon, 16 Oct 2023 20:05:37 -0500
Subject: [PATCH 2/2] Finish enumeration coalescing

---
 ifgen/svd/group/enums.py  | 130 ++++++++++++++++++++++++++++++++++++++
 ifgen/svd/group/fields.py | 116 ++++------------------------------
 2 files changed, 142 insertions(+), 104 deletions(-)
 create mode 100644 ifgen/svd/group/enums.py

diff --git a/ifgen/svd/group/enums.py b/ifgen/svd/group/enums.py
new file mode 100644
index 0000000..28b8af0
--- /dev/null
+++ b/ifgen/svd/group/enums.py
@@ -0,0 +1,130 @@
+"""
+A module for handling SVD bit-field enumerations.
+"""
+
+# built-in
+from typing import Any
+
+# internal
+from ifgen.svd.model.enum import EnumeratedValues
+
+EnumValues = dict[str, Any]
+ENUM_DEFAULTS: dict[str, Any] = {
+    "unit_test": False,
+    "json": False,
+    "use_map": False,
+    "identifier": False,
+}
+
+BY_HASH: dict[str, dict[int, str]] = {}
+
+
+def get_enum_name(name: str, peripheral: str, raw_mapping: EnumValues) -> str:
+    """Get the name of an enumeration."""
+
+    hashed = hash(
+        ",".join(
+            name + f"={val['value']}" for name, val in raw_mapping.items()
+        )
+    )
+
+    BY_HASH.setdefault(peripheral, {})
+
+    for_periph = BY_HASH[peripheral]
+    for_periph.setdefault(hashed, name)
+
+    return for_periph[hashed]
+
+
+IGNORE_WORDS = {
+    "the",
+    "as",
+    "a",
+    "is",
+    "will",
+    "but",
+    "are",
+    "yet",
+    "that",
+    "to",
+    "and",
+    "in",
+    "of",
+    "on",
+    "for",
+    "from",
+    "its",
+    "it",
+}
+
+
+def is_name_part(value: str) -> bool:
+    """Determine if a word should be part of an enumeration value name."""
+    return bool(value) and value not in IGNORE_WORDS
+
+
+def as_alnum(word: str) -> str:
+    """Get a word's alpha-numeric contents only."""
+
+    result = ""
+    for char in word:
+        if char.isalnum() or char == "_":
+            result += char
+
+    return result
+
+
+def handle_enum_name(name: str, description: str = None) -> str:
+    """Attempt to generate more useful enumeration names."""
+
+    if name.startswith("value") and description:
+        new_name = description.replace("-", "_")
+
+        alnum_parts = [as_alnum(x.strip().lower()) for x in new_name.split()]
+
+        # Prune some words if the description is very long.
+        if len(alnum_parts) > 1:
+            alnum_parts = list(filter(is_name_part, alnum_parts))
+
+        assert alnum_parts, (name, description)
+
+        new_name = "_".join(alnum_parts)
+
+        assert new_name, (name, description)
+        name = new_name
+
+    return name
+
+
+def translate_enums(enum: EnumeratedValues) -> EnumValues:
+    """Generate an enumeration definition."""
+
+    result: dict[str, Any] = {}
+    enum.handle_description(result)
+
+    for name, value in enum.derived_elem.enum.items():
+        enum_data: dict[str, Any] = {}
+        value.handle_description(enum_data)
+
+        value_str: str = value.raw_data["value"]
+
+        prefix = ""
+        for possible_prefix in ("#", "0b", "0x"):
+            if value_str.startswith(possible_prefix):
+                prefix = possible_prefix
+                break
+
+        if prefix in ("#", "0b"):
+            enum_data["value"] = int(
+                value_str[len(prefix) :].replace("X", "1"), 2
+            )
+        elif prefix == "0x":
+            enum_data["value"] = int(value_str[len(prefix) :], 16)
+        else:
+            enum_data["value"] = int(value_str)
+
+        result[
+            handle_enum_name(name, value.raw_data.get("description"))
+        ] = enum_data
+
+    return result
diff --git a/ifgen/svd/group/fields.py b/ifgen/svd/group/fields.py
index 3546dd9..e9d335a 100644
--- a/ifgen/svd/group/fields.py
+++ b/ifgen/svd/group/fields.py
@@ -6,7 +6,7 @@
 from typing import Any, Iterable
 
 # internal
-from ifgen.svd.model.enum import EnumeratedValues
+from ifgen.svd.group.enums import ENUM_DEFAULTS, get_enum_name, translate_enums
 from ifgen.svd.model.peripheral import Cluster, Register, RegisterData
 
 StructMap = dict[str, Any]
@@ -85,102 +85,6 @@ def handle_cluster(
 
 RegisterMap = dict[str, Register]
 
-IGNORE_WORDS = {
-    "the",
-    "as",
-    "a",
-    "is",
-    "will",
-    "but",
-    "are",
-    "yet",
-    "that",
-    "to",
-    "and",
-    "in",
-    "of",
-}
-
-
-def is_name_part(value: str) -> bool:
-    """Determine if a word should be part of an enumeration value name."""
-    return bool(value) and value not in IGNORE_WORDS
-
-
-def as_alnum(word: str) -> str:
-    """Get a word's alpha-numeric contents only."""
-
-    result = ""
-    for char in word:
-        if char.isalnum():
-            result += char
-
-    return result
-
-
-def handle_enum_name(name: str, description: str = None) -> str:
-    """Attempt to generate more useful enumeration names."""
-
-    if name.startswith("value") and description:
-        new_name = description.replace("-", "_")
-
-        alnum_parts = [as_alnum(x.strip().lower()) for x in new_name.split()]
-
-        # Prune some words if the description is very long.
-        if len(alnum_parts) > 1:
-            alnum_parts = list(filter(is_name_part, alnum_parts))
-
-        assert alnum_parts, (name, description)
-
-        new_name = "_".join(alnum_parts)
-
-        assert new_name, (name, description)
-        name = new_name
-
-    return name
-
-
-def translate_enums(enum: EnumeratedValues) -> dict[str, Any]:
-    """Generate an enumeration definition."""
-
-    result: dict[str, Any] = {}
-    enum.handle_description(result)
-
-    for name, value in enum.derived_elem.enum.items():
-        enum_data: dict[str, Any] = {}
-        value.handle_description(enum_data)
-
-        value_str: str = value.raw_data["value"]
-
-        prefix = ""
-        for possible_prefix in ("#", "0b", "0x"):
-            if value_str.startswith(possible_prefix):
-                prefix = possible_prefix
-                break
-
-        if prefix in ("#", "0b"):
-            enum_data["value"] = int(
-                value_str[len(prefix) :].replace("X", "1"), 2
-            )
-        elif prefix == "0x":
-            enum_data["value"] = int(value_str[len(prefix) :], 16)
-        else:
-            enum_data["value"] = int(value_str)
-
-        result[
-            handle_enum_name(name, value.raw_data.get("description"))
-        ] = enum_data
-
-    return result
-
-
-ENUM_DEFAULTS: dict[str, Any] = {
-    "unit_test": False,
-    "json": False,
-    "use_map": False,
-    "identifier": False,
-}
-
 
 def process_bit_fields(
     register: Register,
@@ -203,20 +107,24 @@ def process_bit_fields(
 
         # Handle creating an enumeration.
         if field.enum is not None:
-            enum_name = f"{peripheral}_{register.name}_{name}".replace(
-                "[%s]", ""
-            )
-            field_data["type"] = enum_name
-
             # Register enumeration.
-            new_enum: dict[str, Any] = {"enum": translate_enums(field.enum)}
+            raw = translate_enums(field.enum)
+            new_enum: dict[str, Any] = {"enum": raw}
             new_enum.update(ENUM_DEFAULTS)
 
             # Increase size of underlying if necessary.
             if field_data["width"] > 8:
                 new_enum["underlying"] = "uint16_t"
 
-            enums[enum_name] = new_enum
+            # Check if enum is unique.
+            enum_name = get_enum_name(
+                f"{peripheral}_{register.name}_{name}".replace("[%s]", ""),
+                peripheral,
+                raw,
+            )
+            field_data["type"] = enum_name
+            if enum_name not in enums:
+                enums[enum_name] = new_enum
 
     if result:
         output["fields"] = result