Skip to content

Commit

Permalink
Versioning and migration for GufeTokenizables (#227)
Browse files Browse the repository at this point in the history
* Versioning and migration for `GufeTokenizable`s

* use pop w/ default instead of if-else

* Example for moving keys within settings

* Fix existing tests (stable keys; expect versions)

* add GufeTokenizable test mixin for migrations

* Make mypy happy

* Fix ligand network tests

* Detailed tests for migration helpers

* update keys for changed module name

* Docstrings

* _version => _schema_version

---------

Co-authored-by: David L. Dotson <dotsdl@gmail.com>
  • Loading branch information
dwhswenson and dotsdl authored Nov 17, 2023
1 parent f87e236 commit af0d3bd
Show file tree
Hide file tree
Showing 6 changed files with 522 additions and 9 deletions.
11 changes: 11 additions & 0 deletions docs/api/serialize.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,14 @@ GUFE Serialization API
----------------------

.. autoclass:: gufe.tokenization.GufeTokenizable

Serialization migration
~~~~~~~~~~~~~~~~~~~~~~~

.. currentmodule:: gufe.tokenization

.. autofunction:: new_key_added
.. autofunction:: old_key_removed
.. autofunction:: key_renamed
.. autofunction:: nested_key_moved

6 changes: 3 additions & 3 deletions gufe/tests/data/ligand_network.graphml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
<key id="d0" for="node" attr.name="moldict" attr.type="string" />
<graph edgedefault="directed">
<node id="mol0">
<data key="d0">{"__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol1">
<data key="d0">{"__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol2">
<data key="d0">{"__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<edge source="mol0" target="mol2" id="0">
<data key="d1">[[0, 0]]</data>
Expand Down
4 changes: 2 additions & 2 deletions gufe/tests/test_ligand_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ def network_container(

class TestLigandNetwork(GufeTokenizableTestsMixin):
cls = LigandNetwork
key = "LigandNetwork-8d9c3198d7fbfc29e73cb09911bccc7f"
repr = "<LigandNetwork-8d9c3198d7fbfc29e73cb09911bccc7f>"
key = "LigandNetwork-c597016564f85a3c42445bd1dabd91b3"
repr = "<LigandNetwork-c597016564f85a3c42445bd1dabd91b3>"

@pytest.fixture
def instance(self, simple_network):
Expand Down
266 changes: 266 additions & 0 deletions gufe/tests/test_serialization_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
import pytest
import copy

from gufe.tokenization import (
GufeTokenizable,
new_key_added,
old_key_removed,
key_renamed,
nested_key_moved,
from_dict,
_label_to_parts,
_pop_nested,
_set_nested,
)

from gufe.tests.test_tokenization import GufeTokenizableTestsMixin
from pydantic import BaseModel

from typing import Optional, Any, Type


@pytest.fixture
def nested_data():
return {
"foo": {"foo2" : [{"foo3": "foo4"}, "foo5"]},
"bar": ["bar2", "bar3"]
}

@pytest.mark.parametrize('label, expected', [
("foo", ["foo"]),
("foo.foo2", ["foo", "foo2"]),
("foo.foo2[0]", ["foo", "foo2", 0]),
("foo.foo2[0].foo3", ["foo", "foo2", 0, "foo3"]),
])
def test_label_to_parts(label, expected):
assert _label_to_parts(label) == expected

@pytest.mark.parametrize('label, popped, remaining', [
("foo", {"foo2" : [{"foo3": "foo4"}, "foo5"]}, {}),
("foo.foo2", [{"foo3": "foo4"}, "foo5"], {"foo": {}}),
("foo.foo2[0]", {"foo3": "foo4"}, {"foo": {"foo2": ["foo5"]}}),
("foo.foo2[0].foo3", "foo4", {"foo": {"foo2": [{}, "foo5"]}}),
("foo.foo2[1]", "foo5", {"foo": {"foo2": [{"foo3": "foo4"}]}}),
])
def test_pop_nested(nested_data, label, popped, remaining):
val = _pop_nested(nested_data, label)
expected_remaining = {"bar": ["bar2", "bar3"]}
expected_remaining.update(remaining)
assert val == popped
assert nested_data == expected_remaining

@pytest.mark.parametrize("label, expected_foo", [
("foo", {"foo": 10}),
("foo.foo2", {"foo": {"foo2": 10}}),
("foo.foo2[0]", {"foo": {"foo2": [10, "foo5"]}}),
("foo.foo2[0].foo3", {"foo": {"foo2": [{"foo3": 10}, "foo5"]}}),
("foo.foo2[1]", {"foo": {"foo2": [{"foo3": "foo4"}, 10]}}),
])
def test_set_nested(nested_data, label, expected_foo):
_set_nested(nested_data, label, 10)
expected = {"bar": ["bar2", "bar3"]}
expected.update(expected_foo)
assert nested_data == expected


class _DefaultBase(GufeTokenizable):
"""Convenience class to avoid rewriting these methods"""
@classmethod
def _from_dict(cls, dct):
return cls(**dct)

@classmethod
def _defaults(cls):
return super()._defaults()

@classmethod
def _schema_version(cls):
return 2


# this represents an "original" object with fields `foo` and `bar`
_SERIALIZED_OLD = {
'__module__': None, # define in each test
'__qualname__': None, # define in each test
'foo': "foo",
'bar': "bar",
':version:': 1,
}


class KeyAdded(_DefaultBase):
"""Add key ``qux`` to the object's dict"""
def __init__(self, foo, bar, qux=10):
self.foo = foo
self.bar = bar
self.qux = qux

@classmethod
def serialization_migration(cls, dct, version):
if version == 1:
dct = new_key_added(dct, 'qux', 10)

return dct

def _to_dict(self):
return {"foo": self.foo, "bar": self.bar, "qux": self.qux}


class KeyRemoved(_DefaultBase):
"""Remove key ``bar`` from the object's dict"""
def __init__(self, foo):
self.foo = foo

@classmethod
def serialization_migration(cls, dct, version):
if version == 1:
dct = old_key_removed(dct, "bar", should_warn=True)

return dct

def _to_dict(self):
return {"foo": self.foo}


class KeyRenamed(_DefaultBase):
"""Rename key ``bar`` to ``baz`` in the object's dict"""
def __init__(self, foo, baz):
self.foo = foo
self.baz = baz

@classmethod
def serialization_migration(cls, dct, version):
if version == 1:
dct = key_renamed(dct, "bar", "baz")

return dct

def _to_dict(self):
return {"foo": self.foo, "baz": self.baz}


class MigrationTester(GufeTokenizableTestsMixin):
input_dict: Optional[dict[str, Any]] = None
"""Initial input dict (except class name info)"""
kwargs: Optional[dict[str, Any]] = None
"""kwargs to create an equivalent object from scratch"""

repr = None

@pytest.fixture
def instance(self):
return self.cls(**self.kwargs)

def _prep_dct(self, dct):
dct = copy.deepcopy(self.input_dict)
dct['__module__'] = self.cls.__module__
dct['__qualname__'] = self.cls.__qualname__
return dct

def test_serialization_migration(self):
# in these examples, self.kwargs is the same as the output of
# serialization_migration (not necessarily true for all classes)
dct = self._prep_dct(self.input_dict)
del dct['__module__']
del dct['__qualname__']
version = dct.pop(':version:')
assert self.cls.serialization_migration(dct, version) == self.kwargs

def test_migration(self, instance):
dct = self._prep_dct(self.input_dict)
reconstructed = from_dict(dct)
expected = instance
assert expected == reconstructed

class TestKeyAdded(MigrationTester):
cls = KeyAdded
input_dict = _SERIALIZED_OLD
kwargs = {"foo": "foo", "bar": "bar", "qux": 10}
key = "KeyAdded-43d61e49c793a863b8b4f96b0e0b2876"


class TestKeyRemoved(MigrationTester):
cls = KeyRemoved
input_dict = _SERIALIZED_OLD
kwargs = {"foo": "foo"}
key = "KeyRemoved-93a689cdf75976b83507e4d1ded1ad7b"


class TestKeyRenamed(MigrationTester):
cls = KeyRenamed
input_dict = _SERIALIZED_OLD
kwargs = {"foo": "foo", "baz": "bar"}
key = "KeyRenamed-55860190714f16df7b9b6aab47346619"


# for some reason, we'll move the child from belonging to the son to
# belonging to the daughter (some sort of family issues, idk)
_SERIALIZED_NESTED_OLD = {
"__module__": ...,
"__qualname__": ...,
":version:": 1,
"settings": {
"son": {
"son_child": 10
},
"daughter": {}
}
}


class SonSettings(BaseModel):
"""v2 model is empty"""


class DaughterSettings(BaseModel):
"""v2 model has child; v1 would not"""
daughter_child: int


class GrandparentSettings(BaseModel):
son: SonSettings
daughter: DaughterSettings


class Grandparent(_DefaultBase):
def __init__(self, settings: GrandparentSettings):
self.settings = settings

def _to_dict(self):
return {'settings': self.settings.dict()}

@classmethod
def _from_dict(cls, dct):
settings = GrandparentSettings.parse_obj(dct['settings'])
return cls(settings=settings)

@classmethod
def _schema_version(cls):
return 2

@classmethod
def serialization_migration(cls, dct, version):
if version == 1:
dct = nested_key_moved(
dct,
old_name="settings.son.son_child",
new_name="settings.daughter.daughter_child"
)

return dct


class TestNestedKeyMoved(MigrationTester):
cls = Grandparent
input_dict = _SERIALIZED_NESTED_OLD
kwargs = {
'settings': {'son': {}, 'daughter': {'daughter_child': 10}}
}
key = "Grandparent-0e2ea10e853d7ac730e7b2ae477a3801"

@pytest.fixture
def instance(self):
return self.cls(GrandparentSettings(
son=SonSettings(),
daughter=DaughterSettings(daughter_child=10)
))
9 changes: 6 additions & 3 deletions gufe/tests/test_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,15 @@ def setup_method(self):

def leaf_dict(a):
return {'__module__': __name__, '__qualname__': "Leaf", "a": a,
"b": 2}
"b": 2, ':version:': 1}

self.expected_deep = {
'__qualname__': "Container",
'__module__': __name__,
'obj': leaf_dict(leaf_dict("foo")),
'lst': [leaf_dict("foo"), 0],
'dct': {"leaf": leaf_dict("foo"), "a": "b"}
'dct': {"leaf": leaf_dict("foo"), "a": "b"},
':version:': 1,
}

self.expected_shallow = {
Expand All @@ -187,14 +188,16 @@ def leaf_dict(a):
'obj': bar,
'lst': [leaf, 0],
'dct': {'leaf': leaf, 'a': 'b'},
':version:': 1,
}

self.expected_keyed = {
'__qualname__': "Container",
'__module__': __name__,
'obj': {":gufe-key:": bar.key},
'lst': [{":gufe-key:": leaf.key}, 0],
'dct': {'leaf': {":gufe-key:": leaf.key}, 'a': 'b'}
'dct': {'leaf': {":gufe-key:": leaf.key}, 'a': 'b'},
':version:': 1,
}

def test_set_key(self):
Expand Down
Loading

0 comments on commit af0d3bd

Please sign in to comment.