Merge pull request #96 from Netflix-Skunkworks/model_dump

subtle change in pydantic v2 dump method.
Netflix-Skunkworks · Nov 22, 2024 · f0df08a · f0df08a
2 parents 0b25d59 + 2777f5a
commit f0df08a
Show file tree

Hide file tree

Showing 9 changed files with 48 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ requirements = cap_plan.requirements
 least_regret = cap_plan.least_regret
 
 # Show the range of requirements for a single zone
-pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))
+pprint.pprint(requirements.zonal[0].model_dump())
 
 # Show our least regretful choices of hardware in least regret order
 # So for example if we can buy the first set of computers we would prefer
@@ -92,7 +92,7 @@ pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))
 for choice in range(3):
     num_clusters = len(least_regret[choice].candidate_clusters.zonal)
     print(f"Our #{choice + 1} choice is {num_clusters} zones of:")
-    pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].dict(exclude_unset=True))
+    pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].model_dump())
 
 ```
 
@@ -102,6 +102,8 @@ use case, but each model (e.g. Cassandra) supplies reasonable defaults.
 For example we can specify a lot more information
 
 ```python
+from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape
+
 db_desires = CapacityDesires(
     # This service is important to the business, not critical (tier 0)
     service_tier=1,
@@ -152,6 +154,9 @@ In this example we tweak the QPS up, on CPU time of operations down
 and SLO down. This more closely approximates a caching workload
 
 ```python
+from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape
+from service_capacity_modeling.capacity_planner import planner
+
 cache_desires = CapacityDesires(
     service_tier=1,
     query_pattern=QueryPattern(

diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb
@@ -71,7 +71,7 @@
     "\n",
     "import pprint\n",
     "def mprint(x):\n",
-    "    pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)"
+    "    pprint.pprint(x.model_dump(), sort_dicts=False)"
    ]
   },
   {

diff --git a/notebooks/io2.ipynb b/notebooks/io2.ipynb
@@ -87,7 +87,7 @@
     "de.size_gib = 100\n",
     "de.read_io_per_s = 33000\n",
     "print(de.annual_cost)\n",
-    "pprint(shapes.hardware.regions['us-east-1'].drives['io2'].dict())"
+    "pprint(shapes.hardware.regions['us-east-1'].drives['io2'].model_dump())"
    ]
   },
   {
@@ -263,7 +263,7 @@
     "least_regret = cap_plan.least_regret\n",
     "\n",
     "# Show the range of requirements for a single zone\n",
-    "pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n",
+    "pprint.pprint(requirements.zonal[0].model_dump())\n",
     "\n",
     "# Show our least regretful choices of hardware in least regret order\n",
     "# So for example if we can buy the first set of computers we would prefer\n",
@@ -278,7 +278,7 @@
     "        if cluster.cluster_type in seen:\n",
     "            continue\n",
     "        seen.add(cluster.cluster_type)\n",
-    "        pprint.pprint(cluster.dict(exclude_unset=True))"
+    "        pprint.pprint(cluster.model_dump())"
    ]
   },
   {
@@ -397,7 +397,7 @@
     "least_regret = cap_plan.least_regret\n",
     "\n",
     "# Show the range of requirements for a single zone\n",
-    "pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n",
+    "pprint.pprint(requirements.zonal[0].model_dump())\n",
     "\n",
     "# Show our least regretful choices of hardware in least regret order\n",
     "# So for example if we can buy the first set of computers we would prefer\n",
@@ -412,7 +412,7 @@
     "        if cluster.cluster_type in seen:\n",
     "            continue\n",
     "        seen.add(cluster.cluster_type)\n",
-    "        pprint.pprint(cluster.dict(exclude_unset=True))"
+    "        pprint.pprint(cluster.model_dump())"
    ]
   },
   {

diff --git a/notebooks/kafka_demo.ipynb b/notebooks/kafka_demo.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "import pprint\n",
     "def mprint(x):\n",
-    "    pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n",
+    "    pprint.pprint(x.model_dump(), sort_dicts=False)\n",
     "    \n",
     "def do_summarize(cluster, regret):\n",
     "    cost = cluster.candidate_clusters.total_annual_cost\n",

diff --git a/notebooks/visualize_regret.ipynb b/notebooks/visualize_regret.ipynb
@@ -12,7 +12,7 @@
     "\n",
     "def mprint(x):\n",
     "    if isinstance(x, BaseModel):\n",
-    "        pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n",
+    "        pprint.pprint(x.model_dump(), sort_dicts=False)\n",
     "    else:\n",
     "        pprint.pprint(x)\n",
     "        \n",

diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import json
 import sys
 from decimal import Decimal
 from enum import Enum
@@ -16,6 +15,7 @@
 
 import numpy as np
 from pydantic import BaseModel
+from pydantic import computed_field
 from pydantic import ConfigDict
 from pydantic import Field
 
@@ -25,15 +25,15 @@
 
 
 class ExcludeUnsetModel(BaseModel):
-    def dict(self, *args, **kwargs):
+    def model_dump(self, *args, **kwargs):
         if "exclude_unset" not in kwargs:
             kwargs["exclude_unset"] = True
-        return super().dict(*args, **kwargs)
+        return super().model_dump(*args, **kwargs)
 
-    def json(self, *args, **kwargs):
+    def model_dump_json(self, *args, **kwargs):
         if "exclude_unset" not in kwargs:
             kwargs["exclude_unset"] = True
-        return super().json(*args, **kwargs)
+        return super().model_dump_json(*args, **kwargs)
 
 
 ###############################################################################
@@ -198,6 +198,7 @@ class Drive(ExcludeUnsetModel):
     size_gib: int = 0
     read_io_per_s: Optional[int] = None
     write_io_per_s: Optional[int] = None
+    throughput: Optional[int] = None
     # If this drive has single tenant IO capacity, for example a single
     # physical drive versus a virtualised drive
     single_tenant: bool = True
@@ -252,6 +253,7 @@ def max_io_per_s(self):
         else:
             return sys.maxsize
 
+    @computed_field(return_type=float)  # type: ignore
     @property
     def annual_cost(self):
         size = self.size_gib or 0
@@ -687,8 +689,8 @@ class CapacityDesires(ExcludeUnsetModel):
 
     def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires":
         # Now merge with the models default
-        desires_dict = self.dict(exclude_unset=True)
-        default_dict = defaults.dict(exclude_unset=True)
+        desires_dict = self.model_dump()
+        default_dict = defaults.model_dump()
 
         default_dict.get("query_pattern", {}).update(
             desires_dict.pop("query_pattern", {})
@@ -701,7 +703,7 @@ def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires":
         # If user gave state item count but not size or size but not count
         # calculate the missing one from the other
         user_size = (
-            self.dict(exclude_unset=True)
+            self.model_dump()
             .get("data_shape", {})
             .get("estimated_state_size_gib", None)
         )
@@ -790,27 +792,11 @@ class Clusters(ExcludeUnsetModel):
     services: Sequence[ServiceCapacity] = []
 
     # Backwards compatibility for total_annual_cost
+    @computed_field(return_type=float)  # type: ignore
     @property
     def total_annual_cost(self) -> Decimal:
         return cast(Decimal, round(sum(self.annual_costs.values()), 2))
 
-    # TODO(josephl): Once https://github.com/pydantic/pydantic/issues/935
-    # resolves use w.e. that does to make it so total_annual_cost
-    # is present in the JSON. For now we do this hack.
-    def dict(self, *args, **kwargs):
-        attribs = super().dict(*args, **kwargs)
-        attribs["total_annual_cost"] = self.total_annual_cost
-        return attribs
-
-    def json(self, *args, **kwargs):
-        # I can't figure out how to get all of pydantics JSON
-        # serialization goodness (e.g. handling Decimals and nested
-        # models) without just roundtriping ... let's wait for #935
-        pydantic_json = super().json(*args, **kwargs)
-        data = json.loads(pydantic_json)
-        data["total_annual_cost"] = float(round(self.total_annual_cost, 2))
-        return json.dumps(data)
-
 
 class CapacityPlan(ExcludeUnsetModel):
     requirements: Requirements

diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py
@@ -417,9 +417,9 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                 )
 
         estimated_read_size: Interval = Interval(
-            **user_desires.query_pattern.dict(exclude_unset=True).get(
+            **user_desires.query_pattern.model_dump().get(
                 "estimated_mean_read_size_bytes",
-                user_desires.query_pattern.dict(exclude_unset=True).get(
+                user_desires.query_pattern.model_dump().get(
                     "estimated_mean_write_size_bytes",
                     {"low": 16, "mid": 1024, "high": 65536, "confidence": 0.95},
                 ),

diff --git a/service_capacity_modeling/models/org/netflix/kafka.py b/service_capacity_modeling/models/org/netflix/kafka.py
@@ -427,7 +427,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
         concurrent_readers = max(
             1, int(user_desires.query_pattern.estimated_read_per_second.mid)
         )
-        query_pattern = user_desires.query_pattern.dict(exclude_unset=True)
+        query_pattern = user_desires.query_pattern.model_dump()
         if "estimated_mean_write_size_bytes" in query_pattern:
             write_bytes = Interval(**query_pattern["estimated_mean_write_size_bytes"])
         else:

diff --git a/tests/test_model_dump.py b/tests/test_model_dump.py
@@ -0,0 +1,19 @@
+import json
+from decimal import Decimal
+
+from service_capacity_modeling.interface import Clusters
+
+
+def test_total_annual_cost():
+    """make sure total_annual_cost is calculated and dumped correctly"""
+    cluster = Clusters(
+        annual_costs={"right-zonal": Decimal(1234), "right-regional": Decimal(234)}
+    )
+    expected_total = float(cluster.total_annual_cost)
+
+    assert expected_total == cluster.model_dump().get("total_annual_cost")
+    assert expected_total == cluster.dict().get("total_annual_cost")
+    assert expected_total == json.loads(cluster.model_dump_json()).get(
+        "total_annual_cost"
+    )
+    assert expected_total == json.loads(cluster.json()).get("total_annual_cost")