From 2777f5aebd98ccbd2ee0f87f8c154c2e59fadfe0 Mon Sep 17 00:00:00 2001 From: George Campbell Date: Thu, 21 Nov 2024 16:56:25 -0800 Subject: [PATCH] subtle change in pydantic v2 dump method. --- README.md | 9 +++-- notebooks/demo.ipynb | 2 +- notebooks/io2.ipynb | 10 +++--- notebooks/kafka_demo.ipynb | 2 +- notebooks/visualize_regret.ipynb | 2 +- service_capacity_modeling/interface.py | 36 ++++++------------- .../models/org/netflix/evcache.py | 4 +-- .../models/org/netflix/kafka.py | 2 +- tests/test_model_dump.py | 19 ++++++++++ 9 files changed, 48 insertions(+), 38 deletions(-) create mode 100644 tests/test_model_dump.py diff --git a/README.md b/README.md index 5172c3d..5021edf 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ requirements = cap_plan.requirements least_regret = cap_plan.least_regret # Show the range of requirements for a single zone -pprint.pprint(requirements.zonal[0].dict(exclude_unset=True)) +pprint.pprint(requirements.zonal[0].model_dump()) # Show our least regretful choices of hardware in least regret order # So for example if we can buy the first set of computers we would prefer @@ -92,7 +92,7 @@ pprint.pprint(requirements.zonal[0].dict(exclude_unset=True)) for choice in range(3): num_clusters = len(least_regret[choice].candidate_clusters.zonal) print(f"Our #{choice + 1} choice is {num_clusters} zones of:") - pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].dict(exclude_unset=True)) + pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].model_dump()) ``` @@ -102,6 +102,8 @@ use case, but each model (e.g. Cassandra) supplies reasonable defaults. For example we can specify a lot more information ```python +from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape + db_desires = CapacityDesires( # This service is important to the business, not critical (tier 0) service_tier=1, @@ -152,6 +154,9 @@ In this example we tweak the QPS up, on CPU time of operations down and SLO down. This more closely approximates a caching workload ```python +from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape +from service_capacity_modeling.capacity_planner import planner + cache_desires = CapacityDesires( service_tier=1, query_pattern=QueryPattern( diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index fe15057..165e43b 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -71,7 +71,7 @@ "\n", "import pprint\n", "def mprint(x):\n", - " pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)" + " pprint.pprint(x.model_dump(), sort_dicts=False)" ] }, { diff --git a/notebooks/io2.ipynb b/notebooks/io2.ipynb index 3c29ae6..41860b6 100644 --- a/notebooks/io2.ipynb +++ b/notebooks/io2.ipynb @@ -87,7 +87,7 @@ "de.size_gib = 100\n", "de.read_io_per_s = 33000\n", "print(de.annual_cost)\n", - "pprint(shapes.hardware.regions['us-east-1'].drives['io2'].dict())" + "pprint(shapes.hardware.regions['us-east-1'].drives['io2'].model_dump())" ] }, { @@ -263,7 +263,7 @@ "least_regret = cap_plan.least_regret\n", "\n", "# Show the range of requirements for a single zone\n", - "pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n", + "pprint.pprint(requirements.zonal[0].model_dump())\n", "\n", "# Show our least regretful choices of hardware in least regret order\n", "# So for example if we can buy the first set of computers we would prefer\n", @@ -278,7 +278,7 @@ " if cluster.cluster_type in seen:\n", " continue\n", " seen.add(cluster.cluster_type)\n", - " pprint.pprint(cluster.dict(exclude_unset=True))" + " pprint.pprint(cluster.model_dump())" ] }, { @@ -397,7 +397,7 @@ "least_regret = cap_plan.least_regret\n", "\n", "# Show the range of requirements for a single zone\n", - "pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n", + "pprint.pprint(requirements.zonal[0].model_dump())\n", "\n", "# Show our least regretful choices of hardware in least regret order\n", "# So for example if we can buy the first set of computers we would prefer\n", @@ -412,7 +412,7 @@ " if cluster.cluster_type in seen:\n", " continue\n", " seen.add(cluster.cluster_type)\n", - " pprint.pprint(cluster.dict(exclude_unset=True))" + " pprint.pprint(cluster.model_dump())" ] }, { diff --git a/notebooks/kafka_demo.ipynb b/notebooks/kafka_demo.ipynb index 78c3402..4055ea8 100644 --- a/notebooks/kafka_demo.ipynb +++ b/notebooks/kafka_demo.ipynb @@ -13,7 +13,7 @@ "\n", "import pprint\n", "def mprint(x):\n", - " pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n", + " pprint.pprint(x.model_dump(), sort_dicts=False)\n", " \n", "def do_summarize(cluster, regret):\n", " cost = cluster.candidate_clusters.total_annual_cost\n", diff --git a/notebooks/visualize_regret.ipynb b/notebooks/visualize_regret.ipynb index 995593c..db27821 100644 --- a/notebooks/visualize_regret.ipynb +++ b/notebooks/visualize_regret.ipynb @@ -12,7 +12,7 @@ "\n", "def mprint(x):\n", " if isinstance(x, BaseModel):\n", - " pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n", + " pprint.pprint(x.model_dump(), sort_dicts=False)\n", " else:\n", " pprint.pprint(x)\n", " \n", diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py index e2bdee2..db09b53 100644 --- a/service_capacity_modeling/interface.py +++ b/service_capacity_modeling/interface.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import sys from decimal import Decimal from enum import Enum @@ -16,6 +15,7 @@ import numpy as np from pydantic import BaseModel +from pydantic import computed_field from pydantic import ConfigDict from pydantic import Field @@ -25,15 +25,15 @@ class ExcludeUnsetModel(BaseModel): - def dict(self, *args, **kwargs): + def model_dump(self, *args, **kwargs): if "exclude_unset" not in kwargs: kwargs["exclude_unset"] = True - return super().dict(*args, **kwargs) + return super().model_dump(*args, **kwargs) - def json(self, *args, **kwargs): + def model_dump_json(self, *args, **kwargs): if "exclude_unset" not in kwargs: kwargs["exclude_unset"] = True - return super().json(*args, **kwargs) + return super().model_dump_json(*args, **kwargs) ############################################################################### @@ -198,6 +198,7 @@ class Drive(ExcludeUnsetModel): size_gib: int = 0 read_io_per_s: Optional[int] = None write_io_per_s: Optional[int] = None + throughput: Optional[int] = None # If this drive has single tenant IO capacity, for example a single # physical drive versus a virtualised drive single_tenant: bool = True @@ -252,6 +253,7 @@ def max_io_per_s(self): else: return sys.maxsize + @computed_field(return_type=float) # type: ignore @property def annual_cost(self): size = self.size_gib or 0 @@ -687,8 +689,8 @@ class CapacityDesires(ExcludeUnsetModel): def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires": # Now merge with the models default - desires_dict = self.dict(exclude_unset=True) - default_dict = defaults.dict(exclude_unset=True) + desires_dict = self.model_dump() + default_dict = defaults.model_dump() default_dict.get("query_pattern", {}).update( desires_dict.pop("query_pattern", {}) @@ -701,7 +703,7 @@ def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires": # If user gave state item count but not size or size but not count # calculate the missing one from the other user_size = ( - self.dict(exclude_unset=True) + self.model_dump() .get("data_shape", {}) .get("estimated_state_size_gib", None) ) @@ -790,27 +792,11 @@ class Clusters(ExcludeUnsetModel): services: Sequence[ServiceCapacity] = [] # Backwards compatibility for total_annual_cost + @computed_field(return_type=float) # type: ignore @property def total_annual_cost(self) -> Decimal: return cast(Decimal, round(sum(self.annual_costs.values()), 2)) - # TODO(josephl): Once https://github.com/pydantic/pydantic/issues/935 - # resolves use w.e. that does to make it so total_annual_cost - # is present in the JSON. For now we do this hack. - def dict(self, *args, **kwargs): - attribs = super().dict(*args, **kwargs) - attribs["total_annual_cost"] = self.total_annual_cost - return attribs - - def json(self, *args, **kwargs): - # I can't figure out how to get all of pydantics JSON - # serialization goodness (e.g. handling Decimals and nested - # models) without just roundtriping ... let's wait for #935 - pydantic_json = super().json(*args, **kwargs) - data = json.loads(pydantic_json) - data["total_annual_cost"] = float(round(self.total_annual_cost, 2)) - return json.dumps(data) - class CapacityPlan(ExcludeUnsetModel): requirements: Requirements diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py index 43d90d5..b342269 100644 --- a/service_capacity_modeling/models/org/netflix/evcache.py +++ b/service_capacity_modeling/models/org/netflix/evcache.py @@ -417,9 +417,9 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]): ) estimated_read_size: Interval = Interval( - **user_desires.query_pattern.dict(exclude_unset=True).get( + **user_desires.query_pattern.model_dump().get( "estimated_mean_read_size_bytes", - user_desires.query_pattern.dict(exclude_unset=True).get( + user_desires.query_pattern.model_dump().get( "estimated_mean_write_size_bytes", {"low": 16, "mid": 1024, "high": 65536, "confidence": 0.95}, ), diff --git a/service_capacity_modeling/models/org/netflix/kafka.py b/service_capacity_modeling/models/org/netflix/kafka.py index 00a3fd0..0576808 100644 --- a/service_capacity_modeling/models/org/netflix/kafka.py +++ b/service_capacity_modeling/models/org/netflix/kafka.py @@ -427,7 +427,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]): concurrent_readers = max( 1, int(user_desires.query_pattern.estimated_read_per_second.mid) ) - query_pattern = user_desires.query_pattern.dict(exclude_unset=True) + query_pattern = user_desires.query_pattern.model_dump() if "estimated_mean_write_size_bytes" in query_pattern: write_bytes = Interval(**query_pattern["estimated_mean_write_size_bytes"]) else: diff --git a/tests/test_model_dump.py b/tests/test_model_dump.py new file mode 100644 index 0000000..acd1e39 --- /dev/null +++ b/tests/test_model_dump.py @@ -0,0 +1,19 @@ +import json +from decimal import Decimal + +from service_capacity_modeling.interface import Clusters + + +def test_total_annual_cost(): + """make sure total_annual_cost is calculated and dumped correctly""" + cluster = Clusters( + annual_costs={"right-zonal": Decimal(1234), "right-regional": Decimal(234)} + ) + expected_total = float(cluster.total_annual_cost) + + assert expected_total == cluster.model_dump().get("total_annual_cost") + assert expected_total == cluster.dict().get("total_annual_cost") + assert expected_total == json.loads(cluster.model_dump_json()).get( + "total_annual_cost" + ) + assert expected_total == json.loads(cluster.json()).get("total_annual_cost")