Skip to content

Commit

Permalink
Merge pull request #96 from Netflix-Skunkworks/model_dump
Browse files Browse the repository at this point in the history
subtle change in pydantic v2 dump method.
  • Loading branch information
abersnaze authored Nov 22, 2024
2 parents 0b25d59 + 2777f5a commit f0df08a
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 38 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ requirements = cap_plan.requirements
least_regret = cap_plan.least_regret

# Show the range of requirements for a single zone
pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))
pprint.pprint(requirements.zonal[0].model_dump())

# Show our least regretful choices of hardware in least regret order
# So for example if we can buy the first set of computers we would prefer
Expand All @@ -92,7 +92,7 @@ pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))
for choice in range(3):
num_clusters = len(least_regret[choice].candidate_clusters.zonal)
print(f"Our #{choice + 1} choice is {num_clusters} zones of:")
pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].dict(exclude_unset=True))
pprint.pprint(least_regret[choice].candidate_clusters.zonal[0].model_dump())

```

Expand All @@ -102,6 +102,8 @@ use case, but each model (e.g. Cassandra) supplies reasonable defaults.
For example we can specify a lot more information

```python
from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape

db_desires = CapacityDesires(
# This service is important to the business, not critical (tier 0)
service_tier=1,
Expand Down Expand Up @@ -152,6 +154,9 @@ In this example we tweak the QPS up, on CPU time of operations down
and SLO down. This more closely approximates a caching workload

```python
from service_capacity_modeling.interface import CapacityDesires, QueryPattern, Interval, FixedInterval, DataShape
from service_capacity_modeling.capacity_planner import planner

cache_desires = CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
Expand Down
2 changes: 1 addition & 1 deletion notebooks/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"\n",
"import pprint\n",
"def mprint(x):\n",
" pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)"
" pprint.pprint(x.model_dump(), sort_dicts=False)"
]
},
{
Expand Down
10 changes: 5 additions & 5 deletions notebooks/io2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"de.size_gib = 100\n",
"de.read_io_per_s = 33000\n",
"print(de.annual_cost)\n",
"pprint(shapes.hardware.regions['us-east-1'].drives['io2'].dict())"
"pprint(shapes.hardware.regions['us-east-1'].drives['io2'].model_dump())"
]
},
{
Expand Down Expand Up @@ -263,7 +263,7 @@
"least_regret = cap_plan.least_regret\n",
"\n",
"# Show the range of requirements for a single zone\n",
"pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n",
"pprint.pprint(requirements.zonal[0].model_dump())\n",
"\n",
"# Show our least regretful choices of hardware in least regret order\n",
"# So for example if we can buy the first set of computers we would prefer\n",
Expand All @@ -278,7 +278,7 @@
" if cluster.cluster_type in seen:\n",
" continue\n",
" seen.add(cluster.cluster_type)\n",
" pprint.pprint(cluster.dict(exclude_unset=True))"
" pprint.pprint(cluster.model_dump())"
]
},
{
Expand Down Expand Up @@ -397,7 +397,7 @@
"least_regret = cap_plan.least_regret\n",
"\n",
"# Show the range of requirements for a single zone\n",
"pprint.pprint(requirements.zonal[0].dict(exclude_unset=True))\n",
"pprint.pprint(requirements.zonal[0].model_dump())\n",
"\n",
"# Show our least regretful choices of hardware in least regret order\n",
"# So for example if we can buy the first set of computers we would prefer\n",
Expand All @@ -412,7 +412,7 @@
" if cluster.cluster_type in seen:\n",
" continue\n",
" seen.add(cluster.cluster_type)\n",
" pprint.pprint(cluster.dict(exclude_unset=True))"
" pprint.pprint(cluster.model_dump())"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion notebooks/kafka_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"\n",
"import pprint\n",
"def mprint(x):\n",
" pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n",
" pprint.pprint(x.model_dump(), sort_dicts=False)\n",
" \n",
"def do_summarize(cluster, regret):\n",
" cost = cluster.candidate_clusters.total_annual_cost\n",
Expand Down
2 changes: 1 addition & 1 deletion notebooks/visualize_regret.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"\n",
"def mprint(x):\n",
" if isinstance(x, BaseModel):\n",
" pprint.pprint(x.dict(exclude_unset=True), sort_dicts=False)\n",
" pprint.pprint(x.model_dump(), sort_dicts=False)\n",
" else:\n",
" pprint.pprint(x)\n",
" \n",
Expand Down
36 changes: 11 additions & 25 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import json
import sys
from decimal import Decimal
from enum import Enum
Expand All @@ -16,6 +15,7 @@

import numpy as np
from pydantic import BaseModel
from pydantic import computed_field
from pydantic import ConfigDict
from pydantic import Field

Expand All @@ -25,15 +25,15 @@


class ExcludeUnsetModel(BaseModel):
def dict(self, *args, **kwargs):
def model_dump(self, *args, **kwargs):
if "exclude_unset" not in kwargs:
kwargs["exclude_unset"] = True
return super().dict(*args, **kwargs)
return super().model_dump(*args, **kwargs)

def json(self, *args, **kwargs):
def model_dump_json(self, *args, **kwargs):
if "exclude_unset" not in kwargs:
kwargs["exclude_unset"] = True
return super().json(*args, **kwargs)
return super().model_dump_json(*args, **kwargs)


###############################################################################
Expand Down Expand Up @@ -198,6 +198,7 @@ class Drive(ExcludeUnsetModel):
size_gib: int = 0
read_io_per_s: Optional[int] = None
write_io_per_s: Optional[int] = None
throughput: Optional[int] = None
# If this drive has single tenant IO capacity, for example a single
# physical drive versus a virtualised drive
single_tenant: bool = True
Expand Down Expand Up @@ -252,6 +253,7 @@ def max_io_per_s(self):
else:
return sys.maxsize

@computed_field(return_type=float) # type: ignore
@property
def annual_cost(self):
size = self.size_gib or 0
Expand Down Expand Up @@ -687,8 +689,8 @@ class CapacityDesires(ExcludeUnsetModel):

def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires":
# Now merge with the models default
desires_dict = self.dict(exclude_unset=True)
default_dict = defaults.dict(exclude_unset=True)
desires_dict = self.model_dump()
default_dict = defaults.model_dump()

default_dict.get("query_pattern", {}).update(
desires_dict.pop("query_pattern", {})
Expand All @@ -701,7 +703,7 @@ def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires":
# If user gave state item count but not size or size but not count
# calculate the missing one from the other
user_size = (
self.dict(exclude_unset=True)
self.model_dump()
.get("data_shape", {})
.get("estimated_state_size_gib", None)
)
Expand Down Expand Up @@ -790,27 +792,11 @@ class Clusters(ExcludeUnsetModel):
services: Sequence[ServiceCapacity] = []

# Backwards compatibility for total_annual_cost
@computed_field(return_type=float) # type: ignore
@property
def total_annual_cost(self) -> Decimal:
return cast(Decimal, round(sum(self.annual_costs.values()), 2))

# TODO(josephl): Once https://github.com/pydantic/pydantic/issues/935
# resolves use w.e. that does to make it so total_annual_cost
# is present in the JSON. For now we do this hack.
def dict(self, *args, **kwargs):
attribs = super().dict(*args, **kwargs)
attribs["total_annual_cost"] = self.total_annual_cost
return attribs

def json(self, *args, **kwargs):
# I can't figure out how to get all of pydantics JSON
# serialization goodness (e.g. handling Decimals and nested
# models) without just roundtriping ... let's wait for #935
pydantic_json = super().json(*args, **kwargs)
data = json.loads(pydantic_json)
data["total_annual_cost"] = float(round(self.total_annual_cost, 2))
return json.dumps(data)


class CapacityPlan(ExcludeUnsetModel):
requirements: Requirements
Expand Down
4 changes: 2 additions & 2 deletions service_capacity_modeling/models/org/netflix/evcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,9 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
)

estimated_read_size: Interval = Interval(
**user_desires.query_pattern.dict(exclude_unset=True).get(
**user_desires.query_pattern.model_dump().get(
"estimated_mean_read_size_bytes",
user_desires.query_pattern.dict(exclude_unset=True).get(
user_desires.query_pattern.model_dump().get(
"estimated_mean_write_size_bytes",
{"low": 16, "mid": 1024, "high": 65536, "confidence": 0.95},
),
Expand Down
2 changes: 1 addition & 1 deletion service_capacity_modeling/models/org/netflix/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
concurrent_readers = max(
1, int(user_desires.query_pattern.estimated_read_per_second.mid)
)
query_pattern = user_desires.query_pattern.dict(exclude_unset=True)
query_pattern = user_desires.query_pattern.model_dump()
if "estimated_mean_write_size_bytes" in query_pattern:
write_bytes = Interval(**query_pattern["estimated_mean_write_size_bytes"])
else:
Expand Down
19 changes: 19 additions & 0 deletions tests/test_model_dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
from decimal import Decimal

from service_capacity_modeling.interface import Clusters


def test_total_annual_cost():
"""make sure total_annual_cost is calculated and dumped correctly"""
cluster = Clusters(
annual_costs={"right-zonal": Decimal(1234), "right-regional": Decimal(234)}
)
expected_total = float(cluster.total_annual_cost)

assert expected_total == cluster.model_dump().get("total_annual_cost")
assert expected_total == cluster.dict().get("total_annual_cost")
assert expected_total == json.loads(cluster.model_dump_json()).get(
"total_annual_cost"
)
assert expected_total == json.loads(cluster.json()).get("total_annual_cost")

0 comments on commit f0df08a

Please sign in to comment.