From 592def649a4586e5a2e9b10001721d63cf09ea2f Mon Sep 17 00:00:00 2001 From: ramsrivatsak Date: Thu, 19 Oct 2023 07:39:32 -0700 Subject: [PATCH] consolidate current instance type (str), count (interval), cpu% (interval) into a new model object --- service_capacity_modeling/capacity_planner.py | 6 +++--- service_capacity_modeling/interface.py | 13 ++++++++++--- .../models/org/netflix/cassandra.py | 18 +++++++----------- tests/netflix/test_cassandra.py | 12 +++++++++--- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/service_capacity_modeling/capacity_planner.py b/service_capacity_modeling/capacity_planner.py index 29cf8d8..118ef4b 100644 --- a/service_capacity_modeling/capacity_planner.py +++ b/service_capacity_modeling/capacity_planner.py @@ -504,10 +504,10 @@ def _plan_certain( allowed_drives.update(hardware.drives.keys()) # Get current instance object if exists - if desires.current_instance_type is not "": + if desires.current_capacity.current_instance_type is not "": for instance in hardware.instances.values(): - if instance.name == desires.current_instance_type: - desires.current_instance = instance + if instance.name == desires.current_capacity.current_instance_type: + desires.current_capacity.current_instance = instance plans = [] if model.run_hardware_simulation(): diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py index 9c47d85..27e354c 100644 --- a/service_capacity_modeling/interface.py +++ b/service_capacity_modeling/interface.py @@ -619,6 +619,13 @@ class DataShape(ExcludeUnsetModel): ) +class CurrentCapacity(ExcludeUnsetModel): + current_instance_type: str = "" + current_cluster_size: int = 0 + current_instance: Instance = None # type: ignore + cpu_utilization: Interval = certain_float(0.0) + + class CapacityDesires(ExcludeUnsetModel): # How critical is this cluster, impacts how much "extra" we provision # 0 = Critical to the product (Product does not function) @@ -633,14 +640,14 @@ class CapacityDesires(ExcludeUnsetModel): # What will the state look like data_shape: DataShape = DataShape() + # What is the current microarchitectural/system configuration of the system + current_capacity: CurrentCapacity = CurrentCapacity() + # When users are providing latency estimates, what is the typical # instance core frequency we are comparing to. Databases use i3s a lot # hence this default core_reference_ghz: float = 2.3 - current_instance_type: str = "" - current_instance: Instance = None # type: ignore - def merge_with(self, defaults: "CapacityDesires") -> "CapacityDesires": # Now merge with the models default desires_dict = self.dict(exclude_unset=True) diff --git a/service_capacity_modeling/models/org/netflix/cassandra.py b/service_capacity_modeling/models/org/netflix/cassandra.py index 03d3b91..f5b7530 100644 --- a/service_capacity_modeling/models/org/netflix/cassandra.py +++ b/service_capacity_modeling/models/org/netflix/cassandra.py @@ -64,13 +64,11 @@ def _write_buffer_gib_zone( def _estimate_cassandra_requirement( instance: Instance, - max_cpu_utilization: Optional[float], - required_cluster_size: Optional[int], - current_instance: Optional[Instance], desires: CapacityDesires, working_set: float, reads_per_second: float, max_rps_to_disk: int, + required_cluster_size: Optional[int] = None, zones_per_region: int = 3, copies_per_region: int = 3, ) -> CapacityRequirement: @@ -80,8 +78,11 @@ def _estimate_cassandra_requirement( return the zonal capacity requirement """ # Keep half of the cores free for background work (compaction, backup, repair) - if max_cpu_utilization is not None and current_instance is not None and required_cluster_size is not None: - needed_cores = (current_instance.cpu * required_cluster_size * zones_per_region) * (max_cpu_utilization / 20) + if desires.current_capacity.cpu_utilization.high is not None \ + and desires.current_capacity.current_instance is not None \ + and required_cluster_size is not None: + needed_cores = (desires.current_capacity.current_instance.cpu * required_cluster_size * + zones_per_region) * (desires.current_capacity.cpu_utilization.high / 20) else: needed_cores = sqrt_staffed_cores(desires) * 2 # Keep half of the bandwidth available for backup @@ -175,7 +176,6 @@ def _upsert_params(cluster, params): # flake8: noqa: C901 def _estimate_cassandra_cluster_zonal( instance: Instance, - max_cpu_utilization: Optional[float], drive: Drive, context: RegionContext, desires: CapacityDesires, @@ -240,13 +240,11 @@ def _estimate_cassandra_cluster_zonal( requirement = _estimate_cassandra_requirement( instance=instance, - max_cpu_utilization=max_cpu_utilization, - required_cluster_size=required_cluster_size, - current_instance=desires.current_instance, desires=desires, working_set=working_set, reads_per_second=rps, max_rps_to_disk=max_rps_to_disk, + required_cluster_size=required_cluster_size, zones_per_region=zones_per_region, copies_per_region=copies_per_region, ) @@ -502,7 +500,6 @@ def capacity_plan( max_table_buffer_percent: float = min( 0.5, extra_model_arguments.get("max_table_buffer_percent", 0.11) ) - max_cpu_utilization: Optional[float] = extra_model_arguments.get("max_cpu_utilization", None) # Adjust heap defaults for high write clusters if ( @@ -514,7 +511,6 @@ def capacity_plan( return _estimate_cassandra_cluster_zonal( instance=instance, - max_cpu_utilization=max_cpu_utilization, drive=drive, context=context, desires=desires, diff --git a/tests/netflix/test_cassandra.py b/tests/netflix/test_cassandra.py index 8e9e1f2..21d4a18 100644 --- a/tests/netflix/test_cassandra.py +++ b/tests/netflix/test_cassandra.py @@ -1,5 +1,5 @@ from service_capacity_modeling.capacity_planner import planner -from service_capacity_modeling.interface import AccessConsistency +from service_capacity_modeling.interface import AccessConsistency, CurrentCapacity from service_capacity_modeling.interface import CapacityDesires from service_capacity_modeling.interface import certain_float from service_capacity_modeling.interface import certain_int @@ -310,7 +310,13 @@ def test_plan_certain(): """ worn_desire = CapacityDesires( service_tier=1, - current_instance_type="i4i.8xlarge", + current_capacity=CurrentCapacity( + current_instance_type="i4i.8xlarge", + current_cluster_size=8, + cpu_utilization=Interval( + low=10.12, mid=13.2, high=14.194801291058118, confidence=1 + ), + ), query_pattern=QueryPattern( access_pattern=AccessPattern( AccessPattern.latency @@ -340,9 +346,9 @@ def test_plan_certain(): desires=worn_desire, extra_model_arguments={ "required_cluster_size": 8, - "max_cpu_utilization": 14.194801291058118, }, ) + print(cap_plan) lr_clusters = cap_plan[0].candidate_clusters.zonal[0] assert lr_clusters.count == 8