From 5c5f9c2928d1cddaf4ff75f117aa44c94edcfc27 Mon Sep 17 00:00:00 2001
From: George Campbell <gscampbell@netflix.com>
Date: Thu, 19 Oct 2023 15:03:29 -0700
Subject: [PATCH] update the entity model to provision crdb+kv

---
 .../models/org/netflix/entity.py              | 20 ++++++++++++++++++-
 tests/netflix/test_entity.py                  | 17 +++++++++++++++-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/service_capacity_modeling/models/org/netflix/entity.py b/service_capacity_modeling/models/org/netflix/entity.py
index 3561167..02b96a6 100644
--- a/service_capacity_modeling/models/org/netflix/entity.py
+++ b/service_capacity_modeling/models/org/netflix/entity.py
@@ -59,6 +59,23 @@ def extra_model_arguments_schema() -> Dict[str, Any]:
     def compose_with(
         user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any]
     ) -> Tuple[Tuple[str, Callable[[CapacityDesires], CapacityDesires]], ...]:
+        def _modify_crdb_desires(
+            user_desires: CapacityDesires,
+        ) -> CapacityDesires:
+            relaxed = user_desires.copy(deep=True)
+            item_count = relaxed.data_shape.estimated_state_item_count
+            # based on the nts cluster where the version store is 10x the prime store
+            if item_count is None:
+                # assume 10 KB items
+                item_count = user_desires.data_shape.estimated_state_size_gib.scale(
+                    1024 * 1024 / 10
+                )
+            # assume 512 B to track the id/version of each item
+            relaxed.data_shape.estimated_state_size_gib = item_count.scale(
+                512 / 1024**3  # bytes per item in GiB
+            )
+            return relaxed
+
         def _modify_elasticsearch_desires(
             user_desires: CapacityDesires,
         ) -> CapacityDesires:
@@ -69,7 +86,8 @@ def _modify_elasticsearch_desires(
             return relaxed
 
         return (
-            ("org.netflix.cassandra", lambda x: x),
+            ("org.netflix.cockroachdb", _modify_crdb_desires),
+            ("org.netflix.key-value", lambda x: x),
             ("org.netflix.elasticsearch", _modify_elasticsearch_desires),
         )
 
diff --git a/tests/netflix/test_entity.py b/tests/netflix/test_entity.py
index 6646bf4..16afa24 100644
--- a/tests/netflix/test_entity.py
+++ b/tests/netflix/test_entity.py
@@ -21,7 +21,7 @@ def test_entity_increasing_qps_simple():
             ),
             data_shape=DataShape(
                 estimated_state_size_gib=Interval(
-                    low=20, mid=200, high=2000, confidence=0.98
+                    low=2, mid=20, high=200, confidence=0.98
                 ),
             ),
         )
@@ -33,6 +33,21 @@ def test_entity_increasing_qps_simple():
             simulations=256,
         )
 
+        # the set of cluster types the planner chose
+        types = {
+            c.cluster_type
+            for c in list(cap_plan.least_regret[0].candidate_clusters.regional)
+            + list(cap_plan.least_regret[0].candidate_clusters.zonal)
+        }
+        assert types == {
+            "dgwentity",
+            "dgwkv",
+            "cockroachdb",
+            "cassandra",
+            "elasticsearch-data",
+            "elasticsearch-master",
+        }
+
         # Check the Java cluster
         entity_plan = next(
             filter(