Netflix-Skunkworks · akashdeepgoel · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py
@@ -162,7 +162,7 @@ def _estimate_evcache_cluster_zonal(
     zones_per_region: int = 3,
     copies_per_region: int = 3,
     max_local_disk_gib: int = 2048,
-    max_regional_size: int = 999,
+    max_regional_size: int = 10000,
     min_instance_memory_gib: int = 12,
     cross_region_replication: Replication = Replication.none,
 ) -> Optional[CapacityPlan]:
@@ -175,6 +175,9 @@ def _estimate_evcache_cluster_zonal(
     if instance.ram_gib < min_instance_memory_gib:
         return None
 
+    if instance.drive is None:
+        return None
+
     # Based on the disk latency and the read latency SLOs we adjust our
     # working set to keep more or less data in RAM. Faster drives need
     # less fronting RAM.
@@ -234,8 +237,6 @@ def reserve_memory(instance_mem_gib):
         needed_network_mbps=requirement.network_mbps.mid,
         # EVCache doesn't use cloud drives to store data, we will have
         # accounted for the data going on drives or memory via working set
-        required_disk_ios=lambda x, y: (0, 0),
-        required_disk_space=lambda x: 0,
         max_local_disk_gib=max_local_disk_gib,
         # EVCache clusters should be balanced per zone
         cluster_size=lambda x: next_n(x, zones_per_region),
@@ -305,7 +306,7 @@ class NflxEVCacheArguments(BaseModel):
         " this will be deduced from tier",
     )
     max_regional_size: int = Field(
-        default=999,
+        default=10000,
         description="What is the maximum size of a cluster in this region",
     )
     max_local_disk_gib: int = Field(
@@ -341,7 +342,7 @@ def capacity_plan(
         copies_per_region: int = extra_model_arguments.get(
             "copies_per_region", default_copies
         )
-        max_regional_size: int = extra_model_arguments.get("max_regional_size", 999)
+        max_regional_size: int = extra_model_arguments.get("max_regional_size", 10000)
         # Very large nodes are hard to cache warm
         max_local_disk_gib: int = extra_model_arguments.get(
             "max_local_disk_gib", 1024 * 6

diff --git a/tests/netflix/test_evcache.py b/tests/netflix/test_evcache.py
@@ -295,4 +295,42 @@ def test_evcache_ondisk_disk_usage():
         total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \
                     candidate.candidate_clusters.zonal[0].count
 
-        assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
+        assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
+
+def test_evcache_ondisk_high_disk_usage():
+    high_disk_usage_rps = CapacityDesires(
+        service_tier=0,
+        query_pattern=QueryPattern(
+            estimated_read_per_second=Interval(
+                low=284, mid=7110000, high=7110000 * 1.2, confidence=1.0
+            ),
+            estimated_write_per_second=Interval(
+                low=0, mid=2620000, high=2620000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_size_bytes=Interval(
+                low=12000, mid=12000, high=12000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=16000, mid=16000, high=16000 * 1.2, confidence=1.0
+            ),
+        ),
+        data_shape=DataShape(
+            estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0
+            ),
+        ),
+    )
+
+    plan = planner.plan_certain(
+        model_name="org.netflix.evcache",
+        region="us-east-1",
+        desires=high_disk_usage_rps,
+    )
+
+    for candidate in plan:
+        if candidate.candidate_clusters.zonal[0].instance.drive is not None:
+            total_disk = candidate.candidate_clusters.zonal[0].instance.drive.size_gib * \
+                        candidate.candidate_clusters.zonal[0].count
+
+            assert total_disk > high_disk_usage_rps.data_shape.estimated_state_size_gib.mid