opea-project · lvliang-intel · Oct 18, 2024 · Sep 24, 2024 · Sep 25, 2024 · Oct 11, 2024
@@ -12,11 +12,15 @@ This Tool provides a microservice benchmarking framework that uses YAML configur
 
 ## Table of Contents
 
-- [Installation](#installation)
-- [Usage](#usage)
-- [Configuration](#configuration)
-  - [Test Suite Configuration](#test-suite-configuration)
-  - [Test Cases](#test-cases)
+- [OPEA Benchmark Tool](#opea-benchmark-tool)
+  - [Features](#features)
+  - [Table of Contents](#table-of-contents)
+  - [Installation](#installation)
+    - [Prerequisites](#prerequisites)
+  - [Usage](#usage)
+  - [Configuration](#configuration)
+    - [Test Suite Configuration](#test-suite-configuration)
+    - [Test Cases](#test-cases)
 
 
 ## Installation
@@ -66,10 +70,11 @@ test_suite_config:
   load_shape:              # Tenant concurrency pattern
     name: constant           # poisson or constant(locust default load shape)
     params:                  # Loadshape-specific parameters
-      constant:                # Poisson load shape specific parameters, activate only if load_shape is poisson
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
         concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
-      poisson:                 # Poisson load shape specific parameters, activate only if load_shape is poisson
-        arrival-rate: 1.0        # Request arrival rate
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
   warm_ups: 0  # Number of test requests for warm-ups
   run_time: 60m  # Total runtime for the test suite
   seed:  # The seed for all RNGs

@@ -19,10 +19,11 @@ test_suite_config: # Overall configuration settings for the test suite
   load_shape:              # Tenant concurrency pattern
     name: constant           # poisson or constant(locust default load shape)
     params:                  # Loadshape-specific parameters
-      constant:                # Poisson load shape specific parameters, activate only if load_shape is poisson
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
         concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
-      poisson:                 # Poisson load shape specific parameters, activate only if load_shape is poisson
-        arrival-rate: 1.0        # Request arrival rate
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
 
 test_cases:
   chatqna:

@@ -142,11 +142,17 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
     load_shape_conf = run_settings.get("load-shape", global_settings.get("load-shape", locust_defaults["load-shape"]))
     try:
         load_shape = load_shape_conf["name"]
+        runspec["load-shape"] = load_shape_conf
     except KeyError:
         load_shape = DEFAULT_LOADSHAPE
 
-    runspec["load-shape"] = load_shape
-    if load_shape == DEFAULT_LOADSHAPE:
+    load_shape_params = None
+    try:
+        load_shape_params = load_shape_conf["params"][load_shape]
+    except KeyError:
+        console_logger.info(f"The specified load shape not found: {load_shape}")
+
+    if load_shape == DEFAULT_LOADSHAPE and (load_shape_params is None or "arrival_rate" not in load_shape_params):
         # constant load is Locust's default load shape, do nothing.
         console_logger.info("Use default load shape.")
     else:
@@ -155,7 +161,10 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
         if os.path.isfile(f_custom_load_shape):
             # Add the locust file of the custom load shape into classpath
             runspec["locustfile"] += f",{f_custom_load_shape}"
-            console_logger.info("Use custom load shape: {load_shape}")
+            if load_shape == DEFAULT_LOADSHAPE:
+                console_logger.info("Use default load shape based on request arrival rate")
+            else:
+                console_logger.info("Use custom load shape: {load_shape}")
         else:
             console_logger.error(
                 f"Unsupported load shape: {load_shape}."
@@ -180,15 +189,9 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
 
     spawn_rate = 100 if runspec["users"] > 100 else runspec["users"]
 
-    load_shape_params = None
-    try:
-        load_shape_params = load_shape_conf["params"][load_shape]
-    except KeyError:
-        console_logger.info(f"The specified load shape not found: {load_shape}")
-
     # Dynamically allocate Locust processes to fit different loads
     processes = 2
-    if load_shape == "constant":
+    if load_shape == "constant" and (load_shape_params is None or "arrival_rate" not in load_shape_params):
         if runspec["max_requests"] > 0:
             processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes
         else:
@@ -197,11 +200,11 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
                 concurrent_level = int(load_shape_params["concurrent_level"])
             processes = 10 if concurrent_level > 400 else 5 if concurrent_level > 200 else processes
     elif load_shape == "poisson":
-        if load_shape_params and "arrival-rate" in load_shape_params:
-            processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5))
+        if load_shape_params and "arrival_rate" in load_shape_params:
+            processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5))
     else:
-        if load_shape_params and "arrival-rate" in load_shape_params:
-            processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5))
+        if load_shape_params and "arrival_rate" in load_shape_params:
+            processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5))
         elif runspec["max_requests"] > 0:
             processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes
 
@@ -214,7 +217,7 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
         "--run-time",
         runspec["runtime"],
         "--load-shape",
-        runspec["load-shape"],
+        load_shape,
         "--dataset",
         runspec["dataset"],
         "--seed",

@@ -87,9 +87,9 @@ def __init__(self, *args, **kwargs):
     def bench_main(self):
         max_request = self.environment.parsed_options.max_request
         if max_request >= 0 and AiStressUser.request >= max_request:
-            # For poisson load shape, a user only sends single request before it stops.
+            # For custom load shape based on arrival_rate, new users spawned after exceeding max_request is reached will be stopped.
             # TODO: user should not care about load shape
-            if self.environment.parsed_options.load_shape == "poisson":
+            if "arrival_rate" in self.environment.parsed_options:
                 self.stop(force=True)
 
             time.sleep(1)
@@ -186,10 +186,10 @@ def bench_main(self):
             self.environment.runner.stats.log_request("POST", url, time.perf_counter() - start_ts, 0)
             self.environment.runner.stats.log_error("POST", url, "Locust Request error")
 
-        # For poisson load shape, a user only sends single request before it stops.
+        # For custom load shape based on arrival_rate, a user only sends single request before it sleeps.
         # TODO: user should not care about load shape
-        if self.environment.parsed_options.load_shape == "poisson":
-            self.stop(force=True)
+        if "arrival_rate" in self.environment.parsed_options:
+            time.sleep(365 * 60 * 60)
 
     # def on_stop(self) -> None:
 
@@ -220,6 +220,7 @@ def on_locust_init(environment, **_kwargs):
         environment.runner.register_message("worker_reqsent", on_reqsent)
     if not isinstance(environment.runner, MasterRunner):
         environment.runner.register_message("all_reqcnt", on_reqcount)
+        environment.runner.register_message("test_quit", on_quit)
 
 
 @events.quitting.add_listener
@@ -249,12 +250,20 @@ def on_reqcount(msg, **kwargs):
     AiStressUser.request = msg.data
 
 
+def on_quit(environment, msg, **kwargs):
+    logging.debug(f"Test quitting, set stop_timeout to 0...")
+    environment.runner.environment.stop_timeout = 0
+
+
 def checker(environment):
     while environment.runner.state not in [STATE_STOPPING, STATE_STOPPED, STATE_CLEANUP]:
         time.sleep(1)
         max_request = environment.parsed_options.max_request
         if max_request >= 0 and environment.runner.stats.num_requests >= max_request:
             logging.info(f"Exceed the max-request number:{environment.runner.stats.num_requests}, Exit...")
+            # Remove stop_timeout after test quit to avoid Locust user stop exception with custom load shape
+            environment.runner.send_message("test_quit", None)
+            environment.runner.environment.stop_timeout = 0
             #            while environment.runner.user_count > 0:
             time.sleep(5)
             environment.runner.quit()

@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+
+import locust
+from locust import LoadTestShape, events
+
+logger = logging.getLogger(__name__)
+
+
+@events.init_command_line_parser.add_listener
+def _(parser):
+    parser.add_argument(
+        "--arrival_rate",
+        type=float,
+        default=1.0,
+    )
+
+
+class ConstantRPSLoadShape(LoadTestShape):
+    use_common_options = True
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.arrival_rate = locust.argument_parser.parse_options().arrival_rate
+        self.last_tick = 0
+
+    def tick(self):
+        if self.last_tick == 0:
+            logger.info("Constant load shape arrival rate: {arrival_rate}".format(arrival_rate=self.arrival_rate))
+
+        run_time = self.get_run_time()
+        if run_time < self.runner.environment.parsed_options.run_time:
+            self.last_tick = run_time
+
+            new_users = int(self.arrival_rate)
+            current_users = self.get_current_user_count()
+            user_count = current_users + new_users
+            logger.debug(
+                "Current users: {current_users}, New users: {new_users}, Target users: {target_users}".format(
+                    current_users=current_users, new_users=new_users, target_users=user_count
+                )
+            )
+            # Avoid illegal spawn_rate value of 0
+            spawn_rate = max(0.01, new_users)
+            return (user_count, spawn_rate)
+
+        self.runner.environment.stop_timeout = 0
+        return None
@@ -13,7 +13,7 @@
 @events.init_command_line_parser.add_listener
 def _(parser):
     parser.add_argument(
-        "--arrival-rate",
+        "--arrival_rate",
         type=float,
         default=1.0,
     )