diff --git a/evals/benchmark/README.md b/evals/benchmark/README.md index 726513e9..ac2f3870 100644 --- a/evals/benchmark/README.md +++ b/evals/benchmark/README.md @@ -66,10 +66,11 @@ test_suite_config: load_shape: # Tenant concurrency pattern name: constant # poisson or constant(locust default load shape) params: # Loadshape-specific parameters - constant: # Poisson load shape specific parameters, activate only if load_shape is poisson + constant: # Constant load shape specific parameters, activate only if load_shape.name is constant concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users - poisson: # Poisson load shape specific parameters, activate only if load_shape is poisson - arrival-rate: 1.0 # Request arrival rate + # arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate + poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson + arrival_rate: 1.0 # Request arrival rate warm_ups: 0 # Number of test requests for warm-ups run_time: 60m # Total runtime for the test suite seed: # The seed for all RNGs diff --git a/evals/benchmark/benchmark.py b/evals/benchmark/benchmark.py index 8ce76c83..ccb73a3c 100644 --- a/evals/benchmark/benchmark.py +++ b/evals/benchmark/benchmark.py @@ -248,7 +248,7 @@ def run_service_test(example, service_type, service, test_suite_config): deployment_type, test_suite_config.get("service_ip"), test_suite_config.get("service_port"), - test_suite_config.get("namespace") + test_suite_config.get("namespace"), ) base_url = f"http://{svc_ip}:{port}" diff --git a/evals/benchmark/benchmark.yaml b/evals/benchmark/benchmark.yaml index 26c1189c..f7c61d84 100644 --- a/evals/benchmark/benchmark.yaml +++ b/evals/benchmark/benchmark.yaml @@ -19,10 +19,11 @@ test_suite_config: # Overall configuration settings for the test suite load_shape: # Tenant concurrency pattern name: constant # poisson or constant(locust default load shape) params: # Loadshape-specific parameters - constant: # Poisson load shape specific parameters, activate only if load_shape is poisson + constant: # Constant load shape specific parameters, activate only if load_shape.name is constant concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users - poisson: # Poisson load shape specific parameters, activate only if load_shape is poisson - arrival-rate: 1.0 # Request arrival rate + # arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate + poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson + arrival_rate: 1.0 # Request arrival rate namespace: "" # Fill the user-defined namespace. Otherwise, it will be default. test_cases: diff --git a/evals/benchmark/stresscli/commands/load_test.py b/evals/benchmark/stresscli/commands/load_test.py index 4c015454..748347c0 100644 --- a/evals/benchmark/stresscli/commands/load_test.py +++ b/evals/benchmark/stresscli/commands/load_test.py @@ -142,11 +142,17 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in load_shape_conf = run_settings.get("load-shape", global_settings.get("load-shape", locust_defaults["load-shape"])) try: load_shape = load_shape_conf["name"] + runspec["load-shape"] = load_shape_conf except KeyError: load_shape = DEFAULT_LOADSHAPE - runspec["load-shape"] = load_shape - if load_shape == DEFAULT_LOADSHAPE: + load_shape_params = None + try: + load_shape_params = load_shape_conf["params"][load_shape] + except KeyError: + console_logger.info(f"The specified load shape not found: {load_shape}") + + if load_shape == DEFAULT_LOADSHAPE and (load_shape_params is None or "arrival_rate" not in load_shape_params): # constant load is Locust's default load shape, do nothing. console_logger.info("Use default load shape.") else: @@ -155,7 +161,10 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in if os.path.isfile(f_custom_load_shape): # Add the locust file of the custom load shape into classpath runspec["locustfile"] += f",{f_custom_load_shape}" - console_logger.info("Use custom load shape: {load_shape}") + if load_shape == DEFAULT_LOADSHAPE: + console_logger.info("Use default load shape based on request arrival rate") + else: + console_logger.info("Use custom load shape: {load_shape}") else: console_logger.error( f"Unsupported load shape: {load_shape}." @@ -180,15 +189,9 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in spawn_rate = 100 if runspec["users"] > 100 else runspec["users"] - load_shape_params = None - try: - load_shape_params = load_shape_conf["params"][load_shape] - except KeyError: - console_logger.info(f"The specified load shape not found: {load_shape}") - # Dynamically allocate Locust processes to fit different loads processes = 2 - if load_shape == "constant": + if load_shape == "constant" and (load_shape_params is None or "arrival_rate" not in load_shape_params): if runspec["max_requests"] > 0: processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes else: @@ -197,11 +200,11 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in concurrent_level = int(load_shape_params["concurrent_level"]) processes = 10 if concurrent_level > 400 else 5 if concurrent_level > 200 else processes elif load_shape == "poisson": - if load_shape_params and "arrival-rate" in load_shape_params: - processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5)) + if load_shape_params and "arrival_rate" in load_shape_params: + processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5)) else: - if load_shape_params and "arrival-rate" in load_shape_params: - processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5)) + if load_shape_params and "arrival_rate" in load_shape_params: + processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5)) elif runspec["max_requests"] > 0: processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes @@ -214,7 +217,7 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in "--run-time", runspec["runtime"], "--load-shape", - runspec["load-shape"], + load_shape, "--dataset", runspec["dataset"], "--seed", diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py index 52c030ba..4dbc220a 100644 --- a/evals/benchmark/stresscli/locust/aistress.py +++ b/evals/benchmark/stresscli/locust/aistress.py @@ -87,9 +87,9 @@ def __init__(self, *args, **kwargs): def bench_main(self): max_request = self.environment.parsed_options.max_request if max_request >= 0 and AiStressUser.request >= max_request: - # For poisson load shape, a user only sends single request before it stops. + # For custom load shape based on arrival_rate, new users spawned after exceeding max_request is reached will be stopped. # TODO: user should not care about load shape - if self.environment.parsed_options.load_shape == "poisson": + if "arrival_rate" in self.environment.parsed_options: self.stop(force=True) time.sleep(1) @@ -186,10 +186,10 @@ def bench_main(self): self.environment.runner.stats.log_request("POST", url, time.perf_counter() - start_ts, 0) self.environment.runner.stats.log_error("POST", url, "Locust Request error") - # For poisson load shape, a user only sends single request before it stops. + # For custom load shape based on arrival_rate, a user only sends single request before it sleeps. # TODO: user should not care about load shape - if self.environment.parsed_options.load_shape == "poisson": - self.stop(force=True) + if "arrival_rate" in self.environment.parsed_options: + time.sleep(365 * 60 * 60) # def on_stop(self) -> None: @@ -220,6 +220,7 @@ def on_locust_init(environment, **_kwargs): environment.runner.register_message("worker_reqsent", on_reqsent) if not isinstance(environment.runner, MasterRunner): environment.runner.register_message("all_reqcnt", on_reqcount) + environment.runner.register_message("test_quit", on_quit) @events.quitting.add_listener @@ -249,12 +250,20 @@ def on_reqcount(msg, **kwargs): AiStressUser.request = msg.data +def on_quit(environment, msg, **kwargs): + logging.debug("Test quitting, set stop_timeout to 0...") + environment.runner.environment.stop_timeout = 0 + + def checker(environment): while environment.runner.state not in [STATE_STOPPING, STATE_STOPPED, STATE_CLEANUP]: time.sleep(1) max_request = environment.parsed_options.max_request if max_request >= 0 and environment.runner.stats.num_requests >= max_request: logging.info(f"Exceed the max-request number:{environment.runner.stats.num_requests}, Exit...") + # Remove stop_timeout after test quit to avoid Locust user stop exception with custom load shape + environment.runner.send_message("test_quit", None) + environment.runner.environment.stop_timeout = 0 # while environment.runner.user_count > 0: time.sleep(5) environment.runner.quit() diff --git a/evals/benchmark/stresscli/locust/constant_load_shape.py b/evals/benchmark/stresscli/locust/constant_load_shape.py new file mode 100644 index 00000000..ec6239d6 --- /dev/null +++ b/evals/benchmark/stresscli/locust/constant_load_shape.py @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import locust +from locust import LoadTestShape, events + +logger = logging.getLogger(__name__) + + +@events.init_command_line_parser.add_listener +def _(parser): + parser.add_argument( + "--arrival_rate", + type=float, + default=1.0, + ) + + +class ConstantRPSLoadShape(LoadTestShape): + use_common_options = True + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.arrival_rate = locust.argument_parser.parse_options().arrival_rate + self.last_tick = 0 + + def tick(self): + if self.last_tick == 0: + logger.info("Constant load shape arrival rate: {arrival_rate}".format(arrival_rate=self.arrival_rate)) + + run_time = self.get_run_time() + if run_time < self.runner.environment.parsed_options.run_time: + self.last_tick = run_time + + new_users = int(self.arrival_rate) + current_users = self.get_current_user_count() + user_count = current_users + new_users + logger.debug( + "Current users: {current_users}, New users: {new_users}, Target users: {target_users}".format( + current_users=current_users, new_users=new_users, target_users=user_count + ) + ) + # Avoid illegal spawn_rate value of 0 + spawn_rate = max(0.01, new_users) + return (user_count, spawn_rate) + + self.runner.environment.stop_timeout = 0 + return None diff --git a/evals/benchmark/stresscli/locust/poisson_load_shape.py b/evals/benchmark/stresscli/locust/poisson_load_shape.py index 35a8b64c..80fc0e99 100644 --- a/evals/benchmark/stresscli/locust/poisson_load_shape.py +++ b/evals/benchmark/stresscli/locust/poisson_load_shape.py @@ -13,7 +13,7 @@ @events.init_command_line_parser.add_listener def _(parser): parser.add_argument( - "--arrival-rate", + "--arrival_rate", type=float, default=1.0, ) diff --git a/evals/benchmark/utils.py b/evals/benchmark/utils.py index 2256f0c3..d66212a0 100644 --- a/evals/benchmark/utils.py +++ b/evals/benchmark/utils.py @@ -29,8 +29,10 @@ def write_json(data, filename): logging.error(f"Failed to write {filename}: {e}") return False + from kubernetes import client, config + def get_service_cluster_ip(service_name, namespace="default"): # Load the Kubernetes configuration config.load_kube_config() # or use config.load_incluster_config() if running inside a Kubernetes pod