From 9149a1f3cd42d453ec0d28d1de9a4bc84c2a8eea Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 10:34:49 +0100
Subject: [PATCH 1/6] feat: implemented load test logic

---
 .gitignore                                    |   1 +
 .../helpers/login_user.py                     |  17 +++
 ingestion/src/metadata/test_load.py           |  54 ++++++++
 ingestion/tests/load/README.md                | 129 ++++++++++++++++++
 ingestion/tests/load/__init__.py              |   0
 ingestion/tests/load/summaries/.gitkeep       |   0
 ingestion/tests/load/test_load.py             |  63 +++++++++
 .../tests/load/test_resources/__init__.py     |   0
 .../load/test_resources/all_resources.py      |  55 ++++++++
 .../tests/load/test_resources/manifest.yaml   |  19 +++
 .../load/test_resources/tasks/__init__.py     |   0
 .../tasks/test_case_result_tasks.py           |  70 ++++++++++
 .../test_resources/tasks/test_case_tasks.py   |  37 +++++
 ingestion/tests/load/utils.py                 |  21 +++
 14 files changed, 466 insertions(+)
 create mode 100644 ingestion/src/_openmetadata_testutils/helpers/login_user.py
 create mode 100644 ingestion/src/metadata/test_load.py
 create mode 100644 ingestion/tests/load/README.md
 create mode 100644 ingestion/tests/load/__init__.py
 create mode 100644 ingestion/tests/load/summaries/.gitkeep
 create mode 100644 ingestion/tests/load/test_load.py
 create mode 100644 ingestion/tests/load/test_resources/__init__.py
 create mode 100644 ingestion/tests/load/test_resources/all_resources.py
 create mode 100644 ingestion/tests/load/test_resources/manifest.yaml
 create mode 100644 ingestion/tests/load/test_resources/tasks/__init__.py
 create mode 100644 ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
 create mode 100644 ingestion/tests/load/test_resources/tasks/test_case_tasks.py
 create mode 100644 ingestion/tests/load/utils.py

diff --git a/.gitignore b/.gitignore
index 61d78475dc45..e06c0afbb541 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,6 +103,7 @@ ingestion/requirements.txt
 ingestion/.python-version
 ingestion/venv2/**
 .python-version
+ingestion/tests/load/summaries/*.csv
 
 # MLFlow
 mlruns/
diff --git a/ingestion/src/_openmetadata_testutils/helpers/login_user.py b/ingestion/src/_openmetadata_testutils/helpers/login_user.py
new file mode 100644
index 000000000000..3cad8206323d
--- /dev/null
+++ b/ingestion/src/_openmetadata_testutils/helpers/login_user.py
@@ -0,0 +1,17 @@
+"""Base class for resource tests."""
+
+from locust.contrib.fasthttp import FastHttpSession
+from requests.auth import AuthBase
+
+class BearerAuth(AuthBase):
+    def __init__(self, token):
+        self.token = token
+
+    def __call__(self, r):
+        r.headers["authorization"] = "Bearer " + self.token
+        return r
+
+def login_user(client: FastHttpSession) -> BearerAuth:
+    resp = client.post("/api/v1/users/login", json={"email":"admin@open-metadata.org","password":"YWRtaW4="})
+    token = resp.json().get("accessToken")
+    return BearerAuth(token)
diff --git a/ingestion/src/metadata/test_load.py b/ingestion/src/metadata/test_load.py
new file mode 100644
index 000000000000..b21d1b4483ae
--- /dev/null
+++ b/ingestion/src/metadata/test_load.py
@@ -0,0 +1,54 @@
+"""Run test case result resource load test"""
+
+from pathlib import Path
+import csv
+import yaml
+
+def run_all_resources(summary_file: str, locust_file: str):
+    """Test test case result resource"""
+    args = [
+            "locust",
+            "--headless",
+            "-H",
+            "http://localhost:8585",
+            "--user",
+            "10",
+            "--spawn-rate",
+            "2",
+            "-f",
+            str(locust_file),
+            "--run-time",
+            "1m",
+            "--only-summary",
+            "--csv",
+            str(summary_file),
+        ]
+
+    run_load_test(args)
+
+class TestAllResources(TestCase):
+
+    def test_all_resources(self):
+        directory = Path(__file__).parent
+        test_resources_dir = directory.joinpath("test_resources")
+        
+        locust_file = test_resources_dir.joinpath("all_resources.py")
+        summary_file = directory.parent.joinpath("summaries/all_")
+        manifest_file = directory.parent.joinpath("manifests/manifest.json")
+
+        run_all_resources(str(summary_file), str(locust_file))
+
+        with open(manifest_file, "r") as f:
+            manifest = yaml.safe_load(f)
+
+        with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+
+            for row in reader:
+                if row.get("name") in manifest:
+                    assert row.get("fails") == "0"
+                    ninety_ninth_percentile = row.get("99%")
+                    if ninety_ninth_percentile:
+                        ninety_ninth_percentile = int(ninety_ninth_percentile)
+                        assert ninety_ninth_percentile <= 100 # 99% of the requests should be below 100ms
+    
\ No newline at end of file
diff --git a/ingestion/tests/load/README.md b/ingestion/tests/load/README.md
new file mode 100644
index 000000000000..5625ed1d8571
--- /dev/null
+++ b/ingestion/tests/load/README.md
@@ -0,0 +1,129 @@
+## Adding a new resource to load tests
+Add a new `*.py` file to `test_resources/tasks`. The naming does not matter, but we use the resource name as defined in Java, but seperated by `_` (e.g. `TestCaseResource` becomes `test_case_tasks.py`).
+
+In your newly created file, you'll need to import at minimum 1 package
+```python
+from locust import task, TaskSet
+```
+`task` will be used as a decorator to define our task that will run as part of our load test. `TaskSet` wil be inherited by our task set class.
+
+Here is an example of a locust task definition. The integer argument in `@task` will give a specific weigth to the task (i.e. increasing its probability to be ran)
+```
+class TestCaseResultTasks(TaskSet):
+    """Test case result resource load test"""
+
+    def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str):
+        """List test case results for a given time range
+
+        Args:
+            start_ts (int): start timestamp
+            end_ts (int): end timestamp
+            range (str): 
+        """
+        for test_case in self.test_cases:
+            fqn = test_case.get("fullyQualifiedName")
+            if fqn:
+                self.client.get(
+                    f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}",
+                    params={ # type: ignore
+                        "startTs": start_ts,
+                        "endTs": end_ts,
+                    },
+                    auth=self.bearer,
+                    name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"
+                )
+
+    @task(3)
+    def list_test_case_results_30_days(self):
+        """List test case results for the last 30 days. Weighted 3"""
+        now = datetime.now()
+        last_30_days = int((now - timedelta(days=30)).timestamp() * 1000)
+        self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days")
+```
+
+Notice how we use `self.client.get` to perform the request. This is provided by locust `HttpSession`. If the request needs to be authenticated, you can use `auth=self.bearer`. You will need to first define `self.bearer`, you can achieve this using the `on_start` hook from locust.
+
+```python
+from _openmetadata_testutils.helpers.login_user import login_user
+
+class TestCaseResultTasks(TaskSet):
+    """Test case result resource load test"""
+    [...]
+
+    def on_start(self):
+        """Get a list of test cases to fetch results for"""
+        self.bearer = login_user(self.client)
+        resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer)
+        json = resp.json()
+        self.test_cases = json.get("data", [])
+```
+
+**IMPORTANT**
+You MUST define a `def stop(self)` methodd in your `TaskSet` class as shown below so that control is given back to the parent user class.
+
+```python
+class TestCaseResultTasks(TaskSet):
+    """Test case result resource load test"""
+    [...]
+
+    @task
+    def stop(self):
+        self.interrupt()
+```
+ 
+If your request contains a parameter (i.e. `/api/v1/dataQuality/testCases/testCaseResults/{fqn}`) you can name your request so all the request sent you will be grouped together like this
+
+```python
+self.client.get(
+    f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}",
+    params={ # type: ignore
+        "startTs": start_ts,
+        "endTs": end_ts,
+    },
+    auth=self.bearer,
+    name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"
+)
+```
+
+Notice the argument `name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"`, this will define under which name the requests will be grouped. Example of statistics summary below grouped by the request `name`
+
+```csv
+Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
+GET,/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days,3510,0,13,16.2354597524217,5.146791999997902,100.67633299999557,84567.57407407407,49.30531562959204,0.0,13,17,20,21,28,35,45,56,92,100,100
+```
+
+As a final step in `test_resources/manifest.yaml` add the resources, the metrics and the thresholds you want to test.
+
+```yaml
+/api/v1/dataQuality/testCases/testCaseResults/[fqn]/30_days:
+  type: GET
+  99%: 100
+
+/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days:
+  type: GET
+  99%: 100
+```
+
+This will test that our GET request for the defined resources are running 99% of the time in less than 100 milliseconds (0.1 seconds).
+
+Below is a list of all the metrics you can use:
+- Request Count
+- Failure Count
+- Median Response Time
+- Average Response Time
+- Min Response Time
+- Max Response Time
+- Average Content Size
+- Requests/s
+- Failures/s
+- 50%
+- 66%
+- 75%
+- 80%
+- 90%
+- 95%
+- 98%
+- 99%
+- 99.9%
+- 99.99%
+- 100%
diff --git a/ingestion/tests/load/__init__.py b/ingestion/tests/load/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/load/summaries/.gitkeep b/ingestion/tests/load/summaries/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py
new file mode 100644
index 000000000000..8a667429274a
--- /dev/null
+++ b/ingestion/tests/load/test_load.py
@@ -0,0 +1,63 @@
+"""Run test case result resource load test"""
+
+import os
+from pathlib import Path
+import csv
+from unittest import TestCase
+import yaml
+
+from ingestion.tests.load.utils import run_load_test
+
+def run_all_resources(summary_file: str, locust_file: str):
+    """Test test case result resource"""
+    args = [
+            "locust",
+            "--headless",
+            "-H",
+            "http://localhost:8585",
+            "--user",
+            os.getenv("LOCUST_USER", "50"),
+            "--spawn-rate",
+            "1",
+            "-f",
+            str(locust_file),
+            "--run-time",
+            os.getenv("LOCUST_RUNTIME", "1m"),
+            "--only-summary",
+            "--csv",
+            str(summary_file),
+        ]
+
+    run_load_test(args)
+
+class TestAllResources(TestCase):
+    """Test class to run all resources load test"""
+    def test_all_resources(self):
+        """Test all resources"""
+        directory = Path(__file__).parent
+        test_resources_dir = directory.joinpath("test_resources")
+
+        locust_file = test_resources_dir.joinpath("all_resources.py")
+        summary_file = directory.parent.joinpath("load/summaries/all_")
+        manifest_file = test_resources_dir.joinpath("manifest.yaml")
+
+        run_all_resources(str(summary_file), str(locust_file))
+
+        with open(manifest_file, "r", encoding="utf-8") as f:
+            manifest = yaml.safe_load(f)
+
+        with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+
+            for row in reader:
+                name = row.get("Name")
+                if name in manifest:
+                    resource = manifest.get(name)
+                    type_ = resource.get("type")
+                    if type_ == row.get("Type"):
+                        for metric, threshold in resource.items():
+                            with self.subTest(stat=metric, resource=name, type_=type_):
+                                stat = row.get(metric)
+                                if stat:
+                                    stat = int(stat)
+                                    self.assertLessEqual(stat, threshold, msg=f"{metric} for {name} is greater than threshold")
diff --git a/ingestion/tests/load/test_resources/__init__.py b/ingestion/tests/load/test_resources/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/load/test_resources/all_resources.py b/ingestion/tests/load/test_resources/all_resources.py
new file mode 100644
index 000000000000..dd7bc9dee9d5
--- /dev/null
+++ b/ingestion/tests/load/test_resources/all_resources.py
@@ -0,0 +1,55 @@
+"""Test class to run all resources load test"""
+
+from typing import List
+import importlib.util
+from pathlib import Path
+import logging
+import inspect
+
+from locust import TaskSet, constant, HttpUser
+
+TASKS_DIR = "tasks"
+
+logger = logging.getLogger(__name__)
+
+def get_all_tasks_set() -> List:
+    resource_classes = []
+    wd = Path(__file__).parent.joinpath(TASKS_DIR)
+    for file_path in wd.glob("*.py"):
+        try:
+            # parent = file_path.parent
+            # if parent not in sys.path:
+            #     sys.path.insert(0, str(parent))
+            if not str(file_path).startswith("base_"):
+                module_path = str(file_path)
+                module_name = file_path.stem
+                spec = importlib.util.spec_from_file_location(module_name, module_path)
+                if not spec:
+                    logger.error(f"Could not load module {module_name}")
+                    continue
+                module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(module) # type: ignore
+
+                for _, obj in inspect.getmembers(module, inspect.isclass):
+                    if obj.__module__ == module_name:
+                        resource_classes.append(obj)
+        finally:
+            pass
+            # sys.path.remove(str(parent))
+
+    return resource_classes
+
+
+class AllResources(TaskSet):
+    """Execute tasks for all resources"""
+
+    @classmethod
+    def set_tasks(cls):
+        tasks = get_all_tasks_set()
+        cls.tasks = set(tasks)
+
+class All(HttpUser):
+    host = "http://localhost:8585"
+    wait_time = constant(1)
+    AllResources.set_tasks()
+    tasks = [AllResources]
diff --git a/ingestion/tests/load/test_resources/manifest.yaml b/ingestion/tests/load/test_resources/manifest.yaml
new file mode 100644
index 000000000000..fd0beb890577
--- /dev/null
+++ b/ingestion/tests/load/test_resources/manifest.yaml
@@ -0,0 +1,19 @@
+# Description: This file contains the manifest for the test resources.
+# You can add as a key of a resource any metric available in `summaries/all__summaries.csv` file.
+# times should be expressed in milliseconds (e.g. 1000ms = 1s)
+
+/api/v1/dataQuality/testCases/testCaseResults/[fqn]/30_days:
+  type: GET
+  99%: 1000
+
+/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days:
+  type: GET
+  99%: 1000
+
+/api/v1/dataQuality/testCases/testCaseResults/[fqn]/180_days:
+  type: GET
+  99%: 1000
+
+/api/v1/dataQuality/testCases?limit=10:
+  type: GET
+  99%: 6000
diff --git a/ingestion/tests/load/test_resources/tasks/__init__.py b/ingestion/tests/load/test_resources/tasks/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
new file mode 100644
index 000000000000..5448ab98f886
--- /dev/null
+++ b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
@@ -0,0 +1,70 @@
+"""Load test for the test case result resources"""
+from datetime import datetime, timedelta
+
+from _openmetadata_testutils.helpers.login_user import login_user
+from locust import task, TaskSet
+
+
+TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults"
+TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
+
+class TestCaseResultTasks(TaskSet):
+    """Test case result resource load test"""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.test_cases = []
+
+    def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str):
+        """List test case results for a given time range
+
+        Args:
+            start_ts (int): start timestamp
+            end_ts (int): end timestamp
+            range (str): 
+        """
+        for test_case in self.test_cases:
+            fqn = test_case.get("fullyQualifiedName")
+            if fqn:
+                self.client.get(
+                    f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}",
+                    params={ # type: ignore
+                        "startTs": start_ts,
+                        "endTs": end_ts,
+                    },
+                    auth=self.bearer,
+                    name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"
+                )
+
+    @task(3)
+    def list_test_case_results_30_days(self):
+        """List test case results for the last 30 days. Weighted 3"""
+        now = datetime.now()
+        last_30_days = int((now - timedelta(days=30)).timestamp() * 1000)
+        self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days")
+
+
+    @task(2)
+    def list_test_case_results_60_days(self):
+        """List test case results for the last 60 days. Weighted 2"""
+        now = datetime.now()
+        last_60_days = int((now - timedelta(days=60)).timestamp() * 1000)
+        self._list_test_case_results(last_60_days, int(now.timestamp() * 1000), "60_days")
+
+
+    @task
+    def list_test_case_results_180_days(self):
+        """List test case results for the last 180 days"""
+        now = datetime.now()
+        last_180_days = int((now - timedelta(days=180)).timestamp() * 1000)
+        self._list_test_case_results(last_180_days, int(now.timestamp() * 1000), "180_days")
+
+    @task
+    def stop(self):
+        self.interrupt()
+
+    def on_start(self):
+        """Get a list of test cases to fetch results for"""
+        self.bearer = login_user(self.client)
+        resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer, name=f"{TEST_CASE_RESOURCE_PATH}?limit=100")
+        json = resp.json()
+        self.test_cases = json.get("data", [])
diff --git a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py
new file mode 100644
index 000000000000..17ab80ac24f3
--- /dev/null
+++ b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py
@@ -0,0 +1,37 @@
+"""Load test for the test case resources"""
+
+from _openmetadata_testutils.helpers.login_user import login_user
+from locust import task, TaskSet
+
+TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
+
+class TestCaseTasks(TaskSet):
+    """Test case resource load test"""
+
+    def _list_test_cases(self):
+        """Paginate through the test cases"""
+        resp = self.client.get(
+            f"{TEST_CASE_RESOURCE_PATH}",
+            params={"limit": 10},
+            auth=self.bearer,
+            name=f"{TEST_CASE_RESOURCE_PATH}?limit=10")
+        after = resp.json().get("paging", {}).get("after")
+        while after:
+            resp = self.client.get(
+                f"{TEST_CASE_RESOURCE_PATH}",
+                params={"limit": 10,"after": after},
+                auth=self.bearer,
+                name=f"{TEST_CASE_RESOURCE_PATH}?limit=10")
+            after = resp.json().get("paging", {}).get("after")
+
+    @task(2)
+    def list_test_cases(self):
+        """List test cases. Weighted 2"""
+        self._list_test_cases()
+
+    @task
+    def stop(self):
+        self.interrupt()
+
+    def on_start(self):
+        self.bearer = login_user(self.client)
diff --git a/ingestion/tests/load/utils.py b/ingestion/tests/load/utils.py
new file mode 100644
index 000000000000..cdb6fdab90b4
--- /dev/null
+++ b/ingestion/tests/load/utils.py
@@ -0,0 +1,21 @@
+"""Utils functions for load testing."""
+
+from typing import List
+import sys
+
+import pytest
+from locust import main
+
+TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
+TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults"
+
+def run_load_test(args: List[str]):
+    """Test test case result resource"""
+    original_argv = sys.argv
+    try:
+        sys.argv = args
+        with pytest.raises(SystemExit) as excinfo:
+            main.main()
+        assert excinfo.value.code == 0
+    finally:
+        sys.argv = original_argv

From b5d2b81c36262eee342807f5078cd5710132a5bc Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 11:03:34 +0100
Subject: [PATCH 2/6] style: ran python linting

---
 .../helpers/login_user.py                     |  7 ++-
 ingestion/src/metadata/test_load.py           | 54 -------------------
 ingestion/tests/load/test_load.py             | 46 +++++++++-------
 .../load/test_resources/all_resources.py      | 45 +++++++---------
 .../tasks/test_case_result_tasks.py           | 33 ++++++++----
 .../test_resources/tasks/test_case_tasks.py   | 12 +++--
 ingestion/tests/load/utils.py                 |  3 +-
 7 files changed, 85 insertions(+), 115 deletions(-)
 delete mode 100644 ingestion/src/metadata/test_load.py

diff --git a/ingestion/src/_openmetadata_testutils/helpers/login_user.py b/ingestion/src/_openmetadata_testutils/helpers/login_user.py
index 3cad8206323d..c32ab327f3d1 100644
--- a/ingestion/src/_openmetadata_testutils/helpers/login_user.py
+++ b/ingestion/src/_openmetadata_testutils/helpers/login_user.py
@@ -3,6 +3,7 @@
 from locust.contrib.fasthttp import FastHttpSession
 from requests.auth import AuthBase
 
+
 class BearerAuth(AuthBase):
     def __init__(self, token):
         self.token = token
@@ -11,7 +12,11 @@ def __call__(self, r):
         r.headers["authorization"] = "Bearer " + self.token
         return r
 
+
 def login_user(client: FastHttpSession) -> BearerAuth:
-    resp = client.post("/api/v1/users/login", json={"email":"admin@open-metadata.org","password":"YWRtaW4="})
+    resp = client.post(
+        "/api/v1/users/login",
+        json={"email": "admin@open-metadata.org", "password": "YWRtaW4="},
+    )
     token = resp.json().get("accessToken")
     return BearerAuth(token)
diff --git a/ingestion/src/metadata/test_load.py b/ingestion/src/metadata/test_load.py
deleted file mode 100644
index b21d1b4483ae..000000000000
--- a/ingestion/src/metadata/test_load.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""Run test case result resource load test"""
-
-from pathlib import Path
-import csv
-import yaml
-
-def run_all_resources(summary_file: str, locust_file: str):
-    """Test test case result resource"""
-    args = [
-            "locust",
-            "--headless",
-            "-H",
-            "http://localhost:8585",
-            "--user",
-            "10",
-            "--spawn-rate",
-            "2",
-            "-f",
-            str(locust_file),
-            "--run-time",
-            "1m",
-            "--only-summary",
-            "--csv",
-            str(summary_file),
-        ]
-
-    run_load_test(args)
-
-class TestAllResources(TestCase):
-
-    def test_all_resources(self):
-        directory = Path(__file__).parent
-        test_resources_dir = directory.joinpath("test_resources")
-        
-        locust_file = test_resources_dir.joinpath("all_resources.py")
-        summary_file = directory.parent.joinpath("summaries/all_")
-        manifest_file = directory.parent.joinpath("manifests/manifest.json")
-
-        run_all_resources(str(summary_file), str(locust_file))
-
-        with open(manifest_file, "r") as f:
-            manifest = yaml.safe_load(f)
-
-        with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f:
-            reader = csv.DictReader(f)
-
-            for row in reader:
-                if row.get("name") in manifest:
-                    assert row.get("fails") == "0"
-                    ninety_ninth_percentile = row.get("99%")
-                    if ninety_ninth_percentile:
-                        ninety_ninth_percentile = int(ninety_ninth_percentile)
-                        assert ninety_ninth_percentile <= 100 # 99% of the requests should be below 100ms
-    
\ No newline at end of file
diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py
index 8a667429274a..0a5e039c0bd5 100644
--- a/ingestion/tests/load/test_load.py
+++ b/ingestion/tests/load/test_load.py
@@ -1,37 +1,41 @@
 """Run test case result resource load test"""
 
+import csv
 import os
 from pathlib import Path
-import csv
 from unittest import TestCase
+
 import yaml
 
 from ingestion.tests.load.utils import run_load_test
 
+
 def run_all_resources(summary_file: str, locust_file: str):
     """Test test case result resource"""
     args = [
-            "locust",
-            "--headless",
-            "-H",
-            "http://localhost:8585",
-            "--user",
-            os.getenv("LOCUST_USER", "50"),
-            "--spawn-rate",
-            "1",
-            "-f",
-            str(locust_file),
-            "--run-time",
-            os.getenv("LOCUST_RUNTIME", "1m"),
-            "--only-summary",
-            "--csv",
-            str(summary_file),
-        ]
+        "locust",
+        "--headless",
+        "-H",
+        "http://localhost:8585",
+        "--user",
+        os.getenv("LOCUST_USER", "50"),
+        "--spawn-rate",
+        "1",
+        "-f",
+        str(locust_file),
+        "--run-time",
+        os.getenv("LOCUST_RUNTIME", "1m"),
+        "--only-summary",
+        "--csv",
+        str(summary_file),
+    ]
 
     run_load_test(args)
 
+
 class TestAllResources(TestCase):
     """Test class to run all resources load test"""
+
     def test_all_resources(self):
         """Test all resources"""
         directory = Path(__file__).parent
@@ -46,7 +50,7 @@ def test_all_resources(self):
         with open(manifest_file, "r", encoding="utf-8") as f:
             manifest = yaml.safe_load(f)
 
-        with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f:
+        with open(str(summary_file) + "_stats.csv", "r", encoding="utf-8") as f:
             reader = csv.DictReader(f)
 
             for row in reader:
@@ -60,4 +64,8 @@ def test_all_resources(self):
                                 stat = row.get(metric)
                                 if stat:
                                     stat = int(stat)
-                                    self.assertLessEqual(stat, threshold, msg=f"{metric} for {name} is greater than threshold")
+                                    self.assertLessEqual(
+                                        stat,
+                                        threshold,
+                                        msg=f"{metric} for {name} is greater than threshold",
+                                    )
diff --git a/ingestion/tests/load/test_resources/all_resources.py b/ingestion/tests/load/test_resources/all_resources.py
index dd7bc9dee9d5..e7d251f26de4 100644
--- a/ingestion/tests/load/test_resources/all_resources.py
+++ b/ingestion/tests/load/test_resources/all_resources.py
@@ -1,41 +1,35 @@
 """Test class to run all resources load test"""
 
-from typing import List
 import importlib.util
-from pathlib import Path
-import logging
 import inspect
+import logging
+from pathlib import Path
+from typing import List
 
-from locust import TaskSet, constant, HttpUser
+from locust import HttpUser, TaskSet, constant
 
 TASKS_DIR = "tasks"
 
 logger = logging.getLogger(__name__)
 
+
 def get_all_tasks_set() -> List:
     resource_classes = []
     wd = Path(__file__).parent.joinpath(TASKS_DIR)
     for file_path in wd.glob("*.py"):
-        try:
-            # parent = file_path.parent
-            # if parent not in sys.path:
-            #     sys.path.insert(0, str(parent))
-            if not str(file_path).startswith("base_"):
-                module_path = str(file_path)
-                module_name = file_path.stem
-                spec = importlib.util.spec_from_file_location(module_name, module_path)
-                if not spec:
-                    logger.error(f"Could not load module {module_name}")
-                    continue
-                module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(module) # type: ignore
-
-                for _, obj in inspect.getmembers(module, inspect.isclass):
-                    if obj.__module__ == module_name:
-                        resource_classes.append(obj)
-        finally:
-            pass
-            # sys.path.remove(str(parent))
+        if not str(file_path).startswith("base_"):
+            module_path = str(file_path)
+            module_name = file_path.stem
+            spec = importlib.util.spec_from_file_location(module_name, module_path)
+            if not spec:
+                logger.error(f"Could not load module {module_name}")
+                continue
+            module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(module)  # type: ignore
+
+            for _, obj in inspect.getmembers(module, inspect.isclass):
+                if obj.__module__ == module_name:
+                    resource_classes.append(obj)
 
     return resource_classes
 
@@ -48,8 +42,9 @@ def set_tasks(cls):
         tasks = get_all_tasks_set()
         cls.tasks = set(tasks)
 
+
 class All(HttpUser):
     host = "http://localhost:8585"
-    wait_time = constant(1)
+    wait_time = constant(1)  # closed workload
     AllResources.set_tasks()
     tasks = [AllResources]
diff --git a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
index 5448ab98f886..2e398d695f9a 100644
--- a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
+++ b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py
@@ -1,15 +1,17 @@
 """Load test for the test case result resources"""
 from datetime import datetime, timedelta
 
-from _openmetadata_testutils.helpers.login_user import login_user
-from locust import task, TaskSet
+from locust import TaskSet, task
 
+from _openmetadata_testutils.helpers.login_user import login_user
 
 TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults"
 TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
 
+
 class TestCaseResultTasks(TaskSet):
     """Test case result resource load test"""
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.test_cases = []
@@ -20,19 +22,19 @@ def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str):
         Args:
             start_ts (int): start timestamp
             end_ts (int): end timestamp
-            range (str): 
+            range (str):
         """
         for test_case in self.test_cases:
             fqn = test_case.get("fullyQualifiedName")
             if fqn:
                 self.client.get(
                     f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}",
-                    params={ # type: ignore
+                    params={  # type: ignore
                         "startTs": start_ts,
                         "endTs": end_ts,
                     },
                     auth=self.bearer,
-                    name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"
+                    name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}",
                 )
 
     @task(3)
@@ -40,23 +42,27 @@ def list_test_case_results_30_days(self):
         """List test case results for the last 30 days. Weighted 3"""
         now = datetime.now()
         last_30_days = int((now - timedelta(days=30)).timestamp() * 1000)
-        self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days")
-
+        self._list_test_case_results(
+            last_30_days, int(now.timestamp() * 1000), "30_days"
+        )
 
     @task(2)
     def list_test_case_results_60_days(self):
         """List test case results for the last 60 days. Weighted 2"""
         now = datetime.now()
         last_60_days = int((now - timedelta(days=60)).timestamp() * 1000)
-        self._list_test_case_results(last_60_days, int(now.timestamp() * 1000), "60_days")
-
+        self._list_test_case_results(
+            last_60_days, int(now.timestamp() * 1000), "60_days"
+        )
 
     @task
     def list_test_case_results_180_days(self):
         """List test case results for the last 180 days"""
         now = datetime.now()
         last_180_days = int((now - timedelta(days=180)).timestamp() * 1000)
-        self._list_test_case_results(last_180_days, int(now.timestamp() * 1000), "180_days")
+        self._list_test_case_results(
+            last_180_days, int(now.timestamp() * 1000), "180_days"
+        )
 
     @task
     def stop(self):
@@ -65,6 +71,11 @@ def stop(self):
     def on_start(self):
         """Get a list of test cases to fetch results for"""
         self.bearer = login_user(self.client)
-        resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer, name=f"{TEST_CASE_RESOURCE_PATH}?limit=100")
+        resp = self.client.get(
+            f"{TEST_CASE_RESOURCE_PATH}",
+            params={"limit": 100},
+            auth=self.bearer,
+            name=f"{TEST_CASE_RESOURCE_PATH}?limit=100",
+        )
         json = resp.json()
         self.test_cases = json.get("data", [])
diff --git a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py
index 17ab80ac24f3..faa7a0fae875 100644
--- a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py
+++ b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py
@@ -1,10 +1,12 @@
 """Load test for the test case resources"""
 
+from locust import TaskSet, task
+
 from _openmetadata_testutils.helpers.login_user import login_user
-from locust import task, TaskSet
 
 TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
 
+
 class TestCaseTasks(TaskSet):
     """Test case resource load test"""
 
@@ -14,14 +16,16 @@ def _list_test_cases(self):
             f"{TEST_CASE_RESOURCE_PATH}",
             params={"limit": 10},
             auth=self.bearer,
-            name=f"{TEST_CASE_RESOURCE_PATH}?limit=10")
+            name=f"{TEST_CASE_RESOURCE_PATH}?limit=10",
+        )
         after = resp.json().get("paging", {}).get("after")
         while after:
             resp = self.client.get(
                 f"{TEST_CASE_RESOURCE_PATH}",
-                params={"limit": 10,"after": after},
+                params={"limit": 10, "after": after},
                 auth=self.bearer,
-                name=f"{TEST_CASE_RESOURCE_PATH}?limit=10")
+                name=f"{TEST_CASE_RESOURCE_PATH}?limit=10",
+            )
             after = resp.json().get("paging", {}).get("after")
 
     @task(2)
diff --git a/ingestion/tests/load/utils.py b/ingestion/tests/load/utils.py
index cdb6fdab90b4..b73bb1aab421 100644
--- a/ingestion/tests/load/utils.py
+++ b/ingestion/tests/load/utils.py
@@ -1,7 +1,7 @@
 """Utils functions for load testing."""
 
-from typing import List
 import sys
+from typing import List
 
 import pytest
 from locust import main
@@ -9,6 +9,7 @@
 TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases"
 TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults"
 
+
 def run_load_test(args: List[str]):
     """Test test case result resource"""
     original_argv = sys.argv

From 3576bb967ae5328f95e299accd6e2d151cf6ff73 Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 11:34:05 +0100
Subject: [PATCH 3/6] fix: added locust dependency in test

---
 ingestion/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ingestion/setup.py b/ingestion/setup.py
index 966bf70fcade..578b0275a91b 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -346,6 +346,7 @@
 test = {
     # Install Airflow as it's not part of `all` plugin
     "opentelemetry-exporter-otlp==1.27.0",
+    "locust~=2.32.0",
     VERSIONS["airflow"],
     "boto3-stubs",
     "mypy-boto3-glue",

From 8688d35714ddacdf5bd1181c2df6faa46e119f9b Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 12:14:40 +0100
Subject: [PATCH 4/6] fix: skip locust in 3.8 as not supported

---
 ingestion/setup.py                | 5 ++++-
 ingestion/tests/load/test_load.py | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/ingestion/setup.py b/ingestion/setup.py
index 578b0275a91b..be3a895347dc 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -13,6 +13,7 @@
 Python Dependencies
 """
 
+import sys
 from typing import Dict, List, Set
 
 from setuptools import setup
@@ -346,7 +347,6 @@
 test = {
     # Install Airflow as it's not part of `all` plugin
     "opentelemetry-exporter-otlp==1.27.0",
-    "locust~=2.32.0",
     VERSIONS["airflow"],
     "boto3-stubs",
     "mypy-boto3-glue",
@@ -401,6 +401,9 @@
     *plugins["mssql"],
 }
 
+if sys.version_info >= (3, 9):
+    test.add("locust~=2.32.0")
+
 e2e_test = {
     # playwright dependencies
     "pytest-playwright",
diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py
index 0a5e039c0bd5..d8bd1460d15c 100644
--- a/ingestion/tests/load/test_load.py
+++ b/ingestion/tests/load/test_load.py
@@ -2,8 +2,9 @@
 
 import csv
 import os
+import sys
 from pathlib import Path
-from unittest import TestCase
+from unittest import TestCase, skipIf
 
 import yaml
 
@@ -36,6 +37,7 @@ def run_all_resources(summary_file: str, locust_file: str):
 class TestAllResources(TestCase):
     """Test class to run all resources load test"""
 
+    @skipIf(sys.version_info < (3, 9), "locust is not supported on python 3.8")
     def test_all_resources(self):
         """Test all resources"""
         directory = Path(__file__).parent

From 0db2c11c20ff3587863f8b796a74a2238330129c Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 13:47:18 +0100
Subject: [PATCH 5/6] fix: update gcsfs version

---
 ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingestion/setup.py b/ingestion/setup.py
index be3a895347dc..3bf8499ba4dd 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -27,7 +27,7 @@
     "geoalchemy2": "GeoAlchemy2~=0.12",
     "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0",
     "google-cloud-storage": "google-cloud-storage==1.43.0",
-    "gcsfs": "gcsfs>=2023.1.0",
+    "gcsfs": "gcsfs>=2024.10.0",
     "great-expectations": "great-expectations>=0.18.0,<0.18.14",
     "grpc-tools": "grpcio-tools>=1.47.2",
     "msal": "msal~=1.2",

From 8817215166289b22233db0a924ed2ff6a5dc5623 Mon Sep 17 00:00:00 2001
From: Teddy Crepineau <teddy.crepineau@gmail.com>
Date: Fri, 20 Dec 2024 14:09:37 +0100
Subject: [PATCH 6/6] fix: revert gcsfs versionning

---
 ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingestion/setup.py b/ingestion/setup.py
index 3bf8499ba4dd..be3a895347dc 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -27,7 +27,7 @@
     "geoalchemy2": "GeoAlchemy2~=0.12",
     "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0",
     "google-cloud-storage": "google-cloud-storage==1.43.0",
-    "gcsfs": "gcsfs>=2024.10.0",
+    "gcsfs": "gcsfs>=2023.1.0",
     "great-expectations": "great-expectations>=0.18.0,<0.18.14",
     "grpc-tools": "grpcio-tools>=1.47.2",
     "msal": "msal~=1.2",