From 9149a1f3cd42d453ec0d28d1de9a4bc84c2a8eea Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 10:34:49 +0100 Subject: [PATCH 1/6] feat: implemented load test logic --- .gitignore | 1 + .../helpers/login_user.py | 17 +++ ingestion/src/metadata/test_load.py | 54 ++++++++ ingestion/tests/load/README.md | 129 ++++++++++++++++++ ingestion/tests/load/__init__.py | 0 ingestion/tests/load/summaries/.gitkeep | 0 ingestion/tests/load/test_load.py | 63 +++++++++ .../tests/load/test_resources/__init__.py | 0 .../load/test_resources/all_resources.py | 55 ++++++++ .../tests/load/test_resources/manifest.yaml | 19 +++ .../load/test_resources/tasks/__init__.py | 0 .../tasks/test_case_result_tasks.py | 70 ++++++++++ .../test_resources/tasks/test_case_tasks.py | 37 +++++ ingestion/tests/load/utils.py | 21 +++ 14 files changed, 466 insertions(+) create mode 100644 ingestion/src/_openmetadata_testutils/helpers/login_user.py create mode 100644 ingestion/src/metadata/test_load.py create mode 100644 ingestion/tests/load/README.md create mode 100644 ingestion/tests/load/__init__.py create mode 100644 ingestion/tests/load/summaries/.gitkeep create mode 100644 ingestion/tests/load/test_load.py create mode 100644 ingestion/tests/load/test_resources/__init__.py create mode 100644 ingestion/tests/load/test_resources/all_resources.py create mode 100644 ingestion/tests/load/test_resources/manifest.yaml create mode 100644 ingestion/tests/load/test_resources/tasks/__init__.py create mode 100644 ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py create mode 100644 ingestion/tests/load/test_resources/tasks/test_case_tasks.py create mode 100644 ingestion/tests/load/utils.py diff --git a/.gitignore b/.gitignore index 61d78475dc45..e06c0afbb541 100644 --- a/.gitignore +++ b/.gitignore @@ -103,6 +103,7 @@ ingestion/requirements.txt ingestion/.python-version ingestion/venv2/** .python-version +ingestion/tests/load/summaries/*.csv # MLFlow mlruns/ diff --git a/ingestion/src/_openmetadata_testutils/helpers/login_user.py b/ingestion/src/_openmetadata_testutils/helpers/login_user.py new file mode 100644 index 000000000000..3cad8206323d --- /dev/null +++ b/ingestion/src/_openmetadata_testutils/helpers/login_user.py @@ -0,0 +1,17 @@ +"""Base class for resource tests.""" + +from locust.contrib.fasthttp import FastHttpSession +from requests.auth import AuthBase + +class BearerAuth(AuthBase): + def __init__(self, token): + self.token = token + + def __call__(self, r): + r.headers["authorization"] = "Bearer " + self.token + return r + +def login_user(client: FastHttpSession) -> BearerAuth: + resp = client.post("/api/v1/users/login", json={"email":"admin@open-metadata.org","password":"YWRtaW4="}) + token = resp.json().get("accessToken") + return BearerAuth(token) diff --git a/ingestion/src/metadata/test_load.py b/ingestion/src/metadata/test_load.py new file mode 100644 index 000000000000..b21d1b4483ae --- /dev/null +++ b/ingestion/src/metadata/test_load.py @@ -0,0 +1,54 @@ +"""Run test case result resource load test""" + +from pathlib import Path +import csv +import yaml + +def run_all_resources(summary_file: str, locust_file: str): + """Test test case result resource""" + args = [ + "locust", + "--headless", + "-H", + "http://localhost:8585", + "--user", + "10", + "--spawn-rate", + "2", + "-f", + str(locust_file), + "--run-time", + "1m", + "--only-summary", + "--csv", + str(summary_file), + ] + + run_load_test(args) + +class TestAllResources(TestCase): + + def test_all_resources(self): + directory = Path(__file__).parent + test_resources_dir = directory.joinpath("test_resources") + + locust_file = test_resources_dir.joinpath("all_resources.py") + summary_file = directory.parent.joinpath("summaries/all_") + manifest_file = directory.parent.joinpath("manifests/manifest.json") + + run_all_resources(str(summary_file), str(locust_file)) + + with open(manifest_file, "r") as f: + manifest = yaml.safe_load(f) + + with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + + for row in reader: + if row.get("name") in manifest: + assert row.get("fails") == "0" + ninety_ninth_percentile = row.get("99%") + if ninety_ninth_percentile: + ninety_ninth_percentile = int(ninety_ninth_percentile) + assert ninety_ninth_percentile <= 100 # 99% of the requests should be below 100ms + \ No newline at end of file diff --git a/ingestion/tests/load/README.md b/ingestion/tests/load/README.md new file mode 100644 index 000000000000..5625ed1d8571 --- /dev/null +++ b/ingestion/tests/load/README.md @@ -0,0 +1,129 @@ +## Adding a new resource to load tests +Add a new `*.py` file to `test_resources/tasks`. The naming does not matter, but we use the resource name as defined in Java, but seperated by `_` (e.g. `TestCaseResource` becomes `test_case_tasks.py`). + +In your newly created file, you'll need to import at minimum 1 package +```python +from locust import task, TaskSet +``` +`task` will be used as a decorator to define our task that will run as part of our load test. `TaskSet` wil be inherited by our task set class. + +Here is an example of a locust task definition. The integer argument in `@task` will give a specific weigth to the task (i.e. increasing its probability to be ran) +``` +class TestCaseResultTasks(TaskSet): + """Test case result resource load test""" + + def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str): + """List test case results for a given time range + + Args: + start_ts (int): start timestamp + end_ts (int): end timestamp + range (str): + """ + for test_case in self.test_cases: + fqn = test_case.get("fullyQualifiedName") + if fqn: + self.client.get( + f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}", + params={ # type: ignore + "startTs": start_ts, + "endTs": end_ts, + }, + auth=self.bearer, + name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}" + ) + + @task(3) + def list_test_case_results_30_days(self): + """List test case results for the last 30 days. Weighted 3""" + now = datetime.now() + last_30_days = int((now - timedelta(days=30)).timestamp() * 1000) + self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days") +``` + +Notice how we use `self.client.get` to perform the request. This is provided by locust `HttpSession`. If the request needs to be authenticated, you can use `auth=self.bearer`. You will need to first define `self.bearer`, you can achieve this using the `on_start` hook from locust. + +```python +from _openmetadata_testutils.helpers.login_user import login_user + +class TestCaseResultTasks(TaskSet): + """Test case result resource load test""" + [...] + + def on_start(self): + """Get a list of test cases to fetch results for""" + self.bearer = login_user(self.client) + resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer) + json = resp.json() + self.test_cases = json.get("data", []) +``` + +**IMPORTANT** +You MUST define a `def stop(self)` methodd in your `TaskSet` class as shown below so that control is given back to the parent user class. + +```python +class TestCaseResultTasks(TaskSet): + """Test case result resource load test""" + [...] + + @task + def stop(self): + self.interrupt() +``` + +If your request contains a parameter (i.e. `/api/v1/dataQuality/testCases/testCaseResults/{fqn}`) you can name your request so all the request sent you will be grouped together like this + +```python +self.client.get( + f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}", + params={ # type: ignore + "startTs": start_ts, + "endTs": end_ts, + }, + auth=self.bearer, + name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}" +) +``` + +Notice the argument `name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}"`, this will define under which name the requests will be grouped. Example of statistics summary below grouped by the request `name` + +```csv +Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100% +GET,/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days,3510,0,13,16.2354597524217,5.146791999997902,100.67633299999557,84567.57407407407,49.30531562959204,0.0,13,17,20,21,28,35,45,56,92,100,100 +``` + +As a final step in `test_resources/manifest.yaml` add the resources, the metrics and the thresholds you want to test. + +```yaml +/api/v1/dataQuality/testCases/testCaseResults/[fqn]/30_days: + type: GET + 99%: 100 + +/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days: + type: GET + 99%: 100 +``` + +This will test that our GET request for the defined resources are running 99% of the time in less than 100 milliseconds (0.1 seconds). + +Below is a list of all the metrics you can use: +- Request Count +- Failure Count +- Median Response Time +- Average Response Time +- Min Response Time +- Max Response Time +- Average Content Size +- Requests/s +- Failures/s +- 50% +- 66% +- 75% +- 80% +- 90% +- 95% +- 98% +- 99% +- 99.9% +- 99.99% +- 100% diff --git a/ingestion/tests/load/__init__.py b/ingestion/tests/load/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ingestion/tests/load/summaries/.gitkeep b/ingestion/tests/load/summaries/.gitkeep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py new file mode 100644 index 000000000000..8a667429274a --- /dev/null +++ b/ingestion/tests/load/test_load.py @@ -0,0 +1,63 @@ +"""Run test case result resource load test""" + +import os +from pathlib import Path +import csv +from unittest import TestCase +import yaml + +from ingestion.tests.load.utils import run_load_test + +def run_all_resources(summary_file: str, locust_file: str): + """Test test case result resource""" + args = [ + "locust", + "--headless", + "-H", + "http://localhost:8585", + "--user", + os.getenv("LOCUST_USER", "50"), + "--spawn-rate", + "1", + "-f", + str(locust_file), + "--run-time", + os.getenv("LOCUST_RUNTIME", "1m"), + "--only-summary", + "--csv", + str(summary_file), + ] + + run_load_test(args) + +class TestAllResources(TestCase): + """Test class to run all resources load test""" + def test_all_resources(self): + """Test all resources""" + directory = Path(__file__).parent + test_resources_dir = directory.joinpath("test_resources") + + locust_file = test_resources_dir.joinpath("all_resources.py") + summary_file = directory.parent.joinpath("load/summaries/all_") + manifest_file = test_resources_dir.joinpath("manifest.yaml") + + run_all_resources(str(summary_file), str(locust_file)) + + with open(manifest_file, "r", encoding="utf-8") as f: + manifest = yaml.safe_load(f) + + with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + + for row in reader: + name = row.get("Name") + if name in manifest: + resource = manifest.get(name) + type_ = resource.get("type") + if type_ == row.get("Type"): + for metric, threshold in resource.items(): + with self.subTest(stat=metric, resource=name, type_=type_): + stat = row.get(metric) + if stat: + stat = int(stat) + self.assertLessEqual(stat, threshold, msg=f"{metric} for {name} is greater than threshold") diff --git a/ingestion/tests/load/test_resources/__init__.py b/ingestion/tests/load/test_resources/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ingestion/tests/load/test_resources/all_resources.py b/ingestion/tests/load/test_resources/all_resources.py new file mode 100644 index 000000000000..dd7bc9dee9d5 --- /dev/null +++ b/ingestion/tests/load/test_resources/all_resources.py @@ -0,0 +1,55 @@ +"""Test class to run all resources load test""" + +from typing import List +import importlib.util +from pathlib import Path +import logging +import inspect + +from locust import TaskSet, constant, HttpUser + +TASKS_DIR = "tasks" + +logger = logging.getLogger(__name__) + +def get_all_tasks_set() -> List: + resource_classes = [] + wd = Path(__file__).parent.joinpath(TASKS_DIR) + for file_path in wd.glob("*.py"): + try: + # parent = file_path.parent + # if parent not in sys.path: + # sys.path.insert(0, str(parent)) + if not str(file_path).startswith("base_"): + module_path = str(file_path) + module_name = file_path.stem + spec = importlib.util.spec_from_file_location(module_name, module_path) + if not spec: + logger.error(f"Could not load module {module_name}") + continue + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore + + for _, obj in inspect.getmembers(module, inspect.isclass): + if obj.__module__ == module_name: + resource_classes.append(obj) + finally: + pass + # sys.path.remove(str(parent)) + + return resource_classes + + +class AllResources(TaskSet): + """Execute tasks for all resources""" + + @classmethod + def set_tasks(cls): + tasks = get_all_tasks_set() + cls.tasks = set(tasks) + +class All(HttpUser): + host = "http://localhost:8585" + wait_time = constant(1) + AllResources.set_tasks() + tasks = [AllResources] diff --git a/ingestion/tests/load/test_resources/manifest.yaml b/ingestion/tests/load/test_resources/manifest.yaml new file mode 100644 index 000000000000..fd0beb890577 --- /dev/null +++ b/ingestion/tests/load/test_resources/manifest.yaml @@ -0,0 +1,19 @@ +# Description: This file contains the manifest for the test resources. +# You can add as a key of a resource any metric available in `summaries/all__summaries.csv` file. +# times should be expressed in milliseconds (e.g. 1000ms = 1s) + +/api/v1/dataQuality/testCases/testCaseResults/[fqn]/30_days: + type: GET + 99%: 1000 + +/api/v1/dataQuality/testCases/testCaseResults/[fqn]/60_days: + type: GET + 99%: 1000 + +/api/v1/dataQuality/testCases/testCaseResults/[fqn]/180_days: + type: GET + 99%: 1000 + +/api/v1/dataQuality/testCases?limit=10: + type: GET + 99%: 6000 diff --git a/ingestion/tests/load/test_resources/tasks/__init__.py b/ingestion/tests/load/test_resources/tasks/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py new file mode 100644 index 000000000000..5448ab98f886 --- /dev/null +++ b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py @@ -0,0 +1,70 @@ +"""Load test for the test case result resources""" +from datetime import datetime, timedelta + +from _openmetadata_testutils.helpers.login_user import login_user +from locust import task, TaskSet + + +TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults" +TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" + +class TestCaseResultTasks(TaskSet): + """Test case result resource load test""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.test_cases = [] + + def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str): + """List test case results for a given time range + + Args: + start_ts (int): start timestamp + end_ts (int): end timestamp + range (str): + """ + for test_case in self.test_cases: + fqn = test_case.get("fullyQualifiedName") + if fqn: + self.client.get( + f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}", + params={ # type: ignore + "startTs": start_ts, + "endTs": end_ts, + }, + auth=self.bearer, + name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}" + ) + + @task(3) + def list_test_case_results_30_days(self): + """List test case results for the last 30 days. Weighted 3""" + now = datetime.now() + last_30_days = int((now - timedelta(days=30)).timestamp() * 1000) + self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days") + + + @task(2) + def list_test_case_results_60_days(self): + """List test case results for the last 60 days. Weighted 2""" + now = datetime.now() + last_60_days = int((now - timedelta(days=60)).timestamp() * 1000) + self._list_test_case_results(last_60_days, int(now.timestamp() * 1000), "60_days") + + + @task + def list_test_case_results_180_days(self): + """List test case results for the last 180 days""" + now = datetime.now() + last_180_days = int((now - timedelta(days=180)).timestamp() * 1000) + self._list_test_case_results(last_180_days, int(now.timestamp() * 1000), "180_days") + + @task + def stop(self): + self.interrupt() + + def on_start(self): + """Get a list of test cases to fetch results for""" + self.bearer = login_user(self.client) + resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer, name=f"{TEST_CASE_RESOURCE_PATH}?limit=100") + json = resp.json() + self.test_cases = json.get("data", []) diff --git a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py new file mode 100644 index 000000000000..17ab80ac24f3 --- /dev/null +++ b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py @@ -0,0 +1,37 @@ +"""Load test for the test case resources""" + +from _openmetadata_testutils.helpers.login_user import login_user +from locust import task, TaskSet + +TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" + +class TestCaseTasks(TaskSet): + """Test case resource load test""" + + def _list_test_cases(self): + """Paginate through the test cases""" + resp = self.client.get( + f"{TEST_CASE_RESOURCE_PATH}", + params={"limit": 10}, + auth=self.bearer, + name=f"{TEST_CASE_RESOURCE_PATH}?limit=10") + after = resp.json().get("paging", {}).get("after") + while after: + resp = self.client.get( + f"{TEST_CASE_RESOURCE_PATH}", + params={"limit": 10,"after": after}, + auth=self.bearer, + name=f"{TEST_CASE_RESOURCE_PATH}?limit=10") + after = resp.json().get("paging", {}).get("after") + + @task(2) + def list_test_cases(self): + """List test cases. Weighted 2""" + self._list_test_cases() + + @task + def stop(self): + self.interrupt() + + def on_start(self): + self.bearer = login_user(self.client) diff --git a/ingestion/tests/load/utils.py b/ingestion/tests/load/utils.py new file mode 100644 index 000000000000..cdb6fdab90b4 --- /dev/null +++ b/ingestion/tests/load/utils.py @@ -0,0 +1,21 @@ +"""Utils functions for load testing.""" + +from typing import List +import sys + +import pytest +from locust import main + +TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" +TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults" + +def run_load_test(args: List[str]): + """Test test case result resource""" + original_argv = sys.argv + try: + sys.argv = args + with pytest.raises(SystemExit) as excinfo: + main.main() + assert excinfo.value.code == 0 + finally: + sys.argv = original_argv From b5d2b81c36262eee342807f5078cd5710132a5bc Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 11:03:34 +0100 Subject: [PATCH 2/6] style: ran python linting --- .../helpers/login_user.py | 7 ++- ingestion/src/metadata/test_load.py | 54 ------------------- ingestion/tests/load/test_load.py | 46 +++++++++------- .../load/test_resources/all_resources.py | 45 +++++++--------- .../tasks/test_case_result_tasks.py | 33 ++++++++---- .../test_resources/tasks/test_case_tasks.py | 12 +++-- ingestion/tests/load/utils.py | 3 +- 7 files changed, 85 insertions(+), 115 deletions(-) delete mode 100644 ingestion/src/metadata/test_load.py diff --git a/ingestion/src/_openmetadata_testutils/helpers/login_user.py b/ingestion/src/_openmetadata_testutils/helpers/login_user.py index 3cad8206323d..c32ab327f3d1 100644 --- a/ingestion/src/_openmetadata_testutils/helpers/login_user.py +++ b/ingestion/src/_openmetadata_testutils/helpers/login_user.py @@ -3,6 +3,7 @@ from locust.contrib.fasthttp import FastHttpSession from requests.auth import AuthBase + class BearerAuth(AuthBase): def __init__(self, token): self.token = token @@ -11,7 +12,11 @@ def __call__(self, r): r.headers["authorization"] = "Bearer " + self.token return r + def login_user(client: FastHttpSession) -> BearerAuth: - resp = client.post("/api/v1/users/login", json={"email":"admin@open-metadata.org","password":"YWRtaW4="}) + resp = client.post( + "/api/v1/users/login", + json={"email": "admin@open-metadata.org", "password": "YWRtaW4="}, + ) token = resp.json().get("accessToken") return BearerAuth(token) diff --git a/ingestion/src/metadata/test_load.py b/ingestion/src/metadata/test_load.py deleted file mode 100644 index b21d1b4483ae..000000000000 --- a/ingestion/src/metadata/test_load.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Run test case result resource load test""" - -from pathlib import Path -import csv -import yaml - -def run_all_resources(summary_file: str, locust_file: str): - """Test test case result resource""" - args = [ - "locust", - "--headless", - "-H", - "http://localhost:8585", - "--user", - "10", - "--spawn-rate", - "2", - "-f", - str(locust_file), - "--run-time", - "1m", - "--only-summary", - "--csv", - str(summary_file), - ] - - run_load_test(args) - -class TestAllResources(TestCase): - - def test_all_resources(self): - directory = Path(__file__).parent - test_resources_dir = directory.joinpath("test_resources") - - locust_file = test_resources_dir.joinpath("all_resources.py") - summary_file = directory.parent.joinpath("summaries/all_") - manifest_file = directory.parent.joinpath("manifests/manifest.json") - - run_all_resources(str(summary_file), str(locust_file)) - - with open(manifest_file, "r") as f: - manifest = yaml.safe_load(f) - - with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f: - reader = csv.DictReader(f) - - for row in reader: - if row.get("name") in manifest: - assert row.get("fails") == "0" - ninety_ninth_percentile = row.get("99%") - if ninety_ninth_percentile: - ninety_ninth_percentile = int(ninety_ninth_percentile) - assert ninety_ninth_percentile <= 100 # 99% of the requests should be below 100ms - \ No newline at end of file diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py index 8a667429274a..0a5e039c0bd5 100644 --- a/ingestion/tests/load/test_load.py +++ b/ingestion/tests/load/test_load.py @@ -1,37 +1,41 @@ """Run test case result resource load test""" +import csv import os from pathlib import Path -import csv from unittest import TestCase + import yaml from ingestion.tests.load.utils import run_load_test + def run_all_resources(summary_file: str, locust_file: str): """Test test case result resource""" args = [ - "locust", - "--headless", - "-H", - "http://localhost:8585", - "--user", - os.getenv("LOCUST_USER", "50"), - "--spawn-rate", - "1", - "-f", - str(locust_file), - "--run-time", - os.getenv("LOCUST_RUNTIME", "1m"), - "--only-summary", - "--csv", - str(summary_file), - ] + "locust", + "--headless", + "-H", + "http://localhost:8585", + "--user", + os.getenv("LOCUST_USER", "50"), + "--spawn-rate", + "1", + "-f", + str(locust_file), + "--run-time", + os.getenv("LOCUST_RUNTIME", "1m"), + "--only-summary", + "--csv", + str(summary_file), + ] run_load_test(args) + class TestAllResources(TestCase): """Test class to run all resources load test""" + def test_all_resources(self): """Test all resources""" directory = Path(__file__).parent @@ -46,7 +50,7 @@ def test_all_resources(self): with open(manifest_file, "r", encoding="utf-8") as f: manifest = yaml.safe_load(f) - with open(str(summary_file)+"_stats.csv", "r", encoding="utf-8") as f: + with open(str(summary_file) + "_stats.csv", "r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: @@ -60,4 +64,8 @@ def test_all_resources(self): stat = row.get(metric) if stat: stat = int(stat) - self.assertLessEqual(stat, threshold, msg=f"{metric} for {name} is greater than threshold") + self.assertLessEqual( + stat, + threshold, + msg=f"{metric} for {name} is greater than threshold", + ) diff --git a/ingestion/tests/load/test_resources/all_resources.py b/ingestion/tests/load/test_resources/all_resources.py index dd7bc9dee9d5..e7d251f26de4 100644 --- a/ingestion/tests/load/test_resources/all_resources.py +++ b/ingestion/tests/load/test_resources/all_resources.py @@ -1,41 +1,35 @@ """Test class to run all resources load test""" -from typing import List import importlib.util -from pathlib import Path -import logging import inspect +import logging +from pathlib import Path +from typing import List -from locust import TaskSet, constant, HttpUser +from locust import HttpUser, TaskSet, constant TASKS_DIR = "tasks" logger = logging.getLogger(__name__) + def get_all_tasks_set() -> List: resource_classes = [] wd = Path(__file__).parent.joinpath(TASKS_DIR) for file_path in wd.glob("*.py"): - try: - # parent = file_path.parent - # if parent not in sys.path: - # sys.path.insert(0, str(parent)) - if not str(file_path).startswith("base_"): - module_path = str(file_path) - module_name = file_path.stem - spec = importlib.util.spec_from_file_location(module_name, module_path) - if not spec: - logger.error(f"Could not load module {module_name}") - continue - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) # type: ignore - - for _, obj in inspect.getmembers(module, inspect.isclass): - if obj.__module__ == module_name: - resource_classes.append(obj) - finally: - pass - # sys.path.remove(str(parent)) + if not str(file_path).startswith("base_"): + module_path = str(file_path) + module_name = file_path.stem + spec = importlib.util.spec_from_file_location(module_name, module_path) + if not spec: + logger.error(f"Could not load module {module_name}") + continue + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore + + for _, obj in inspect.getmembers(module, inspect.isclass): + if obj.__module__ == module_name: + resource_classes.append(obj) return resource_classes @@ -48,8 +42,9 @@ def set_tasks(cls): tasks = get_all_tasks_set() cls.tasks = set(tasks) + class All(HttpUser): host = "http://localhost:8585" - wait_time = constant(1) + wait_time = constant(1) # closed workload AllResources.set_tasks() tasks = [AllResources] diff --git a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py index 5448ab98f886..2e398d695f9a 100644 --- a/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py +++ b/ingestion/tests/load/test_resources/tasks/test_case_result_tasks.py @@ -1,15 +1,17 @@ """Load test for the test case result resources""" from datetime import datetime, timedelta -from _openmetadata_testutils.helpers.login_user import login_user -from locust import task, TaskSet +from locust import TaskSet, task +from _openmetadata_testutils.helpers.login_user import login_user TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults" TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" + class TestCaseResultTasks(TaskSet): """Test case result resource load test""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.test_cases = [] @@ -20,19 +22,19 @@ def _list_test_case_results(self, start_ts: int, end_ts: int, days_range: str): Args: start_ts (int): start timestamp end_ts (int): end timestamp - range (str): + range (str): """ for test_case in self.test_cases: fqn = test_case.get("fullyQualifiedName") if fqn: self.client.get( f"{TEST_CASE_RESULT_RESOURCE_PATH}/{fqn}", - params={ # type: ignore + params={ # type: ignore "startTs": start_ts, "endTs": end_ts, }, auth=self.bearer, - name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}" + name=f"{TEST_CASE_RESULT_RESOURCE_PATH}/[fqn]/{days_range}", ) @task(3) @@ -40,23 +42,27 @@ def list_test_case_results_30_days(self): """List test case results for the last 30 days. Weighted 3""" now = datetime.now() last_30_days = int((now - timedelta(days=30)).timestamp() * 1000) - self._list_test_case_results(last_30_days, int(now.timestamp() * 1000), "30_days") - + self._list_test_case_results( + last_30_days, int(now.timestamp() * 1000), "30_days" + ) @task(2) def list_test_case_results_60_days(self): """List test case results for the last 60 days. Weighted 2""" now = datetime.now() last_60_days = int((now - timedelta(days=60)).timestamp() * 1000) - self._list_test_case_results(last_60_days, int(now.timestamp() * 1000), "60_days") - + self._list_test_case_results( + last_60_days, int(now.timestamp() * 1000), "60_days" + ) @task def list_test_case_results_180_days(self): """List test case results for the last 180 days""" now = datetime.now() last_180_days = int((now - timedelta(days=180)).timestamp() * 1000) - self._list_test_case_results(last_180_days, int(now.timestamp() * 1000), "180_days") + self._list_test_case_results( + last_180_days, int(now.timestamp() * 1000), "180_days" + ) @task def stop(self): @@ -65,6 +71,11 @@ def stop(self): def on_start(self): """Get a list of test cases to fetch results for""" self.bearer = login_user(self.client) - resp = self.client.get(f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 100}, auth=self.bearer, name=f"{TEST_CASE_RESOURCE_PATH}?limit=100") + resp = self.client.get( + f"{TEST_CASE_RESOURCE_PATH}", + params={"limit": 100}, + auth=self.bearer, + name=f"{TEST_CASE_RESOURCE_PATH}?limit=100", + ) json = resp.json() self.test_cases = json.get("data", []) diff --git a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py index 17ab80ac24f3..faa7a0fae875 100644 --- a/ingestion/tests/load/test_resources/tasks/test_case_tasks.py +++ b/ingestion/tests/load/test_resources/tasks/test_case_tasks.py @@ -1,10 +1,12 @@ """Load test for the test case resources""" +from locust import TaskSet, task + from _openmetadata_testutils.helpers.login_user import login_user -from locust import task, TaskSet TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" + class TestCaseTasks(TaskSet): """Test case resource load test""" @@ -14,14 +16,16 @@ def _list_test_cases(self): f"{TEST_CASE_RESOURCE_PATH}", params={"limit": 10}, auth=self.bearer, - name=f"{TEST_CASE_RESOURCE_PATH}?limit=10") + name=f"{TEST_CASE_RESOURCE_PATH}?limit=10", + ) after = resp.json().get("paging", {}).get("after") while after: resp = self.client.get( f"{TEST_CASE_RESOURCE_PATH}", - params={"limit": 10,"after": after}, + params={"limit": 10, "after": after}, auth=self.bearer, - name=f"{TEST_CASE_RESOURCE_PATH}?limit=10") + name=f"{TEST_CASE_RESOURCE_PATH}?limit=10", + ) after = resp.json().get("paging", {}).get("after") @task(2) diff --git a/ingestion/tests/load/utils.py b/ingestion/tests/load/utils.py index cdb6fdab90b4..b73bb1aab421 100644 --- a/ingestion/tests/load/utils.py +++ b/ingestion/tests/load/utils.py @@ -1,7 +1,7 @@ """Utils functions for load testing.""" -from typing import List import sys +from typing import List import pytest from locust import main @@ -9,6 +9,7 @@ TEST_CASE_RESOURCE_PATH = "/api/v1/dataQuality/testCases" TEST_CASE_RESULT_RESOURCE_PATH = "/api/v1/dataQuality/testCases/testCaseResults" + def run_load_test(args: List[str]): """Test test case result resource""" original_argv = sys.argv From 3576bb967ae5328f95e299accd6e2d151cf6ff73 Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 11:34:05 +0100 Subject: [PATCH 3/6] fix: added locust dependency in test --- ingestion/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ingestion/setup.py b/ingestion/setup.py index 966bf70fcade..578b0275a91b 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -346,6 +346,7 @@ test = { # Install Airflow as it's not part of `all` plugin "opentelemetry-exporter-otlp==1.27.0", + "locust~=2.32.0", VERSIONS["airflow"], "boto3-stubs", "mypy-boto3-glue", From 8688d35714ddacdf5bd1181c2df6faa46e119f9b Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 12:14:40 +0100 Subject: [PATCH 4/6] fix: skip locust in 3.8 as not supported --- ingestion/setup.py | 5 ++++- ingestion/tests/load/test_load.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ingestion/setup.py b/ingestion/setup.py index 578b0275a91b..be3a895347dc 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -13,6 +13,7 @@ Python Dependencies """ +import sys from typing import Dict, List, Set from setuptools import setup @@ -346,7 +347,6 @@ test = { # Install Airflow as it's not part of `all` plugin "opentelemetry-exporter-otlp==1.27.0", - "locust~=2.32.0", VERSIONS["airflow"], "boto3-stubs", "mypy-boto3-glue", @@ -401,6 +401,9 @@ *plugins["mssql"], } +if sys.version_info >= (3, 9): + test.add("locust~=2.32.0") + e2e_test = { # playwright dependencies "pytest-playwright", diff --git a/ingestion/tests/load/test_load.py b/ingestion/tests/load/test_load.py index 0a5e039c0bd5..d8bd1460d15c 100644 --- a/ingestion/tests/load/test_load.py +++ b/ingestion/tests/load/test_load.py @@ -2,8 +2,9 @@ import csv import os +import sys from pathlib import Path -from unittest import TestCase +from unittest import TestCase, skipIf import yaml @@ -36,6 +37,7 @@ def run_all_resources(summary_file: str, locust_file: str): class TestAllResources(TestCase): """Test class to run all resources load test""" + @skipIf(sys.version_info < (3, 9), "locust is not supported on python 3.8") def test_all_resources(self): """Test all resources""" directory = Path(__file__).parent From 0db2c11c20ff3587863f8b796a74a2238330129c Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 13:47:18 +0100 Subject: [PATCH 5/6] fix: update gcsfs version --- ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion/setup.py b/ingestion/setup.py index be3a895347dc..3bf8499ba4dd 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -27,7 +27,7 @@ "geoalchemy2": "GeoAlchemy2~=0.12", "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0", "google-cloud-storage": "google-cloud-storage==1.43.0", - "gcsfs": "gcsfs>=2023.1.0", + "gcsfs": "gcsfs>=2024.10.0", "great-expectations": "great-expectations>=0.18.0,<0.18.14", "grpc-tools": "grpcio-tools>=1.47.2", "msal": "msal~=1.2", From 8817215166289b22233db0a924ed2ff6a5dc5623 Mon Sep 17 00:00:00 2001 From: Teddy Crepineau Date: Fri, 20 Dec 2024 14:09:37 +0100 Subject: [PATCH 6/6] fix: revert gcsfs versionning --- ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion/setup.py b/ingestion/setup.py index 3bf8499ba4dd..be3a895347dc 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -27,7 +27,7 @@ "geoalchemy2": "GeoAlchemy2~=0.12", "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0", "google-cloud-storage": "google-cloud-storage==1.43.0", - "gcsfs": "gcsfs>=2024.10.0", + "gcsfs": "gcsfs>=2023.1.0", "great-expectations": "great-expectations>=0.18.0,<0.18.14", "grpc-tools": "grpcio-tools>=1.47.2", "msal": "msal~=1.2",