From 036fca98378473e212b61e2b44d864c021834381 Mon Sep 17 00:00:00 2001
From: Ben Galewsky <ben@peartreestudio.net>
Date: Wed, 3 Jul 2024 09:33:31 -0500
Subject: [PATCH] Add example stress test DID Finder that returns large numbers
 of files in each response

---
 README.md                                |  8 +++
 tests/stresstest/stressful_did_finder.py | 74 ++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 tests/stresstest/stressful_did_finder.py

diff --git a/README.md b/README.md
index 844a088..46ace90 100644
--- a/README.md
+++ b/README.md
@@ -148,3 +148,11 @@ All the incoming DID's are expected to be URI's without the schema. As such, the
 * `get` - If the value is `all` (the default) then all files in the dataset must be returned. If the value is `available`, then only files that are accessible need be returned.
 
 As am example, if the following URI is given to ServiceX, "rucio://dataset_name?files=20&get=available", then the first 20 available files of the dataset will be processed by the rest of servicex.
+
+## Stressful DID Finder
+As an example, there is in this repo a simple DID finder that can be used to test the system. It is called `stressful_did_finder.py`. It will return a large number of files, and will take a long time to run. It is useful for testing the system under load.
+I'm not quite sure how to use it yet, but I'm sure it will be useful.
+
+It accepts the following arguments:
+* `--num-files` - The number of files to return as part of each request. Default is 10.
+* `--file-path` - The DID Finder returns the same file over and over. This is the file to return in the response
diff --git a/tests/stresstest/stressful_did_finder.py b/tests/stresstest/stressful_did_finder.py
new file mode 100644
index 0000000..3dc634a
--- /dev/null
+++ b/tests/stresstest/stressful_did_finder.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, IRIS-HEP
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
+import logging
+from typing import Any, Dict, Generator
+
+from servicex_did_finder_lib import DIDFinderApp
+
+__log = logging.getLogger(__name__)
+
+
+def find_files(did_name: str,
+               info: Dict[str, Any],
+               did_finder_args: dict = None) -> Generator[Dict[str, Any], None, None]:
+    for i in range(int(did_finder_args['num_files'])):
+        yield {
+            'paths': did_finder_args['file_path'],
+            'adler32': 0,  # No clue
+            'file_size': 0,  # Size in bytes if known
+            'file_events': i,  # Include clue of how far we've come
+        }
+
+
+def run_open_data():
+    # Parse the command line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num-files', dest='num_files', action='store',
+                        default='10',
+                        help='Number of files to generate for each dataset')
+
+    parser.add_argument('--file-path', dest='file_path', action='store',
+                        default='',
+                        help='Path to a file to be returned in each response')
+
+    DIDFinderApp.add_did_finder_cnd_arguments(parser)
+
+    __log.info('Starting Stressful DID finder')
+    app = DIDFinderApp('stressful_did_finder', parsed_args=parser.parse_args())
+
+    @app.did_lookup_task(name="stressful_did_finder.lookup_dataset")
+    def lookup_dataset(self, did: str, dataset_id: int, endpoint: str) -> None:
+        self.do_lookup(did=did, dataset_id=dataset_id,
+                       endpoint=endpoint, user_did_finder=find_files)
+
+    app.start()
+
+
+if __name__ == "__main__":
+    run_open_data()