From a8fe4555cfb2b4621627667a63555ea291da617f Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Mon, 30 Dec 2024 11:28:16 -0500 Subject: [PATCH] Overhaul documentation on structuring software (#3731) # Description Makes changes that we discussed during a community call back in May: - Emphasize keeping decorators out of the main library - Discuss benefits of modules for serializing functions - Propose defining separate "workflow-ready" functions in module - General clean up of dicsussion cc: @Andrew-S-Rosen # Changed Behaviour N/A # Fixes None ## Type of change - Update to human readable text: Documentation/error messages/comments --------- Co-authored-by: Ben Clifford --- docs/userguide/apps.rst | 4 + docs/userguide/examples/config.py | 13 --- docs/userguide/examples/library.py | 6 -- docs/userguide/examples/library/__init__.py | 0 docs/userguide/examples/library/app.py | 9 ++ docs/userguide/examples/library/config.py | 23 ++++ docs/userguide/examples/library/logic.py | 15 +++ docs/userguide/examples/pyproject.toml | 7 ++ docs/userguide/examples/run.py | 35 ++++++ docs/userguide/examples/run_increment.py | 9 -- docs/userguide/modularizing.rst | 113 ++++++++++++++++---- 11 files changed, 183 insertions(+), 51 deletions(-) delete mode 100644 docs/userguide/examples/config.py delete mode 100644 docs/userguide/examples/library.py create mode 100644 docs/userguide/examples/library/__init__.py create mode 100644 docs/userguide/examples/library/app.py create mode 100644 docs/userguide/examples/library/config.py create mode 100644 docs/userguide/examples/library/logic.py create mode 100644 docs/userguide/examples/pyproject.toml create mode 100644 docs/userguide/examples/run.py delete mode 100644 docs/userguide/examples/run_increment.py diff --git a/docs/userguide/apps.rst b/docs/userguide/apps.rst index 1ef105b4fe..41a988db6d 100644 --- a/docs/userguide/apps.rst +++ b/docs/userguide/apps.rst @@ -111,6 +111,8 @@ Practically, this means return input_list +.. _functions-from-modules: + Functions from Modules ++++++++++++++++++++++ @@ -194,6 +196,8 @@ Learn more about the types of data allowed in `the data section `_. Any changes to mutable input arguments will be ignored. +.. _special-kwargs: + Special Keyword Arguments +++++++++++++++++++++++++ diff --git a/docs/userguide/examples/config.py b/docs/userguide/examples/config.py deleted file mode 100644 index 68057d2b01..0000000000 --- a/docs/userguide/examples/config.py +++ /dev/null @@ -1,13 +0,0 @@ -from parsl.config import Config -from parsl.executors import HighThroughputExecutor -from parsl.providers import LocalProvider - -htex_config = Config( - executors=[ - HighThroughputExecutor( - label="htex_local", - cores_per_worker=1, - provider=LocalProvider(), - ) - ], -) diff --git a/docs/userguide/examples/library.py b/docs/userguide/examples/library.py deleted file mode 100644 index 2992a2dfc2..0000000000 --- a/docs/userguide/examples/library.py +++ /dev/null @@ -1,6 +0,0 @@ -from parsl import python_app - - -@python_app -def increment(x): - return x + 1 diff --git a/docs/userguide/examples/library/__init__.py b/docs/userguide/examples/library/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/userguide/examples/library/app.py b/docs/userguide/examples/library/app.py new file mode 100644 index 0000000000..d16746ab75 --- /dev/null +++ b/docs/userguide/examples/library/app.py @@ -0,0 +1,9 @@ +"""Functions used as part of the workflow""" +from typing import List, Tuple + +from .logic import convert_to_binary + + +def convert_many_to_binary(xs: List[int]) -> List[Tuple[bool, ...]]: + """Convert a list of nonnegative integers to binary""" + return [convert_to_binary(x) for x in xs] diff --git a/docs/userguide/examples/library/config.py b/docs/userguide/examples/library/config.py new file mode 100644 index 0000000000..4b2301fb66 --- /dev/null +++ b/docs/userguide/examples/library/config.py @@ -0,0 +1,23 @@ +from parsl.config import Config +from parsl.executors import HighThroughputExecutor +from parsl.providers import LocalProvider + + +def make_local_config(cores_per_worker: int = 1) -> Config: + """Generate a configuration which runs all tasks on the local system + + Args: + cores_per_worker: Number of cores to dedicate for each task + Returns: + Configuration object with the requested settings + """ + return Config( + executors=[ + HighThroughputExecutor( + label="htex_local", + cores_per_worker=cores_per_worker, + cpu_affinity='block', + provider=LocalProvider(), + ) + ], + ) diff --git a/docs/userguide/examples/library/logic.py b/docs/userguide/examples/library/logic.py new file mode 100644 index 0000000000..769e6158d5 --- /dev/null +++ b/docs/userguide/examples/library/logic.py @@ -0,0 +1,15 @@ +from typing import Tuple + + +def convert_to_binary(x: int) -> Tuple[bool, ...]: + """Convert a nonnegative integer into a binary + + Args: + x: Number to be converted + Returns: + The binary number represented as list of booleans + """ + if x < 0: + raise ValueError('`x` must be nonnegative') + bin_as_string = bin(x) + return tuple(i == '1' for i in bin_as_string[2:]) diff --git a/docs/userguide/examples/pyproject.toml b/docs/userguide/examples/pyproject.toml new file mode 100644 index 0000000000..4c1639bfb4 --- /dev/null +++ b/docs/userguide/examples/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "library" +version = '0.0.0' +description = 'Example library for Parsl documentation' + +[tool.setuptools.packages.find] +include = ['library*'] diff --git a/docs/userguide/examples/run.py b/docs/userguide/examples/run.py new file mode 100644 index 0000000000..9ca5b40219 --- /dev/null +++ b/docs/userguide/examples/run.py @@ -0,0 +1,35 @@ +from argparse import ArgumentParser + +import parsl + +from library.config import make_local_config +from library.app import convert_many_to_binary +from parsl.app.python import PythonApp + +# Protect the script from running twice. +# See "Safe importing of main module" in Python multiprocessing docs +# https://docs.python.org/3/library/multiprocessing.html#multiprocessing-programming +if __name__ == "__main__": + # Get user instructions + parser = ArgumentParser() + parser.add_argument('--numbers-per-batch', default=8, type=int) + parser.add_argument('numbers', nargs='+', type=int) + args = parser.parse_args() + + # Prepare the workflow functions + convert_app = PythonApp(convert_many_to_binary, cache=False) + + # Load the configuration + # As a context manager so resources are shutdown on exit + with parsl.load(make_local_config()): + + # Spawn tasks + futures = [ + convert_app(args.numbers[start:start + args.numbers_per_batch]) + for start in range(0, len(args.numbers), args.numbers_per_batch) + ] + + # Retrieve task results + for future in futures: + for x, b in zip(future.task_record['args'][0], future.result()): + print(f'{x} -> {"".join("1" if i else "0" for i in b)}') diff --git a/docs/userguide/examples/run_increment.py b/docs/userguide/examples/run_increment.py deleted file mode 100644 index b265640edc..0000000000 --- a/docs/userguide/examples/run_increment.py +++ /dev/null @@ -1,9 +0,0 @@ -from config import htex_config -from library import increment - -import parsl - -parsl.load(htex_config) - -for i in range(5): - print('{} + 1 = {}'.format(i, increment(i).result())) diff --git a/docs/userguide/modularizing.rst b/docs/userguide/modularizing.rst index 93b23575b9..143a4ebcd8 100644 --- a/docs/userguide/modularizing.rst +++ b/docs/userguide/modularizing.rst @@ -3,40 +3,107 @@ Structuring Parsl programs -------------------------- -Parsl programs can be developed in many ways. When developing a simple program it is -often convenient to include the app definitions and control logic in a single script. -However, as a program inevitably grows and changes, like any code, there are significant -benefits to be obtained by modularizing the program, including: +While convenient to build simple Parsl programs as a single Python file, +splitting a Parsl programs into multiple files and a Python module +has significant benefits, including: 1. Better readability 2. Logical separation of components (e.g., apps, config, and control logic) 3. Ease of reuse of components -The following example illustrates how a Parsl project can be organized into modules. +Large applications that use Parsl often divide into several core components: -The configuration(s) can be defined in a module or file (e.g., ``config.py``) -which can be imported into the control script depending on which execution resources -should be used. +.. contents:: + :local: + :depth: 2 -.. literalinclude:: examples/config.py +The following sections use an example where each component is in a separate file: -Parsl apps can be defined in separate file(s) or module(s) (e.g., ``library.py``) -grouped by functionality. +.. code-block:: + examples/logic.py + examples/app.py + examples/config.py + examples/__init__.py + run.py + pyproject.toml -.. literalinclude:: examples/library.py +Run the application by first installing the Python library and then executing the "run.py" script. -Finally, the control logic for the Parsl program can then be implemented in a -separate file (e.g., ``run_increment.py``). This file must the import the -configuration from ``config.py`` before calling the ``increment`` app from -``library.py``: +.. code-block:: bash -.. literalinclude:: examples/run_increment.py + pip install . # Install module so it can be imported by workers + python run.py -Which produces the following output:: - 0 + 1 = 1 - 1 + 1 = 2 - 2 + 1 = 3 - 3 + 1 = 4 - 4 + 1 = 5 +Core application logic +====================== + +The core application logic should be developed without any deference to Parsl. +Implement capabilities, write unit tests, and prepare documentation +in which ever way works best for the problem at hand. + +Parallelization with Parsl will be easy if the software already follows best practices. + +The example defines a function to convert a single integer into binary. + +.. literalinclude:: examples/library/logic.py + :caption: library/logic.py + +Workflow functions +================== + +Tasks within a workflow may require unique combinations of core functions. +Functions to be run in parallel must also meet :ref:`specific requirements ` +that may complicate writing the core logic effectively. +As such, separating functions to be used as Apps is often beneficial. + +The example includes a function to convert many integers into binary. + +Key points to note: + +- It is not necessary to have import statements inside the function. + Parsl will serialize this function by reference, as described in :ref:`functions-from-modules`. + +- The function is not yet marked as a Parsl PythonApp. + Keeping Parsl out of the function definitions simplifies testing + because you will not need to run Parsl when testing the code. + +- *Advanced*: Consider including Parsl decorators in the library if using complex workflow patterns, + such as :ref:`join apps ` or functions which take :ref:`special arguments `. + +.. literalinclude:: examples/library/app.py + :caption: library/app.py + + +Parsl configuration functions +============================= + +Create Parsl configurations specific to your application needs as functions. +While not necessary, including the Parsl configuration functions inside the module +ensures they can be imported into other scripts easily. + +Generating Parsl :class:`~parsl.config.Config` objects from a function +makes it possible to change the configuration without editing the module. + +The example function provides a configuration suited for a single node. + +.. literalinclude:: examples/library/config.py + :caption: library/config.py + +Orchestration Scripts +===================== + +The last file defines the workflow itself. + +Such orchestration scripts, at minimum, perform at least four tasks: + +1. *Load execution options* using a tool like :mod:`argparse`. +2. *Prepare workflow functions for execution* by creating :class:`~parsl.app.python.PythonApp` wrappers over each function. +3. *Create configuration then start Parsl* with the :meth:`parsl.load` function. +4. *Launch tasks and retrieve results* depending on the needs of the application. + +An example run script is as follows + +.. literalinclude:: examples/run.py + :caption: run.py