From d71c544883ac783ba685fcb03333a92af2d21504 Mon Sep 17 00:00:00 2001 From: German <28149841+germa89@users.noreply.github.com> Date: Wed, 23 Oct 2024 18:11:40 +0200 Subject: [PATCH] docs: documenting using pymapdl on clusters (#3466) * feat: adding env vars needed for multinode * feat: adding env vars needed for multinode * feat: renaming hpc detection argument * docs: adding documentation * chore: adding changelog file 3466.documentation.md * feat: adding env vars needed for multinode * feat: renaming hpc detection argument * docs: adding documentation * chore: adding changelog file 3466.documentation.md * fix: vale issues * chore: To fix sphinx build Squashed commit of the following: commit c1d1a3ea278e6461bcc91e1c965f6e6a46d00bc3 Author: German <28149841+germa89@users.noreply.github.com> Date: Mon Oct 7 15:33:19 2024 +0200 ci: retrigger CICD commit b7b5c30a422413d203a31f5a29b7e57f93a0ab08 Author: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon Oct 7 13:31:55 2024 +0000 ci: auto fixes from pre-commit.com hooks. for more information, see https://pre-commit.ci commit 32a1c0203fc5101f429aafafba26a28cc06bf24c Author: Revathy Venugopal <104772255+Revathyvenugopal162@users.noreply.github.com> Date: Mon Oct 7 15:31:24 2024 +0200 fix: add suggestions Co-authored-by: German <28149841+germa89@users.noreply.github.com> commit 575a219ef8b135b234f2ec5f24a9585298845eca Merge: f2afe139f be1be2e2c Author: Revathyvenugopal162 Date: Mon Oct 7 15:09:01 2024 +0200 Merge branch 'fix/add-build-cheatsheet-as-env-varaible' of https://github.com/ansys/pymapdl into fix/add-build-cheatsheet-as-env-varaible commit f2afe139f693f4f1979506662c514692280487a9 Author: Revathyvenugopal162 Date: Mon Oct 7 15:08:58 2024 +0200 fix: precommit commit be1be2e2ca4f8736db0b180ab3d8cc6bff696412 Author: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Mon Oct 7 13:07:35 2024 +0000 chore: adding changelog file 3468.fixed.md commit f052a4dba77cb586be59232d2627d7814077f094 Author: Revathyvenugopal162 Date: Mon Oct 7 15:05:56 2024 +0200 fix: add build cheatsheet as env variable within doc-build * docs: expanding a bit troubleshooting advices and small format fix * docs: fix vale * fix: nproc tests * feat: adding env vars needed for multinode * feat: renaming hpc detection argument * docs: adding documentation * chore: adding changelog file 3466.documentation.md * fix: vale issues * docs: fix vale * docs: expanding a bit troubleshooting advices and small format fix * fix: nproc tests * revert: "chore: To fix sphinx build" This reverts commit e45d2e5d4fb97359605f445f462fa4b9cf76515a. * docs: clarifying where everything is running. * docs: expanding bash example * tests: fix * docs: adding `PYMAPDL_NPROC` to env var section * docs: fix vale issue * docs: fix vale issue * fix: replacing env var name * fix: unit tests * chore: adding changelog file 3466.documentation.md [dependabot-skip] * Apply suggestions from code review Co-authored-by: Camille <78221213+clatapie@users.noreply.github.com> * docs: apply suggestions from code review made by Kathy Co-authored-by: Kathy Pippert <84872299+PipKat@users.noreply.github.com> * docs: adding Kathy suggestion. --------- Co-authored-by: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Co-authored-by: Camille <78221213+clatapie@users.noreply.github.com> Co-authored-by: Kathy Pippert <84872299+PipKat@users.noreply.github.com> --- doc/changelog.d/3466.documentation.md | 1 + .../extended_examples/hpc/hpc_ml_ga.rst | 2 +- doc/source/user_guide/hpc/pymapdl.rst | 182 ++++++++++++++---- doc/source/user_guide/hpc/settings.rst | 52 +++-- doc/source/user_guide/hpc/troubleshooting.rst | 134 ++++++++++--- doc/source/user_guide/mapdl.rst | 182 ++++++++++-------- doc/source/user_guide/troubleshoot.rst | 1 + src/ansys/mapdl/core/launcher.py | 36 ++-- tests/test_launcher.py | 47 +++-- 9 files changed, 441 insertions(+), 196 deletions(-) create mode 100644 doc/changelog.d/3466.documentation.md diff --git a/doc/changelog.d/3466.documentation.md b/doc/changelog.d/3466.documentation.md new file mode 100644 index 0000000000..7c211ed0d1 --- /dev/null +++ b/doc/changelog.d/3466.documentation.md @@ -0,0 +1 @@ +docs: documenting using pymapdl on clusters \ No newline at end of file diff --git a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst index 30570b5c6b..fb87bb7e6d 100644 --- a/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst +++ b/doc/source/examples/extended_examples/hpc/hpc_ml_ga.rst @@ -251,7 +251,7 @@ this script. If you have problems when creating the virtual environment or accessing it from the compute nodes, - see :ref:`ref_hpc_pymapdl_job`. + see :ref:`ref_hpc_troubleshooting`. 3. Install the requirements for this example from the :download:`requirements.txt ` file. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index e0fddefa78..c6d0a6c307 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -1,84 +1,182 @@ -.. _ref_hpc_pymapdl: +.. _ref_hpc_pymapdl_job: -============================= -PyMAPDL on SLURM HPC clusters -============================= +======================= +PyMAPDL on HPC clusters +======================= -.. _ref_hpc_pymapdl_job: -Submit a PyMAPDL job -==================== +Introduction +============ -To submit a PyMAPDL job, you must create two files: +PyMAPDL communicates with MAPDL using the gRPC protocol. +This protocol offers the many advantages and features described in +see :ref:`ref_project_page`. +One of these features is that it is not required to have both +PyMAPDL and MAPDL processes running on the same machine. +This possibility opens the door to many configurations, depending +on whether or not you run them both on the HPC compute nodes. +Additionally, you might be able interact with them (``interactive`` mode) +or not (``batch`` mode). -- Python script with the PyMAPDL code -- Bash script that activates the virtual environment and calls the Python script +For information on supported configurations, see :ref:`ref_pymapdl_batch_in_cluster_hpc`. + + +Since v0.68.5, PyMAPDL can take advantage of the tight integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL launches an MAPDL instance which uses 32 cores +spawning across those 8 nodes. +This behavior can turn off if passing the :envvar:`PYMAPDL_ON_SLURM` +environment variable or passing the ``detect_HPC=False`` argument +to the :func:`launch_mapdl() ` function. + + +.. _ref_pymapdl_batch_in_cluster_hpc: + +Submit a PyMAPDL batch job to the cluster from the entrypoint node +================================================================== + +Many HPC clusters allow their users to log into a machine using +``ssh``, ``vnc``, ``rdp``, or similar technologies and then submit a job +to the cluster from there. +This entrypoint machine, sometimes known as the *head node* or *entrypoint node*, +might be a virtual machine (VDI/VM). + +In such cases, once the Python virtual environment with PyMAPDL is already +set and is accessible to all the compute nodes, launching a +PyMAPDL job from the entrypoint node is very easy to do using the ``sbatch`` command. +When the ``sbatch`` command is used, PyMAPDL runs and launches an MAPDL instance in +the compute nodes. +No changes are needed on a PyMAPDL script to run it on an SLURM cluster. + +First the virtual environment must be activated in the current terminal. + +.. code-block:: console + + user@entrypoint-machine:~$ export VENV_PATH=/my/path/to/the/venv + user@entrypoint-machine:~$ source $VENV_PATH/bin/activate -**Python script:** ``pymapdl_script.py`` +Once the virtual environment is activated, you can launch any Python +script that has the proper Python shebang (``#!/usr/bin/env python3``). + +For instance, assume that you want to launch the following ``main.py`` Python script: .. code-block:: python + :caption: main.py + + #!/usr/bin/env python3 from ansys.mapdl.core import launch_mapdl - # Number of processors must be lower than the - # number of CPUs allocated for the job. - mapdl = launch_mapdl(nproc=10) + mapdl = launch_mapdl(run_location="/home/ubuntu/tmp/tmp/mapdl", loglevel="debug") - mapdl.prep7() - n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") - print(f"Number of CPUs: {n_proc}") + print(mapdl.prep7()) + print(f'Number of CPU: {mapdl.get_value("ACTIVE", 0, "NUMCPU")}') mapdl.exit() +You can run this command in your console: -**Bash script:** ``job.sh`` - -.. code-block:: bash +.. code-block:: console - source /home/user/.venv/bin/activate - python pymapdl_script.py + (venv) user@entrypoint-machine:~$ sbatch main.py -To start the simulation, you use this code: +Alternatively, you can remove the shebang from the Python file and use a +Python executable call: .. code-block:: console - user@machine:~$ srun job.sh + (venv) user@entrypoint-machine:~$ sbatch python main.py + +Additionally, you can change the number of cores used in your +job by setting the :envvar:`PYMAPDL_NPROC` environment variable to the desired value. + +.. code-block:: console + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The bash script allows you to customize the environment before running the Python script. -This bash script performs such tasks as creating environment variables, moving to -different directories, and printing to ensure your configuration is correct. However, -this bash script is not mandatory. -You can avoid having the ``job.sh`` bash script if the virtual environment is activated -and you pass all the environment variables to the job: +You can also add ``sbatch`` options to the command: .. code-block:: console - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun python pymapdl_script.py --export=ALL + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py -The ``--export=ALL`` argument might not be needed, depending on the cluster configuration. -Furthermore, you can omit the Python call in the preceding command if you include the -Python shebang (``#!/usr/bin/python3``) in the first line of the ``pymapdl_script.py`` script. +For instance, to launch a PyMAPDL job that starts a four-core MAPDL instance +on a 10-CPU SLURM job, you can run this command: .. code-block:: console - user@machine:~$ source /home/user/.venv/bin/activate - (.venv) user@machine:~$ srun pymapdl_script.py --export=ALL + (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch --partition=qsmall --nodes=10 --ntasks-per-node=1 main.py -If you prefer to run the job in the background, you can use the ``sbatch`` -command instead of the ``srun`` command. However, in this case, the Bash file is needed: + +Using a submission script +------------------------- + +If you need to customize your PyMAPDL job further, you can create a SLURM +submission script for submitting it. +In this case, you must create two files: + +- Python script with the PyMAPDL code +- Bash script that activates the virtual environment and calls the + Python script + +.. code-block:: python + :caption: main.py + + from ansys.mapdl.core import launch_mapdl + + # Number of processors must be lower than the + # number of CPU allocated for the job. + mapdl = launch_mapdl(nproc=10) + + mapdl.prep7() + n_proc = mapdl.get_value("ACTIVE", 0, "NUMCPU") + print(f"Number of CPU: {n_proc}") + + mapdl.exit() + + +.. code-block:: bash + :caption: job.sh + + #!/bin/bash + # Set SLURM options + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --nodes=5 # Number of nodes + #SBATCH --ntasks-per-node=2 # Number of tasks (cores) per node + #SBATCH --time=04:00:00 # Set a time limit for the job (optional but recommended) + + # Set env vars + export MY_ENV_VAR=VALUE + + # Activate Python virtual environment + source /home/user/.venv/bin/activate + # Call Python script + python main.py + +To start the simulation, you use this code: .. code-block:: console user@machine:~$ sbatch job.sh - Submitted batch job 1 -Here is the expected output of the job: +In this case, the Python virtual environment does not need to be activated +before submission since it is activated later in the script. + +The expected output of the job follows: .. code-block:: text - Number of CPUs: 10.0 + Number of CPU: 10.0 + +The bash script allows you to customize the environment before running the +Python script. +This bash script performs tasks such as creating environment variables, +moving files to different directories, and printing to ensure your +configuration is correct. diff --git a/doc/source/user_guide/hpc/settings.rst b/doc/source/user_guide/hpc/settings.rst index 7f6af61c63..225b37d1c9 100644 --- a/doc/source/user_guide/hpc/settings.rst +++ b/doc/source/user_guide/hpc/settings.rst @@ -7,14 +7,16 @@ Setting PyMAPDL Requirements ============ -Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain requirements: +Using PyMAPDL in an HPC environment managed by SLURM scheduler has certain +requirements: -* **An Ansys installation must be accessible from all the compute nodes**. +* **An Ansys installation must be accessible from all the compute nodes.** This normally implies that the ``ANSYS`` installation directory is in a shared drive or directory. Your HPC cluster administrator should provide you with the path to the ``ANSYS`` directory. -* **A compatible Python installation must be accessible from all the compute nodes**. +* **A compatible Python installation must be accessible from all the compute + nodes.** For compatible Python versions, see :ref:`ref_pymapdl_installation`. Additionally, you must perform a few key steps to ensure efficient job @@ -23,8 +25,8 @@ execution and resource utilization. Subsequent topics describe these steps. Check the Python installation ============================= -The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in a virtual -environment that is accessible from the compute nodes. +The PyMAPDL Python package (``ansys-mapdl-core``) must be installed in +a virtual environment that is accessible from the compute nodes. To see where your Python distribution is installed, use this code: @@ -40,9 +42,10 @@ To print the version of Python you have available, use this code: user@machine:~$ python3 --version Python 3.9.16 -You should be aware that your machine might have installed other Python versions. -To find out if those installations are already in the ``PATH`` environment variable, -you can press the **Tab** key to use autocomplete: +You should be aware that your machine might have other Python versions +installed. +To find out if those installations are already in the ``PATH`` environment +variable, you can press the **Tab** key to use autocomplete: .. code-block:: console @@ -55,11 +58,20 @@ you can press the **Tab** key to use autocomplete: You should use a Python version that is compatible with PyMAPDL. For more information, see :ref:`ref_pymapdl_installation`. -The ``which`` command returns the path where the Python executable is installed. -You can use that executable to create your own Python virtual environment in a directory -that is accessible from all the compute nodes. -For most HPC clusters, the ``/home/$user`` directory is generally available to all nodes. -You can then create the virtual environment in the ``/home/user/.venv`` directory: +.. warning:: + + Contact your cluster administrator if you cannot find a Python version + compatible with PyMAPDL. + + +The ``which`` command returns the path where the Python executable is +installed. +You can use that executable to create your own Python virtual environment +in a directory that is accessible from all the compute nodes. +For most HPC clusters, the ``/home/$user`` directory is generally available +to all nodes. +You can then create the virtual environment in the ``/home/user/.venv`` +directory: .. code-block:: console @@ -67,11 +79,13 @@ You can then create the virtual environment in the ``/home/user/.venv`` director After activating the virtual environment, you can install PyMAPDL. +.. _ref_install_pymapdl_on_hpc: Install PyMAPDL =============== -To install PyMAPDL on the activated virtual environment, run the following commands: +To install PyMAPDL on the activated virtual environment, run the following +commands: .. code-block:: console @@ -107,8 +121,9 @@ then you can run that script using: user@machine:~$ srun test.sh -This command might take a minute or two to complete, depending on the amount of free -resources available in the cluster. +This command might take a minute or two to complete, depending on the amount of +free resources available in the cluster. + On the console, you should see this output: .. code-block:: text @@ -116,5 +131,6 @@ On the console, you should see this output: Testing Python! PyMAPDL version 0.68.1 was successfully imported. -If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, especially -:ref:`ref_python_venv_not_accesible`. +If you see an error in the output, see :ref:`ref_hpc_troubleshooting`, +especially :ref:`ref_python_venv_not_accesible`. + diff --git a/doc/source/user_guide/hpc/troubleshooting.rst b/doc/source/user_guide/hpc/troubleshooting.rst index 3a41a60537..da2286431f 100644 --- a/doc/source/user_guide/hpc/troubleshooting.rst +++ b/doc/source/user_guide/hpc/troubleshooting.rst @@ -8,9 +8,41 @@ Troubleshooting Debugging jobs -------------- - Use ``--output`` and ``--error`` directives in batch scripts to capture - standard output and error messages. + standard output and error messages to specific files: + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py - Check SLURM logs for error messages and debugging information. +- It is also good idea to print the environment variables in your bash script, using + ``printenv`` *bash* command. + Additionally, you can filter its output using ``grep`` *bash* command. + + .. code-block:: bash + + #!/bin/bash + #SBATCH --job-name=ansys_job # Job name + #SBATCH --partition=qsmall # Specify the queue/partition name + #SBATCH --output=ansys_job.out # Standard output file + #SBATCH --error=ansys_job.err # Standard error file + + printenv | grep "PYMAPDL" # Print env vars which contains 'PYMAPDL' + printenv | grep "SLURM" # Print env vars which contains 'SLURM' + source /home/user/pymapdl/.venv/bin/activate + python /home/user/pymapdl.py + +- Use PyMAPDL logging to printout valuable information. To activate this, see + :ref:`ref_debug_pymapdl`. + +- If you need more help, see :ref:`ref_troubleshooting`. .. _ref_python_venv_not_accesible: @@ -19,44 +51,89 @@ Python virtual environment is not accessible -------------------------------------------- If there is an error while testing the Python installation, it might mean that the Python environment is not accessible to the compute nodes. -For example, in the following output, PyMAPDL could not be found, meaning that the script -is not using the virtual environment (``/home/user/.venv``): +For example, assume you have the following `test.sh` *bash* script: + +.. code-block:: bash + + source /home/user/.venv/bin/activate + python -c "from ansys.mapdl import core as pymapdl; pymapdl.report()" + +The following output is shown after running this script in the terminal: .. code-block:: console user@machine:~$ srun test.sh + Testing Python! Traceback (most recent call last): File "", line 1, in ImportError: No module named ansys.mapdl -This could be for a number of reasons. One of them is that the system Python distribution -used to create the virtual environment is not accessible from the compute nodes -due to one of these reasons: +As the output shows, PyMAPDL could not be found, indicating one of the following problems: + +* The virtual environment does not have PyMAPDL installed. + See :ref:`ref_install_pymapdl_on_hpc`. -- The virtual environment has been created in a - directory that is not accessible from the nodes. -- The virtual environment has been created from a Python - executable that is not available to the compute nodes. - Hence, the virtual environment is not activated. For - example, you might be creating the virtual environment - using Python 3.10, but only Python 3.8 is available - from the compute nodes. +* The script did not properly activate the virtual environment + (``/home/user/.venv``). -You can test which Python executable the cluster is using by starting an interactive session in -a compute node with this code: +The second problem can occur due to a number of reasons. +One of them is that the system Python distribution used to create +the virtual environment is not accessible from the compute nodes +because of one of these situations: + +- The virtual environment has been created in a directory that is + not accessible from the nodes. In this case, your terminal might + also show that the ``activate`` file could not be found. + + .. code-block:: console + + user@machine:~$ srun test.sh + Testing Python! + bash: .venv/bin/activate: No such file or directory + + Depending on your terminal configuration, the preceding error might be + sufficient to exit the terminal process. If it is not, the execution continues, + and the subsequent ``python`` call is executed using the default Python executable. + It is very likely that the default Python executable does not have + PyMAPDL installed. Hence the ``ImportError`` error might + appear too. + +- The virtual environment has been created from a Python executable that is + not available to the compute nodes. Hence, the virtual environment is not + activated. + For example, you might be creating the virtual environment using + Python 3.10, but only Python 3.8 is available from the compute nodes. + You can test which Python executable the cluster is using by starting an + interactive session in a compute node with this code to list all commands + that start with ``python``: .. code-block:: console user@machine:~$ srun --pty /bin/bash - user@compute_node_01:~$ compgen -c | grep python # List all commands starting with python + user@compute_node_01:~$ compgen -c | grep python .. the approach to solve this comes from: https://stackoverflow.com/questions/64188693/problem-with-python-environment-and-slurm-srun-sbatch +It should be noted that the preceding approach assumes that all the nodes have similar +configurations. Hence, all of them should have the same Python installations +available. + +You can also use environment variable modules to activate Python installations. +For more information, see :ref:`ref_envvar_modules_on_hpc`. + + +.. _ref_envvar_modules_on_hpc: + +Using modules to load Python +---------------------------- + Many HPC infrastructures use environment managers to load and unload -software packages using modules and environment variables. -Hence, you might want to make sure that the correct module is loaded in your script. +software packages using modules and environment variables. +Hence, you might want to make sure that the correct module is loaded in your +script. + For information on two of the most common environment managers, see the `Modules documentation `_ and `Lmod documentation `_. Check your cluster documentation to know which environment @@ -76,12 +153,14 @@ Using the Ansys-provided Python installation **For development purposes only** -In certain HPC environments the possibility of installing a different Python version -is limited for security reasons. In such cases, the Python distribution available in -the Ansys installation can be used. -This Python distribution is a customized Python (CPython) -version for Ansys products use only. Its use is **discouraged** -except for very advanced users and special use cases. +In certain HPC environments the possibility of installing a different Python +version is limited for security reasons. +In such cases, the Python distribution available in the Ansys installation +can be used. +This Python distribution is a customized Python (CPython) version for use only by Ansys +products. +Its use is **discouraged** unless you are a very advanced user or have a special use +case. This Python distribution is in the following directory, where ``%MAPDL_VERSION%`` is the three-digit Ansys version: @@ -98,7 +177,8 @@ For example, here is the directory for Ansys 2024 R2: In Ansys 2024 R1 and later, the unified installer includes CPython 3.10. -Earlier versions include CPython 3.7 (``/commonfiles/CPython/3_7/linx64/Release/python``). +Earlier Ansys versions include CPython 3.7 +(``/commonfiles/CPython/3_7/linx64/Release/python``). Because the Ansys installation must be available to all the compute nodes to run simulations using them, this @@ -116,6 +196,8 @@ the compute nodes: user@machine:~$ export PY_PATH=/ansys_inc/v241/commonfiles/CPython/3_10/linx64/Release/Python + This path needs to be adapted to where Ansys is installed and also which version is used. + #. For only Ansys 2024 R1 and earlier, patch the ``PATH`` and ``LD_LIBRARY_PATH`` environment variables: diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index bfc59931b5..c7ba053666 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1097,83 +1097,105 @@ Environment variables ===================== There are several PyMAPDL-specific environment variables that can be -used to control the behavior or launching of PyMAPDL and MAPDL. -These are described in the following table: - -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_START_INSTANCE` | Override the behavior of the | -| | :func:`ansys.mapdl.core.launcher.launch_mapdl` function | -| | to only attempt to connect to existing | -| | instances of PyMAPDL. Generally used | -| | in combination with ``PYMAPDL_PORT``. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_START_INSTANCE=True | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_PORT` | Default port for PyMAPDL to connect to. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_PORT=50052 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_IP` | Default IP for PyMAPDL to connect to. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_IP=123.45.67.89 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`ANSYSLMD_LICENSE_FILE` | License file or IP address with port in the format | -| | ``PORT@IP``. Do not confuse with the ``IP`` and | -| | ``PORT`` where the MAPDL instance is running, which | -| | are specified using :envvar:`PYMAPDL_IP` and | -| | :envvar:`PYMAPDL_PORT`. | -| | This is helpful for supplying licensing for | -| | Docker. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export ANSYSLMD_LICENSE_FILE=1055@123.45.67.89 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAPDL_EXEC` | Executable path from where to launch MAPDL | -| | instances. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_MAPDL_EXEC=/ansys_inc/v241/ansys/bin/mapdl | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAPDL_VERSION` | Default MAPDL version to launch in case there | -| | are several versions availables. | -| | | -| | **Example:** | -| | | -| | .. code:: console | -| | | -| | export PYMAPDL_MAPDL_VERSION=22.2 | -| | | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | -| | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | -+---------------------------------------+---------------------------------------------------------------------+ -| :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | -| | connection terminates when running | -| | PRNSOL or NLIST, raise this. In bytes, | -| | defaults to 256 MB. | -| | | -| | Only for developing purposes. | -+---------------------------------------+---------------------------------------------------------------------+ +used to control the default behavior of PyMAPDL or launching MAPDL. + +These environment variables do not have +priority over the arguments given in the corresponding functions. +Consider this command: + +.. code-block:: console + + user@machine:~$ export PYMAPDL_PORT=50052 + user@machine:~$ python -c "from ansys.mapdl.core import launch_mapdl; mapdl=launch_mapdl(port=60053)" + +This command launches an MAPDL instance on port 60053 +because the ``port`` argument has priority over the :envvar:`PYMAPDL_PORT` +environment variable. The following table describes all arguments. + + ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_START_INSTANCE` | Override the behavior of the | +| | :func:`ansys.mapdl.core.launcher.launch_mapdl` function | +| | to only attempt to connect to existing | +| | instances of PyMAPDL. Generally used | +| | in combination with ``PYMAPDL_PORT``. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_START_INSTANCE=True | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_PORT` | Default port for PyMAPDL to connect to. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_PORT=50052 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_IP` | Default IP for PyMAPDL to connect to. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_IP=123.45.67.89 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_NPROC` | Default number of cores for MAPDL to use. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_NPROC=10 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`ANSYSLMD_LICENSE_FILE` | License file or IP address with port in the format | +| | ``PORT@IP``. Do not confuse with the ``IP`` and | +| | ``PORT`` where the MAPDL instance is running, which | +| | are specified using :envvar:`PYMAPDL_IP` and | +| | :envvar:`PYMAPDL_PORT`. | +| | This is helpful for supplying licensing for | +| | Docker. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export ANSYSLMD_LICENSE_FILE=1055@123.45.89 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAPDL_EXEC` | Executable path from where to launch MAPDL | +| | instances. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_MAPDL_EXEC=/ansys_inc/v241/ansys/bin/mapdl | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAPDL_VERSION` | Default MAPDL version to launch in case there | +| | are several versions availables. | +| | | +| | **Example:** | +| | | +| | .. code-block:: console | +| | | +| | user@machine:~$ export PYMAPDL_MAPDL_VERSION=22.2 | +| | | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | +| | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | ++---------------------------------------+----------------------------------------------------------------------------------+ +| :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | +| | connection terminates when running | +| | PRNSOL or NLIST, raise this. In bytes, | +| | defaults to 256 MB. | +| | | +| | Only for developing purposes. | ++---------------------------------------+----------------------------------------------------------------------------------+ diff --git a/doc/source/user_guide/troubleshoot.rst b/doc/source/user_guide/troubleshoot.rst index 54cf12d0c7..74a2b63f35 100644 --- a/doc/source/user_guide/troubleshoot.rst +++ b/doc/source/user_guide/troubleshoot.rst @@ -8,6 +8,7 @@ Troubleshooting PyMAPDL To help you resolve any problems that you might have when using PyMAPDL, some of the most common problems and frequently asked questions are posted here. +.. _ref_debug_pymapdl: Debug in PyMAPDL ---------------- diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 047404f64c..c6b6096a96 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -106,7 +106,7 @@ "add_env_vars", "replace_env_vars", "version", - "detect_slurm_config", + "detect_HPC", "set_no_abort", "force_intel" # Non documented args @@ -972,8 +972,8 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_slurm_config: bool = True, - **kwargs, + detect_HPC: bool = True, + **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -1001,12 +1001,15 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. + Number of processors. Defaults to 2. If running on an HPC cluster, + this value is adjusted to the number of CPUs allocated to the job, + unless ``detect_HPC`` is set to "false". ram : float, optional - Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size - throughout the run, specify a negative number. + Total size in megabytes of the workspace (memory) used for the initial + allocation. The default is ``None``, in which case 2 GB (2048 MB) is + used. To force a fixed size throughout the run, specify a negative + number. mode : str, optional Mode to launch MAPDL. Must be one of the following: @@ -1139,9 +1142,16 @@ def launch_mapdl( export PYMAPDL_MAPDL_VERSION=22.2 + detect_HPC: bool, optional + Whether detect if PyMAPDL is running on an HPC cluster. Currently + only SLURM clusters are supported. By default, it is set to true. + This option can be bypassed if the ``PYMAPDL_ON_SLURM`` + environment variable is set to "true". For more information, see + :ref:`ref_hpc_slurm`. + kwargs : dict, optional - These keyword arguments are interface specific or for - development purposes. See Notes for more details. + These keyword arguments are interface-specific or for + development purposes. For more information, see Notes. set_no_abort : :class:`bool` *(Development use only)* @@ -1403,6 +1413,10 @@ def launch_mapdl( cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] ) + if args["ON_SLURM"]: + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + # Early exit for debugging. if args["_debug_no_launch"]: # Early exit, just for testing @@ -1742,7 +1756,7 @@ def get_value( # ntasks is for mpi SLURM_NTASKS = get_value("SLURM_NTASKS", kwargs) LOG.info(f"SLURM_NTASKS: {SLURM_NTASKS}") - # Sharing tasks acrros multiple nodes (DMP) + # Sharing tasks across multiple nodes (DMP) # the format of this envvar is a bit tricky. Avoiding it for the moment. # SLURM_TASKS_PER_NODE = int( # kwargs.pop( @@ -1891,7 +1905,7 @@ def is_on_slurm(args: Dict[str, Any]) -> bool: # Let's require the following env vars to exist to go into slurm mode. args["ON_SLURM"] = bool( - args["detect_slurm_config"] + args["detect_HPC"] and not is_flag_false # default is true and os.environ.get("SLURM_JOB_NAME") and os.environ.get("SLURM_JOB_ID") diff --git a/tests/test_launcher.py b/tests/test_launcher.py index e7dc8b4843..63ea33d1d3 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -495,13 +495,6 @@ def test_launching_on_busy_port(mapdl, monkeypatch): launch_mapdl(port=mapdl.port) -@requires("local") -def test_cpu_checks(): - machine_cores = psutil.cpu_count(logical=False) - with pytest.raises(NotEnoughResources): - launch_mapdl(nproc=machine_cores + 2) - - def test_fail_channel_port(): with pytest.raises(ValueError): launch_mapdl(channel="something", port="something") @@ -610,7 +603,6 @@ def test_fail_channel_ip(): ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -619,12 +611,11 @@ def test_fail_channel_ip(): "SLURM_MEM_PER_NODE": None, "SLURM_NODELIST": None, }, - {"nproc": 5, "jobname": "myawesomejob"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob"}, + id="Testing SLURM_JOB_NAME", ), pytest.param( { - "PYMAPDL_NPROC": 5, "SLURM_JOB_NAME": "myawesomejob", "SLURM_NTASKS": 2, "SLURM_CPUS_PER_TASK": 2, @@ -634,8 +625,8 @@ def test_fail_channel_ip(): "SLURM_NODELIST": None, "PYMAPDL_MAPDL_EXEC": "asdf/qwer/poiu", }, - {"nproc": 5, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, - id="Testing PYMAPDL_NPROC and SLURM_JOB_NAME", + {"nproc": 4, "jobname": "myawesomejob", "exec_file": "asdf/qwer/poiu"}, + id="Testing PYMAPDL_MAPDL_EXEC and SLURM_JOB_NAME", ), ), indirect=["set_env_var_context"], @@ -705,17 +696,17 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_env_var", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) -@pytest.mark.parametrize("detect_slurm_config", [True, False, None]) +@pytest.mark.parametrize("detect_HPC", [True, False, None]) def test_is_on_slurm( - monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_slurm_config + monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_HPC ): monkeypatch.setenv("PYMAPDL_ON_SLURM", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_on_slurm(args={"detect_slurm_config": detect_slurm_config}) + flag = is_on_slurm(args={"detect_HPC": detect_HPC}) - if detect_slurm_config is not True: + if detect_HPC is not True: assert not flag else: @@ -731,7 +722,7 @@ def test_is_on_slurm( if ON_LOCAL: assert ( launch_mapdl( - detect_slurm_config=detect_slurm_config, + detect_HPC=detect_HPC, _debug_no_launch=True, )["ON_SLURM"] == flag @@ -895,6 +886,26 @@ def mycpucount(**kwargs): return 10 # faking 10 cores +@patch("psutil.cpu_count", mycpucount) +def test_nproc_envvar(monkeypatch): + monkeypatch.setenv("PYMAPDL_NPROC", 10) + args = launch_mapdl(_debug_no_launch=True) + assert args["nproc"] == 10 + + +@pytest.mark.parametrize("nproc", [None, 5, 9, 15]) +@patch("psutil.cpu_count", mycpucount) +def test_nproc(monkeypatch, nproc): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + + if nproc and nproc > mycpucount(): + with pytest.raises(NotEnoughResources): + launch_mapdl(nproc=nproc, _debug_no_launch=True) + else: + args = launch_mapdl(nproc=nproc, _debug_no_launch=True) + assert args["nproc"] == (nproc or 2) + + @patch("os.name", "nt") @patch("psutil.cpu_count", mycpucount) def test_generate_mapdl_launch_command_windows():