Skip to content

Commit

Permalink
Merge branch 'improve_documentation' into 'main'
Browse files Browse the repository at this point in the history
Update documentation for 3.4 release

Closes #166 and #178

See merge request dlb/dlb!76
  • Loading branch information
vlopezh committed Dec 22, 2023
2 parents 1d2571f + 0f671a6 commit f6370bd
Show file tree
Hide file tree
Showing 18 changed files with 716 additions and 238 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ control through OMPT.
4. **Example 4:** Get a TALP summary report at the end of an execution

```bash
export DLB_ARGS="--talp --talp-summary=app"
export DLB_ARGS="--talp --talp-summary=pop-metrics"
PRELOAD=<DLB_PREFIX>/lib/libdlb_mpi.so
mpirun <opts> env LD_PRELOAD="$PRELOAD" ./app
```
Expand Down
9 changes: 8 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,11 @@ AX_VAR_POPVALUE([CFLAGS])

##### Fortran Compiler checks #####
AC_PROG_FC
AC_FC_FREEFORM

# use Fortran free-form by default
# NOTE: we must override action-if-failure to no-op to avoid erroring if FC is
# not found, because testing FC doesn't seem to work
AC_FC_FREEFORM([:], [:])

# Disable Fortran tests if FC is not found
AS_IF([test "x$FC" = x], [
Expand Down Expand Up @@ -397,6 +401,9 @@ AC_CONFIG_LINKS([
doc/user_guide/source/faq.rst:doc/user_guide/source/faq.rst
doc/user_guide/source/how_to_install.rst:doc/user_guide/source/how_to_install.rst
doc/user_guide/source/how_to_run.rst:doc/user_guide/source/how_to_run.rst
doc/user_guide/source/how_to_run_lewi.rst:doc/user_guide/source/how_to_run_lewi.rst
doc/user_guide/source/how_to_run_drom.rst:doc/user_guide/source/how_to_run_drom.rst
doc/user_guide/source/how_to_run_talp.rst:doc/user_guide/source/how_to_run_talp.rst
doc/user_guide/source/intro.rst:doc/user_guide/source/intro.rst
doc/user_guide/source/man_pages_appendix.rst:doc/user_guide/source/man_pages_appendix.rst
doc/user_guide/source/technical_requirements.rst:doc/user_guide/source/technical_requirements.rst
Expand Down
12 changes: 7 additions & 5 deletions doc/scripts/lewi_omp.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ export DLB_ARGS="$DLB_ARGS --ompt"
#export DLB_ARGS="$DLB_ARGS --verbose=ompt"

# OMPT policy flags for LeWI. Select when DLB will invoke LewI functions.
# Possible values: {mpi, borrow, lend}, default: none
export DLB_ARGS="$DLB_ARGS --lewi-ompt=mpi:borrow"
# Possible values: [none, {borrow:lend}], default: borrow
#export DLB_ARGS="$DLB_ARGS --lewi-ompt=borrow"

# OMPT Thread Manager. Select which Thread Manager will be used to manage the
# OpenMP library. Possible values:
Expand Down Expand Up @@ -90,9 +90,11 @@ if [[ -z $DLB_HOME ]] ; then
fi

# If the application is not linked with DLB, select which DLB version to preload.
# Choose between the non-MPI version, or the MPI versions for C/C++ or Fortran
preload="$DLB_HOME/lib/libdlb.so"
#preload="$DLB_HOME/lib/libdlb_mpi.so"
# Choose between the non-MPI version, the standard MPI version, or the specific
# C/C++ or Fortran MPI.
#preload="$DLB_HOME/lib/libdlb.so"
preload="$DLB_HOME/lib/libdlb_mpi.so"
#preload="$DLB_HOME/lib/libdlb_mpic.so"
#preload="$DLB_HOME/lib/libdlb_mpif.so"

$DLB_HOME/bin/dlb_run env LD_PRELOAD="$LD_PRELOAD:$preload" "$@"
8 changes: 5 additions & 3 deletions doc/scripts/lewi_omp_trace.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ export DLB_ARGS="$DLB_ARGS --ompt"
#export DLB_ARGS="$DLB_ARGS --verbose=ompt"

# OMPT policy flags for LeWI. Select when DLB will invoke LewI functions.
# Possible values: {mpi, borrow, lend}, default: none
export DLB_ARGS="$DLB_ARGS --lewi-ompt=mpi:borrow:lend"
# Possible values: [none, {borrow:lend}], default: borrow
#export DLB_ARGS="$DLB_ARGS --lewi-ompt=borrow"

# Select events to be instrumented
# Possible values: {none:all:mpi:lewi:drom:talp:barrier:ompt:cpus:callbacks},
Expand Down Expand Up @@ -77,9 +77,11 @@ if [[ -z $DLB_HOME ]] ; then
fi

# If the application is not linked with DLB, select which DLB version to preload.
# Choose between the non-MPI version, or the MPI versions for C/C++ or Fortran
# Choose between the non-MPI version, the standard MPI version, or the specific
# C/C++ or Fortran MPI.
#preload="$DLB_HOME/lib/libdlb_instr.so"
preload="$DLB_HOME/lib/libdlb_mpi_instr.so"
#preload="$DLB_HOME/lib/libdlb_mpic_instr.so"
#preload="$DLB_HOME/lib/libdlb_mpif_instr.so"

if [[ -z $EXTRAE_HOME ]] ; then
Expand Down
4 changes: 3 additions & 1 deletion doc/scripts/lewi_ompss_trace.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,11 @@ if [[ -z $DLB_HOME ]] ; then
fi

# If the application is not linked with DLB, select which DLB version to preload.
# Choose between the non-MPI version, or the MPI versions for C/C++ or Fortran
# Choose between the non-MPI version, the standard MPI version, or the specific
# C/C++ or Fortran MPI.
#preload="$DLB_HOME/lib/libdlb_instr.so"
preload="$DLB_HOME/lib/libdlb_mpi_instr.so"
#preload="$DLB_HOME/lib/libdlb_mpic_instr.so"
#preload="$DLB_HOME/lib/libdlb_mpif_instr.so"

if [[ -z $EXTRAE_HOME ]] ; then
Expand Down
3 changes: 2 additions & 1 deletion doc/scripts/talp.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
export DLB_ARGS="$DLB_ARGS --talp"

# Select which TALP summary to show
# [app:node:process:iteration:omp:regions], default: pop-metrics
# {none:all:pop-metrics:pop-raw:node:process}, default: pop-metrics
#export DLB_ARGS="$DLB_ARGS --talp-summary=pop-metrics:process:regions"

#################################################################################
Expand All @@ -34,6 +34,7 @@ fi
# both C/C++ and Fortran MPI.
#preload="$DLB_HOME/lib/libdlb.so"
preload="$DLB_HOME/lib/libdlb_mpi.so"
#preload="$DLB_HOME/lib/libdlb_mpic.so"
#preload="$DLB_HOME/lib/libdlb_mpif.so"

env LD_PRELOAD="$LD_PRELOAD:$preload" "$@"
2 changes: 1 addition & 1 deletion doc/user_guide/source/advance_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Usage
To enable OMPT support, DLB needs the option ``DLB_ARGS+=" --ompt"`` and the
OpenMP runtime linked to the application must support this feature. If you are
unsure of whether the OpenMP runtime you are using supports OMPT, you can run
the example located in ``$DLB_HOME/share/doc/dlb/examples/OMPT``.
the example located in ``$DLB_PREFIX/share/doc/dlb/examples/OMPT``.

We do recommend to explicitly set the environment variable
``OMP_WAIT_POLICY="passive"``, since even if *passive* may be the default
Expand Down
132 changes: 93 additions & 39 deletions doc/user_guide/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,77 @@ Public API

DLB offers a public interface for C, C++ and Fortran. The DLB API can be divided into:

**Basic set**
The basic set contains the general purpose functions that are common to other
modules. The different functions are explained in detail in section :ref:`basic-api`.
.. glossary::

**LeWI: Lend When Idle**
The LeWI API is oriented to be used by runtimes to manage the CPU sharing between
other processes but can also be used on applications to use the ``LeWI``
algorithm. These functions are explained in detail in section
:ref:`lewi-api`.
Basic set
The basic set contains the general purpose functions that are common to other
modules. The different functions are explained in detail in section :ref:`basic-api`.

**DROM: Dynamic Resource Ownership Manager**
The DROM API manages the CPU ownership of each DLB running process. For a more
detailed description see :ref:`drom`. These functions are described in section
:ref:`drom-api`.
LeWI: Lend When Idle
The LeWI API is oriented to be used by runtimes to manage the CPU sharing between
other processes but can also be used on applications to use the ``LeWI``
algorithm. These functions are explained in detail in section
:ref:`lewi-api`.

**TALP: Tracking Application Live Performance**
The TALP API is used to obtain measured metrics from other processes as well as
to define custom monitoring regions, see :ref:`talp`. These functions are
described in section :ref:`talp-api`.
DROM: Dynamic Resource Ownership Manager
The DROM API manages the CPU ownership of each DLB running process. For a more
detailed description see :ref:`drom`. These functions are described in section
:ref:`drom-api`.

**MPI API**
This is a specific API for MPI. We offer an MPI interface that will be called by
Extrae if we are tracing the application or internally in the MPI intercept API.
All the calls of this API are of the form shown below, and thus not documented.
TALP: Tracking Application Live Performance
The TALP API is used to obtain measured metrics from other processes as well as
to define custom monitoring regions, see :ref:`talp`. These functions are
described in section :ref:`talp-api`.

- DLB_<mpi_call_name>_enter(...)
- DLB_<mpi_call_name>_leave(...)
MPI API
This is a specific API for MPI. We offer an MPI interface that will be called by
Extrae if we are tracing the application or internally in the MPI intercept API.
All the calls of this API are of the form shown below, and thus not documented.

- DLB_<mpi_call_name>_enter(...)
- DLB_<mpi_call_name>_leave(...)


.. TALP: Tracking Application Live Performance
.. To be done
=========
DLB Types
=========

The following types may be used in the DLB interface:

.. glossary::

dlb_cpu_set_t
Opaque type that corresponds to ``cpu_set_t *``. See ``<sched.h>``.

const_dlb_cpu_set_t
Opaque type that corresponds to ``const cpu_set_t *``.

dlb_callbacks_t
Enum to identify the type of callback. See ``"dlb_types.h"``.

dlb_callback_t
Opaque type for callback function.

dlb_printshmem_flags_t
Print shared memory flags. See ``"dlb_types.h"``.

dlb_drom_flags_t
DROM flags. See ``"dlb_types.h"``.

dlb_monitor_t
Monitoring region. See ``"dlb_talp.h"`` and :ref:`talp-custom-regions`.

dlb_node_metrics_t
Output struct where POP node metrics are stored. See ``"dlb_talp.h"``.

dlb_pop_metrics_t
Output struct where POP metrics are stored. See ``"dlb_talp.h"``.


.. _basic-api:

=============
Expand All @@ -45,7 +83,7 @@ DLB Basic API

These functions make the basic API to be used independently from which DLB mode is enabled.

.. function:: int DLB_Init(int ncpus, const_dlb_cpu_set_t mask, const char \*dlb_args)
.. function:: int DLB_Init(int ncpus, const_dlb_cpu_set_t mask, const char *dlb_args)

Initialize DLB library and all its internal data structures. Must be called once and only
once by each process in the DLB system.
Expand Down Expand Up @@ -75,18 +113,18 @@ These functions make the basic API to be used independently from which DLB mode
time, subsequent calls to borrow CPUs will be ignored until some of them are returned.


.. function:: int DLB_CallbackSet(dlb_callbacks_t which, dlb_callback_t callback, void \*arg)
int DLB_CallbackGet(dlb_callbacks_t which, dlb_callback_t \*callback, void \*\*arg)
.. function:: int DLB_CallbackSet(dlb_callbacks_t which, dlb_callback_t callback, void *arg)
int DLB_CallbackGet(dlb_callbacks_t which, dlb_callback_t *callback, void **arg)
Setter and Getter for DLB callbacks. See section :ref:`callbacks`.

.. function:: int DLB_PollDROM(int \*ncpus, dlb_cpu_set_t mask)
.. function:: int DLB_PollDROM(int *ncpus, dlb_cpu_set_t mask)
int DLB_PollDROM_Update(void)

Poll DROM module to check if the process needs to adapt to a new mask or number of CPUs.

.. function:: int DLB_SetVariable(const char \*variable, const char \*value)
int DLB_GetVariable(const char \*variable, char \*value)
.. function:: int DLB_SetVariable(const char *variable, const char *value)
int DLB_GetVariable(const char *variable, char *value)
Set or get a DLB internal variable. These variables are the same ones specified in ``DLB_ARGS``,
although not all of them can be modified at runtime. If the variable is readonly the setter
Expand Down Expand Up @@ -176,11 +214,11 @@ process mask of each DLB process.

Detach process from DLB

.. function:: int DLB_DROM_GetNumCpus(int \*ncpus)
.. function:: int DLB_DROM_GetNumCpus(int *ncpus)

Get the total number of available CPUs in the node

.. function:: void DLB_DROM_GetPidList(int \*pidlist, int \*nelems, int max_len)
.. function:: void DLB_DROM_GetPidList(int *pidlist, int *nelems, int max_len)

Get the PID's attached to this module

Expand Down Expand Up @@ -211,18 +249,22 @@ and later it can obtain some data from the other DLB running processes.

Detach process from DLB

.. function:: int DLB_TALP_GetNumCpus(int \*ncpus)
.. function:: int DLB_TALP_GetNumCpus(int *ncpus)

Get the total number of available CPUs in the node

.. function:: void DLB_TALP_GetPidList(int \*pidlist, int \*nelems, int max_len)
.. function:: void DLB_TALP_GetPidList(int *pidlist, int *nelems, int max_len)

Get the PID's attached to this module

.. function:: int DLB_TALP_GetTimes(int pid, double \*mpi_time, double \*useful_time)
.. function:: int DLB_TALP_GetTimes(int pid, double *mpi_time, double *useful_time)

Get the CPU time on MPI and useful computation for the given process

.. function:: DLB_TALP_QueryPOPNodeMetrics(const char *name, dlb_node_metrics_t *node_metrics)

Compute POP Node Metrics for one region


The second set of services are designed to be called from witihn the DLB running proceses.
With these funcions, the process can obtain live metrics from TALP, as well as to define
Expand All @@ -232,22 +274,34 @@ new custom Monitoring Regions to delimit a specific part of the code.

Get the pointer of the implicit MPI Monitorig Region

.. function:: dlb_monitor_t* DLB_MonitoringRegionRegister(const char \*name)
.. function:: dlb_monitor_t* DLB_MonitoringRegionRegister(const char *name)

Register a new Monitoring Region

.. function:: int DLB_MonitoringRegionReset(dlb_monitor_t \*handle)
.. function:: int DLB_MonitoringRegionReset(dlb_monitor_t *handle)

Reset monitoring region

.. function:: int DLB_MonitoringRegionStart(dlb_monitor_t \*handle)
.. function:: int DLB_MonitoringRegionStart(dlb_monitor_t *handle)

Start (or unpause) monitoring region
Start (or resume) monitoring region

.. function:: int DLB_MonitoringRegionStop(dlb_monitor_t \*handle)
.. function:: int DLB_MonitoringRegionStop(dlb_monitor_t *handle)

Stop (or pause) monitoring region

.. function:: int DLB_MonitoringRegionReport(const dlb_monitor_t \*handle)
.. function:: int DLB_MonitoringRegionReport(const dlb_monitor_t *handle)

Print a Report by stdout of the monitoring region

.. function:: int DLB_MonitoringRegionsUpdate(void)

Explicitly update all monitoring regions

.. function:: int DLB_TALP_CollectPOPMetrics(dlb_monitor_t *monitor, dlb_pop_metrics_t *pop_metrics)

Perform an MPI collective communication to collect POP metrics

.. function:: int DLB_TALP_CollectPOPNodeMetrics(dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics)

Perform a node collective communication to collect TALP node metrics
2 changes: 1 addition & 1 deletion doc/user_guide/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

# General information about the project.
project = u'DLB'
copyright = u'2015-2021, Barcelona Supercomputing Center'
copyright = u'2015-2023, Barcelona Supercomputing Center'
author = u'Marta Garcia and Victor Lopez'

# The version info for the project you're documenting, acts as replacement for
Expand Down
Loading

0 comments on commit f6370bd

Please sign in to comment.