Skip to content

Commit

Permalink
DAOS-14650 test: replace mpirun bind-to and map-by with args (#13392)
Browse files Browse the repository at this point in the history
Skip-test: true
Skip-unit-test: true
Skip-unit-tests: true

- Replace --bind-to and --map-by with generic args to support flexible
  options.
- Update mpirun np AND ppn to be compatible with more implementations

Required-githooks: true

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
  • Loading branch information
daltonbohning committed Dec 1, 2023
1 parent 158bc30 commit 9be1a2a
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 46 deletions.
3 changes: 1 addition & 2 deletions src/tests/ftest/deployment/agent_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def run_ior_collect_error(self, results, job_num, file_name, clients):
mpi_type="mpich")
manager.assign_hosts(clients_nodeset, self.workdir, self.hostfile_clients_slots)
ppn = self.params.get("ppn", '/run/ior/client_processes/*')
manager.ppn.update(ppn, 'mpirun.ppn')
manager.processes.update(None, 'mpirun.np')
manager.assign_processes(ppn=ppn)

try:
ior_output = manager.run()
Expand Down
3 changes: 1 addition & 2 deletions src/tests/ftest/deployment/target_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def run_ior_report_error(self, results, job_num, file_name, pool, container, nam
manager.assign_hosts(
self.hostlist_clients, self.workdir, self.hostfile_clients_slots)
ppn = self.params.get("ppn", '/run/ior/client_processes/*')
manager.ppn.update(ppn, 'mpirun.ppn')
manager.processes.update(None, 'mpirun.np')
manager.assign_processes(ppn=ppn)

# Run the command.
try:
Expand Down
3 changes: 1 addition & 2 deletions src/tests/ftest/performance/ior_easy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,4 @@ client:
env_vars:
- D_LOG_MASK=INFO
mpirun:
bind_to: hwthread
map_by: socket
args: "--bind-to hwthread --map-by socket"
3 changes: 1 addition & 2 deletions src/tests/ftest/performance/ior_hard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,4 @@ client:
env_vars:
- D_LOG_MASK=INFO
mpirun:
bind_to: hwthread
map_by: socket
args: "--bind-to hwthread --map-by socket"
3 changes: 1 addition & 2 deletions src/tests/ftest/performance/mdtest_easy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,4 @@ client:
env_vars:
- D_LOG_MASK=INFO
mpirun:
bind_to: hwthread
map_by: socket
args: "--bind-to hwthread --map-by socket"
3 changes: 1 addition & 2 deletions src/tests/ftest/performance/mdtest_hard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,4 @@ client:
env_vars:
- D_LOG_MASK=INFO
mpirun:
bind_to: hwthread
map_by: socket
args: "--bind-to hwthread --map-by socket"
2 changes: 1 addition & 1 deletion src/tests/ftest/telemetry/pool_space_metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ ior:
env_vars:
- D_LOG_MASK=INFO
mpirun:
bind_to: socket
args: "--bind-to socket"

scm_metric_thresholds:
# Maximal metadata size is empirically adjusted to 8MiB
Expand Down
6 changes: 1 addition & 5 deletions src/tests/ftest/util/data_mover_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,7 @@ def run(self, processes, job_manager, ppn=None, env=None):
# Get job manager cmd
job_manager = Mpirun(self, mpi_type="mpich")
job_manager.assign_hosts(self.hosts, self.tmp)
if ppn is None:
job_manager.assign_processes(processes)
else:
job_manager.ppn.update(ppn, 'mpirun.ppn')
job_manager.processes.update(None, 'mpirun.np')
job_manager.assign_processes(processes, ppn)
job_manager.exit_status_exception = self.exit_status_exception
job_manager.assign_environment(env or {}, True)

Expand Down
8 changes: 4 additions & 4 deletions src/tests/ftest/util/ior_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ def run_ior(self, manager, processes, intercept=None, display_space=True,
manager.working_dir.value = self.dfuse.mount_dir.value
manager.assign_hosts(
self.hostlist_clients, self.workdir, self.hostfile_clients_slots)
if self.ppn is None:
manager.assign_processes(processes)
# Pass only processes or ppn to be compatible with previous behavior
if self.ppn is not None:
manager.assign_processes(ppn=self.ppn)
else:
manager.ppn.update(self.ppn, 'mpirun.ppn')
manager.processes.update(None, 'mpirun.np')
manager.assign_processes(processes=processes)

manager.assign_environment(env)

Expand Down
8 changes: 4 additions & 4 deletions src/tests/ftest/util/ior_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,11 +571,11 @@ def run(self, group, pool, container, processes, ppn=None, intercept=None, plugi
self.manager.job.test_file.update(
os.path.join(os.sep, self.label_generator.get_label("testfile")))

if ppn is None:
self.manager.assign_processes(processes)
# Pass only processes or ppn to be compatible with previous behavior
if ppn is not None:
self.manager.assign_processes(ppn=ppn)
else:
self.manager.ppn.update(ppn, ".".join([self.manager.command, "ppn"]))
self.manager.processes.update(None, ".".join([self.manager.command, "np"]))
self.manager.assign_processes(processes=processes)

self.manager.assign_environment(self.env)

Expand Down
37 changes: 21 additions & 16 deletions src/tests/ftest/util/job_manager_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
# pylint: disable=too-many-lines
from distutils.spawn import find_executable # pylint: disable=deprecated-module
import os
import re
import time
# pylint: disable=too-many-lines
from distutils.spawn import \
find_executable # pylint: disable=deprecated-module

from ClusterShell.NodeSet import NodeSet

from command_utils import ExecutableCommand, SystemctlCommand
from command_utils_base import FormattedParameter, EnvironmentVariables
from exception_utils import CommandFailure, MPILoadError
from command_utils_base import (BasicParameter, EnvironmentVariables,
FormattedParameter)
from env_modules import load_mpi
from general_utils import pcmd, run_pcmd, get_job_manager_class, get_journalctl_command, \
journalctl_time
from exception_utils import CommandFailure, MPILoadError
from general_utils import (get_job_manager_class, get_journalctl_command,
journalctl_time, pcmd, run_pcmd)
from run_utils import run_remote, stop_processes
from write_host_file import write_host_file

Expand Down Expand Up @@ -163,12 +164,12 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
"""

def assign_processes(self, processes):
"""Assign the number of processes per node.
"""Assign the number of processes.
Set the appropriate command line parameter with the specified value.
Args:
processes (int): number of processes per node
processes (int): number of processes
"""

def assign_environment(self, env_vars, append=False):
Expand Down Expand Up @@ -337,10 +338,10 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
self.hostfile.value = write_host_file(**kwargs)

def assign_processes(self, processes):
"""Assign the number of processes per node (-np).
"""Assign the number of processes (-np).
Args:
processes (int): number of processes per node
processes (int): number of processes
"""
self.processes.value = processes

Expand Down Expand Up @@ -433,8 +434,7 @@ def __init__(self, job, subprocess=False, mpi_type="openmpi"):
self.mca = FormattedParameter("--mca {}", mca_default)
self.working_dir = FormattedParameter("-wdir {}", None)
self.tmpdir_base = FormattedParameter("--mca orte_tmpdir_base {}", None)
self.bind_to = FormattedParameter("--bind-to {}", None)
self.map_by = FormattedParameter("--map-by {}", None)
self.args = BasicParameter(None, None)
self.mpi_type = mpi_type

def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
Expand All @@ -456,13 +456,18 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
kwargs["path"] = path
self.hostfile.value = write_host_file(**kwargs)

def assign_processes(self, processes):
"""Assign the number of processes per node (-np).
def assign_processes(self, processes=None, ppn=None):
"""Assign the number of processes (-np) and processes per node (-ppn).
Args:
processes (int): number of processes per node
processes (int, optional): number of processes. Defaults to None.
if not specified, auto-calculated from ppn.
ppn (int, optional): number of processes per node. Defaults to None.
"""
if ppn is not None and processes is None:
processes = ppn * len(self._hosts)
self.processes.update(processes, "mpirun.np")
self.ppn.update(ppn, "mpirun.ppn")

def assign_environment(self, env_vars, append=False):
"""Assign or add environment variables to the command.
Expand Down
8 changes: 4 additions & 4 deletions src/tests/ftest/util/mdtest_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ def run_mdtest(self, manager, processes, display_space=True, pool=None, out_queu
"""
env = self.mdtest_cmd.get_default_env(str(manager), self.client_log)
manager.assign_hosts(self.hostlist_clients, self.workdir, self.hostfile_clients_slots)
if self.ppn is None:
manager.assign_processes(processes)
# Pass only processes or ppn to be compatible with previous behavior
if self.ppn is not None:
manager.assign_processes(ppn=self.ppn)
else:
manager.ppn.update(self.ppn, 'mpirun.ppn')
manager.processes.update(None, 'mpirun.np')
manager.assign_processes(processes=processes)

manager.assign_environment(env)

Expand Down

0 comments on commit 9be1a2a

Please sign in to comment.