Skip to content

Commit

Permalink
Merge branch 'master' into kjacque/control-cart-fabric
Browse files Browse the repository at this point in the history
Features: control
  • Loading branch information
kjacque committed Apr 17, 2024
2 parents 7b683d6 + a3fa6c5 commit f462e2b
Show file tree
Hide file tree
Showing 15 changed files with 213 additions and 71 deletions.
1 change: 1 addition & 0 deletions src/tests/ftest/container/per_server_fault_domain.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ timeout: 300
server_config:
name: daos_server
engines_per_host: 2
crt_timeout: 10
engines:
0:
pinned_numa_node: 0
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/cell_size_property.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ timeout: 900
server_config:
name: daos_server
engines_per_host: 2
crt_timeout: 10
engines:
0:
pinned_numa_node: 0
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/offline_rebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ setup:
server_config:
name: daos_server
engines_per_host: 2
crt_timeout: 10
engines:
0:
pinned_numa_node: 0
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/erasurecode/rebuild_disabled_single.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pool:
pool_query_timeout: 30
container:
type: POSIX
control_method: API
control_method: daos
single_data_set:
# [object_qty, record_qty, dkey, akey, data_size]
- [1, 1, 1, 1, 4194304]
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/rebuild/container_create_race.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ timeout: 360
server_config:
name: daos_server
engines_per_host: 2
crt_timeout: 10
engines:
0:
targets: 2
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/faults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ ior_faults:
dfs_oclass:
- ["EC_2P1GX", "RP_2GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/ior/"
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
Expand Down
19 changes: 10 additions & 9 deletions src/tests/ftest/soak/harassers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ hosts:
orterun:
allow_run_as_root: true
# This timeout must be longer than the test_timeout param (+15minutes)
# 12 hour test
timeout: 12H15M
timeout: 24H30M
setup:
start_servers: true
start_agents: true
Expand Down Expand Up @@ -56,7 +55,7 @@ container:
daos_timeout: 30
container_reserved:
type: POSIX
properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1
properties: cksum:crc16,cksum_size:16384,srv_cksum:on
file_oclass: SX
dir_oclass: SX
control_method: daos
Expand All @@ -78,8 +77,8 @@ soak_harassers:
# harasser test timeout in hours
single_test_pool: false
test_timeout:
test_soak_online_harassers: 12
test_soak_offline_harassers: 12
test_soak_online_harassers: 24
test_soak_offline_harassers: 24
# maximum timeout for a single job in test in minutes
joblist:
- ior_harasser
Expand All @@ -93,12 +92,14 @@ soak_harassers:
- server-stop_server-reintegrate-offline
- extend-pool-offline
# - vmd-identify-check-offline
- reboot_reboot-reintegrate-offline
test_soak_online_harassers:
- exclude_reintegrate
- server-stop_server-start
- server-stop_server-reintegrate
- extend-pool
# - vmd-identify-check
- reboot_reboot-reintegrate
harasser_to: 1200
# drain rank from all pools before stopping server
enable_drain: true
Expand Down Expand Up @@ -140,7 +141,7 @@ ior_harasser:
- ["EC_2P1GX", "RP_2GX"]
- ["EC_4P2GX", "RP_3GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/ior"
mount_dir: "/tmp/soak_dfuse_ior"
disable_caching: true
fio_harasser:
api:
Expand All @@ -150,7 +151,7 @@ fio_harasser:
- global
- test
global:
directory: "/tmp/daos_dfuse/fio/"
directory: "/tmp/soak_dfuse_fio/"
ioengine: 'libaio'
thread: 1
group_reporting: 1
Expand All @@ -172,7 +173,7 @@ fio_harasser:
- ["EC_2P1GX", "RP_2GX"]
- ["EC_4P2GX", "RP_3GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/fio/"
mount_dir: "/tmp/soak_dfuse_fio/"
disable_caching: true
mdtest_harasser:
# maximum timeout for a single job in test in minutes
Expand Down Expand Up @@ -204,7 +205,7 @@ mdtest_harasser:
- ["EC_4P2G1", "RP_3G1"]
dfs_destroy: false
dfuse:
mount_dir: "/tmp/daos_dfuse/mdtest/"
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
Expand Down
18 changes: 9 additions & 9 deletions src/tests/ftest/soak/smoke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ ior_smoke:
dfs_oclass:
- ["EC_2P1GX", "RP_2GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/ior/"
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -134,7 +134,7 @@ fio_smoke:
- global
- test
global:
directory: "/tmp/daos_dfuse/fio/"
directory: "/tmp/soak_dfuse_fio/"
ioengine: 'libaio'
thread: 1
group_reporting: 1
Expand All @@ -155,7 +155,7 @@ fio_smoke:
oclass:
- ["EC_2P1GX", "RP_2GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/fio/"
mount_dir: "/tmp/soak_dfuse_fio/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -172,9 +172,9 @@ vpic_smoke:
- POSIX
- POSIX-LIBIOIL
- POSIX-LIBPIL4DFS
workdir: "/tmp/daos_dfuse/vpic/"
workdir: "/tmp/soak_dfuse_vpic/"
dfuse:
mount_dir: "/tmp/daos_dfuse/vpic/"
mount_dir: "/tmp/soak_dfuse_vpic/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -191,9 +191,9 @@ lammps_smoke:
- POSIX
- POSIX-LIBIOIL
- POSIX-LIBPIL4DFS
workdir: "/tmp/daos_dfuse/lammps/"
workdir: "/tmp/soak_dfuse_lammps/"
dfuse:
mount_dir: "/tmp/daos_dfuse/lammps/"
mount_dir: "/tmp/soak_dfuse_lammps/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand Down Expand Up @@ -226,7 +226,7 @@ mdtest_smoke:
- ["EC_2P1G1", "RP_2G1"]
dfs_destroy: false
dfuse:
mount_dir: "/tmp/daos_dfuse/mdtest/"
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -250,7 +250,7 @@ macsio_smoke:
oclass:
- ["EC_2P1GX", "RP_2GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/macsio/"
mount_dir: "/tmp/soak_dfuse_macsio/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand Down
21 changes: 9 additions & 12 deletions src/tests/ftest/soak/stress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ ior_stress:
# - ["EC_8P2G1", "RP_3GX"]
# - ["EC_16P2GX", "RP_3GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/ior/"
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -149,7 +149,7 @@ fio_stress:
- test
global:
create_serialize: 0
directory: "/tmp/daos_dfuse/fio/"
directory: "/tmp/soak_dfuse_fio/"
ioengine: 'libaio'
thread: 1
group_reporting: 1
Expand All @@ -176,7 +176,7 @@ fio_stress:
# - ["EC_8P2G1", "RP_3GX"]
# - ["EC_16P2GX", "RP_3GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/fio/"
mount_dir: "/tmp/soak_dfuse_fio/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand All @@ -194,9 +194,9 @@ vpic_stress:
- POSIX
- POSIX-LIBIOIL
- POSIX-LIBPIL4DFS
workdir: "/tmp/daos_dfuse/vpic/"
workdir: "/tmp/soak_dfuse_vpic/"
dfuse:
mount_dir: "/tmp/daos_dfuse/vpic/"
mount_dir: "/tmp/soak_dfuse_vpic/"
disable_caching: true
oclass:
- ["EC_2P1GX", "RP_2GX"]
Expand All @@ -211,9 +211,9 @@ lammps_stress:
- POSIX
- POSIX-LIBIOIL
- POSIX-LIBPIL4DFS
workdir: "/tmp/daos_dfuse/lammps/"
workdir: "/tmp/soak_dfuse_lammps/"
dfuse:
mount_dir: "/tmp/daos_dfuse/lammps/"
mount_dir: "/tmp/soak_dfuse_lammps/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand Down Expand Up @@ -254,7 +254,7 @@ mdtest_stress:
# - ["EC_16P2G1", "RP_3G1"]
dfs_destroy: false
dfuse:
mount_dir: "/tmp/daos_dfuse/mdtest/"
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
thread_count: 8
cores: '0-7'
Expand Down Expand Up @@ -285,18 +285,15 @@ macsio_stress:
# - ["EC_8P2G1", "RP_3GX"]
# - ["EC_16P2GX", "RP_3GX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/macsio/"
mount_dir: "/tmp/soak_dfuse_macsio/"
disable_caching: true
thread_count: 8
cores: '0-7'
datamover_stress:
job_timeout: 10
nodesperjob:
- 1
- 4
- 8
taskspernode:
- 16
- 32
oclass:
- ["SX","SX"]
Expand Down
12 changes: 6 additions & 6 deletions src/tests/ftest/soak/stress_2h.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ ior_stress:
dfs_oclass:
- ["SX","SX"]
dfuse:
mount_dir: "/tmp/daos_dfuse/ior/"
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
mdtest_stress:
# maximum timeout for a single job in test in minutes
Expand All @@ -124,7 +124,7 @@ mdtest_stress:
- ["S1","S1"]
dfs_destroy: false
dfuse:
mount_dir: "/tmp/daos_dfuse/mdtest/"
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
vpic_stress:
job_timeout: 20
Expand All @@ -134,9 +134,9 @@ vpic_stress:
- 16
cmdline: "/var/hit/daos/builds/vpic-install/bin/harris.Linux"
posix: true
workdir: "/tmp/daos_dfuse/vpic/"
workdir: "/tmp/soak_dfuse_vpic/"
dfuse:
mount_dir: "/tmp/daos_dfuse/vpic/"
mount_dir: "/tmp/soak_dfuse_vpic/"
disable_caching: true
oclass:
- ["SX","SX"]
Expand All @@ -148,9 +148,9 @@ lammps_stress:
- 16
cmdline: "/var/hit/daos/builds/lammps/src/lmp_mpi -i /var/hit/daos/builds/lammps/bench/in.lj"
posix: true
workdir: "/tmp/daos_dfuse/lammps/"
workdir: "/tmp/soak_dfuse_lammps/"
dfuse:
mount_dir: "/tmp/daos_dfuse/lammps/"
mount_dir: "/tmp/soak_dfuse_lammps/"
disable_caching: true
oclass:
- ["SX","SX"]
6 changes: 5 additions & 1 deletion src/tests/ftest/util/ec_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2020-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -280,6 +280,10 @@ def ec_container_create(self, oclass):
oclass (str): object class for creating the container.
"""
self.container.append(self.get_container(self.pool, create=False, oclass=oclass))
if self.container[-1].control_method.value == \
self.container[-1].USE_DAOS and self.container[-1].oclass.value:
self.container[-1].oclass.update(self.container[-1].oclass.value.replace("OC_", ""),
"container.oclass")

# Get the Parity count for setting the container RF property.
ec_object = get_data_parity_number(self.log, oclass)
Expand Down
5 changes: 4 additions & 1 deletion src/tests/ftest/util/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def display_memory_info(self):
"""Display server hosts memory info."""
self.log.debug("#" * 80)
self.log.debug("<SERVER> Collection debug memory info")
run_remote(self.log, self._hosts, "free -m")
run_remote(self.log, self._hosts, "free -m && df -h --type=tmpfs")
run_remote(self.log, self._hosts, "ps -eo size,pid,user,command --sort -size | head -n 6")
self.log.debug("#" * 80)

Expand Down Expand Up @@ -720,6 +720,9 @@ def stop(self):
# Make sure the mount directory belongs to non-root user
self.set_scm_mount_ownership()

# Collective memory usage after stop.
self.display_memory_info()

# Report any errors after all stop actions have been attempted
if messages:
raise ServerFailed("Failed to stop servers:\n {}".format("\n ".join(messages)))
Expand Down
Loading

0 comments on commit f462e2b

Please sign in to comment.