diff --git a/src/tests/ftest/control/dmg_scale.py b/src/tests/ftest/control/dmg_scale.py
new file mode 100644
index 00000000000..5a268a798c7
--- /dev/null
+++ b/src/tests/ftest/control/dmg_scale.py
@@ -0,0 +1,182 @@
+"""
+  (C) Copyright 2024 Intel Corporation.
+
+  SPDX-License-Identifier: BSD-2-Clause-Patent
+"""
+from apricot import TestWithServers
+from telemetry_utils import TelemetryUtils
+from test_utils_pool import time_pool_create
+
+ENGINE_POOL_METRICS_SHORT = [
+    "engine_pool_entries_dtx_batched_degree",
+    "engine_pool_entries_dtx_batched_total",
+    "engine_pool_ops_akey_enum",
+    "engine_pool_ops_akey_punch",
+    "engine_pool_ops_compound",
+    "engine_pool_ops_dkey_enum",
+    "engine_pool_ops_dkey_punch",
+    "engine_pool_ops_dtx_abort",
+    "engine_pool_ops_dtx_check",
+    "engine_pool_ops_dtx_commit",
+    "engine_pool_ops_dtx_refresh",
+    "engine_pool_ops_ec_agg",
+    "engine_pool_ops_ec_rep",
+    "engine_pool_ops_fetch",
+    "engine_pool_ops_key_query",
+    "engine_pool_ops_migrate",
+    "engine_pool_ops_obj_enum",
+    "engine_pool_ops_obj_punch",
+    "engine_pool_ops_obj_sync",
+    "engine_pool_ops_recx_enum",
+    "engine_pool_ops_tgt_akey_punch",
+    "engine_pool_ops_tgt_dkey_punch",
+    "engine_pool_ops_tgt_punch",
+    "engine_pool_ops_tgt_update",
+    "engine_pool_ops_update",
+    "engine_pool_ops_pool_connect",
+    "engine_pool_ops_pool_disconnect",
+    "engine_pool_ops_pool_evict",
+    "engine_pool_ops_pool_query",
+    "engine_pool_ops_pool_query_space",
+    "engine_pool_resent",
+    "engine_pool_restarted",
+    "engine_pool_retry",
+    "engine_pool_scrubber_busy_time",
+    "engine_pool_scrubber_bytes_scrubbed_current",
+    "engine_pool_scrubber_bytes_scrubbed_prev",
+    "engine_pool_scrubber_bytes_scrubbed_total",
+    "engine_pool_scrubber_corruption_current",
+    "engine_pool_scrubber_corruption_total",
+    "engine_pool_scrubber_csums_current",
+    "engine_pool_scrubber_csums_prev",
+    "engine_pool_scrubber_csums_total",
+    "engine_pool_scrubber_next_csum_scrub",
+    "engine_pool_scrubber_next_tree_scrub",
+    "engine_pool_scrubber_prev_duration",
+    "engine_pool_scrubber_prev_duration_max",
+    "engine_pool_scrubber_prev_duration_mean",
+    "engine_pool_scrubber_prev_duration_min",
+    "engine_pool_scrubber_prev_duration_stddev",
+    "engine_pool_scrubber_scrubber_started",
+    "engine_pool_scrubber_scrubs_completed",
+    "engine_pool_started_at",
+    "engine_pool_vos_aggregation_akey_deleted",
+    "engine_pool_vos_aggregation_akey_scanned",
+    "engine_pool_vos_aggregation_akey_skipped",
+    "engine_pool_vos_aggregation_csum_errors",
+    "engine_pool_vos_aggregation_deleted_ev",
+    "engine_pool_vos_aggregation_deleted_sv",
+    "engine_pool_vos_aggregation_dkey_deleted",
+    "engine_pool_vos_aggregation_dkey_scanned",
+    "engine_pool_vos_aggregation_dkey_skipped",
+    "engine_pool_vos_aggregation_epr_duration",
+    "engine_pool_vos_aggregation_epr_duration_max",
+    "engine_pool_vos_aggregation_epr_duration_mean",
+    "engine_pool_vos_aggregation_epr_duration_min",
+    "engine_pool_vos_aggregation_epr_duration_stddev",
+    "engine_pool_vos_aggregation_merged_recs",
+    "engine_pool_vos_aggregation_merged_size",
+    "engine_pool_vos_aggregation_obj_deleted",
+    "engine_pool_vos_aggregation_obj_scanned",
+    "engine_pool_vos_aggregation_obj_skipped",
+    "engine_pool_vos_aggregation_uncommitted",
+    "engine_pool_vos_space_nvme_used",
+    "engine_pool_vos_space_scm_used",
+    "engine_pool_xferred_fetch",
+    "engine_pool_xferred_update",
+    "engine_pool_EC_update_full_stripe",
+    "engine_pool_EC_update_partial",
+    "engine_pool_block_allocator_alloc_hint",
+    "engine_pool_block_allocator_alloc_large",
+    "engine_pool_block_allocator_alloc_small",
+    "engine_pool_block_allocator_frags_aging",
+    "engine_pool_block_allocator_frags_large",
+    "engine_pool_block_allocator_frags_small",
+    "engine_pool_block_allocator_free_blks",
+    "engine_pool_ops_key2anchor"
+]
+
+
+class DmgScale(TestWithServers):
+    """Verify dmg commands works as expected in a large scale system.
+
+    :avocado: recursive
+    """
+
+    def test_dmg_scale(self):
+        """Run the following steps and manually collect duration for each step.
+
+        0. Format storage
+        1. System query
+        2. Create a 100% pool that spans all engines
+        3. Pool query
+        4. Pool destroy
+        5. Create 49 pools spanning all the engines with each pool using a 1/50th of the capacity
+        6. Pool list
+        7. Query around 80 pool metrics
+        8. Destroy all 49 pools
+        9. System stop
+        10. System start
+
+        Jira ID: DAOS-10508.
+
+        :avocado: tags=all,manual
+        :avocado: tags=deployment
+        :avocado: tags=DmgScale,test_dmg_scale
+        """
+        # This is a manual test and we need to find the durations from job.log, so add "##" to make
+        # it easy to search. The log is usually over 1 million lines.
+        self.log_step("## System query")
+        dmg_command = self.get_dmg_command()
+        dmg_command.system_query()
+
+        self.log_step("## Create a 100% pool that spans all engines")
+        pool = self.get_pool(namespace="/run/pool_100/*", create=False)
+        duration = time_pool_create(log=self.log, number=1, pool=pool)
+        self.log.info("## Single pool create duration = %.1f", duration)
+
+        self.log_step("## Pool query")
+        pool.query()
+
+        self.log_step("## Pool destroy")
+        pool.destroy()
+
+        quantity = self.params.get("quantity", "/run/pool_small/*", 1)
+        msg = (f"## Create {quantity} small pools spanning all the engines where the pools fill up "
+               f"the capacity")
+        self.log_step(msg)
+        pool_0 = self.get_pool(namespace="/run/pool_small/*", create=False)
+        duration_0 = time_pool_create(log=self.log, number=0, pool=pool_0)
+        pools = [pool_0]
+        durations = [duration_0]
+        for count in range(1, quantity):
+            pools.append(self.get_pool(create=False))
+            # Use the SCM and NVMe size of the first pool for the rest of the (quantity - 1) pools.
+            pools[-1].scm_size.update(pool_0.scm_per_rank)
+            pools[-1].nvme_size.update(pool_0.nvme_per_rank)
+            durations.append(time_pool_create(log=self.log, number=count, pool=pools[-1]))
+            msg = (f"Pool {count} created. SCM = {pools[-1].scm_per_rank}; "
+                   f"NVMe = {pools[-1].nvme_per_rank}")
+            self.log.info(msg)
+        self.log.info("## durations = %s", durations)
+        total_duration = sum(durations)
+        self.log.info("## %d pools create duration = %.1f", quantity, total_duration)
+
+        self.log_step("## Pool list")
+        dmg_command.pool_list()
+
+        self.log_step("## Query around 80 pool metrics")
+        # To save time and logs, call telemetry on the first host only. With the 80 pool metrics
+        # above, ~100K lines are printed per host.
+        telemetry_utils = TelemetryUtils(
+            dmg=dmg_command, servers=[self.server_managers[0].hosts[0]])
+        telemetry_utils.get_metrics(name=",".join(ENGINE_POOL_METRICS_SHORT))
+
+        self.log_step(f"## Destroy all {quantity} pools")
+        self.destroy_pools(pools=pools)
+
+        self.log_step("## System stop")
+        self.server_managers[0].system_stop()
+
+        self.log_step("## System start")
+        self.server_managers[0].system_start()
diff --git a/src/tests/ftest/control/dmg_scale.yaml b/src/tests/ftest/control/dmg_scale.yaml
new file mode 100644
index 00000000000..84f4e35bc4d
--- /dev/null
+++ b/src/tests/ftest/control/dmg_scale.yaml
@@ -0,0 +1,37 @@
+# Note: We usually use the extra yaml in aurora-tools, but that extra yaml has test_clients while
+# this test doesn't need any client, so update the extra yaml or provide some dummy client to -tc.
+hosts:
+  test_servers: 256
+
+timeout: 900
+
+daos_server:
+  pattern_timeout: 60
+
+server_config:
+  name: daos_server
+  engines_per_host: 2
+  engines:
+    0:
+      pinned_numa_node: 0
+      nr_xs_helpers: 1
+      fabric_iface: ib0
+      fabric_iface_port: 31317
+      log_file: daos_server0.log
+      storage: auto
+      targets: 8
+    1:
+      pinned_numa_node: 1
+      nr_xs_helpers: 1
+      fabric_iface: ib1
+      fabric_iface_port: 31417
+      log_file: daos_server1.log
+      storage: auto
+      targets: 8
+
+pool_100:
+  size: 100%
+pool_small:
+  size: 2%
+  # If we use --size=2% during pool create, we can only create up to 49 pools.
+  quantity: 49
diff --git a/src/tests/ftest/deployment/agent_failure.py b/src/tests/ftest/deployment/agent_failure.py
index d1964df3c27..22cbbbdaa79 100644
--- a/src/tests/ftest/deployment/agent_failure.py
+++ b/src/tests/ftest/deployment/agent_failure.py
@@ -122,7 +122,7 @@ def test_agent_failure(self):
         # 5. Verify journalctl shows the log that the agent is stopped.
         results = get_journalctl(
             hosts=self.hostlist_clients, since=since, until=until,
-            journalctl_type="daos_agent")
+            journalctl_type="daos_agent", run_user=self.test_env.agent_user)
         self.log.info("journalctl results = %s", results)
         if "shutting down" not in results[0]["data"]:
             msg = "Agent shut down message not found in journalctl! Output = {}".format(
@@ -240,7 +240,7 @@ def test_agent_failure_isolation(self):
         # stopped.
         results = get_journalctl(
             hosts=[agent_host_kill], since=since, until=until,
-            journalctl_type="daos_agent")
+            journalctl_type="daos_agent", run_user=self.test_env.agent_user)
         self.log.info("journalctl results (kill) = %s", results)
         if "shutting down" not in results[0]["data"]:
             msg = ("Agent shut down message not found in journalctl on killed client! "
@@ -251,7 +251,7 @@ def test_agent_failure_isolation(self):
         # in the previous step doesn't show that the agent is stopped.
         results = get_journalctl(
             hosts=[agent_host_keep], since=since, until=until,
-            journalctl_type="daos_agent")
+            journalctl_type="daos_agent", run_user=self.test_env.agent_user)
         self.log.info("journalctl results (keep) = %s", results)
         if "shutting down" in results[0]["data"]:
             msg = ("Agent shut down message found in journalctl on keep client! "
diff --git a/src/tests/ftest/deployment/basic_checkout.yaml b/src/tests/ftest/deployment/basic_checkout.yaml
index 03d420ab82b..7ce9515bae8 100644
--- a/src/tests/ftest/deployment/basic_checkout.yaml
+++ b/src/tests/ftest/deployment/basic_checkout.yaml
@@ -70,7 +70,7 @@ mdtest_easy: &mdtest_easy_base
   write_bytes: 0
   num_of_files_dirs: 100000000
   stonewall_timer: 30
-  stonewall_statusfile: "/var/tmp/daos_testing/stoneWallingStatusFile"
+  stonewall_statusfile: stoneWallingStatusFile
   dfs_destroy: false
 mdtest_dfs_s1:
   <<: *mdtest_easy_base
diff --git a/src/tests/ftest/deployment/io_sys_admin.py b/src/tests/ftest/deployment/io_sys_admin.py
index bca8373ba5c..265c1ad42f3 100644
--- a/src/tests/ftest/deployment/io_sys_admin.py
+++ b/src/tests/ftest/deployment/io_sys_admin.py
@@ -40,66 +40,88 @@ def test_io_sys_admin(self):
         new_cont_user = self.params.get("user", "/run/container_set_owner/*")
         new_cont_group = self.params.get("group", "/run/container_set_owner/*")
 
+        # Toggle independent steps
+        steps_to_run = {
+            "pool_create_ownership": True,
+            "storage_system_query": True,
+            "io": True,
+            "snapshot": True,
+            "datamover": True
+        }
+        for step in steps_to_run:
+            run = self.params.get(step, "/run/io_sys_admin/steps_to_run/*", None)
+            if run is not None:
+                steps_to_run[step] = run
+
         dmg = self.get_dmg_command()
         daos = self.get_daos_command()
 
-        for idx in range(1, 4):
-            pool = self.get_pool(namespace=f"/run/pool_{idx}/", create=False)
-            check_pool_creation(self, [pool], 60)
-            containers = []
-            for cont_idx in range(1, 4):
-                containers.append(
-                    self.get_container(pool, namespace=f"/run/container_{cont_idx}/"))
-                containers[-1].set_owner(f"{new_cont_user}@", f"{new_cont_group}@")
-
-            daos.container_list(pool.identifier)
-            self.destroy_containers(containers)
-            pool.destroy()
-
-        # dmg storage scan
-        dmg.storage_scan()
-        dmg.system_query()
-        dmg.system_leader_query()
-
-        # write large data sets
-        self.run_file_count()
-        # create snapshot
-        self.container[-1].create_snap()
-        # overwrite the last ior file
-        self.ior_cmd.signature.update('456')
-        self.processes = self.ior_np
-        self.ppn = self.ior_ppn
-        self.run_ior_with_pool(create_pool=False, create_cont=False)
-
-        nvme_free_space_before_snap_destroy = self.get_free_space()[1]
-        # delete snapshot
-        self.container[-1].destroy_snap(epc=self.container[-1].epoch)
-        # Now check if the space is returned back.
-        counter = 1
-        returned_space = self.get_free_space()[1] - nvme_free_space_before_snap_destroy
-
-        data_written = (int(self.ppn) * human_to_bytes(self.ior_cmd.block_size.value))
-        while returned_space < int(data_written):
-            # try to wait for 4 x 60 secs for aggregation to be completed or
-            # else exit the test with a failure.
-            if counter > 4:
-                self.log.info("Free space before snapshot destroy: %s",
-                              nvme_free_space_before_snap_destroy)
-                self.log.info("Free space when test terminated: %s",
-                              self.get_free_space()[1])
-                self.fail("Aggregation did not complete as expected")
-
-            time.sleep(60)
+        if steps_to_run["pool_create_ownership"]:
+            self.log_step("Verify pool creation time and container set-owner")
+            for idx in range(1, 4):
+                pool = self.get_pool(namespace=f"/run/pool_{idx}/", create=False)
+                check_pool_creation(self, [pool], 60)
+                containers = []
+                for cont_idx in range(1, 4):
+                    containers.append(
+                        self.get_container(pool, namespace=f"/run/container_{cont_idx}/"))
+                    containers[-1].set_owner(f"{new_cont_user}@", f"{new_cont_group}@")
+
+                daos.container_list(pool.identifier)
+                self.destroy_containers(containers)
+                pool.destroy()
+
+        if steps_to_run["storage_system_query"]:
+            self.log_step("Verify storage scan and system query")
+            dmg.storage_scan()
+            dmg.system_query()
+            dmg.system_leader_query()
+
+        if steps_to_run["io"]:
+            self.log_step("Verifying large dataset IO")
+            self.run_file_count()
+
+        if steps_to_run["snapshot"]:
+            self.log_step("Verifying snapshot creation and aggregation")
+            self.container[-1].create_snap()
+            # overwrite the last ior file
+            self.ior_cmd.signature.update('456')
+            self.processes = self.ior_np
+            self.ppn = self.ior_ppn
+            self.run_ior_with_pool(create_pool=False, create_cont=False)
+
+            nvme_free_space_before_snap_destroy = self.get_free_space()[1]
+            # delete snapshot
+            self.container[-1].destroy_snap(epc=self.container[-1].epoch)
+            # Now check if the space is returned back.
+            counter = 1
             returned_space = self.get_free_space()[1] - nvme_free_space_before_snap_destroy
-            counter += 1
-
-        self.log.info("#####Starting FS_COPY Test")
-        self.run_dm_activities_with_ior("FS_COPY", self.pool, self.container[-1])
-        self.log.info("#####Starting DCP Test")
-        self.run_dm_activities_with_ior("DCP", self.pool, self.container[-1])
-        self.log.info("#####Starting DSERIAL Test")
-        self.run_dm_activities_with_ior("DSERIAL", self.pool, self.container[-1])
-        self.log.info("#####Starting CONT_CLONE Test")
-        self.run_dm_activities_with_ior("CONT_CLONE", self.pool, self.container[-1])
-        self.log.info("#####Completed all Datamover tests")
-        self.container.pop(0)
+
+            data_written = (int(self.ppn) * human_to_bytes(self.ior_cmd.block_size.value))
+            while returned_space < int(data_written):
+                # try to wait for 4 x 60 secs for aggregation to be completed or
+                # else exit the test with a failure.
+                if counter > 4:
+                    self.log.info(
+                        "Free space before snapshot destroy: %s",
+                        nvme_free_space_before_snap_destroy)
+                    self.log.info(
+                        "Free space when test terminated: %s", self.get_free_space()[1])
+                    self.fail("Aggregation did not complete as expected")
+
+                time.sleep(60)
+                returned_space = self.get_free_space()[1] - nvme_free_space_before_snap_destroy
+                counter += 1
+
+        if steps_to_run["datamover"]:
+            self.log_step("Verifying datamover")
+            self.log.info("#####Starting FS_COPY Test")
+            self.run_dm_activities_with_ior("FS_COPY", self.pool, self.container[-1])
+            self.log.info("#####Starting DCP Test")
+            self.run_dm_activities_with_ior("DCP", self.pool, self.container[-1])
+            self.log.info("#####Starting DSERIAL Test")
+            self.run_dm_activities_with_ior("DSERIAL", self.pool, self.container[-1])
+            self.log.info("#####Starting CONT_CLONE Test")
+            self.run_dm_activities_with_ior("CONT_CLONE", self.pool, self.container[-1])
+            self.log.info("#####Completed all Datamover tests")
+            self.container.pop(0)
diff --git a/src/tests/ftest/deployment/io_sys_admin.yaml b/src/tests/ftest/deployment/io_sys_admin.yaml
index 6c3edab15b3..f2a238ad4b5 100644
--- a/src/tests/ftest/deployment/io_sys_admin.yaml
+++ b/src/tests/ftest/deployment/io_sys_admin.yaml
@@ -104,3 +104,11 @@ dcp:
     np: 16
 hdf5_vol:
   plugin_path: /usr/lib64/mpich/lib
+
+io_sys_admin:
+  steps_to_run:
+    pool_create_ownership: True
+    storage_system_query: True
+    io: True
+    snapshot: True
+    datamover: True
diff --git a/src/tests/ftest/io/large_file_count.yaml b/src/tests/ftest/io/large_file_count.yaml
index 619143a83e8..6ff375cf3a9 100644
--- a/src/tests/ftest/io/large_file_count.yaml
+++ b/src/tests/ftest/io/large_file_count.yaml
@@ -44,6 +44,8 @@ ior:
   repetitions: 1
   transfer_size: '1M'
   block_size: '7G'
+  env_vars:
+    - D_IL_REPORT=1
 dfuse:
   disable_caching: true
 
@@ -59,3 +61,5 @@ mdtest:
   write_bytes: 4096
   read_bytes: 4096
   depth: 0
+  env_vars:
+    - D_IL_REPORT=1
diff --git a/src/tests/ftest/io/small_file_count.yaml b/src/tests/ftest/io/small_file_count.yaml
index b9bf23cd126..79e02c3d787 100644
--- a/src/tests/ftest/io/small_file_count.yaml
+++ b/src/tests/ftest/io/small_file_count.yaml
@@ -45,6 +45,8 @@ ior:
   repetitions: 1
   transfer_size: '1M'
   block_size: '2G'
+  env_vars:
+    - D_IL_REPORT=1
 dfuse:
   disable_caching: true
 
@@ -60,3 +62,5 @@ mdtest:
   write_bytes: 4096
   read_bytes: 4096
   depth: 0
+  env_vars:
+    - D_IL_REPORT=1
diff --git a/src/tests/ftest/ior/small.yaml b/src/tests/ftest/ior/small.yaml
index b0b21620a93..b638a396580 100644
--- a/src/tests/ftest/ior/small.yaml
+++ b/src/tests/ftest/ior/small.yaml
@@ -38,6 +38,8 @@ container:
   properties: cksum:crc16,cksum_size:16384,srv_cksum:on
   control_method: daos
 ior:
+  env_vars:
+    - D_IL_REPORT=1
   ior_timeout: 75
   client_processes:
     ppn: 32
diff --git a/src/tests/ftest/launch.py b/src/tests/ftest/launch.py
index f3daa8464d7..749993ef637 100755
--- a/src/tests/ftest/launch.py
+++ b/src/tests/ftest/launch.py
@@ -282,7 +282,8 @@ def _run(self, args):
             else:
                 set_test_environment(
                     logger, test_env, args.test_servers, args.test_clients, args.provider,
-                    args.insecure_mode, self.details)
+                    args.insecure_mode, self.details, args.agent_user, args.test_log_dir,
+                    args.server_ld_lib)
         except TestEnvironmentException as error:
             message = f"Error setting up test environment: {str(error)}"
             return self.get_exit_status(1, message, "Setup", sys.exc_info())
@@ -320,12 +321,13 @@ def _run(self, args):
             return self.get_exit_status(0, "Listing tests complete")
 
         # Setup the fuse configuration
-        try:
-            setup_fuse_config(logger, args.test_servers | args.test_clients)
-        except LaunchException:
-            # Warn but don't fail
-            message = "Issue detected setting up the fuse configuration"
-            setup_result.warn_test(logger, "Setup", message, sys.exc_info())
+        if args.fuse_setup:
+            try:
+                setup_fuse_config(logger, args.test_servers | args.test_clients)
+            except LaunchException:
+                # Warn but don't fail
+                message = "Issue detected setting up the fuse configuration"
+                setup_result.warn_test(logger, "Setup", message, sys.exc_info())
 
         # Setup override systemctl files
         try:
@@ -351,15 +353,15 @@ def _run(self, args):
         # Determine if bullseye code coverage collection is enabled
         code_coverage = CodeCoverage(test_env)
         # pylint: disable=unsupported-binary-operation
-        code_coverage.check(logger, args.test_servers | self.local_host)
+        #code_coverage.check(logger, args.test_servers | self.local_host)
 
         # Update the test yaml files for the tests in this test group
         try:
             group.update_test_yaml(
                 logger, args.scm_size, args.scm_mount, args.extra_yaml,
                 args.timeout_multiplier, args.override, args.verbose, args.include_localhost)
-        except (RunException, YamlException) as e:
-            message = "Error modifying the test yaml files: {}".format(e)
+        except (RunException, YamlException) as error:
+            message = f"Error modifying the test yaml files: {str(error)}"
             status |= self.get_exit_status(1, message, "Setup", sys.exc_info())
         except StorageException:
             message = "Error detecting storage information for test yaml files"
@@ -540,6 +542,12 @@ def main():
         "-a", "--archive",
         action="store_true",
         help="archive host log files in the avocado job-results directory")
+    parser.add_argument(
+        "-au", "--agent_user",
+        action="store",
+        default=None,
+        type=str,
+        help="user account to use when running the daos_agent")
     parser.add_argument(
         "-c", "--clear_mounts",
         action="append",
@@ -562,6 +570,10 @@ def main():
         "--failfast",
         action="store_true",
         help="stop the test suite after the first failure")
+    parser.add_argument(
+        "-fs", "--fuse_setup",
+        action="store_true",
+        help="enable setting up fuse configuration files")
     parser.add_argument(
         "-i", "--include_localhost",
         action="store_true",
@@ -584,7 +596,7 @@ def main():
         help="modify the test yaml files but do not run the tests")
     parser.add_argument(
         "-mo", "--mode",
-        choices=['normal', 'manual', 'ci'],
+        choices=['normal', 'manual', 'ci', 'custom_a'],
         default='normal',
         help="provide the mode of test to be run under. Default is normal, "
              "in which the final return code of launch.py is still zero if "
@@ -649,6 +661,12 @@ def main():
         "-si", "--slurm_install",
         action="store_true",
         help="enable installing slurm RPMs if required by the tests")
+    parser.add_argument(
+        "-sl", "--server_ld_lib",
+        action="store",
+        default=None,
+        type=str,
+        help="LD_LIBRARY_PATH environment variable to use in the daos_server config file")
     parser.add_argument(
         "--scm_mount",
         action="store",
@@ -681,6 +699,12 @@ def main():
         default=NodeSet(),
         help="comma-separated list of hosts to use as replacement values for "
              "client placeholders in each test's yaml file")
+    parser.add_argument(
+        "-tld", "--test_log_dir",
+        action="store",
+        default=None,
+        type=str,
+        help="test log directory base path")
     parser.add_argument(
         "-th", "--logs_threshold",
         action="store",
@@ -744,10 +768,38 @@ def main():
         args.slurm_install = True
         args.slurm_setup = True
         args.user_create = True
+        args.fuse_setup = True
         args.clear_mounts.append("/mnt/daos")
         args.clear_mounts.append("/mnt/daos0")
         args.clear_mounts.append("/mnt/daos1")
 
+    elif args.mode == "custom_a":
+        if args.agent_user is None:
+            # Run the agent with the current user by default
+            args.agent_user = getpass.getuser()
+        if os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir) is None:
+            # Use a user-specific test log dir by default
+            args.test_log_dir = os.path.join(
+                os.sep, "var", "tmp", f"daos_testing_{args.agent_user}")
+        if os.environ.get("DAOS_TEST_CONTROL_CONFIG") is None:
+            os.environ["DAOS_TEST_CONTROL_CONFIG"] = os.path.join(
+                os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
+                "daos_control.yml")
+        if os.environ.get("DAOS_TEST_AGENT_CONFIG") is None:
+            os.environ["DAOS_TEST_AGENT_CONFIG"] = os.path.join(
+                os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
+                "daos_agent.yml")
+        if os.environ.get("DAOS_TEST_SERVER_CONFIG") is None:
+            os.environ["DAOS_TEST_SERVER_CONFIG"] = os.path.join(
+                os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
+                "daos_server.yml")
+        args.process_cores = False
+        args.logs_threshold = None
+        args.slurm_install = False
+        args.slurm_setup = False
+        args.user_create = False
+        args.fuse_setup = False
+
     # Setup the Launch object
     launch = Launch(args.name, args.mode, args.slurm_install, args.slurm_setup)
 
diff --git a/src/tests/ftest/network/cart_self_test.py b/src/tests/ftest/network/cart_self_test.py
index 9600531c5f2..a1e78684c8e 100644
--- a/src/tests/ftest/network/cart_self_test.py
+++ b/src/tests/ftest/network/cart_self_test.py
@@ -3,6 +3,8 @@
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
+import os
+
 from apricot import TestWithServers
 from command_utils import ExecutableCommand
 from command_utils_base import EnvironmentVariables, FormattedParameter
@@ -70,7 +72,8 @@ def setUp(self):
             self.server_managers[0].get_config_value("provider")
         self.cart_env["D_INTERFACE"] = \
             self.server_managers[0].get_config_value("fabric_iface")
-        self.cart_env["DAOS_AGENT_DRPC_DIR"] = "/var/run/daos_agent/"
+        self.cart_env["DAOS_AGENT_DRPC_DIR"] = os.environ.get(
+            "DAOS_AGENT_DRPC_DIR", "/var/run/daos_agent/")
 
         self.server_managers[0].manager.assign_environment(self.cart_env, True)
         self.server_managers[0].detect_start_via_dmg = True
diff --git a/src/tests/ftest/performance/ior_easy.yaml b/src/tests/ftest/performance/ior_easy.yaml
index fca6fa3ba70..b846b179277 100644
--- a/src/tests/ftest/performance/ior_easy.yaml
+++ b/src/tests/ftest/performance/ior_easy.yaml
@@ -28,7 +28,7 @@ server_config:
 
 pool:
   size: 95%
-  properties: ec_cell_sz:128KiB
+  properties: ec_cell_sz:1MiB
 
 container:
   type: POSIX
diff --git a/src/tests/ftest/performance/mdtest_easy.py b/src/tests/ftest/performance/mdtest_easy.py
index 86db9f0c49d..c1a768694af 100644
--- a/src/tests/ftest/performance/mdtest_easy.py
+++ b/src/tests/ftest/performance/mdtest_easy.py
@@ -33,6 +33,15 @@ def test_performance_mdtest_easy_dfs_ec_16p2g1(self):
         """
         self.run_performance_mdtest(namespace="/run/mdtest_dfs_ec_16p2g1/*")
 
+    def test_performance_mdtest_easy_dfs_rp_3g1(self):
+        """Test Description: Run MDTest Easy, DFS, RP_3G1.
+
+        :avocado: tags=all,manual
+        :avocado: tags=performance
+        :avocado: tags=MdtestEasy,test_performance_mdtest_easy_dfs_rp_3g1
+        """
+        self.run_performance_mdtest(namespace="/run/mdtest_dfs_rp_3g1/*")
+
     def test_performance_mdtest_easy_pil4dfs_s1(self):
         """Test Description: Run MDTest Easy, dfuse + pil4dfs, S1.
 
@@ -51,3 +60,12 @@ def test_performance_mdtest_easy_pil4dfs_ec_16p2g1(self):
         :avocado: tags=MdtestEasy,test_performance_mdtest_easy_pil4dfs_ec_16p2g1
         """
         self.run_performance_mdtest(namespace="/run/mdtest_pil4dfs_ec_16p2g1/*")
+
+    def test_performance_mdtest_easy_pil4dfs_rp_3g1(self):
+        """Test Description: Run MDTest Easy, dfuse + pil4dfs, RP_3G1.
+
+        :avocado: tags=all,manual
+        :avocado: tags=performance
+        :avocado: tags=MdtestEasy,test_performance_mdtest_easy_pil4dfs_rp_3g1
+        """
+        self.run_performance_mdtest(namespace="/run/mdtest_pil4dfs_rp_3g1/*")
diff --git a/src/tests/ftest/performance/mdtest_easy.yaml b/src/tests/ftest/performance/mdtest_easy.yaml
index 8fdd27031c2..d2925536b79 100644
--- a/src/tests/ftest/performance/mdtest_easy.yaml
+++ b/src/tests/ftest/performance/mdtest_easy.yaml
@@ -28,7 +28,7 @@ server_config:
 
 pool:
   size: 95%
-  properties: ec_cell_sz:128KiB
+  properties: ec_cell_sz:1MiB
 
 container:
   type: POSIX
@@ -46,7 +46,7 @@ mdtest: &mdtest_base
   write_bytes: 0
   num_of_files_dirs: 100000000
   stonewall_timer: 30
-  stonewall_statusfile: "/var/tmp/daos_testing/stoneWallingStatusFile"
+  stonewall_statusfile: stoneWallingStatusFile
   dfs_destroy: false
 
 mdtest_s1: &mdtest_s1
@@ -61,6 +61,12 @@ mdtest_ec_16p2g1: &mdtest_ec_16p2g1
   dfs_dir_oclass: RP_3GX
   dfs_chunk: 16MiB
 
+mdtest_rp_3g1: &mdtest_rp_3g1
+  <<: *mdtest_base
+  dfs_oclass: RP_3G1
+  dfs_dir_oclass: RP_3GX
+  dfs_chunk: 1MiB
+
 mdtest_dfs_s1:
   api: DFS
   <<: *mdtest_s1
@@ -69,6 +75,10 @@ mdtest_dfs_ec_16p2g1:
   api: DFS
   <<: *mdtest_ec_16p2g1
 
+mdtest_dfs_rp_3g1:
+  api: DFS
+  <<: *mdtest_rp_3g1
+
 mdtest_pil4dfs_s1:
   api: POSIX+PIL4DFS  # handled by ftest
   <<: *mdtest_s1
@@ -77,6 +87,10 @@ mdtest_pil4dfs_ec_16p2g1:
   api: POSIX+PIL4DFS  # handled by ftest
   <<: *mdtest_ec_16p2g1
 
+mdtest_pil4dfs_rp_3g1:
+  api: POSIX+PIL4DFS  # handled by ftest
+  <<: *mdtest_rp_3g1
+
 dfuse:
   disable_caching: true
 
diff --git a/src/tests/ftest/performance/mdtest_hard.py b/src/tests/ftest/performance/mdtest_hard.py
index 2eebc5738a8..a1bf2ec3076 100644
--- a/src/tests/ftest/performance/mdtest_hard.py
+++ b/src/tests/ftest/performance/mdtest_hard.py
@@ -33,6 +33,15 @@ def test_performance_mdtest_hard_dfs_ec_16p2g1(self):
         """
         self.run_performance_mdtest(namespace="/run/mdtest_dfs_ec_16p2g1/*")
 
+    def test_performance_mdtest_hard_dfs_rp_3g1(self):
+        """Test Description: Run MdTest Hard, DFS, RP_3G1.
+
+        :avocado: tags=all,manual
+        :avocado: tags=performance
+        :avocado: tags=MdtestHard,test_performance_mdtest_hard_dfs_rp_3g1
+        """
+        self.run_performance_mdtest(namespace="/run/mdtest_dfs_rp_3g1/*")
+
     def test_performance_mdtest_hard_pil4dfs_s1(self):
         """Test Description: Run MDTest Hard, dfuse + pil4dfs, S1.
 
@@ -51,3 +60,12 @@ def test_performance_mdtest_hard_pil4dfs_ec_16p2g1(self):
         :avocado: tags=MdtestHard,test_performance_mdtest_hard_pil4dfs_ec_16p2g1
         """
         self.run_performance_mdtest(namespace="/run/mdtest_pil4dfs_ec_16p2g1/*")
+
+    def test_performance_mdtest_hard_pil4dfs_rp_3g1(self):
+        """Test Description: Run MDTest Hard, dfuse + pil4dfs, RP_3G1.
+
+        :avocado: tags=all,manual
+        :avocado: tags=performance
+        :avocado: tags=MdtestHard,test_performance_mdtest_hard_pil4dfs_rp_3g1
+        """
+        self.run_performance_mdtest(namespace="/run/mdtest_pil4dfs_rp_3g1/*")
diff --git a/src/tests/ftest/performance/mdtest_hard.yaml b/src/tests/ftest/performance/mdtest_hard.yaml
index ae3fcebaf5c..0599ea61319 100644
--- a/src/tests/ftest/performance/mdtest_hard.yaml
+++ b/src/tests/ftest/performance/mdtest_hard.yaml
@@ -28,8 +28,7 @@ server_config:
 
 pool:
   size: 95%
-  control_method: dmg
-  properties: ec_cell_sz:128KiB
+  properties: ec_cell_sz:1MiB
 
 container:
   type: POSIX
@@ -47,7 +46,7 @@ mdtest: &mdtest_base
   write_bytes: 3901
   num_of_files_dirs: 100000000
   stonewall_timer: 30
-  stonewall_statusfile: "/var/tmp/daos_testing/stoneWallingStatusFile"
+  stonewall_statusfile: stoneWallingStatusFile
   dfs_destroy: false
 
 mdtest_s1: &mdtest_s1
@@ -62,6 +61,12 @@ mdtest_ec_16p2g1: &mdtest_ec_16p2g1
   dfs_dir_oclass: RP_3GX
   dfs_chunk: 16MiB
 
+mdtest_rp_3g1: &mdtest_rp_3g1
+  <<: *mdtest_base
+  dfs_oclass: RP_3G1
+  dfs_dir_oclass: RP_3GX
+  dfs_chunk: 1MiB
+
 mdtest_dfs_s1:
   api: DFS
   <<: *mdtest_s1
@@ -70,6 +75,10 @@ mdtest_dfs_ec_16p2g1:
   api: DFS
   <<: *mdtest_ec_16p2g1
 
+mdtest_dfs_rp_3g1:
+  api: DFS
+  <<: *mdtest_rp_3g1
+
 mdtest_pil4dfs_s1:
   api: POSIX+PIL4DFS  # handled by ftest
   <<: *mdtest_s1
@@ -78,6 +87,13 @@ mdtest_pil4dfs_ec_16p2g1:
   api: POSIX+PIL4DFS  # handled by ftest
   <<: *mdtest_ec_16p2g1
 
+mdtest_pil4dfs_rp_3g1:
+  api: POSIX+PIL4DFS  # handled by ftest
+  <<: *mdtest_rp_3g1
+
+dfuse:
+  disable_caching: true
+
 client:
   env_vars:
     - D_LOG_MASK=INFO
diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py
index 74b79fb9796..d3e32997343 100644
--- a/src/tests/ftest/util/agent_utils.py
+++ b/src/tests/ftest/util/agent_utils.py
@@ -229,9 +229,8 @@ def __init__(self, group, bin_dir, cert_dir, config_file, run_user, config_temp=
                 the hosts using the config_file specification. Defaults to None.
             manager (str, optional): the name of the JobManager class used to
                 manage the YamlCommand defined through the "job" attribute.
-                Defaults to "Orterun".
-            outputdir (str, optional): path to avocado test outputdir. Defaults
-                to None.
+                Defaults to "Systemctl".
+            outputdir (str, optional): path to avocado test outputdir. Defaults to None.
         """
         agent_command = get_agent_command(
             group, cert_dir, bin_dir, config_file, run_user, config_temp)
@@ -283,8 +282,7 @@ def start(self):
             self._hosts, self.manager.command)
 
         # Copy certificates
-        self.manager.job.copy_certificates(
-            get_log_file("daosCA/certs"), self._hosts)
+        self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts)
 
         # Verify the socket directory exists when using a non-systemctl manager
         if self.verify_socket_dir:
diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py
index 42e05937f37..a70973659c9 100644
--- a/src/tests/ftest/util/apricot/apricot/test.py
+++ b/src/tests/ftest/util/apricot/apricot/test.py
@@ -10,6 +10,7 @@
 import re
 import sys
 from ast import literal_eval
+from getpass import getuser
 from time import time
 
 from agent_utils import DaosAgentManager, include_local_host
@@ -736,7 +737,7 @@ def setUp(self):
 
         # Toggle whether to dump server ULT stacks on failure
         self.__dump_engine_ult_on_failure = self.params.get(
-            "dump_engine_ult_on_failure", "/run/setup/*", True)
+            "dump_engine_ult_on_failure", "/run/setup/*", self.__dump_engine_ult_on_failure)
 
         # # Find a configuration that meets the test requirements
         # self.config = Configuration(
@@ -1065,12 +1066,10 @@ def add_agent_manager(self, group=None, config_file=None, config_temp=None):
         """
         if group is None:
             group = self.server_group
-        if config_file is None and self.agent_manager_class == "Systemctl":
+
+        if config_file is None:
             config_file = self.test_env.agent_config
             config_temp = self.get_config_file(group, "agent", self.test_dir)
-        elif config_file is None:
-            config_file = self.get_config_file(group, "agent")
-            config_temp = None
 
         # Verify the correct configuration files have been provided
         if self.agent_manager_class == "Systemctl" and config_temp is None:
@@ -1079,10 +1078,12 @@ def add_agent_manager(self, group=None, config_file=None, config_temp=None):
                 "file provided for the Systemctl manager class!")
 
         # Define the location of the certificates
-        if self.agent_manager_class == "Systemctl":
+        if self.agent_manager_class == "Systemctl" and self.test_env.agent_user != getuser():
+            # Default directory requiring privileged access
             cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
         else:
-            cert_dir = self.workdir
+            # Test-specific directory not requiring privileged access
+            cert_dir = os.path.join(self.test_dir, "certs")
 
         self.agent_managers.append(
             DaosAgentManager(
@@ -1115,6 +1116,8 @@ def add_server_manager(self, group=None, svr_config_file=None,
         """
         if group is None:
             group = self.server_group
+
+        # Set default server config files
         if svr_config_file is None and self.server_manager_class == "Systemctl":
             svr_config_file = self.test_env.server_config
             svr_config_temp = self.get_config_file(
@@ -1122,12 +1125,6 @@ def add_server_manager(self, group=None, svr_config_file=None,
         elif svr_config_file is None:
             svr_config_file = self.get_config_file(group, "server")
             svr_config_temp = None
-        if dmg_config_file is None and self.server_manager_class == "Systemctl":
-            dmg_config_file = self.test_env.control_config
-            dmg_config_temp = self.get_config_file(group, "dmg", self.test_dir)
-        elif dmg_config_file is None:
-            dmg_config_file = self.get_config_file(group, "dmg")
-            dmg_config_temp = None
 
         # Verify the correct configuration files have been provided
         if self.server_manager_class == "Systemctl" and svr_config_temp is None:
@@ -1135,13 +1132,25 @@ def add_server_manager(self, group=None, svr_config_file=None,
                 "Error adding a DaosServerManager: no temporary configuration "
                 "file provided for the Systemctl manager class!")
 
-        # Define the location of the certificates
+        # Set default dmg config files
+        if dmg_config_file is None:
+            if self.server_manager_class == "Systemctl":
+                dmg_config_file = self.test_env.control_config
+                dmg_config_temp = self.get_config_file(group, "dmg", self.test_dir)
+            else:
+                dmg_config_file = os.path.join(self.test_dir, "daos_control.yml")
+
+        # Define server certificate directory
         if self.server_manager_class == "Systemctl":
             svr_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
-            dmg_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
         else:
             svr_cert_dir = self.workdir
-            dmg_cert_dir = self.workdir
+
+        # Define dmg certificate directory
+        if self.server_manager_class == "Systemctl" and self.test_env.agent_user != getuser():
+            dmg_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
+        else:
+            dmg_cert_dir = os.path.join(self.test_dir, "certs")
 
         self.server_managers.append(
             DaosServerManager(
@@ -1681,7 +1690,7 @@ def get_dmg_command(self, index=0):
 
         dmg_cmd = get_dmg_command(
             self.server_group, dmg_cert_dir, self.bin, dmg_config_file,
-            dmg_config_temp, self.access_points_suffix)
+            dmg_config_temp, self.access_points_suffix, getuser())
         dmg_cmd.hostlist = self.access_points
         return dmg_cmd
 
diff --git a/src/tests/ftest/util/collection_utils.py b/src/tests/ftest/util/collection_utils.py
index a900769f4d6..f651c603f29 100644
--- a/src/tests/ftest/util/collection_utils.py
+++ b/src/tests/ftest/util/collection_utils.py
@@ -191,23 +191,23 @@ def archive_files(logger, summary, hosts, source, pattern, destination, depth, t
         logger.debug("No %s files found on %s", os.path.join(source, pattern), hosts)
         return return_code
 
-    if "log" in pattern:
-        # Remove any empty files
-        return_code |= remove_empty_files(logger, file_hosts, source, pattern, depth, test_result)
+    # if "log" in pattern:
+    #     # Remove any empty files
+    #     return_code |= remove_empty_files(logger, file_hosts, source, pattern, depth, test_result)
 
-        # Report an error if any files sizes exceed the threshold
-        if threshold is not None:
-            return_code |= check_log_size(
-                logger, file_hosts, source, pattern, depth, threshold, test_result)
+    #     # Report an error if any files sizes exceed the threshold
+    #     if threshold is not None:
+    #         return_code |= check_log_size(
+    #             logger, file_hosts, source, pattern, depth, threshold, test_result)
 
-        # Run cart_logtest on log files
-        return_code |= cart_log_test(logger, file_hosts, source, pattern, depth, test_result)
+    #     # Run cart_logtest on log files
+    #     return_code |= cart_log_test(logger, file_hosts, source, pattern, depth, test_result)
 
     # Remove any empty files
     return_code |= remove_empty_files(logger, file_hosts, source, pattern, depth, test_result)
 
     # Compress any files larger than 1 MB
-    return_code |= compress_files(logger, file_hosts, source, pattern, depth, test_result)
+    #return_code |= compress_files(logger, file_hosts, source, pattern, depth, test_result)
 
     # Move the test files to the test-results directory on this host
     return_code |= move_files(
@@ -445,21 +445,36 @@ def move_files(logger, hosts, source, pattern, destination, depth, timeout, test
         tmp_copy_dir = os.path.join(source, tmp_copy_dir)
         sudo_command = ""
 
-    # Create a temporary remote directory - should already exist, see _setup_test_directory()
-    command = f"mkdir -p '{tmp_copy_dir}'"
-    result = run_remote(logger, hosts, command)
-    if not result.passed:
-        message = (f"Error creating temporary remote copy directory '{tmp_copy_dir}' on "
-                   f"{result.failed_hosts}")
-        test_result.fail_test(logger, "Process", message)
-        return_code = 16
-        hosts = result.passed_hosts.copy()
+    compute_hosts = NodeSet.fromlist(_host for _host in hosts if _host.startswith('x'))
+    if destination.startswith("/lus/") and compute_hosts:
+        # Optimize collection to run a single command and not use clush --rcopy
+        hosts = hosts - compute_hosts
+        commands = []
+
+        # Create the host-specific directory
+        commands.append(f"mkdir -p '{destination}'.$(hostname)")
+
+        # Move all the source files matching the pattern into the host-specific directory
+        other = f"-print0 | xargs -0 -r0 -I '{{}}' {sudo_command}mv '{{}}' " \
+                f"'{destination}'.$(hostname)/"
+        commands.append(find_command(source, pattern, depth, other))
+
+        result = run_remote(logger, compute_hosts, " && ".join(commands), timeout=timeout)
+        if not result.passed:
+            message = f"Error moving files from {source} to {destination}"
+            test_result.fail_test(logger, "Process", message)
+            return_code = 16
+
+    # Continue clush --rcopy archiving if there are remaining hosts
     if not hosts:
         return return_code
 
+    # Create a temporary remote directory - should already exist, see _setup_test_directory()
     # Move all the source files matching the pattern into the temporary remote directory
+    mkdir_command = f"mkdir -p '{tmp_copy_dir}'"
     other = f"-print0 | xargs -0 -r0 -I '{{}}' {sudo_command}mv '{{}}' '{tmp_copy_dir}'/"
-    result = run_remote(logger, hosts, find_command(source, pattern, depth, other))
+    command = f"{mkdir_command} && {find_command(source, pattern, depth, other)}"
+    result = run_remote(logger, hosts, command)
     if not result.passed:
         message = (f"Error moving files to temporary remote copy directory '{tmp_copy_dir}' on "
                    f"{result.failed_hosts}")
@@ -924,14 +939,14 @@ def collect_test_result(logger, test, test_result, job_results_dir, stop_daos, a
             "depth": 1,
             "timeout": 900,
         }
-        remote_files["valgrind log files"] = {
-            "source": test_env.shared_dir,
-            "destination": os.path.join(job_results_dir, "latest", TEST_RESULTS_DIRS[4]),
-            "pattern": "valgrind*",
-            "hosts": test.host_info.servers.hosts,
-            "depth": 1,
-            "timeout": 900,
-        }
+        # remote_files["valgrind log files"] = {
+        #     "source": test_env.shared_dir,
+        #     "destination": os.path.join(job_results_dir, "latest", TEST_RESULTS_DIRS[4]),
+        #     "pattern": "valgrind*",
+        #     "hosts": test.host_info.servers.hosts,
+        #     "depth": 1,
+        #     "timeout": 900,
+        # }
         for index, hosts in enumerate(core_files):
             remote_files[f"core files {index + 1}/{len(core_files)}"] = {
                 "source": core_files[hosts]["path"],
diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py
index 06da3b27868..2a533e66e9e 100644
--- a/src/tests/ftest/util/command_utils.py
+++ b/src/tests/ftest/util/command_utils.py
@@ -59,7 +59,13 @@ def __init__(self, namespace, command, path="", subprocess=False, check_results=
         self.exit_status_exception = True
         self.output_check = "both"
         self.verbose = True
+        # TODO proper. Really just need this for all "client" commands
         self.env = EnvironmentVariables()
+        _env_from_os = ("DAOS_AGENT_DRPC_DIR",)
+        for key in _env_from_os:
+            val = os.environ.get(key)
+            if val is not None:
+                self.env[key] = val
 
         # User to run the command as. "root" is equivalent to sudo
         self.run_user = run_user
@@ -1022,12 +1028,20 @@ def copy_certificates(self, source, hosts):
                         src_file = os.path.join(source, file_name)
                         dst_file = os.path.join(name, file_name)
                         self.log.debug("  %s -> %s", src_file, dst_file)
+                        # Don't use sudo if running as the current user
+                        # TODO proper
+                        _sudo = self.run_user != getuser() or \
+                            self.certificate_owner != getuser() or dst_file.startswith('/etc')
+                        _owner = self.certificate_owner if _sudo else None
                         result = distribute_files(
                             self.log, hosts, src_file, dst_file, mkdir=False,
-                            verbose=False, sudo=True, owner=self.certificate_owner)
+                            verbose=True, sudo=_sudo, owner=_owner)
                         if not result.passed:
+                            # TODO warns on copying dmg certs because it is done multiple times
+                            # to the same destination
                             self.log.info(
-                                "    WARNING: %s copy failed on %s", dst_file, result.failed_hosts)
+                                "    WARNING: %s copy failed on %s:\n%s",
+                                dst_file, result.failed_hosts, result)
                     names.add(name)
             yaml = yaml.other_params
 
@@ -1055,9 +1069,14 @@ def copy_configuration(self, hosts):
                 self.log.info(
                     "Copying %s yaml configuration file to %s on %s",
                     self.temporary_file, self.yaml.filename, hosts)
+                # Don't use sudo if running as the current user
+                # TODO proper
+                _sudo = self.run_user != getuser() or self.certificate_owner != getuser() or \
+                    self.yaml.filename.startswith('/etc')
+                _owner = self.certificate_owner if _sudo else None
                 result = distribute_files(
-                    self.log, hosts, self.temporary_file, self.yaml.filename, verbose=False,
-                    sudo=True)
+                    self.log, hosts, self.temporary_file, self.yaml.filename,
+                    verbose=True, sudo=_sudo, owner=_owner)
                 if not result.passed:
                     raise CommandFailure(
                         f"ERROR: Copying yaml configuration file to {result.failed_hosts}")
@@ -1083,17 +1102,16 @@ def verify_socket_directory(self, user, hosts):
                 self.log.info(
                     "%s: creating socket directory %s for user %s on %s",
                     self.command, directory, user, nodes)
-                result = create_directory(self.log, nodes, directory, user="root")
+                if user == getuser():
+                    result = create_directory(self.log, nodes, directory)
+                else:
+                    result = create_directory(self.log, nodes, directory, user="root")
+                    change_file_owner(
+                        self.log, nodes, directory, user, get_primary_group(user), user="root")
                 if not result.passed:
                     raise CommandFailure(
                         f"{self.command}: error creating socket directory {directory} for user "
                         f"{user} on {result.failed_hosts}")
-                result = change_file_owner(
-                    self.log, nodes, directory, user, get_primary_group(user), user="root")
-                if not result.passed:
-                    raise CommandFailure(
-                        f"{self.command}: error setting socket directory {directory} owner for "
-                        f"user {user} on {result.failed_hosts}")
 
     def get_socket_dir(self):
         """Get the socket directory.
@@ -1119,14 +1137,14 @@ def _get_new(self):
 class SubprocessManager(ObjectWithParameters):
     """Defines an object that manages a sub process launched with orterun."""
 
-    def __init__(self, command, manager="Orterun", namespace=None):
+    def __init__(self, command, manager="Systemctl", namespace=None):
         """Create a SubprocessManager object.
 
         Args:
             command (YamlCommand): command to manage as a subprocess
             manager (str, optional): the name of the JobManager class used to
                 manage the YamlCommand defined through the "job" attribute.
-                Defaults to "OpenMpi"
+                Defaults to "Systemctl"
             namespace (str): yaml namespace (path to parameters)
         """
         super().__init__(namespace)
@@ -1290,7 +1308,7 @@ def get_config_value(self, name):
         return value
 
     def get_current_state(self):
-        """Get the current state of the daos_server ranks.
+        """Get the current state of the service.
 
         Returns:
             dict: dictionary of server rank keys, each referencing a dictionary
diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py
index 34e21f66d0a..a12646af389 100644
--- a/src/tests/ftest/util/dmg_utils.py
+++ b/src/tests/ftest/util/dmg_utils.py
@@ -19,7 +19,8 @@ class DmgJsonCommandFailure(CommandFailure):
     """Exception raised when a dmg --json command fails."""
 
 
-def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hostlist_suffix=None):
+def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hostlist_suffix=None,
+                    run_user=None):
     """Get a dmg command object.
 
     Args:
@@ -33,6 +34,8 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos
             utilizes the file specified by config_file.
         hostlist_suffix (str, optional): Suffix to append to each host name.
             Defaults to None.
+        run_user (str, optional): user to run as. Defaults to None, which will run commands as
+            the current user.
 
     Returns:
         DmgCommand: the dmg command object
@@ -40,7 +43,7 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos
     """
     transport_config = DmgTransportCredentials(cert_dir)
     config = DmgYamlParameters(config_file, group, transport_config)
-    command = DmgCommand(bin_dir, config, hostlist_suffix)
+    command = DmgCommand(bin_dir, config, hostlist_suffix, run_user)
     if config_temp:
         # Setup the DaosServerCommand to write the config file data to the
         # temporary file and then copy the file to all the hosts using the
diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py
index a601b590033..5bff504241a 100644
--- a/src/tests/ftest/util/dmg_utils_base.py
+++ b/src/tests/ftest/util/dmg_utils_base.py
@@ -15,7 +15,7 @@
 class DmgCommandBase(YamlCommand):
     """Defines a base object representing a dmg command."""
 
-    def __init__(self, path, yaml_cfg=None, hostlist_suffix=None):
+    def __init__(self, path, yaml_cfg=None, hostlist_suffix=None, run_user=None):
         """Create a dmg Command object.
 
         Args:
@@ -24,8 +24,10 @@ def __init__(self, path, yaml_cfg=None, hostlist_suffix=None):
                 settings. Defaults to None, in which case settings
                 must be supplied as command-line parameters.
             hostlist_suffix (str, optional): Suffix to append to each host name. Defaults to None.
+            run_user (str, optional): user to run as. Defaults to None, which will run commands as
+                the current user.
         """
-        super().__init__("/run/dmg/*", "dmg", path, yaml_cfg)
+        super().__init__("/run/dmg/*", "dmg", path, yaml_cfg, run_user=run_user)
 
         # If running dmg on remote hosts, this list needs to include those hosts
         self.temporary_file_hosts = NodeSet(gethostname().split(".")[0])
diff --git a/src/tests/ftest/util/file_count_test_base.py b/src/tests/ftest/util/file_count_test_base.py
index be21183c97a..f95e22bd4bd 100644
--- a/src/tests/ftest/util/file_count_test_base.py
+++ b/src/tests/ftest/util/file_count_test_base.py
@@ -17,15 +17,15 @@ class FileCountTestBase(IorTestBase, MdtestBase):
     :avocado: recursive
     """
 
-    def add_containers(self, file_oclass=None, dir_oclass=None):
-        """Create a list of containers that the various jobs use for storage.
+    def get_file_write_container(self, file_oclass=None, dir_oclass=None):
+        """Create a container, set oclass, dir_oclass, and add rd_fac property based on oclass.
 
         Args:
-            file_oclass (str, optional): file object class of container.
-                                         Defaults to None.
-            dir_oclass (str, optional): dir object class of container.
-                                        Defaults to None.
+            file_oclass (str, optional): file object class of container. Defaults to None.
+            dir_oclass (str, optional): dir object class of container. Defaults to None.
 
+        Returns:
+            TestContainer: Created container with oclass, dir_oclass, and rd_fac set.
 
         """
         # Create a container and add it to the overall list of containers
@@ -92,12 +92,12 @@ def run_file_count(self):
                     rd_fac = extract_redundancy_factor(oclass)
                     dir_oclass = self.get_diroclass(rd_fac)
                     self.mdtest_cmd.dfs_dir_oclass.update(dir_oclass)
-                    self.container = self.add_containers(oclass, dir_oclass)
+                    self.container = self.get_file_write_container(oclass, dir_oclass)
                     try:
                         self.processes = mdtest_np
                         self.ppn = mdtest_ppn
                         if self.mdtest_cmd.api.value == 'POSIX':
-                            self.mdtest_cmd.env.update(LD_PRELOAD=intercept, D_IL_REPORT='1')
+                            self.mdtest_cmd.env.update(LD_PRELOAD=intercept)
                             self.execute_mdtest()
                         else:
                             self.execute_mdtest()
@@ -111,14 +111,27 @@ def run_file_count(self):
                 # run ior
                 self.log.info("=======>>>Starting IOR with %s and %s", api, oclass)
                 self.ior_cmd.dfs_oclass.update(oclass)
-                self.container = self.add_containers(oclass)
+                self.container = self.get_file_write_container(oclass)
                 self.update_ior_cmd_with_pool(False)
                 try:
                     self.processes = ior_np
                     self.ppn = ior_ppn
                     if api == 'HDF5-VOL':
+                        # Format the container properties so that it works with HDF5-VOL env var.
+                        # Each entry:value pair needs to be separated by a semicolon. Since we're
+                        # using this in the mpirun command, semicolon would indicate the end of the
+                        # command, so quote the whole thing.
+                        cont_props = self.container.properties.value
+                        cont_props_hdf5_vol = '"' + cont_props.replace(",", ";") + '"'
+                        self.log.info("cont_props_hdf5_vol = %s", cont_props_hdf5_vol)
+                        env = self.ior_cmd.env.copy()
+                        env.update({
+                            "HDF5_DAOS_OBJ_CLASS": oclass,
+                            "HDF5_DAOS_FILE_PROP": cont_props_hdf5_vol
+                        })
                         self.ior_cmd.api.update('HDF5')
-                        self.run_ior_with_pool(create_pool=False, plugin_path=hdf5_plugin_path)
+                        self.run_ior_with_pool(
+                            create_pool=False, plugin_path=hdf5_plugin_path, env=env)
                     elif self.ior_cmd.api.value == 'POSIX':
                         self.run_ior_with_pool(create_pool=False, intercept=intercept)
                     else:
diff --git a/src/tests/ftest/util/general_utils.py b/src/tests/ftest/util/general_utils.py
index 84e55601ff2..09f4004aecb 100644
--- a/src/tests/ftest/util/general_utils.py
+++ b/src/tests/ftest/util/general_utils.py
@@ -137,6 +137,10 @@ def run_command(command, timeout=60, verbose=True, raise_exception=True,
     """
     log = getLogger()
     msg = None
+    if env is not None and "DAOS_AGENT_DRPC_DIR" not in env:
+        daos_agent_drpc_dir = os.environ.get("DAOS_AGENT_DRPC_DIR")
+        if daos_agent_drpc_dir:
+            env["DAOS_AGENT_DRPC_DIR"] = daos_agent_drpc_dir
     kwargs = {
         "cmd": command,
         "timeout": timeout,
@@ -148,6 +152,7 @@ def run_command(command, timeout=60, verbose=True, raise_exception=True,
     }
     if verbose:
         log.info("Command environment vars:\n  %s", env)
+
     try:
         # Block until the command is complete or times out
         return process.run(**kwargs)
@@ -1024,7 +1029,8 @@ def percent_change(val1, val2):
         return math.nan
 
 
-def get_journalctl_command(since, until=None, system=False, units=None, identifiers=None):
+def get_journalctl_command(since, until=None, system=False, units=None, identifiers=None,
+                           run_user="root"):
     """Get the journalctl command to capture all unit/identifier activity from since to until.
 
     Args:
@@ -1036,24 +1042,27 @@ def get_journalctl_command(since, until=None, system=False, units=None, identifi
             None.
         identifiers (str/list, optional): show messages for the specified syslog identifier(s).
             Defaults to None.
+        run_user (str, optional): user to run as. Defaults to root
 
     Returns:
         str: journalctl command to capture all unit activity
 
     """
-    command = ["sudo", os.path.join(os.sep, "usr", "bin", "journalctl")]
+    command = [os.path.join(os.sep, "usr", "bin", "journalctl")]
     if system:
         command.append("--system")
+    if run_user != "root":
+        command.append("--user")
     for key, values in {"unit": units or [], "identifier": identifiers or []}.items():
         for item in values if isinstance(values, (list, tuple)) else [values]:
-            command.append("--{}={}".format(key, item))
-    command.append("--since=\"{}\"".format(since))
+            command.append(f"--{key}={item}")
+    command.append(f'--since="{since}"')
     if until:
-        command.append("--until=\"{}\"".format(until))
-    return " ".join(command)
+        command.append(f'--until="{until}"')
+    return command_as_user(" ".join(command), run_user)
 
 
-def get_journalctl(hosts, since, until, journalctl_type):
+def get_journalctl(hosts, since, until, journalctl_type, run_user="root"):
     """Run the journalctl on the hosts.
 
     Args:
@@ -1061,6 +1070,7 @@ def get_journalctl(hosts, since, until, journalctl_type):
         since (str): Start time to search the log.
         until (str): End time to search the log.
         journalctl_type (str): String to search in the log. -t param for journalctl.
+        run_user (str, optional): user to run as. Defaults to root
 
     Returns:
         list: a list of dictionaries containing the following key/value pairs:
@@ -1068,7 +1078,8 @@ def get_journalctl(hosts, since, until, journalctl_type):
             "data":  data requested for the group of hosts
 
     """
-    command = get_journalctl_command(since, until, True, identifiers=journalctl_type)
+    system = run_user != getuser()
+    command = get_journalctl_command(since, until, system, identifiers=journalctl_type, run_user=run_user)
     err = "Error gathering system log events"
     return get_host_data(hosts=hosts, command=command, text="journalctl", error=err)
 
diff --git a/src/tests/ftest/util/ior_test_base.py b/src/tests/ftest/util/ior_test_base.py
index 625a283593e..8f056c34002 100644
--- a/src/tests/ftest/util/ior_test_base.py
+++ b/src/tests/ftest/util/ior_test_base.py
@@ -225,8 +225,6 @@ def run_ior(self, manager, processes, intercept=None, display_space=True,
             env = self.ior_cmd.get_default_env(str(manager), self.client_log)
         if intercept:
             env['LD_PRELOAD'] = intercept
-            if 'D_IL_REPORT' not in env:
-                env['D_IL_REPORT'] = '1'
         if plugin_path:
             env["HDF5_VOL_CONNECTOR"] = "daos"
             env["HDF5_PLUGIN_PATH"] = str(plugin_path)
diff --git a/src/tests/ftest/util/ior_utils.py b/src/tests/ftest/util/ior_utils.py
index 7851e4587d7..ffde4454fcb 100644
--- a/src/tests/ftest/util/ior_utils.py
+++ b/src/tests/ftest/util/ior_utils.py
@@ -588,7 +588,7 @@ def get_unique_log(self, container):
         return '.'.join(['_'.join(parts), 'log'])
 
     def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=None,
-            dfuse=None, display_space=True, fail_on_warning=False, unique_log=True, il_report=1):
+            dfuse=None, display_space=True, fail_on_warning=False, unique_log=True, il_report=None):
         # pylint: disable=too-many-arguments
         """Run ior.
 
@@ -609,7 +609,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=
             unique_log (bool, optional): whether or not to update the log file with a new unique log
                 file name. Defaults to True.
             il_report (int, optional): D_IL_REPORT value to use when 'intercept' is specified and a
-                value does not already exist in the environment. Defaults to 1.
+                value does not already exist in the environment. Defaults to None.
 
         Raises:
             CommandFailure: if there is an error running the ior command
@@ -627,7 +627,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=
             self.env["LD_PRELOAD"] = intercept
             if "D_LOG_MASK" not in self.env:
                 self.env["D_LOG_MASK"] = "INFO"
-            if "D_IL_REPORT" not in self.env:
+            if "D_IL_REPORT" not in self.env and il_report is not None:
                 self.env["D_IL_REPORT"] = str(il_report)
 
         if plugin_path:
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 2b5f2cd6c26..23c6bb7c358 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -5,8 +5,10 @@
 """
 import os
 import re
+import tempfile
 import time
 # pylint: disable=too-many-lines
+from getpass import getuser
 from shutil import which
 
 from ClusterShell.NodeSet import NodeSet
@@ -14,6 +16,7 @@
 from command_utils_base import BasicParameter, EnvironmentVariables, FormattedParameter
 from env_modules import load_mpi
 from exception_utils import CommandFailure, MPILoadError
+from file_utils import distribute_files
 from general_utils import (get_job_manager_class, get_journalctl_command, journalctl_time, pcmd,
                            run_pcmd)
 from run_utils import run_remote, stop_processes
@@ -651,7 +654,7 @@ def __init__(self, job):
         """
         super().__init__("/run/systemctl/*", "systemd", job)
         self.job = job
-        self._systemctl = SystemctlCommand()
+        self._systemctl = SystemctlCommand(run_user=job.run_user)
         self._systemctl.service.value = self.job.service_name
 
         self.timestamps = {
@@ -816,25 +819,16 @@ def _run_unit_command(self, command):
             CommandFailure: if there is an issue running the command
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         self._systemctl.unit_command.value = command
         self.timestamps[command] = journalctl_time()
-        result = pcmd(self._hosts, str(self), self.verbose, self.timeout)
-        if 255 in result:
-            raise CommandFailure(
-                "Timeout detected running '{}' with a {}s timeout on {}".format(
-                    str(self), self.timeout, NodeSet.fromlist(result[255])))
-
-        if 0 not in result or len(result) > 1:
-            failed = []
-            for item, value in list(result.items()):
-                if item != 0:
-                    failed.extend(value)
+        result = run_remote(self.log, self._hosts, str(self), self.verbose, self.timeout)
+        if not result.passed:
             raise CommandFailure(
-                "Error occurred running '{}' on {}".format(str(self), NodeSet.fromlist(failed)))
+                "Error occurred running '{}' on {}".format(str(self), result.failed_hosts))
         return result
 
     def _report_unit_command(self, command):
@@ -847,8 +841,8 @@ def _report_unit_command(self, command):
             CommandFailure: if there is an issue running the command
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         try:
@@ -856,7 +850,8 @@ def _report_unit_command(self, command):
         except CommandFailure as error:
             self.log.info(error)
             command = get_journalctl_command(
-                self.timestamps[command], units=self._systemctl.service.value)
+                self.timestamps[command], units=self._systemctl.service.value,
+                run_user=self.job.run_user)
             self.display_log_data(self.get_log_data(self._hosts, command))
             raise CommandFailure(error) from error
 
@@ -867,8 +862,8 @@ def service_enable(self):
             CommandFailure: if unable to enable
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         return self._report_unit_command("enable")
@@ -880,8 +875,8 @@ def service_disable(self):
             CommandFailure: if unable to disable
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         return self._report_unit_command("disable")
@@ -893,8 +888,8 @@ def service_start(self):
             CommandFailure: if unable to start
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         return self._report_unit_command("start")
@@ -906,8 +901,8 @@ def service_stop(self):
             CommandFailure: if unable to stop
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         return self._report_unit_command("stop")
@@ -919,8 +914,8 @@ def service_status(self):
             CommandFailure: if unable to get the status
 
         Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
+            RemoteCommandResult: a grouping of the command results from the same hosts with the
+                same return status
 
         """
         return self._report_unit_command("status")
@@ -1084,7 +1079,8 @@ def search_logs(self, pattern, since, until, quantity=1, timeout=60, verbose=Fal
                 (str)  - string indicating the number of patterns found in what duration
 
         """
-        command = get_journalctl_command(since, until, units=self._systemctl.service.value)
+        command = get_journalctl_command(
+            since, until, units=self._systemctl.service.value, run_user=self.job.run_user)
         self.log.info("Searching for '%s' in '%s' output on %s", pattern, command, self._hosts)
 
         log_data = None
@@ -1171,7 +1167,8 @@ def dump_logs(self, hosts=None, timestamp=None):
         if timestamp:
             if hosts is None:
                 hosts = self._hosts
-            command = get_journalctl_command(timestamp, units=self._systemctl.service.value)
+            command = get_journalctl_command(
+                timestamp, units=self._systemctl.service.value, run_user=self.job.run_user)
             self.display_log_data(self.get_log_data(hosts, command))
 
     def log_additional_debug_data(self, hosts, since, until):
@@ -1184,7 +1181,8 @@ def log_additional_debug_data(self, hosts, since, until):
                 to None, in which case it is not utilized.
         """
         command = get_journalctl_command(
-            since, until, True, identifiers=["kernel", self._systemctl.service.value])
+            since, until, True, identifiers=["kernel", self._systemctl.service.value],
+            run_user=self.job.run_user)
         details = self.str_log_data(self.get_log_data(hosts, command))
         self.log.info("Additional '%s' output:\n%s", command, details)
 
diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py
index 0f7284c50ef..4df840cd86b 100644
--- a/src/tests/ftest/util/launch_utils.py
+++ b/src/tests/ftest/util/launch_utils.py
@@ -129,12 +129,14 @@ def setup_systemctl(logger, servers, clients, test_env):
         __add_systemctl_override(
             logger, servers, "daos_server.service", "root",
             os.path.join(test_env.daos_prefix, "bin", "daos_server"), test_env.server_config,
-            None, None))
+            os.environ.get("DAOS_TEST_SYSTEMD_PATH"),
+            os.environ.get("DAOS_TEST_SYSTEMD_LIBRARY_PATH")))
     systemctl_configs.update(
         __add_systemctl_override(
             logger, clients, "daos_agent.service", test_env.agent_user,
             os.path.join(test_env.daos_prefix, "bin", "daos_agent"), test_env.agent_config,
-            None, None))
+            os.environ.get("DAOS_TEST_SYSTEMD_PATH"),
+            os.environ.get("DAOS_TEST_SYSTEMD_LIBRARY_PATH")))
     return systemctl_configs
 
 
@@ -602,7 +604,7 @@ def _setup_test_directory(self, logger, test):
         hosts.add(self.local_host)
         logger.debug("Setting up '%s' on %s:", test_env.log_dir, hosts)
         commands = [
-            f"sudo -n rm -fr {test_env.log_dir}",
+            f"rm -fr {test_env.log_dir}",
             f"mkdir -p {test_env.log_dir}",
             f"chmod a+wrx {test_env.log_dir}",
         ]
@@ -612,11 +614,11 @@ def _setup_test_directory(self, logger, test):
             directories.append(os.path.join(test_env.log_dir, directory))
         commands.append(f"mkdir -p {' '.join(directories)}")
         commands.append(f"ls -al {test_env.log_dir}")
-        for command in commands:
-            if not run_remote(logger, hosts, command).passed:
-                message = "Error setting up the common test directory on all hosts"
-                self.test_result.fail_test(logger, "Prepare", message, sys.exc_info())
-                return 128
+        command = " && ".join(commands)
+        if not run_remote(logger, hosts, command).passed:
+            message = "Error setting up the common test directory on all hosts"
+            self.test_result.fail_test(logger, "Prepare", message, sys.exc_info())
+            return 128
         return 0
 
     def _user_setup(self, logger, test, create=False):
diff --git a/src/tests/ftest/util/network_utils.py b/src/tests/ftest/util/network_utils.py
index e3802364d8f..d2ee78ea269 100644
--- a/src/tests/ftest/util/network_utils.py
+++ b/src/tests/ftest/util/network_utils.py
@@ -10,6 +10,7 @@
 
 from ClusterShell.NodeSet import NodeSet
 # pylint: disable=import-error,no-name-in-module
+from util.host_utils import get_local_host
 from util.run_utils import run_remote
 
 # Order here is used to select default provider in environment_utils
@@ -398,6 +399,7 @@ def get_fastest_interface(logger, hosts, verbose=True):
     Returns:
         str: the fastest active interface common to all hosts specified
     """
+    hosts = NodeSet(hosts) | NodeSet(get_local_host())
     common_interfaces = get_common_interfaces(logger, hosts, verbose)
 
     # Find the speed of each common active interface in order to be able to choose the fastest
diff --git a/src/tests/ftest/util/run_utils.py b/src/tests/ftest/util/run_utils.py
index 2f9d33b07c5..a57e5d80c85 100644
--- a/src/tests/ftest/util/run_utils.py
+++ b/src/tests/ftest/util/run_utils.py
@@ -345,7 +345,8 @@ def log_result_data(log, data):
             log.debug("%s%s", " " * indent, line)
 
 
-def get_clush_command(hosts, args=None, command="", command_env=None, command_sudo=False):
+def get_clush_command(hosts, args=None, command="", command_env=None, command_sudo=False,
+                      timeout=None, fanout=None):
     """Get the clush command with optional sudo arguments.
 
     Args:
@@ -355,11 +356,21 @@ def get_clush_command(hosts, args=None, command="", command_env=None, command_su
         command_env (EnvironmentVariables, optional): environment variables to export with the
             command. Defaults to None.
         sudo (bool, optional): whether to run the command with sudo privileges. Defaults to False.
+        timeout (int, optional): number of seconds to wait for the command to complete.
+            Defaults to None.
+        fanout (int, optional): fanout to use. Default uses the max of the
+            clush default (64) or available cores
 
     Returns:
         str: the clush command
     """
+    if fanout is None:
+        fanout = max(64, len(os.sched_getaffinity(0)))
     cmd_list = ["clush"]
+    if timeout is not None:
+        cmd_list.extend(["-u", str(timeout)])
+    if fanout is not None:
+        cmd_list.extend(["-f", str(fanout)])
     if args:
         cmd_list.append(args)
     cmd_list.extend(["-w", str(hosts)])
@@ -427,6 +438,35 @@ def run_local(log, command, verbose=True, timeout=None, stderr=False, capture_ou
     return results
 
 
+def daos_env_str(env):
+    """Return a copy of an env including only daos-relevant variables.
+
+    TODO ideally should be under EnvironmentVariables but the imports are broken
+
+    Args:
+        enc (dict): the original env
+
+    Returns:
+        str: a copy of env env including only daos-relevant variables,
+             converted to an export str
+    """
+    def _include(key):
+        return key.startswith('FI_') or \
+               key.startswith('OFI_') or \
+               key.startswith('DAOS_') or \
+               key.startswith('D_') or \
+               key.startswith('CRT_') or \
+               key.startswith('DD_') or \
+               key.startswith('MPI') or \
+               key in ('PATH', 'LD_LIBRARY_PATH')
+    export_str = ';'.join(
+        f'export {key}' if value is None else "export {}='{}'".format(key, value)
+        for key, value in env.items() if _include(key))
+    if export_str:
+        export_str = "".join([export_str, ';'])
+    return export_str
+
+
 def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False, stderr=False,
                fanout=None):
     """Run the command on the remote hosts.
@@ -453,13 +493,16 @@ def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False,
     if fanout is None:
         fanout = max(task.info('fanout'), len(os.sched_getaffinity(0)))
     task.set_info('fanout', fanout)
-    # Enable forwarding of the ssh authentication agent connection
-    task.set_info("ssh_options", "-oForwardAgent=yes")
+    # Enable forwarding of the ssh authentication agent connection.
+    # Force pseudo-terminal allocation so timed-out commands are killed remotely.
+    task.set_info("ssh_options", "-oForwardAgent=yes -q -t -t")
     if verbose:
         if timeout is None:
             log.debug("Running on %s without a timeout: %s", hosts, command)
         else:
             log.debug("Running on %s with a %s second timeout: %s", hosts, timeout, command)
+    env_str = daos_env_str(os.environ)
+    command = f'{env_str}{command}'
     task.run(command=command, nodes=hosts, timeout=timeout)
     results = CommandResult(command, task)
     if verbose:
@@ -571,7 +614,7 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
         log.debug(
             "Killing%s any processes on %s that match %s and then waiting %s seconds",
             step[0], result.passed_hosts, pattern_match, step[1])
-        kill_command = f"sudo /usr/bin/pkill{step[0]} {pattern}"
+        kill_command = f"sudo -n /usr/bin/pkill{step[0]} {pattern}"
         run_remote(log, result.passed_hosts, kill_command, verbose, timeout)
         time.sleep(step[1])
         result = run_remote(log, result.passed_hosts, command, verbose, timeout)
diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py
index 752473021a3..649885ac974 100644
--- a/src/tests/ftest/util/server_utils.py
+++ b/src/tests/ftest/util/server_utils.py
@@ -5,7 +5,6 @@
 """
 # pylint: disable=too-many-lines
 
-import os
 import random
 import time
 from getpass import getuser
@@ -46,6 +45,7 @@ def get_server_command(group, cert_dir, bin_dir, config_file, config_temp=None):
     common_config = CommonConfig(group, transport_config)
     config = DaosServerYamlParameters(config_file, common_config)
     command = DaosServerCommand(bin_dir, config, None)
+    command.run_user = "root"
     if config_temp:
         # Setup the DaosServerCommand to write the config file data to the
         # temporary file and then copy the file to all the hosts using the
@@ -104,7 +104,8 @@ def __init__(self, group, bin_dir,
         # Dmg command to access this group of servers which will be configured
         # to access the daos_servers when they are started
         self.dmg = get_dmg_command(
-            group, dmg_cert_dir, bin_dir, dmg_config_file, dmg_config_temp, access_points_suffix)
+            group, dmg_cert_dir, bin_dir, dmg_config_file, dmg_config_temp, access_points_suffix,
+            getuser())
 
         # Set the correct certificate file ownership
         if manager == "Systemctl":
@@ -262,7 +263,7 @@ def prepare(self, storage=True):
                     self.manager.mca.update({"plm_rsh_args": "-l root"}, "orterun.mca", True)
 
         # Verify the socket directory exists when using a non-systemctl manager
-        self.verify_socket_directory(getuser())
+        self.verify_socket_directory(self.manager.job.certificate_owner)
 
     def clean_files(self, verbose=True):
         """Clean up the daos server files.
@@ -1156,10 +1157,9 @@ def get_daos_metrics(self, verbose=False, timeout=60):
         """
         engines_per_host = self.get_config_value("engines_per_host") or 1
         engines = []
-        daos_metrics_exe = os.path.join(self.manager.job.command_path, "daos_metrics")
         for engine in range(engines_per_host):
             results = run_pcmd(
                 hosts=self._hosts, verbose=verbose, timeout=timeout,
-                command="sudo {} -S {} --csv".format(daos_metrics_exe, engine))
+                command=f"sudo daos_metrics -S {engine} --csv")
             engines.append(results)
         return engines
diff --git a/src/tests/ftest/util/server_utils_base.py b/src/tests/ftest/util/server_utils_base.py
index bbda608faad..8e211d2bc00 100644
--- a/src/tests/ftest/util/server_utils_base.py
+++ b/src/tests/ftest/util/server_utils_base.py
@@ -34,7 +34,7 @@ class DaosServerCommand(YamlCommand):
 
     DEFAULT_CONFIG_FILE = os.path.join(os.sep, "etc", "daos", "daos_server.yml")
 
-    def __init__(self, path="", yaml_cfg=None, timeout=45):
+    def __init__(self, path="", yaml_cfg=None, timeout=45, run_user=None):
         """Create a daos_server command object.
 
         Args:
@@ -43,9 +43,11 @@ def __init__(self, path="", yaml_cfg=None, timeout=45):
                 Defaults to None.
             timeout (int, optional): number of seconds to wait for patterns to
                 appear in the subprocess output. Defaults to 45 seconds.
+            run_user (str, optional): user to run as. Defaults to None, which will run commands as
+                the current user.
         """
         super().__init__(
-            "/run/daos_server/*", "daos_server", path, yaml_cfg, timeout)
+            "/run/daos_server/*", "daos_server", path, yaml_cfg, timeout, run_user)
         self.pattern = self.NORMAL_PATTERN
 
         # Command line parameters:
diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py
index 440ffe68f82..b80201a6fdf 100644
--- a/src/tests/ftest/util/server_utils_params.py
+++ b/src/tests/ftest/util/server_utils_params.py
@@ -502,6 +502,7 @@ def __init__(self, base_namespace, index, provider=None, max_storage_tiers=MAX_S
             if name in self.REQUIRED_ENV_VARS:
                 default_env_vars.extend(self.REQUIRED_ENV_VARS[name])
         self.env_vars = BasicParameter(None, default_env_vars)
+        self.env_pass_through = BasicParameter(None, None)
 
         # the storage configuration for this engine
         self.storage = StorageYamlParameters(self.namespace, max_storage_tiers)
diff --git a/src/tests/ftest/util/soak_test_base.py b/src/tests/ftest/util/soak_test_base.py
index f32e068cb16..5716a9c5ee1 100644
--- a/src/tests/ftest/util/soak_test_base.py
+++ b/src/tests/ftest/util/soak_test_base.py
@@ -570,7 +570,7 @@ def run_soak(self, test_param):
         resv_bytes = self.params.get("resv_bytes", test_param + "*", 500000000)
         ignore_soak_errors = self.params.get("ignore_soak_errors", test_param + "*", False)
         self.enable_il = self.params.get("enable_intercept_lib", test_param + "*", False)
-        self.sudo_cmd = "sudo" if enable_sudo else ""
+        self.sudo_cmd = "sudo -n" if enable_sudo else ""
         self.enable_remote_logging = self.params.get(
             "enable_remote_logging", os.path.join(test_param, "*"), False)
         self.enable_scrubber = self.params.get(
diff --git a/src/tests/ftest/util/soak_utils.py b/src/tests/ftest/util/soak_utils.py
index 39178e0e0d9..e2abe4ca0ad 100644
--- a/src/tests/ftest/util/soak_utils.py
+++ b/src/tests/ftest/util/soak_utils.py
@@ -15,6 +15,7 @@
 import slurm_utils
 from avocado.core.exceptions import TestFail
 from avocado.utils.distro import detect
+from command_utils import command_as_user
 from command_utils_base import EnvironmentVariables
 from daos_racer_utils import DaosRacerCommand
 from data_mover_utils import DcpCommand, FsCopy
@@ -32,7 +33,7 @@
 from mdtest_utils import MdtestCommand
 from oclass_utils import extract_redundancy_factor
 from pydaos.raw import DaosApiError, DaosSnapshot
-from run_utils import run_remote
+from run_utils import daos_env_str, run_remote
 from test_utils_container import add_container
 
 H_LOCK = threading.Lock()
@@ -497,7 +498,7 @@ def launch_reboot(self, pools, name, results, args):
         self.log.info(
             "<<<PASS %s: %s started on ranks %s at %s >>>\n", self.loop, name, ranks, time.ctime())
         # reboot host in 1 min
-        result = run_remote(self.log, reboot_host, "sudo shutdown -r +1")
+        result = run_remote(self.log, reboot_host, command_as_user("shutdown -r +1", "root"))
         if result.passed:
             status = True
         else:
@@ -536,7 +537,8 @@ def launch_reboot(self, pools, name, results, args):
             # issue a restart
             self.log.info("<<<PASS %s: Issue systemctl restart daos_server on %s at %s>>>\n",
                           self.loop, name, reboot_host, time.ctime())
-            cmd_results = run_remote(self.log, reboot_host, "sudo systemctl restart daos_server")
+            cmd_results = run_remote(
+                self.log, reboot_host, command_as_user("systemctl restart daos_server", "root"))
             if cmd_results.passed:
                 self.dmg_command.system_query()
                 for pool in pools:
@@ -855,7 +857,8 @@ def start_dfuse(self, pool, container, name=None, job_spec=None):
         self.soak_log_dir,
         self.test_name + "_" + name + "_`hostname -s`_"
         "" + "${SLURM_JOB_ID}_" + "daos_dfuse.log")
-    dfuse_env = f"export D_LOG_FILE_APPEND_PID=1;export D_LOG_MASK=ERR;export D_LOG_FILE={dfuselog}"
+    dfuse_env = daos_env_str(os.environ) + \
+                f"export D_LOG_FILE_APPEND_PID=1;export D_LOG_MASK=ERR;export D_LOG_FILE={dfuselog}"
     module_load = f"module use {self.mpi_module_use};module load {self.mpi_module}"
 
     dfuse_start_cmds = [
@@ -884,9 +887,10 @@ def stop_dfuse(dfuse, vol=False):
             "do daos container destroy --path={0}/\"$file\" ; done".format(
                 dfuse.mount_dir.value)])
 
+    dfuse_env = daos_env_str(os.environ)
     dfuse_stop_cmds.extend([
-        "clush -S -w $SLURM_JOB_NODELIST \"fusermount3 -uz {0}\"".format(dfuse.mount_dir.value),
-        "clush -S -w $SLURM_JOB_NODELIST \"rm -rf {0}\"".format(dfuse.mount_dir.value)])
+        f'clush -S -w $SLURM_JOB_NODELIST "{dfuse_env}fusermount3 -uz {dfuse.mount_dir.value}"',
+        f'clush -S -w $SLURM_JOB_NODELIST "rm -rf {dfuse.mount_dir.value}"'])
     return dfuse_stop_cmds
 
 
@@ -997,10 +1001,8 @@ def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob, oclass_list=None,
             mpirun_cmd.get_params(self)
             if api == "POSIX-LIBPIL4DFS":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
-                env["D_IL_REPORT"] = "1"
             if api == "POSIX-LIBIOIL":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                env["D_IL_REPORT"] = "1"
             # add envs if api is HDF5-VOL
             if api == "HDF5-VOL":
                 vol = True
@@ -1166,10 +1168,8 @@ def create_mdtest_cmdline(self, job_spec, pool, ppn, nodesperjob):
                 if self.enable_il and api == "POSIX-LIBPIL4DFS":
                     env["LD_PRELOAD"] = os.path.join(
                         self.prefix, 'lib64', 'libpil4dfs.so')
-                    env["D_IL_REPORT"] = "1"
                 if self.enable_il and api == "POSIX-LIBIOIL":
                     env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                    env["D_IL_REPORT"] = "1"
             mpirun_cmd = Mpirun(mdtest_cmd, mpi_type=self.mpi_module)
             mpirun_cmd.get_params(self)
             mpirun_cmd.assign_processes(nodesperjob * ppn)
@@ -1297,10 +1297,8 @@ def create_fio_cmdline(self, job_spec, pool):
         cmds.append(f"cd {dfuse.mount_dir.value};")
         if self.enable_il and api == "POSIX-LIBPIL4DFS":
             cmds.append(f"export LD_PRELOAD={os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')}")
-            cmds.append("export D_IL_REPORT=1")
         if self.enable_il and api == "POSIX-LIBIOIL":
             cmds.append(f"export LD_PRELOAD={os.path.join(self.prefix, 'lib64', 'libioil.so')}")
-            cmds.append("export D_IL_REPORT=1")
         cmds.append(str(fio_cmd))
         cmds.append("status=$?")
         cmds.append("cd -")
@@ -1372,10 +1370,8 @@ def create_app_cmdline(self, job_spec, pool, ppn, nodesperjob):
                 env["DAOS_UNS_PREFIX"] = format_path(pool, self.container[-1])
             if self.enable_il and api == "POSIX-LIBPIL4DFS":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
-                env["D_IL_REPORT"] = "1"
             if self.enable_il and api == "POSIX-LIBIOIL":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                env["D_IL_REPORT"] = "1"
             mpirun_cmd.assign_environment(env, True)
             mpirun_cmd.assign_processes(nodesperjob * ppn)
             mpirun_cmd.ppn.update(ppn)
diff --git a/src/tests/ftest/util/systemctl_utils.py b/src/tests/ftest/util/systemctl_utils.py
index 9ee809a1143..32b5ec9f992 100644
--- a/src/tests/ftest/util/systemctl_utils.py
+++ b/src/tests/ftest/util/systemctl_utils.py
@@ -125,7 +125,7 @@ def get_systemctl_command(unit_command, service, user="root"):
     """
     command = ["systemctl"]
     if user != "root":
-        command.append(f"--user {user}")
+        command.append("--user")
     if unit_command:
         command.append(unit_command)
     if service:
@@ -190,6 +190,10 @@ def create_override_config(logger, hosts, service, user, service_command, servic
     Returns:
         str: the systemctl override config file path
     """
+    # TODO proper
+    # Reload since teardown removes the files
+    daemon_reload(logger, hosts, user, verbose, timeout)
+
     # Get the existing service file
     service_file = get_service_file(logger, hosts, service, user, verbose, timeout)