diff --git a/src/cart/README.env b/src/cart/README.env
index 656f2ab73e47..ad84d8c1b31b 100644
--- a/src/cart/README.env
+++ b/src/cart/README.env
@@ -167,3 +167,15 @@ This file lists the environment variables used in CaRT.
  . CRT_TEST_CONT
    When set to 1, orterun does not automatically shut down other servers when
    one server is shutdown. Used in cart internal testing.
+
+ . D_CLIENT_METRICS_ENABLE
+   When set to 1, client side metrics will be collected on each daos client, which
+   can by retrieved by daos_metrics -j job_id on each client.
+
+ . D_CLIENT_METRICS_RETAIN
+   when set to 1, client side metrics will be retained even after the job exits, i.e.
+   those metrics can be retrieved by daos_metrics even after job exits.
+
+ . D_CLIENT_METRICS_DUMP_PATH
+   Set client side metrics dump path(file) for each client, so these metrics will be
+   dumped to the specified file when the job exits.
diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c
index 3766753c0596..df243b1dce98 100644
--- a/src/cart/crt_init.c
+++ b/src/cart/crt_init.c
@@ -18,6 +18,50 @@ static volatile int	gdata_init_flag;
 struct crt_plugin_gdata crt_plugin_gdata;
 static bool		g_prov_settings_applied[CRT_PROV_COUNT];
 
+/* List of the environment variables used in CaRT */
+static const char      *crt_env_names[] = {
+    "D_PROVIDER",
+    "D_INTERFACE",
+    "D_DOMAIN",
+    "D_PORT",
+    "CRT_PHY_ADDR_STR",
+    "D_LOG_STDERR_IN_LOG",
+    "D_LOG_SIZE",
+    "D_LOG_FILE",
+    "D_LOG_FILE_APPEND_PID",
+    "D_LOG_MASK",
+    "DD_MASK",
+    "DD_STDERR",
+    "DD_SUBSYS",
+    "CRT_TIMEOUT",
+    "CRT_ATTACH_INFO_PATH",
+    "OFI_PORT",
+    "OFI_INTERFACE",
+    "OFI_DOMAIN",
+    "CRT_CREDIT_EP_CTX",
+    "CRT_CTX_SHARE_ADDR",
+    "CRT_CTX_NUM",
+    "D_FI_CONFIG",
+    "FI_UNIVERSE_SIZE",
+    "CRT_ENABLE_MEM_PIN",
+    "FI_OFI_RXM_USE_SRX",
+    "D_LOG_FLUSH",
+    "CRT_MRC_ENABLE",
+    "CRT_SECONDARY_PROVIDER",
+    "D_PROVIDER_AUTH_KEY",
+    "D_PORT_AUTO_ADJUST",
+    "D_POLL_TIMEOUT",
+    "D_LOG_FILE_APPEND_RANK",
+    "D_QUOTA_RPCS",
+    "D_POST_INIT",
+    "D_POST_INCR",
+    "DAOS_SIGNAL_REGISTER",
+    "D_CLIENT_METRICS_ENABLE",
+    "D_CLIENT_METRICS_RETAIN",
+    "D_CLIENT_METRICS_DUMP_PATH",
+
+};
+
 static void
 crt_lib_init(void) __attribute__((__constructor__));
 
@@ -62,53 +106,19 @@ crt_lib_fini(void)
 static void
 dump_envariables(void)
 {
-	int                i;
-	char              *val;
-	static const char *var_names[] = {"D_PROVIDER",
-					  "D_INTERFACE",
-					  "D_DOMAIN",
-					  "D_PORT",
-					  "CRT_PHY_ADDR_STR",
-					  "D_LOG_STDERR_IN_LOG",
-					  "D_LOG_SIZE",
-					  "D_LOG_FILE",
-					  "D_LOG_FILE_APPEND_PID",
-					  "D_LOG_MASK",
-					  "DD_MASK",
-					  "DD_STDERR",
-					  "DD_SUBSYS",
-					  "CRT_TIMEOUT",
-					  "CRT_ATTACH_INFO_PATH",
-					  "OFI_PORT",
-					  "OFI_INTERFACE",
-					  "OFI_DOMAIN",
-					  "CRT_CREDIT_EP_CTX",
-					  "CRT_CTX_SHARE_ADDR",
-					  "CRT_CTX_NUM",
-					  "D_FI_CONFIG",
-					  "FI_UNIVERSE_SIZE",
-					  "CRT_ENABLE_MEM_PIN",
-					  "FI_OFI_RXM_USE_SRX",
-					  "D_LOG_FLUSH",
-					  "CRT_MRC_ENABLE",
-					  "CRT_SECONDARY_PROVIDER",
-					  "D_PROVIDER_AUTH_KEY",
-					  "D_PORT_AUTO_ADJUST",
-					  "D_POLL_TIMEOUT",
-					  "D_LOG_FILE_APPEND_RANK",
-					  "D_QUOTA_RPCS",
-					  "D_POST_INIT",
-					  "D_POST_INCR"};
+	int i;
 
 	D_INFO("-- ENVARS: --\n");
-	for (i = 0; i < ARRAY_SIZE(var_names); i++) {
-		d_agetenv_str(&val, var_names[i]);
+	for (i = 0; i < ARRAY_SIZE(crt_env_names); i++) {
+		char *val = NULL;
+
+		d_agetenv_str(&val, crt_env_names[i]);
 		if (val == NULL)
 			continue;
-		if (strcmp(var_names[i], "D_PROVIDER_AUTH_KEY") == 0)
-			D_INFO("%s = %s\n", var_names[i], "********");
+		if (strcmp(crt_env_names[i], "D_PROVIDER_AUTH_KEY") == 0)
+			D_INFO("%s = %s\n", crt_env_names[i], "********");
 		else
-			D_INFO("%s = %s\n", var_names[i], val);
+			D_INFO("%s = %s\n", crt_env_names[i], val);
 		d_freeenv_str(&val);
 	}
 }
diff --git a/src/client/api/SConscript b/src/client/api/SConscript
index e12aa93eaa92..b7e93f516d53 100644
--- a/src/client/api/SConscript
+++ b/src/client/api/SConscript
@@ -1,7 +1,7 @@
 """Build DAOS client"""
 
 LIBDAOS_SRC = ['agent.c', 'array.c', 'container.c', 'event.c', 'init.c', 'job.c', 'kv.c', 'mgmt.c',
-               'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c']
+               'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c', 'metrics.c']
 
 
 def scons():
diff --git a/src/client/api/init.c b/src/client/api/init.c
index c93fd6393216..f574169d8c7b 100644
--- a/src/client/api/init.c
+++ b/src/client/api/init.c
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2016-2023 Intel Corporation.
+ * (C) Copyright 2016-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -23,6 +23,7 @@
 #include <daos/btree_class.h>
 #include <daos/placement.h>
 #include <daos/job.h>
+#include <daos/metrics.h>
 #include "task_internal.h"
 #include <pthread.h>
 
@@ -219,6 +220,13 @@ daos_init(void)
 	if (rc != 0)
 		D_GOTO(out_pl, rc);
 
+	/** set up client telemetry */
+	rc = dc_tm_init();
+	if (rc != 0) {
+		/* should not be fatal */
+		DL_WARN(rc, "failed to initialize client telemetry");
+	}
+
 	/** set up pool */
 	rc = dc_pool_init();
 	if (rc != 0)
@@ -242,6 +250,7 @@ daos_init(void)
 out_pool:
 	dc_pool_fini();
 out_mgmt:
+	dc_tm_fini();
 	dc_mgmt_fini();
 out_pl:
 	pl_fini();
@@ -291,6 +300,8 @@ daos_fini(void)
 		D_GOTO(unlock, rc);
 	}
 
+	/** clean up all registered per-module metrics */
+	daos_metrics_fini();
 	dc_obj_fini();
 	dc_cont_fini();
 	dc_pool_fini();
@@ -301,6 +312,7 @@ daos_fini(void)
 		D_ERROR("failed to disconnect some resources may leak, "
 			DF_RC"\n", DP_RC(rc));
 
+	dc_tm_fini();
 	dc_agent_fini();
 	dc_job_fini();
 
diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c
new file mode 100644
index 000000000000..2395d9b40f5e
--- /dev/null
+++ b/src/client/api/metrics.c
@@ -0,0 +1,216 @@
+/*
+ * (C) Copyright 2020-2024 Intel Corporation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause-Patent
+ */
+#define D_LOGFAC DD_FAC(client)
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/utsname.h>
+#include <sys/shm.h>
+#include <daos/common.h>
+#include <daos/job.h>
+#include <daos/tls.h>
+#include <daos/metrics.h>
+#include <daos/mgmt.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_consumer.h>
+#include <gurt/telemetry_producer.h>
+
+#define INIT_JOB_NUM 1024
+bool daos_client_metric;
+bool daos_client_metric_retain;
+
+#define MAX_IDS_SIZE(num) (num * D_TM_METRIC_SIZE)
+/* The client side metrics structure looks like
+ * root/job_id/pid/....
+ */
+
+static int
+shm_chown(key_t key, uid_t new_owner)
+{
+	struct shmid_ds shmid_ds;
+	int             shmid;
+	int             rc;
+
+	rc = shmget(key, 0, 0);
+	if (rc < 0) {
+		D_ERROR("shmget(0x%x) failed: %s (%d)\n", key, strerror(errno), errno);
+		return daos_errno2der(errno);
+	}
+	shmid = rc;
+
+	rc = shmctl(shmid, IPC_STAT, &shmid_ds);
+	if (rc < 0) {
+		D_ERROR("shmctl(0x%x, IPC_STAT) failed: %s (%d)\n", shmid, strerror(errno), errno);
+		return daos_errno2der(errno);
+	}
+
+	shmid_ds.shm_perm.uid = new_owner;
+	rc                    = shmctl(shmid, IPC_SET, &shmid_ds);
+	if (rc < 0) {
+		D_ERROR("shmctl(0x%x, IPC_SET) failed: %s (%d)\n", shmid, strerror(errno), errno);
+		return daos_errno2der(errno);
+	}
+
+	return 0;
+}
+
+static int
+init_managed_root(const char *name, pid_t pid, int flags)
+{
+	uid_t agent_uid;
+	key_t key;
+	int   rc;
+
+	/* Set the key based on our pid so that it can be easily found. */
+	key = pid - D_TM_SHARED_MEMORY_KEY;
+	rc  = d_tm_init_with_name(key, MAX_IDS_SIZE(INIT_JOB_NUM), flags, name);
+	if (rc != 0) {
+		DL_ERROR(rc, "failed to initialize root for %s.", name);
+		return rc;
+	}
+
+	/* Request that the agent adds our segment into the tree. */
+	rc = dc_mgmt_tm_register(NULL, dc_jobid, pid, &agent_uid);
+	if (rc != 0) {
+		DL_ERROR(rc, "client telemetry setup failed.");
+		return rc;
+	}
+
+	/* Change ownership of the segment so that the agent can manage it. */
+	D_INFO("setting shm segment 0x%x to be owned by uid %d\n", pid, agent_uid);
+	rc = shm_chown(pid, agent_uid);
+	if (rc != 0) {
+		DL_ERROR(rc, "failed to chown shm segment.");
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+dc_tm_init(void)
+{
+	struct d_tm_node_t *started_at;
+	pid_t               pid = getpid();
+	int                 metrics_tag;
+	char                root_name[D_TM_MAX_NAME_LEN];
+	int                 rc;
+
+	d_getenv_bool(DAOS_CLIENT_METRICS_ENABLE, &daos_client_metric);
+	if (!daos_client_metric && d_isenv_def(DAOS_CLIENT_METRICS_DUMP_PATH))
+		daos_client_metric = true;
+
+	if (!daos_client_metric)
+		return 0;
+
+	D_INFO("Setting up client telemetry for %s/%d\n", dc_jobid, pid);
+
+	rc = dc_tls_key_create();
+	if (rc)
+		D_GOTO(out, rc);
+
+	metrics_tag = D_TM_OPEN_OR_CREATE | D_TM_MULTIPLE_WRITER_LOCK;
+	d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN, &daos_client_metric_retain);
+	if (daos_client_metric_retain)
+		metrics_tag |= D_TM_RETAIN_SHMEM;
+
+	snprintf(root_name, sizeof(root_name), "%d", pid);
+	rc = init_managed_root(root_name, pid, metrics_tag);
+	if (rc != 0) {
+		DL_ERROR(rc, "failed to initialize client telemetry");
+		D_GOTO(out, rc);
+	}
+
+	rc = d_tm_add_metric(&started_at, D_TM_TIMESTAMP, "Timestamp of client startup", NULL,
+			     "started_at");
+	if (rc != 0) {
+		DL_ERROR(rc, "add metric started_at failed.");
+		D_GOTO(out, rc);
+	}
+
+	d_tm_record_timestamp(started_at);
+out:
+	if (rc != 0) {
+		daos_client_metric = false;
+		d_tm_fini();
+	}
+
+	return rc;
+}
+
+static void
+iter_dump(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format,
+	  int opt_fields, void *arg)
+{
+	d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg);
+}
+
+static int
+dump_tm_file(const char *dump_path)
+{
+	struct d_tm_context *ctx;
+	struct d_tm_node_t  *root;
+	char                 dirname[D_TM_MAX_NAME_LEN] = {0};
+	uint32_t             filter;
+	FILE                *dump_file;
+	int                  rc = 0;
+
+	dump_file = fopen(dump_path, "w+");
+	if (dump_file == NULL) {
+		D_INFO("cannot open %s", dump_path);
+		return -DER_INVAL;
+	}
+
+	filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO |
+		 D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE;
+
+	ctx = d_tm_open(DC_TM_JOB_ROOT_ID);
+	if (ctx == NULL)
+		D_GOTO(close, rc = -DER_NOMEM);
+
+	snprintf(dirname, sizeof(dirname), "%s/%u", dc_jobid, getpid());
+	root = d_tm_find_metric(ctx, dirname);
+	if (root == NULL) {
+		printf("No metrics found at: '%s'\n", dirname);
+		D_GOTO(close_ctx, rc = -DER_NONEXIST);
+	}
+
+	d_tm_print_field_descriptors(0, dump_file);
+
+	d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_CSV, 0, iter_dump, dump_file);
+
+close_ctx:
+	d_tm_close(&ctx);
+close:
+	fclose(dump_file);
+	return rc;
+}
+
+void
+dc_tm_fini()
+{
+	char *dump_path;
+	int   rc;
+
+	if (!daos_client_metric)
+		return;
+
+	rc = d_agetenv_str(&dump_path, DAOS_CLIENT_METRICS_DUMP_PATH);
+	if (rc != 0)
+		D_GOTO(out, rc);
+	if (dump_path != NULL) {
+		D_INFO("dump path is %s\n", dump_path);
+		dump_tm_file(dump_path);
+	}
+	d_freeenv_str(&dump_path);
+
+out:
+	dc_tls_fini();
+	dc_tls_key_delete();
+
+	d_tm_fini();
+}
diff --git a/src/common/SConscript b/src/common/SConscript
index c61ecdeebe3a..38bd221793e2 100644
--- a/src/common/SConscript
+++ b/src/common/SConscript
@@ -9,7 +9,7 @@ COMMON_FILES = ['debug.c', 'mem.c', 'fail_loc.c', 'lru.c',
                 'dedup.c', 'profile.c', 'compression.c', 'compression_isal.c',
                 'compression_qat.c', 'multihash.c', 'multihash_isal.c',
                 'cipher.c', 'cipher_isal.c', 'qat.c', 'fault_domain.c',
-                'policy.c']
+                'policy.c', 'tls.c', 'metrics.c']
 
 
 def build_daos_common(denv, client):
diff --git a/src/common/metrics.c b/src/common/metrics.c
new file mode 100644
index 000000000000..b6c88a3ea0d0
--- /dev/null
+++ b/src/common/metrics.c
@@ -0,0 +1,131 @@
+/**
+ * (C) Copyright 2016-2024 Intel Corporation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause-Patent
+ */
+/**
+ * It implements thread-local storage (TLS) for DAOS.
+ */
+#include <pthread.h>
+#include <daos/tls.h>
+#include <daos/metrics.h>
+
+struct metrics_list {
+	struct daos_module_metrics *mm_metrics;
+	d_list_t                    mm_list;
+	uint32_t                    mm_id;
+};
+
+/* Track list of loaded modules */
+D_LIST_HEAD(metrics_mod_list);
+pthread_mutex_t metrics_mod_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+int
+daos_metrics_init(enum daos_module_tag tag, uint32_t id, struct daos_module_metrics *metrics)
+{
+	struct metrics_list *ml;
+
+	D_ALLOC_PTR(ml);
+	if (ml == NULL)
+		return -DER_NOMEM;
+	ml->mm_metrics = metrics;
+	ml->mm_id      = id;
+	D_MUTEX_LOCK(&metrics_mod_list_lock);
+	d_list_add_tail(&ml->mm_list, &metrics_mod_list);
+	D_MUTEX_UNLOCK(&metrics_mod_list_lock);
+
+	return 0;
+}
+
+void
+daos_metrics_fini(void)
+{
+	struct metrics_list *ml;
+	struct metrics_list *tmp;
+
+	D_MUTEX_LOCK(&metrics_mod_list_lock);
+	d_list_for_each_entry_safe(ml, tmp, &metrics_mod_list, mm_list) {
+		d_list_del_init(&ml->mm_list);
+		D_FREE(ml);
+	}
+	D_MUTEX_UNLOCK(&metrics_mod_list_lock);
+}
+
+void
+daos_module_fini_metrics(enum dss_module_tag tag, void **metrics)
+{
+	struct metrics_list *ml;
+
+	D_MUTEX_LOCK(&metrics_mod_list_lock);
+	d_list_for_each_entry(ml, &metrics_mod_list, mm_list) {
+		struct daos_module_metrics *met = ml->mm_metrics;
+
+		if (met == NULL)
+			continue;
+		if ((met->dmm_tags & tag) == 0)
+			continue;
+		if (met->dmm_fini == NULL)
+			continue;
+		if (metrics[ml->mm_id] == NULL)
+			continue;
+
+		met->dmm_fini(metrics[ml->mm_id]);
+	}
+	D_MUTEX_UNLOCK(&metrics_mod_list_lock);
+}
+
+int
+daos_module_init_metrics(enum dss_module_tag tag, void **metrics, const char *path, int tgt_id)
+{
+	struct metrics_list *ml;
+
+	D_MUTEX_LOCK(&metrics_mod_list_lock);
+	d_list_for_each_entry(ml, &metrics_mod_list, mm_list) {
+		struct daos_module_metrics *met = ml->mm_metrics;
+
+		if (met == NULL)
+			continue;
+		if ((met->dmm_tags & tag) == 0)
+			continue;
+		if (met->dmm_init == NULL)
+			continue;
+
+		metrics[ml->mm_id] = met->dmm_init(path, tgt_id);
+		if (metrics[ml->mm_id] == NULL) {
+			D_ERROR("failed to allocate per-pool metrics for module %u\n", ml->mm_id);
+			D_MUTEX_UNLOCK(&metrics_mod_list_lock);
+			daos_module_fini_metrics(tag, metrics);
+			return -DER_NOMEM;
+		}
+	}
+	D_MUTEX_UNLOCK(&metrics_mod_list_lock);
+
+	return 0;
+}
+
+/**
+ * Query all modules for the number of per-pool metrics they create.
+ *
+ * \return Total number of metrics for all modules
+ */
+int
+daos_module_nr_pool_metrics(void)
+{
+	struct metrics_list *ml;
+	int                  total = 0;
+
+	d_list_for_each_entry(ml, &metrics_mod_list, mm_list) {
+		struct daos_module_metrics *met = ml->mm_metrics;
+
+		if (met == NULL)
+			continue;
+		if (met->dmm_nr_metrics == NULL)
+			continue;
+		if (!(met->dmm_tags & DAOS_CLI_TAG))
+			continue;
+
+		total += met->dmm_nr_metrics();
+	}
+
+	return total;
+}
diff --git a/src/common/tls.c b/src/common/tls.c
new file mode 100644
index 000000000000..89b9baf13e87
--- /dev/null
+++ b/src/common/tls.c
@@ -0,0 +1,227 @@
+/**
+ * (C) Copyright 2016-2023 Intel Corporation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause-Patent
+ */
+/**
+ * It implements thread-local storage (TLS) for DAOS.
+ */
+#include <pthread.h>
+#include <daos/tls.h>
+
+/* The array remember all of registered module keys on one node. */
+static struct daos_module_key *daos_module_keys[DAOS_MODULE_KEYS_NR] = {NULL};
+pthread_mutex_t                daos_module_keys_lock                 = PTHREAD_MUTEX_INITIALIZER;
+
+static __thread bool           dc_tls_thread_init;
+
+static pthread_key_t           dss_tls_key;
+static pthread_key_t           dc_tls_key;
+
+void
+daos_register_key(struct daos_module_key *key)
+{
+	int i;
+
+	D_MUTEX_LOCK(&daos_module_keys_lock);
+	for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) {
+		if (daos_module_keys[i] == NULL) {
+			daos_module_keys[i] = key;
+			key->dmk_index      = i;
+			break;
+		}
+	}
+	D_MUTEX_UNLOCK(&daos_module_keys_lock);
+	D_ASSERT(i < DAOS_MODULE_KEYS_NR);
+}
+
+void
+daos_unregister_key(struct daos_module_key *key)
+{
+	if (key == NULL)
+		return;
+	D_ASSERT(key->dmk_index >= 0);
+	D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR);
+	D_MUTEX_LOCK(&daos_module_keys_lock);
+	daos_module_keys[key->dmk_index] = NULL;
+	D_MUTEX_UNLOCK(&daos_module_keys_lock);
+}
+
+struct daos_module_key *
+daos_get_module_key(int index)
+{
+	D_ASSERT(index < DAOS_MODULE_KEYS_NR);
+	D_ASSERT(index >= 0);
+
+	return daos_module_keys[index];
+}
+
+static int
+daos_thread_local_storage_init(struct daos_thread_local_storage *dtls, int xs_id, int tgt_id)
+{
+	int rc = 0;
+	int i;
+
+	if (dtls->dtls_values == NULL) {
+		D_ALLOC_ARRAY(dtls->dtls_values, DAOS_MODULE_KEYS_NR);
+		if (dtls->dtls_values == NULL)
+			return -DER_NOMEM;
+	}
+
+	for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) {
+		struct daos_module_key *dmk = daos_module_keys[i];
+
+		if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) {
+			D_ASSERT(dmk->dmk_init != NULL);
+			dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id);
+			if (dtls->dtls_values[i] == NULL) {
+				rc = -DER_NOMEM;
+				break;
+			}
+		}
+	}
+	return rc;
+}
+
+static void
+daos_thread_local_storage_fini(struct daos_thread_local_storage *dtls)
+{
+	int i;
+
+	if (dtls->dtls_values != NULL) {
+		for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) {
+			struct daos_module_key *dmk = daos_module_keys[i];
+
+			if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) {
+				D_ASSERT(dtls->dtls_values[i] != NULL);
+				D_ASSERT(dmk->dmk_fini != NULL);
+				dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]);
+			}
+		}
+	}
+
+	D_FREE(dtls->dtls_values);
+}
+
+/*
+ * Allocate daos_thread_local_storage for a particular thread on server and
+ * store the pointer in a thread-specific value which can be fetched at any
+ * time with daos_tls_get().
+ */
+static struct daos_thread_local_storage *
+daos_tls_init(int tag, int xs_id, int tgt_id, bool server)
+{
+	struct daos_thread_local_storage *dtls;
+	int                               rc;
+
+	D_ALLOC_PTR(dtls);
+	if (dtls == NULL)
+		return NULL;
+
+	dtls->dtls_tag = tag;
+	rc             = daos_thread_local_storage_init(dtls, xs_id, tgt_id);
+	if (rc != 0) {
+		D_FREE(dtls);
+		return NULL;
+	}
+
+	if (server) {
+		rc = pthread_setspecific(dss_tls_key, dtls);
+	} else {
+		rc = pthread_setspecific(dc_tls_key, dtls);
+		if (rc == 0)
+			dc_tls_thread_init = true;
+	}
+
+	if (rc) {
+		D_ERROR("failed to initialize tls: %d\n", rc);
+		daos_thread_local_storage_fini(dtls);
+		D_FREE(dtls);
+		return NULL;
+	}
+
+	return dtls;
+}
+
+int
+ds_tls_key_create(void)
+{
+	return pthread_key_create(&dss_tls_key, NULL);
+}
+
+int
+dc_tls_key_create(void)
+{
+	return pthread_key_create(&dc_tls_key, NULL);
+}
+
+void
+ds_tls_key_delete()
+{
+	pthread_key_delete(dss_tls_key);
+}
+
+void
+dc_tls_key_delete(void)
+{
+	pthread_key_delete(dc_tls_key);
+}
+
+/* Free DTC for a particular thread. */
+static void
+daos_tls_fini(struct daos_thread_local_storage *dtls, bool server)
+{
+	daos_thread_local_storage_fini(dtls);
+	D_FREE(dtls);
+	if (server)
+		pthread_setspecific(dss_tls_key, NULL);
+	else
+		pthread_setspecific(dc_tls_key, NULL);
+}
+
+/* Allocate local per thread storage. */
+struct daos_thread_local_storage *
+dc_tls_init(int tag, uint32_t pid)
+{
+	return daos_tls_init(tag, -1, pid, false);
+}
+
+/* Free DTC for a particular thread. */
+void
+dc_tls_fini(void)
+{
+	struct daos_thread_local_storage *dtls;
+
+	dtls = (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key);
+	if (dtls != NULL)
+		daos_tls_fini(dtls, false);
+}
+
+struct daos_thread_local_storage *
+dc_tls_get(unsigned int tag)
+{
+	if (!dc_tls_thread_init)
+		return dc_tls_init(tag, getpid());
+
+	return (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key);
+}
+
+struct daos_thread_local_storage *
+dss_tls_get()
+{
+	return (struct daos_thread_local_storage *)pthread_getspecific(dss_tls_key);
+}
+
+/* Allocate local per thread storage. */
+struct daos_thread_local_storage *
+dss_tls_init(int tag, int xs_id, int tgt_id)
+{
+	return daos_tls_init(tag, xs_id, tgt_id, true);
+}
+
+/* Free DTC for a particular thread. */
+void
+dss_tls_fini(struct daos_thread_local_storage *dtls)
+{
+	daos_tls_fini(dtls, true);
+}
diff --git a/src/container/srv.c b/src/container/srv.c
index 80650f7c16ca..05760d9439e6 100644
--- a/src/container/srv.c
+++ b/src/container/srv.c
@@ -12,6 +12,7 @@
 #define D_LOGFAC	DD_FAC(container)
 
 #include <daos_srv/daos_engine.h>
+#include <daos/metrics.h>
 #include <daos/rpc.h>
 #include "rpc.h"
 #include "srv_internal.h"
@@ -142,11 +143,11 @@ struct dss_module_key cont_module_key = {
 	.dmk_fini = dsm_tls_fini,
 };
 
-struct dss_module_metrics cont_metrics = {
-	.dmm_tags = DAOS_SYS_TAG,
-	.dmm_init = ds_cont_metrics_alloc,
-	.dmm_fini = ds_cont_metrics_free,
-	.dmm_nr_metrics = ds_cont_metrics_count,
+struct daos_module_metrics cont_metrics = {
+    .dmm_tags       = DAOS_SYS_TAG,
+    .dmm_init       = ds_cont_metrics_alloc,
+    .dmm_fini       = ds_cont_metrics_free,
+    .dmm_nr_metrics = ds_cont_metrics_count,
 };
 
 struct dss_module cont_module =  {
diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go
index 3a6f7a14368c..c9d08d197448 100644
--- a/src/control/cmd/daos_agent/config.go
+++ b/src/control/cmd/daos_agent/config.go
@@ -55,6 +55,14 @@ type Config struct {
 	DisableAutoEvict    bool                      `yaml:"disable_auto_evict,omitempty"`
 	ExcludeFabricIfaces common.StringSet          `yaml:"exclude_fabric_ifaces,omitempty"`
 	FabricInterfaces    []*NUMAFabricConfig       `yaml:"fabric_ifaces,omitempty"`
+	TelemetryPort       int                       `yaml:"telemetry_port,omitempty"`
+	TelemetryEnabled    bool                      `yaml:"telemetry_enabled,omitempty"`
+	TelemetryRetain     time.Duration             `yaml:"telemetry_retain,omitempty"`
+}
+
+// TelemetryExportEnabled returns true if client telemetry export is enabled.
+func (c *Config) TelemetryExportEnabled() bool {
+	return c.TelemetryPort > 0
 }
 
 // NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA
@@ -89,6 +97,14 @@ func LoadConfig(cfgPath string) (*Config, error) {
 		return nil, fmt.Errorf("invalid system name: %q", cfg.SystemName)
 	}
 
+	if cfg.TelemetryRetain > 0 && cfg.TelemetryPort == 0 {
+		return nil, errors.New("telemetry_retain requires telemetry_port")
+	}
+
+	if cfg.TelemetryEnabled && cfg.TelemetryPort == 0 {
+		return nil, errors.New("telemetry_enabled requires telemetry_port")
+	}
+
 	return cfg, nil
 }
 
diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go
index 0dbdf4fc645d..cb777396ff1b 100644
--- a/src/control/cmd/daos_agent/infocache.go
+++ b/src/control/cmd/daos_agent/infocache.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2020-2023 Intel Corporation.
+// (C) Copyright 2020-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -8,6 +8,7 @@ package main
 
 import (
 	"context"
+	"fmt"
 	"net"
 	"strings"
 	"sync"
@@ -22,6 +23,7 @@ import (
 	"github.com/daos-stack/daos/src/control/lib/control"
 	"github.com/daos-stack/daos/src/control/lib/hardware"
 	"github.com/daos-stack/daos/src/control/lib/hardware/hwprov"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
@@ -36,17 +38,20 @@ type fabricScanFn func(ctx context.Context, providers ...string) (*NUMAFabric, e
 // NewInfoCache creates a new InfoCache with appropriate parameters set.
 func NewInfoCache(ctx context.Context, log logging.Logger, client control.UnaryInvoker, cfg *Config) *InfoCache {
 	ic := &InfoCache{
-		log:            log,
-		ignoreIfaces:   cfg.ExcludeFabricIfaces,
-		client:         client,
-		cache:          cache.NewItemCache(log),
-		getAttachInfo:  control.GetAttachInfo,
-		fabricScan:     getFabricScanFn(log, cfg, hwprov.DefaultFabricScanner(log)),
-		netIfaces:      net.Interfaces,
-		devClassGetter: hwprov.DefaultNetDevClassProvider(log),
-		devStateGetter: hwprov.DefaultNetDevStateProvider(log),
+		log:             log,
+		ignoreIfaces:    cfg.ExcludeFabricIfaces,
+		client:          client,
+		cache:           cache.NewItemCache(log),
+		getAttachInfoCb: control.GetAttachInfo,
+		fabricScan:      getFabricScanFn(log, cfg, hwprov.DefaultFabricScanner(log)),
+		netIfaces:       net.Interfaces,
+		devClassGetter:  hwprov.DefaultNetDevClassProvider(log),
+		devStateGetter:  hwprov.DefaultNetDevStateProvider(log),
 	}
 
+	ic.clientTelemetryEnabled.Store(cfg.TelemetryEnabled)
+	ic.clientTelemetryRetain.Store(cfg.TelemetryRetain > 0)
+
 	if cfg.DisableCache {
 		ic.DisableAttachInfoCache()
 		ic.DisableFabricCache()
@@ -198,12 +203,14 @@ type InfoCache struct {
 	cache                   *cache.ItemCache
 	fabricCacheDisabled     atm.Bool
 	attachInfoCacheDisabled atm.Bool
+	clientTelemetryEnabled  atm.Bool
+	clientTelemetryRetain   atm.Bool
 
-	getAttachInfo  getAttachInfoFn
-	fabricScan     fabricScanFn
-	netIfaces      func() ([]net.Interface, error)
-	devClassGetter hardware.NetDevClassProvider
-	devStateGetter hardware.NetDevStateProvider
+	getAttachInfoCb getAttachInfoFn
+	fabricScan      fabricScanFn
+	netIfaces       func() ([]net.Interface, error)
+	devClassGetter  hardware.NetDevClassProvider
+	devStateGetter  hardware.NetDevStateProvider
 
 	client            control.UnaryInvoker
 	attachInfoRefresh time.Duration
@@ -292,6 +299,41 @@ func (c *InfoCache) EnableStaticFabricCache(ctx context.Context, nf *NUMAFabric)
 	c.EnableFabricCache()
 }
 
+func (c *InfoCache) getAttachInfo(ctx context.Context, rpcClient control.UnaryInvoker, req *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) {
+	if c == nil {
+		return nil, errors.New("InfoCache is nil")
+	}
+	if c.getAttachInfoCb == nil {
+		return nil, errors.New("getAttachInfoFn is nil")
+	}
+
+	resp, err := c.getAttachInfoCb(ctx, rpcClient, req)
+	if err != nil {
+		return nil, err
+	}
+	c.addTelemetrySettings(resp)
+	return resp, nil
+}
+
+// addTelemetrySettings modifies the response by adding telemetry settings
+// before returning it.
+func (c *InfoCache) addTelemetrySettings(resp *control.GetAttachInfoResp) {
+	if c == nil || resp == nil {
+		return
+	}
+
+	if c.clientTelemetryEnabled.IsTrue() {
+		resp.ClientNetHint.EnvVars = append(resp.ClientNetHint.EnvVars,
+			fmt.Sprintf("%s=1", telemetry.ClientMetricsEnabledEnv),
+		)
+		if c.clientTelemetryRetain.IsTrue() {
+			resp.ClientNetHint.EnvVars = append(resp.ClientNetHint.EnvVars,
+				fmt.Sprintf("%s=1", telemetry.ClientMetricsRetainEnv),
+			)
+		}
+	}
+}
+
 // GetAttachInfo fetches the attach info from the cache, and refreshes if necessary.
 func (c *InfoCache) GetAttachInfo(ctx context.Context, sys string) (*control.GetAttachInfoResp, error) {
 	if c == nil {
@@ -308,7 +350,8 @@ func (c *InfoCache) GetAttachInfo(ctx context.Context, sys string) (*control.Get
 	}
 	createItem := func() (cache.Item, error) {
 		c.log.Debugf("cache miss for %s", sysAttachInfoKey(sys))
-		return newCachedAttachInfo(c.attachInfoRefresh, sys, c.client, c.getAttachInfo), nil
+		cai := newCachedAttachInfo(c.attachInfoRefresh, sys, c.client, c.getAttachInfo)
+		return cai, nil
 	}
 
 	item, release, err := c.cache.GetOrCreate(ctx, sysAttachInfoKey(sys), createItem)
diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go
index 54571d006a7b..e86c44bfc0ce 100644
--- a/src/control/cmd/daos_agent/infocache_test.go
+++ b/src/control/cmd/daos_agent/infocache_test.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2020-2023 Intel Corporation.
+// (C) Copyright 2020-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -8,20 +8,23 @@ package main
 
 import (
 	"context"
+	"fmt"
 	"net"
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/pkg/errors"
+
 	"github.com/daos-stack/daos/src/control/build"
 	"github.com/daos-stack/daos/src/control/common"
 	"github.com/daos-stack/daos/src/control/common/test"
 	"github.com/daos-stack/daos/src/control/lib/cache"
 	"github.com/daos-stack/daos/src/control/lib/control"
 	"github.com/daos-stack/daos/src/control/lib/hardware"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
 	"github.com/daos-stack/daos/src/control/logging"
-	"github.com/google/go-cmp/cmp"
-	"github.com/google/go-cmp/cmp/cmpopts"
-	"github.com/pkg/errors"
 )
 
 type testInfoCacheParams struct {
@@ -32,6 +35,8 @@ type testInfoCacheParams struct {
 	mockNetDevStateGetter  hardware.NetDevStateProvider
 	disableFabricCache     bool
 	disableAttachInfoCache bool
+	enableClientTelemetry  bool
+	retainClientTelemetry  bool
 	ctlInvoker             control.Invoker
 	cachedItems            []cache.Item
 }
@@ -43,16 +48,19 @@ func newTestInfoCache(t *testing.T, log logging.Logger, params testInfoCachePara
 	}
 
 	ic := &InfoCache{
-		log:            log,
-		getAttachInfo:  params.mockGetAttachInfo,
-		fabricScan:     params.mockScanFabric,
-		devClassGetter: params.mockNetDevClassGetter,
-		devStateGetter: params.mockNetDevStateGetter,
-		netIfaces:      params.mockNetIfaces,
-		client:         params.ctlInvoker,
-		cache:          c,
+		log:             log,
+		getAttachInfoCb: params.mockGetAttachInfo,
+		fabricScan:      params.mockScanFabric,
+		devClassGetter:  params.mockNetDevClassGetter,
+		devStateGetter:  params.mockNetDevStateGetter,
+		netIfaces:       params.mockNetIfaces,
+		client:          params.ctlInvoker,
+		cache:           c,
 	}
 
+	ic.clientTelemetryEnabled.Store(params.enableClientTelemetry)
+	ic.clientTelemetryRetain.Store(params.retainClientTelemetry)
+
 	if ic.netIfaces == nil {
 		ic.netIfaces = func() ([]net.Interface, error) {
 			return []net.Interface{
@@ -714,6 +722,14 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 			NetDevClass: uint32(hardware.Ether),
 		},
 	}
+	telemEnabledResp := copyGetAttachInfoResp(ctlResp)
+	telemEnabledResp.ClientNetHint.EnvVars = append(telemEnabledResp.ClientNetHint.EnvVars,
+		fmt.Sprintf("%s=1", telemetry.ClientMetricsEnabledEnv),
+	)
+	telemRetainedResp := copyGetAttachInfoResp(telemEnabledResp)
+	telemRetainedResp.ClientNetHint.EnvVars = append(telemRetainedResp.ClientNetHint.EnvVars,
+		fmt.Sprintf("%s=1", telemetry.ClientMetricsRetainEnv),
+	)
 
 	for name, tc := range map[string]struct {
 		getInfoCache func(logging.Logger) *InfoCache
@@ -734,7 +750,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 					disableAttachInfoCache: true,
 				})
 			},
-			remoteResp: ctlResp,
+			remoteResp: copyGetAttachInfoResp(ctlResp),
 			expResp:    ctlResp,
 			expRemote:  true,
 		},
@@ -748,11 +764,45 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 			expErr:    errors.New("mock remote"),
 			expRemote: true,
 		},
+		"cache disabled; client telemetry enabled": {
+			getInfoCache: func(l logging.Logger) *InfoCache {
+				return newTestInfoCache(t, l, testInfoCacheParams{
+					disableAttachInfoCache: true,
+					enableClientTelemetry:  true,
+				})
+			},
+			remoteResp: copyGetAttachInfoResp(ctlResp),
+			expResp:    telemEnabledResp,
+			expRemote:  true,
+		},
+		"cache enabled; client telemetry enabled": {
+			getInfoCache: func(l logging.Logger) *InfoCache {
+				return newTestInfoCache(t, l, testInfoCacheParams{
+					enableClientTelemetry: true,
+				})
+			},
+			remoteResp: copyGetAttachInfoResp(ctlResp),
+			expResp:    telemEnabledResp,
+			expRemote:  true,
+			expCached:  true,
+		},
+		"cache enabled; client telemetry enabled; client telemetry retained": {
+			getInfoCache: func(l logging.Logger) *InfoCache {
+				return newTestInfoCache(t, l, testInfoCacheParams{
+					enableClientTelemetry: true,
+					retainClientTelemetry: true,
+				})
+			},
+			remoteResp: copyGetAttachInfoResp(ctlResp),
+			expResp:    telemRetainedResp,
+			expRemote:  true,
+			expCached:  true,
+		},
 		"enabled but empty": {
 			getInfoCache: func(l logging.Logger) *InfoCache {
 				return newTestInfoCache(t, l, testInfoCacheParams{})
 			},
-			remoteResp: ctlResp,
+			remoteResp: copyGetAttachInfoResp(ctlResp),
 			expResp:    ctlResp,
 			expRemote:  true,
 			expCached:  true,
@@ -772,7 +822,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 					fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) {
 						return nil, errors.New("shouldn't call cached remote")
 					},
-					lastResponse: ctlResp,
+					lastResponse: copyGetAttachInfoResp(ctlResp),
 					cacheItem:    cacheItem{lastCached: time.Now()},
 					system:       "test",
 				})
@@ -790,7 +840,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 					fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) {
 						return nil, errors.New("shouldn't call cached remote")
 					},
-					lastResponse: ctlResp,
+					lastResponse: copyGetAttachInfoResp(ctlResp),
 					cacheItem:    cacheItem{lastCached: time.Now()},
 					system:       build.DefaultSystemName,
 				})
@@ -814,7 +864,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 				return ic
 			},
 			system:     "somethingelse",
-			remoteResp: ctlResp,
+			remoteResp: copyGetAttachInfoResp(ctlResp),
 			expResp:    ctlResp,
 			expCached:  true,
 			expRemote:  true,
@@ -831,7 +881,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) {
 
 			calledRemote := false
 			if ic != nil {
-				ic.getAttachInfo = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) {
+				ic.getAttachInfoCb = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) {
 					calledRemote = true
 					return tc.remoteResp, tc.remoteErr
 				}
diff --git a/src/control/cmd/daos_agent/main.go b/src/control/cmd/daos_agent/main.go
index 73788a7cb496..1518207a3cbb 100644
--- a/src/control/cmd/daos_agent/main.go
+++ b/src/control/cmd/daos_agent/main.go
@@ -20,6 +20,7 @@ import (
 	"github.com/daos-stack/daos/src/control/common/cmdutil"
 	"github.com/daos-stack/daos/src/control/lib/atm"
 	"github.com/daos-stack/daos/src/control/lib/control"
+	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/daos-stack/daos/src/control/lib/hardware/hwprov"
 	"github.com/daos-stack/daos/src/control/logging"
 )
@@ -112,6 +113,17 @@ func parseOpts(args []string, opts *cliOptions, invoker control.Invoker, log *lo
 			logCmd.SetLog(log)
 		}
 
+		daosLogMask := daos.DefaultErrorMask
+		if opts.Debug {
+			log.SetLevel(logging.LogLevelTrace)
+			daosLogMask = daos.DefaultDebugMask
+		}
+		fini, err := daos.InitLogging(daosLogMask)
+		if err != nil {
+			return err
+		}
+		defer fini()
+
 		if jsonCmd, ok := cmd.(cmdutil.JSONOutputter); ok && opts.JSON {
 			jsonCmd.EnableJSONOutput(os.Stdout, &wroteJSON)
 			// disable output on stdout other than JSON
@@ -194,7 +206,6 @@ func parseOpts(args []string, opts *cliOptions, invoker control.Invoker, log *lo
 			return errors.Wrap(err, "Unable to load Certificate Data")
 		}
 
-		var err error
 		if cfg.AccessPoints, err = common.ParseHostList(cfg.AccessPoints, cfg.ControlPort); err != nil {
 			return errors.Wrap(err, "Failed to parse config access_points")
 		}
diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go
index 17c07b4a2f62..75dc337e3138 100644
--- a/src/control/cmd/daos_agent/mgmt_rpc.go
+++ b/src/control/cmd/daos_agent/mgmt_rpc.go
@@ -25,6 +25,8 @@ import (
 	"github.com/daos-stack/daos/src/control/lib/control"
 	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/daos-stack/daos/src/control/lib/hardware"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+	"github.com/daos-stack/daos/src/control/lib/telemetry/promexp"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
@@ -40,6 +42,7 @@ type mgmtModule struct {
 	ctlInvoker     control.Invoker
 	cache          *InfoCache
 	monitor        *procMon
+	cliMetricsSrc  *promexp.ClientSource
 	useDefaultNUMA bool
 
 	numaGetter hardware.ProcessNUMAProvider
@@ -71,6 +74,8 @@ func (mod *mgmtModule) HandleCall(ctx context.Context, session *drpc.Session, me
 	switch method {
 	case drpc.MethodGetAttachInfo:
 		return mod.handleGetAttachInfo(ctx, req, cred.Pid)
+	case drpc.MethodSetupClientTelemetry:
+		return mod.handleSetupClientTelemetry(ctx, req, cred)
 	case drpc.MethodNotifyPoolConnect:
 		return nil, mod.handleNotifyPoolConnect(ctx, req, cred.Pid)
 	case drpc.MethodNotifyPoolDisconnect:
@@ -214,6 +219,33 @@ func (mod *mgmtModule) getFabricInterface(ctx context.Context, numaNode int, net
 	return mod.cache.GetFabricDevice(ctx, numaNode, netDevClass, provider)
 }
 
+func (mod *mgmtModule) handleSetupClientTelemetry(ctx context.Context, reqb []byte, cred *unix.Ucred) ([]byte, error) {
+	if len(reqb) == 0 {
+		return nil, errors.New("empty request")
+	}
+
+	pbReq := new(mgmtpb.ClientTelemetryReq)
+	if err := proto.Unmarshal(reqb, pbReq); err != nil {
+		return nil, drpc.UnmarshalingPayloadFailure()
+	}
+	if pbReq.Jobid == "" {
+		return nil, errors.New("empty jobid")
+	}
+	if pbReq.ShmKey == 0 {
+		return nil, errors.New("unset shm key")
+	}
+	if cred == nil {
+		return nil, errors.New("nil user credentials")
+	}
+
+	if err := telemetry.SetupClientRoot(ctx, pbReq.Jobid, int(cred.Pid), int(pbReq.ShmKey)); err != nil {
+		return nil, err
+	}
+	resp := &mgmtpb.ClientTelemetryResp{AgentUid: int32(unix.Getuid())}
+	mod.log.Tracef("%d: %s", cred.Pid, pblog.Debug(resp))
+	return proto.Marshal(resp)
+}
+
 func (mod *mgmtModule) handleNotifyPoolConnect(ctx context.Context, reqb []byte, pid int32) error {
 	pbReq := new(mgmtpb.PoolMonitorReq)
 	if err := proto.Unmarshal(reqb, pbReq); err != nil {
diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go
index 9bd85decf08d..59fcb507a810 100644
--- a/src/control/cmd/daos_agent/mgmt_rpc_test.go
+++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021-2023 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -15,18 +15,22 @@ import (
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
 	"google.golang.org/protobuf/proto"
+	"google.golang.org/protobuf/testing/protocmp"
 
 	"github.com/daos-stack/daos/src/control/build"
 	"github.com/daos-stack/daos/src/control/common"
 	"github.com/daos-stack/daos/src/control/common/proto/convert"
 	mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt"
 	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/drpc"
 	"github.com/daos-stack/daos/src/control/fault"
 	"github.com/daos-stack/daos/src/control/fault/code"
 	"github.com/daos-stack/daos/src/control/lib/control"
 	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/daos-stack/daos/src/control/lib/hardware"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
@@ -388,3 +392,116 @@ func TestAgent_mgmtModule_RefreshCache(t *testing.T) {
 		})
 	}
 }
+
+func TestAgent_handleSetupClientTelemetry(t *testing.T) {
+	testCreds := &unix.Ucred{
+		Uid: 123,
+		Gid: 456,
+	}
+	testSysName := "test-sys"
+	testJobID := "test-job"
+	testShmKey := int32(42)
+
+	for name, tc := range map[string]struct {
+		clientBytes []byte
+		clientReq   *mgmtpb.ClientTelemetryReq
+		clientCred  *unix.Ucred
+		expResp     *mgmtpb.ClientTelemetryResp
+		expErr      error
+	}{
+		"nil client request": {
+			clientReq:  nil,
+			clientCred: testCreds,
+			expErr:     errors.New("empty request"),
+		},
+		"garbage client request": {
+			clientBytes: []byte("invalid"),
+			clientCred:  testCreds,
+			expErr:      drpc.UnmarshalingPayloadFailure(),
+		},
+		"unset jobid": {
+			clientReq: &mgmtpb.ClientTelemetryReq{
+				Sys:    testSysName,
+				Jobid:  "",
+				ShmKey: testShmKey,
+			},
+			clientCred: testCreds,
+			expErr:     errors.New("empty jobid"),
+		},
+		"unset shm key": {
+			clientReq: &mgmtpb.ClientTelemetryReq{
+				Sys:    testSysName,
+				Jobid:  testJobID,
+				ShmKey: 0,
+			},
+			clientCred: testCreds,
+			expErr:     errors.New("unset shm key"),
+		},
+		"nil user creds": {
+			clientReq: &mgmtpb.ClientTelemetryReq{
+				Sys:    testSysName,
+				Jobid:  testJobID,
+				ShmKey: testShmKey,
+			},
+			clientCred: nil,
+			expErr:     errors.New("nil user credentials"),
+		},
+		"success": {
+			clientReq: &mgmtpb.ClientTelemetryReq{
+				Sys:    testSysName,
+				Jobid:  testJobID,
+				ShmKey: testShmKey,
+			},
+			clientCred: testCreds,
+			expResp: &mgmtpb.ClientTelemetryResp{
+				AgentUid: int32(unix.Getuid()),
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			mod := &mgmtModule{
+				log: log,
+			}
+
+			var reqBytes []byte
+			if len(tc.clientBytes) > 0 {
+				reqBytes = tc.clientBytes
+			} else {
+				var err error
+				reqBytes, err = proto.Marshal(tc.clientReq)
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			testID := uint32(telemetry.NextTestID(telemetry.AgentIDBase))
+			telemetry.InitTestMetricsProducer(t, int(testID), 2048)
+			defer telemetry.CleanupTestMetricsProducer(t)
+
+			parent := test.MustLogContext(t, log)
+			ctx, err := telemetry.Init(parent, testID)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer telemetry.Fini()
+
+			gotResp, gotErr := mod.handleSetupClientTelemetry(ctx, reqBytes, tc.clientCred)
+			test.CmpErr(t, tc.expErr, gotErr)
+			if tc.expErr != nil {
+				return
+			}
+
+			expRespBytes, err := proto.Marshal(tc.expResp)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if diff := cmp.Diff(expRespBytes, gotResp, protocmp.Transform()); diff != "" {
+				t.Fatalf("-want, +got:\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/src/control/cmd/daos_agent/start.go b/src/control/cmd/daos_agent/start.go
index cb5505234d52..e5416ee874bd 100644
--- a/src/control/cmd/daos_agent/start.go
+++ b/src/control/cmd/daos_agent/start.go
@@ -23,6 +23,7 @@ import (
 	"github.com/daos-stack/daos/src/control/lib/hardware/hwloc"
 	"github.com/daos-stack/daos/src/control/lib/hardware/hwprov"
 	"github.com/daos-stack/daos/src/control/lib/systemd"
+	"github.com/daos-stack/daos/src/control/lib/telemetry/promexp"
 )
 
 type ctxKey string
@@ -98,15 +99,30 @@ func (cmd *startCmd) Execute(_ []string) error {
 	procmon.startMonitoring(ctx)
 	cmd.Debugf("started process monitor: %s", time.Since(procmonStart))
 
+	var clientMetricSource *promexp.ClientSource
+	if cmd.cfg.TelemetryExportEnabled() {
+		if clientMetricSource, err = promexp.NewClientSource(ctx); err != nil {
+			return errors.Wrap(err, "unable to create client metrics source")
+		}
+		telemetryStart := time.Now()
+		shutdown, err := startPrometheusExporter(ctx, cmd, clientMetricSource, cmd.cfg)
+		if err != nil {
+			return errors.Wrap(err, "unable to start prometheus exporter")
+		}
+		defer shutdown()
+		cmd.Debugf("telemetry exporter started: %s", time.Since(telemetryStart))
+	}
+
 	drpcRegStart := time.Now()
 	drpcServer.RegisterRPCModule(NewSecurityModule(cmd.Logger, cmd.cfg.TransportConfig))
 	mgmtMod := &mgmtModule{
-		log:        cmd.Logger,
-		sys:        cmd.cfg.SystemName,
-		ctlInvoker: cmd.ctlInvoker,
-		cache:      cache,
-		numaGetter: hwprov.DefaultProcessNUMAProvider(cmd.Logger),
-		monitor:    procmon,
+		log:           cmd.Logger,
+		sys:           cmd.cfg.SystemName,
+		ctlInvoker:    cmd.ctlInvoker,
+		cache:         cache,
+		numaGetter:    hwprov.DefaultProcessNUMAProvider(cmd.Logger),
+		monitor:       procmon,
+		cliMetricsSrc: clientMetricSource,
 	}
 	drpcServer.RegisterRPCModule(mgmtMod)
 	cmd.Debugf("registered dRPC modules: %s", time.Since(drpcRegStart))
diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go
new file mode 100644
index 000000000000..4c0e2d35b4c7
--- /dev/null
+++ b/src/control/cmd/daos_agent/telemetry.go
@@ -0,0 +1,36 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package main
+
+import (
+	"context"
+
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/lib/telemetry/promexp"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+func startPrometheusExporter(ctx context.Context, log logging.Logger, cs *promexp.ClientSource, cfg *Config) (func(), error) {
+	expCfg := &promexp.ExporterConfig{
+		Port:  cfg.TelemetryPort,
+		Title: "DAOS Client Telemetry",
+		Register: func(ctx context.Context, log logging.Logger) error {
+			c, err := promexp.NewClientCollector(ctx, log, cs, &promexp.CollectorOpts{
+				RetainDuration: cfg.TelemetryRetain,
+			})
+			if err != nil {
+				return err
+			}
+			prometheus.MustRegister(c)
+
+			return nil
+		},
+	}
+
+	return promexp.StartExporter(ctx, log, expCfg)
+}
diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go
index 444f64c57693..86c11e72f08d 100644
--- a/src/control/common/proto/mgmt/svc.pb.go
+++ b/src/control/common/proto/mgmt/svc.pb.go
@@ -1,13 +1,13 @@
 //
-// (C) Copyright 2018-2023 Intel Corporation.
+// (C) Copyright 2018-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
 
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.1
-// 	protoc        v3.11.4
+// 	protoc-gen-go v1.31.0
+// 	protoc        v3.21.12
 // source: mgmt/svc.proto
 
 package mgmt
@@ -990,6 +990,124 @@ func (x *PoolMonitorReq) GetJobid() string {
 	return ""
 }
 
+type ClientTelemetryReq struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Sys    string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"`                      // DAOS system identifier
+	Jobid  string `protobuf:"bytes,2,opt,name=jobid,proto3" json:"jobid,omitempty"`                  // Job ID used for client telemetry
+	ShmKey int32  `protobuf:"varint,3,opt,name=shm_key,json=shmKey,proto3" json:"shm_key,omitempty"` // Client's shared memory segment key
+}
+
+func (x *ClientTelemetryReq) Reset() {
+	*x = ClientTelemetryReq{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mgmt_svc_proto_msgTypes[14]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ClientTelemetryReq) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ClientTelemetryReq) ProtoMessage() {}
+
+func (x *ClientTelemetryReq) ProtoReflect() protoreflect.Message {
+	mi := &file_mgmt_svc_proto_msgTypes[14]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ClientTelemetryReq.ProtoReflect.Descriptor instead.
+func (*ClientTelemetryReq) Descriptor() ([]byte, []int) {
+	return file_mgmt_svc_proto_rawDescGZIP(), []int{14}
+}
+
+func (x *ClientTelemetryReq) GetSys() string {
+	if x != nil {
+		return x.Sys
+	}
+	return ""
+}
+
+func (x *ClientTelemetryReq) GetJobid() string {
+	if x != nil {
+		return x.Jobid
+	}
+	return ""
+}
+
+func (x *ClientTelemetryReq) GetShmKey() int32 {
+	if x != nil {
+		return x.ShmKey
+	}
+	return 0
+}
+
+type ClientTelemetryResp struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Status   int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"`                     // DAOS status code
+	AgentUid int32 `protobuf:"varint,2,opt,name=agent_uid,json=agentUid,proto3" json:"agent_uid,omitempty"` // UID of agent process
+}
+
+func (x *ClientTelemetryResp) Reset() {
+	*x = ClientTelemetryResp{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mgmt_svc_proto_msgTypes[15]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ClientTelemetryResp) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ClientTelemetryResp) ProtoMessage() {}
+
+func (x *ClientTelemetryResp) ProtoReflect() protoreflect.Message {
+	mi := &file_mgmt_svc_proto_msgTypes[15]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ClientTelemetryResp.ProtoReflect.Descriptor instead.
+func (*ClientTelemetryResp) Descriptor() ([]byte, []int) {
+	return file_mgmt_svc_proto_rawDescGZIP(), []int{15}
+}
+
+func (x *ClientTelemetryResp) GetStatus() int32 {
+	if x != nil {
+		return x.Status
+	}
+	return 0
+}
+
+func (x *ClientTelemetryResp) GetAgentUid() int32 {
+	if x != nil {
+		return x.AgentUid
+	}
+	return 0
+}
+
 type GroupUpdateReq_Engine struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -1003,7 +1121,7 @@ type GroupUpdateReq_Engine struct {
 func (x *GroupUpdateReq_Engine) Reset() {
 	*x = GroupUpdateReq_Engine{}
 	if protoimpl.UnsafeEnabled {
-		mi := &file_mgmt_svc_proto_msgTypes[14]
+		mi := &file_mgmt_svc_proto_msgTypes[16]
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		ms.StoreMessageInfo(mi)
 	}
@@ -1016,7 +1134,7 @@ func (x *GroupUpdateReq_Engine) String() string {
 func (*GroupUpdateReq_Engine) ProtoMessage() {}
 
 func (x *GroupUpdateReq_Engine) ProtoReflect() protoreflect.Message {
-	mi := &file_mgmt_svc_proto_msgTypes[14]
+	mi := &file_mgmt_svc_proto_msgTypes[16]
 	if protoimpl.UnsafeEnabled && x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -1065,7 +1183,7 @@ type GetAttachInfoResp_RankUri struct {
 func (x *GetAttachInfoResp_RankUri) Reset() {
 	*x = GetAttachInfoResp_RankUri{}
 	if protoimpl.UnsafeEnabled {
-		mi := &file_mgmt_svc_proto_msgTypes[15]
+		mi := &file_mgmt_svc_proto_msgTypes[17]
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		ms.StoreMessageInfo(mi)
 	}
@@ -1078,7 +1196,7 @@ func (x *GetAttachInfoResp_RankUri) String() string {
 func (*GetAttachInfoResp_RankUri) ProtoMessage() {}
 
 func (x *GetAttachInfoResp_RankUri) ProtoReflect() protoreflect.Message {
-	mi := &file_mgmt_svc_proto_msgTypes[15]
+	mi := &file_mgmt_svc_proto_msgTypes[17]
 	if protoimpl.UnsafeEnabled && x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -1221,11 +1339,21 @@ var file_mgmt_svc_proto_rawDesc = []byte{
 	0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f,
 	0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05,
 	0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62,
-	0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d,
-	0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73,
-	0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d,
-	0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+	0x69, 0x64, 0x22, 0x55, 0x0a, 0x12, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x54, 0x65, 0x6c, 0x65,
+	0x6d, 0x65, 0x74, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f,
+	0x62, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64,
+	0x12, 0x17, 0x0a, 0x07, 0x73, 0x68, 0x6d, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x03, 0x20, 0x01, 0x28,
+	0x05, 0x52, 0x06, 0x73, 0x68, 0x6d, 0x4b, 0x65, 0x79, 0x22, 0x4a, 0x0a, 0x13, 0x43, 0x6c, 0x69,
+	0x65, 0x6e, 0x74, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70,
+	0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05,
+	0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x67, 0x65, 0x6e,
+	0x74, 0x5f, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x61, 0x67, 0x65,
+	0x6e, 0x74, 0x55, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e,
+	0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64,
+	0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f,
+	0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d,
+	0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
 }
 
 var (
@@ -1241,7 +1369,7 @@ func file_mgmt_svc_proto_rawDescGZIP() []byte {
 }
 
 var file_mgmt_svc_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_mgmt_svc_proto_msgTypes = make([]protoimpl.MessageInfo, 16)
+var file_mgmt_svc_proto_msgTypes = make([]protoimpl.MessageInfo, 18)
 var file_mgmt_svc_proto_goTypes = []interface{}{
 	(JoinResp_State)(0),               // 0: mgmt.JoinResp.State
 	(*DaosResp)(nil),                  // 1: mgmt.DaosResp
@@ -1258,13 +1386,15 @@ var file_mgmt_svc_proto_goTypes = []interface{}{
 	(*PingRankReq)(nil),               // 12: mgmt.PingRankReq
 	(*SetRankReq)(nil),                // 13: mgmt.SetRankReq
 	(*PoolMonitorReq)(nil),            // 14: mgmt.PoolMonitorReq
-	(*GroupUpdateReq_Engine)(nil),     // 15: mgmt.GroupUpdateReq.Engine
-	(*GetAttachInfoResp_RankUri)(nil), // 16: mgmt.GetAttachInfoResp.RankUri
+	(*ClientTelemetryReq)(nil),        // 15: mgmt.ClientTelemetryReq
+	(*ClientTelemetryResp)(nil),       // 16: mgmt.ClientTelemetryResp
+	(*GroupUpdateReq_Engine)(nil),     // 17: mgmt.GroupUpdateReq.Engine
+	(*GetAttachInfoResp_RankUri)(nil), // 18: mgmt.GetAttachInfoResp.RankUri
 }
 var file_mgmt_svc_proto_depIdxs = []int32{
-	15, // 0: mgmt.GroupUpdateReq.engines:type_name -> mgmt.GroupUpdateReq.Engine
+	17, // 0: mgmt.GroupUpdateReq.engines:type_name -> mgmt.GroupUpdateReq.Engine
 	0,  // 1: mgmt.JoinResp.state:type_name -> mgmt.JoinResp.State
-	16, // 2: mgmt.GetAttachInfoResp.rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri
+	18, // 2: mgmt.GetAttachInfoResp.rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri
 	9,  // 3: mgmt.GetAttachInfoResp.client_net_hint:type_name -> mgmt.ClientNetHint
 	4,  // [4:4] is the sub-list for method output_type
 	4,  // [4:4] is the sub-list for method input_type
@@ -1448,7 +1578,7 @@ func file_mgmt_svc_proto_init() {
 			}
 		}
 		file_mgmt_svc_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*GroupUpdateReq_Engine); i {
+			switch v := v.(*ClientTelemetryReq); i {
 			case 0:
 				return &v.state
 			case 1:
@@ -1460,6 +1590,30 @@ func file_mgmt_svc_proto_init() {
 			}
 		}
 		file_mgmt_svc_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ClientTelemetryResp); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_mgmt_svc_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*GroupUpdateReq_Engine); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_mgmt_svc_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} {
 			switch v := v.(*GetAttachInfoResp_RankUri); i {
 			case 0:
 				return &v.state
@@ -1478,7 +1632,7 @@ func file_mgmt_svc_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_mgmt_svc_proto_rawDesc,
 			NumEnums:      1,
-			NumMessages:   16,
+			NumMessages:   18,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go
index cd88b5acf25c..4d27fb78b2a2 100644
--- a/src/control/common/test/utils.go
+++ b/src/control/common/test/utils.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2018-2022 Intel Corporation.
+// (C) Copyright 2018-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -26,6 +26,8 @@ import (
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"golang.org/x/sys/unix"
 	"google.golang.org/protobuf/testing/protocmp"
+
+	"github.com/daos-stack/daos/src/control/logging"
 )
 
 // AssertTrue asserts b is true
@@ -408,3 +410,14 @@ func Context(t *testing.T) context.Context {
 	t.Cleanup(cancel)
 	return ctx
 }
+
+// MustLogContext returns a context containing the supplied logger.
+// Canceled when the test is done.
+func MustLogContext(t *testing.T, log logging.Logger) context.Context {
+	t.Helper()
+	ctx, err := logging.ToContext(Context(t), log)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return ctx
+}
diff --git a/src/control/drpc/modules.go b/src/control/drpc/modules.go
index 1a51bc2f67c3..0aacbae1d4a3 100644
--- a/src/control/drpc/modules.go
+++ b/src/control/drpc/modules.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2019-2022 Intel Corporation.
+// (C) Copyright 2019-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -157,6 +157,7 @@ func (m MgmtMethod) String() string {
 		MethodPoolGetProp:          "PoolGetProp",
 		MethodPoolUpgrade:          "PoolUpgrade",
 		MethodLedManage:            "LedManage",
+		MethodSetupClientTelemetry: "SetupClientTelemetry",
 	}[m]; ok {
 		return s
 	}
@@ -244,6 +245,8 @@ const (
 	MethodPoolUpgrade MgmtMethod = C.DRPC_METHOD_MGMT_POOL_UPGRADE
 	// MethodLedManage defines a method to manage a VMD device LED state
 	MethodLedManage MgmtMethod = C.DRPC_METHOD_MGMT_LED_MANAGE
+	// MethodSetupClientTelemetry defines a method to setup client telemetry
+	MethodSetupClientTelemetry MgmtMethod = C.DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM
 )
 
 type srvMethod int32
diff --git a/src/control/lib/daos/logging.go b/src/control/lib/daos/logging.go
new file mode 100644
index 000000000000..9891adba0bed
--- /dev/null
+++ b/src/control/lib/daos/logging.go
@@ -0,0 +1,47 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package daos
+
+import (
+	"os"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+/*
+#cgo LDFLAGS: -lgurt
+
+#include <daos/debug.h>
+*/
+import "C"
+
+const (
+	// DefaultDebugMask defines the basic debug mask.
+	DefaultDebugMask = "DEBUG,MEM=ERR,OBJECT=ERR,PLACEMENT=ERR"
+	// DefaultInfoMask defines the basic info mask.
+	DefaultInfoMask = "INFO"
+	// DefaultErrorMask defines the basic error mask.
+	DefaultErrorMask = "ERROR"
+)
+
+// InitLogging initializes the DAOS logging system.
+func InitLogging(masks ...string) (func(), error) {
+	mask := strings.Join(masks, ",")
+	if mask == "" {
+		mask = DefaultInfoMask
+	}
+	os.Setenv("D_LOG_MASK", mask)
+
+	if rc := C.daos_debug_init(nil); rc != 0 {
+		return func() {}, errors.Wrap(Status(rc), "daos_debug_init() failed")
+	}
+
+	return func() {
+		C.daos_debug_fini()
+	}, nil
+}
diff --git a/src/control/lib/telemetry/promexp/client.go b/src/control/lib/telemetry/promexp/client.go
new file mode 100644
index 000000000000..e6eefeaf3968
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/client.go
@@ -0,0 +1,176 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build linux && (amd64 || arm64)
+// +build linux
+// +build amd64 arm64
+
+package promexp
+
+import (
+	"context"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/lib/atm"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+const (
+	// defaultCleanupInterval is the default interval for pruning unused
+	// shared memory segments.
+	defaultCleanupInterval = 1 * time.Minute
+)
+
+type (
+	// ClientCollector is a metrics collector for DAOS client metrics.
+	ClientCollector struct {
+		metricsCollector
+	}
+
+	// ClientSource is a metrics source for DAOS client metrics.
+	ClientSource struct {
+		MetricSource
+		cleanup func()
+	}
+)
+
+func extractClientLabels(log logging.Logger, in string) (labels labelMap, name string) {
+	log.Tracef("in: %q", in)
+
+	labels = make(labelMap)
+	compsIdx := 0
+	comps := strings.Split(in, string(telemetry.PathSep))
+	if len(comps) == 0 {
+		return labels, ""
+	}
+
+	if strings.HasPrefix(comps[compsIdx], "ID") {
+		if len(comps) == 1 {
+			return labels, ""
+		}
+		compsIdx++
+	}
+
+	for i, label := range []string{"job", "pid", "tid"} {
+		if i > 0 {
+			// After jobid, we should have a pid and/or tid, and
+			// then move on to the engine labels.
+			_, err := strconv.Atoi(comps[compsIdx])
+			if err != nil {
+				break
+			}
+		}
+
+		if len(comps) == compsIdx+1 {
+			// If we have a weird path ending on a pid or tid, treat it
+			// as empty of labels.
+			if _, err := strconv.Atoi(comps[compsIdx]); err == nil && i > 0 {
+				return labelMap{}, ""
+			}
+			return labels, comps[compsIdx]
+		}
+		labels[label] = comps[compsIdx]
+		compsIdx++
+	}
+
+	var engLabels labelMap
+	engLabels, name = extractLabels(log, strings.Join(comps[compsIdx:], string(telemetry.PathSep)))
+	for k, v := range engLabels {
+		labels[k] = v
+	}
+
+	return
+}
+
+func newClientMetric(log logging.Logger, m telemetry.Metric) *sourceMetric {
+	labels, name := extractClientLabels(log, m.FullPath())
+	baseName := "client_" + name
+
+	return newSourceMetric(log, m, baseName, labels)
+}
+
+// NewClientSource creates a new ClientSource for client metrics.
+func NewClientSource(parent context.Context) (*ClientSource, error) {
+	ctx, err := telemetry.InitClientRoot(parent)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to init telemetry")
+	}
+
+	go func(outer, inner context.Context) {
+		<-outer.Done()
+		telemetry.Detach(inner)
+	}(parent, ctx)
+
+	return &ClientSource{
+		MetricSource: MetricSource{
+			ctx:      ctx,
+			enabled:  atm.NewBool(true),
+			tmSchema: telemetry.NewSchema(),
+			smSchema: newSourceMetricSchema(newClientMetric),
+		},
+	}, nil
+}
+
+// NewClientCollector creates a new ClientCollector instance.
+func NewClientCollector(ctx context.Context, log logging.Logger, source *ClientSource, opts *CollectorOpts) (*ClientCollector, error) {
+	if opts == nil {
+		opts = defaultCollectorOpts()
+	}
+
+	if opts.RetainDuration == 0 {
+		// Clients will clean up after themselves, but we still need to
+		// periodically remove the top-level jobid segments.
+		opts.RetainDuration = defaultCleanupInterval
+	}
+
+	log.Debugf("pruning unused client metric segments every %s", opts.RetainDuration)
+	go func() {
+		pruneTicker := time.NewTicker(opts.RetainDuration)
+		defer pruneTicker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+			case <-pruneTicker.C:
+				source.PruneSegments(log, opts.RetainDuration)
+			}
+		}
+	}()
+
+	c := &ClientCollector{
+		metricsCollector: metricsCollector{
+			log: log,
+			summary: prometheus.NewSummaryVec(
+				prometheus.SummaryOpts{
+					Namespace: "client",
+					Subsystem: "exporter",
+					Name:      "scrape_duration_seconds",
+					Help:      "daos_client_exporter: Duration of a scrape job.",
+				},
+				[]string{"source", "result"},
+			),
+			collectFn: func(ch chan *sourceMetric) {
+				source.Collect(log, ch)
+			},
+		},
+	}
+
+	for _, pat := range opts.Ignores {
+		re, err := regexp.Compile(pat)
+		if err != nil {
+			return nil, errors.Wrapf(err, "failed to compile %q", pat)
+		}
+		c.ignoredMetrics = append(c.ignoredMetrics, re)
+	}
+
+	return c, nil
+}
diff --git a/src/control/lib/telemetry/promexp/client_test.go b/src/control/lib/telemetry/promexp/client_test.go
new file mode 100644
index 000000000000..d0274f157b50
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/client_test.go
@@ -0,0 +1,163 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package promexp
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+func TestPromExp_extractClientLabels(t *testing.T) {
+	shmID := 256
+	jobID := "testJob"
+	pid := "12345"
+	tid := "67890"
+
+	testPath := func(suffix string) string {
+		return fmt.Sprintf("ID: %d/%s/%s/%s/%s", shmID, jobID, pid, tid, suffix)
+	}
+
+	for name, tc := range map[string]struct {
+		input     string
+		expName   string
+		expLabels labelMap
+	}{
+		"empty": {
+			expLabels: labelMap{},
+		},
+		"ID stripped": {
+			input:     "ID: 123",
+			expLabels: labelMap{},
+		},
+		"weird truncation": {
+			input:     "ID: 123/jobbo/6783/90",
+			expLabels: labelMap{},
+		},
+		"active update ops": {
+			input:   testPath("io/ops/update/active"),
+			expName: "io_ops_update_active",
+			expLabels: labelMap{
+				"job": jobID,
+				"pid": pid,
+				"tid": tid,
+			},
+		},
+		"fetch latency 1MB": {
+			input:   testPath("io/latency/fetch/1MB"),
+			expName: "io_latency_fetch",
+			expLabels: labelMap{
+				"job":  jobID,
+				"pid":  pid,
+				"tid":  tid,
+				"size": "1MB",
+			},
+		},
+		"started_at": {
+			input:   fmt.Sprintf("ID: %d/%s/%s/started_at", shmID, jobID, pid),
+			expName: "started_at",
+			expLabels: labelMap{
+				"job": jobID,
+				"pid": pid,
+			},
+		},
+		"pool ops": {
+			input:   fmt.Sprintf("ID: %d/%s/%s/pool/%s/ops/foo", shmID, jobID, pid, test.MockPoolUUID(1)),
+			expName: "pool_ops_foo",
+			expLabels: labelMap{
+				"job":  jobID,
+				"pid":  pid,
+				"pool": test.MockPoolUUID(1).String(),
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			labels, name := extractClientLabels(log, tc.input)
+
+			test.AssertEqual(t, name, tc.expName, "")
+			if diff := cmp.Diff(labels, tc.expLabels); diff != "" {
+				t.Errorf("labels mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestPromExp_NewClientCollector(t *testing.T) {
+	for name, tc := range map[string]struct {
+		opts      *CollectorOpts
+		expErr    error
+		expResult *ClientCollector
+	}{
+		"defaults": {
+			expResult: &ClientCollector{
+				metricsCollector: metricsCollector{
+					summary: &prometheus.SummaryVec{
+						MetricVec: &prometheus.MetricVec{},
+					},
+				},
+			},
+		},
+		"opts with ignores": {
+			opts: &CollectorOpts{Ignores: []string{"one", "two"}},
+			expResult: &ClientCollector{
+				metricsCollector: metricsCollector{
+					summary: &prometheus.SummaryVec{
+						MetricVec: &prometheus.MetricVec{},
+					},
+					ignoredMetrics: []*regexp.Regexp{
+						regexp.MustCompile("one"),
+						regexp.MustCompile("two"),
+					},
+				},
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			ctx := test.MustLogContext(t, log)
+			cs, err := NewClientSource(ctx)
+			if err != nil {
+				t.Fatal(err)
+			}
+			result, err := NewClientCollector(ctx, log, cs, tc.opts)
+
+			test.CmpErr(t, tc.expErr, err)
+
+			cmpOpts := []cmp.Option{
+				cmpopts.IgnoreUnexported(MetricSource{}),
+				cmpopts.IgnoreUnexported(prometheus.SummaryVec{}),
+				cmpopts.IgnoreUnexported(prometheus.MetricVec{}),
+				cmpopts.IgnoreUnexported(regexp.Regexp{}),
+				cmp.AllowUnexported(ClientCollector{}),
+				cmp.AllowUnexported(metricsCollector{}),
+				cmp.FilterPath(func(p cmp.Path) bool {
+					// Ignore a few specific fields
+					return (strings.HasSuffix(p.String(), "log") ||
+						strings.HasSuffix(p.String(), "sourceMutex") ||
+						strings.HasSuffix(p.String(), "cleanupSource") ||
+						strings.HasSuffix(p.String(), "collectFn"))
+				}, cmp.Ignore()),
+			}
+			if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" {
+				t.Fatalf("(-want, +got)\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/src/control/lib/telemetry/promexp/collector.go b/src/control/lib/telemetry/promexp/collector.go
index 03e6fa40dd50..ec70c0e8fbdb 100644
--- a/src/control/lib/telemetry/promexp/collector.go
+++ b/src/control/lib/telemetry/promexp/collector.go
@@ -7,445 +7,34 @@
 // +build linux
 // +build amd64 arm64
 
-//
-
 package promexp
 
 import (
-	"context"
-	"fmt"
 	"regexp"
-	"strings"
-	"sync"
-	"unicode"
+	"time"
 
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
-	"github.com/daos-stack/daos/src/control/lib/atm"
 	"github.com/daos-stack/daos/src/control/lib/telemetry"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
 type (
-	Collector struct {
-		log            logging.Logger
-		summary        *prometheus.SummaryVec
-		ignoredMetrics []*regexp.Regexp
-		sources        []*EngineSource
-		cleanupSource  map[uint32]func()
-		sourceMutex    sync.RWMutex // To protect sources
-	}
-
+	// CollectorOpts contains options for the metrics collector.
 	CollectorOpts struct {
-		Ignores []string
-	}
-
-	EngineSource struct {
-		ctx      context.Context
-		tmMutex  sync.RWMutex // To protect telemetry collection
-		Index    uint32
-		Rank     uint32
-		enabled  atm.Bool
-		tmSchema *telemetry.Schema
-		rmSchema rankMetricSchema
+		Ignores        []string
+		RetainDuration time.Duration
 	}
 
-	rankMetricSchema struct {
-		mu          sync.Mutex
-		rankMetrics map[string]*rankMetric
-		seen        map[string]struct{}
+	metricsCollector struct {
+		log            logging.Logger
+		summary        *prometheus.SummaryVec
+		ignoredMetrics []*regexp.Regexp
+		collectFn      func(ch chan *sourceMetric)
 	}
 )
 
-func (s *rankMetricSchema) Prune() {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	for id := range s.rankMetrics {
-		if _, found := s.seen[id]; !found {
-			delete(s.rankMetrics, id)
-		}
-	}
-	s.seen = make(map[string]struct{})
-}
-
-func (s *rankMetricSchema) add(log logging.Logger, rank uint32, metric telemetry.Metric) (rm *rankMetric) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	id := metric.FullPath()
-	s.seen[id] = struct{}{}
-
-	var found bool
-	if rm, found = s.rankMetrics[id]; !found {
-		rm = newRankMetric(log, rank, metric)
-		s.rankMetrics[id] = rm
-	} else {
-		rm.resetVecs()
-	}
-
-	return
-}
-
-func NewEngineSource(parent context.Context, idx uint32, rank uint32) (*EngineSource, func(), error) {
-	ctx, err := telemetry.Init(parent, idx)
-	if err != nil {
-		return nil, nil, errors.Wrap(err, "failed to init telemetry")
-	}
-
-	cleanupFn := func() {
-		telemetry.Detach(ctx)
-	}
-
-	return &EngineSource{
-		ctx:      ctx,
-		Index:    idx,
-		Rank:     rank,
-		enabled:  atm.NewBool(true),
-		tmSchema: telemetry.NewSchema(),
-		rmSchema: rankMetricSchema{
-			rankMetrics: make(map[string]*rankMetric),
-			seen:        make(map[string]struct{}),
-		},
-	}, cleanupFn, nil
-}
-
-func defaultCollectorOpts() *CollectorOpts {
-	return &CollectorOpts{}
-}
-
-func NewCollector(log logging.Logger, opts *CollectorOpts, sources ...*EngineSource) (*Collector, error) {
-	if opts == nil {
-		opts = defaultCollectorOpts()
-	}
-
-	c := &Collector{
-		log:           log,
-		sources:       sources,
-		cleanupSource: make(map[uint32]func()),
-		summary: prometheus.NewSummaryVec(
-			prometheus.SummaryOpts{
-				Namespace: "engine",
-				Subsystem: "exporter",
-				Name:      "scrape_duration_seconds",
-				Help:      "daos_exporter: Duration of a scrape job.",
-			},
-			[]string{"source", "result"},
-		),
-	}
-
-	for _, pat := range opts.Ignores {
-		re, err := regexp.Compile(pat)
-		if err != nil {
-			return nil, errors.Wrapf(err, "failed to compile %q", pat)
-		}
-		c.ignoredMetrics = append(c.ignoredMetrics, re)
-	}
-
-	return c, nil
-}
-
-type labelMap map[string]string
-
-func (lm labelMap) keys() (keys []string) {
-	for label := range lm {
-		keys = append(keys, label)
-	}
-
-	return
-}
-
-func sanitizeMetricName(in string) string {
-	return strings.Map(func(r rune) rune {
-		switch {
-		// Valid names for Prometheus are limited to:
-		case r >= 'a' && r <= 'z': // lowercase letters
-		case r >= 'A' && r <= 'Z': // uppercase letters
-		case unicode.IsDigit(r): // digits
-		default: // sanitize any other character
-			return '_'
-		}
-
-		return r
-	}, strings.TrimLeft(in, "/"))
-}
-
-func matchLabel(labels labelMap, input, match, label string) bool {
-	if !strings.HasPrefix(input, match) {
-		return false
-	}
-
-	splitStr := strings.SplitN(input, "_", 2)
-	if len(splitStr) == 2 {
-		labels[label] = splitStr[1]
-		return true
-	}
-	return false
-}
-
-func appendName(cur, name string) string {
-	if cur == "" {
-		return name
-	}
-	return cur + "_" + name
-}
-
-// extractLabels takes a "/"-separated DAOS metric name in order to
-// create a normalized Prometheus name and label map.
-//
-// NB: Prometheus metric names should follow best practices as
-// outlined at https://prometheus.io/docs/practices/naming/
-//
-// In particular, a metric name should describe the measurement,
-// not the entity the measurement is about. In other words, if 4
-// different entities share the same measurement, then there should
-// be a single metric with a label that distinguishes between
-// individual measurement values.
-//
-// Good: pool_started_at {pool="00000000-1111-2222-3333-4444444444"}
-// Bad: pool_00000000_1111_2222_3333_4444444444_started_at
-func extractLabels(in string) (labels labelMap, name string) {
-	labels = make(labelMap)
-	compsIdx := 0
-	comps := strings.Split(in, string(telemetry.PathSep))
-	if len(comps) == 0 {
-		return labels, in
-	}
-
-	if strings.HasPrefix(comps[compsIdx], "ID") {
-		if len(comps) == 1 {
-			return labels, ""
-		}
-		compsIdx++
-	}
-
-	switch comps[compsIdx] {
-	case "pool":
-		name = "pool"
-		compsIdx++
-		labels["pool"] = comps[compsIdx]
-		compsIdx++
-		switch comps[compsIdx] {
-		case "ops":
-			compsIdx++
-			name += "_ops_" + comps[compsIdx]
-			compsIdx++
-		}
-	case "io":
-		name = "io"
-		compsIdx++
-		switch comps[compsIdx] {
-		case "latency":
-			compsIdx++
-			name += "_latency_" + comps[compsIdx]
-			compsIdx++
-			labels["size"] = comps[compsIdx]
-			compsIdx++
-		case "ops":
-			compsIdx++
-			name += "_ops_" + comps[compsIdx]
-			compsIdx++
-		default:
-			name += "_" + comps[compsIdx]
-			compsIdx++
-		}
-	case "net":
-		compsIdx++
-		if comps[compsIdx] == "uri" {
-			compsIdx++
-			name = "net_uri_" + comps[compsIdx]
-			compsIdx++
-			break
-		}
-
-		name = "net"
-		labels["provider"] = comps[compsIdx]
-		compsIdx++
-	case "nvme":
-		name = "nvme"
-		compsIdx++
-		labels["device"] = comps[compsIdx]
-		compsIdx++
-	}
-
-	for {
-		if len(comps) == compsIdx {
-			break
-		}
-
-		switch {
-		case matchLabel(labels, comps[compsIdx], "tgt_", "target"):
-			compsIdx++
-		case matchLabel(labels, comps[compsIdx], "xs_", "xstream"):
-			compsIdx++
-		case matchLabel(labels, comps[compsIdx], "ctx_", "context"):
-			compsIdx++
-		default:
-			name = appendName(name, comps[compsIdx])
-			compsIdx++
-		}
-	}
-
-	name = sanitizeMetricName(name)
-	return
-}
-
-func (es *EngineSource) Collect(log logging.Logger, ch chan<- *rankMetric) {
-	if es == nil {
-		log.Error("nil engine source")
-		return
-	}
-	if !es.IsEnabled() {
-		return
-	}
-	if ch == nil {
-		log.Error("nil channel")
-		return
-	}
-
-	es.tmMutex.RLock()
-	defer es.tmMutex.RUnlock()
-
-	metrics := make(chan telemetry.Metric)
-	go func() {
-		if err := telemetry.CollectMetrics(es.ctx, es.tmSchema, metrics); err != nil {
-			log.Errorf("failed to collect metrics for engine rank %d: %s", es.Rank, err)
-			return
-		}
-		es.tmSchema.Prune()
-	}()
-
-	for metric := range metrics {
-		ch <- es.rmSchema.add(log, es.Rank, metric)
-	}
-	es.rmSchema.Prune()
-}
-
-// IsEnabled checks if the engine source is enabled.
-func (es *EngineSource) IsEnabled() bool {
-	return es.enabled.IsTrue()
-}
-
-// Enable enables the engine source.
-func (es *EngineSource) Enable() {
-	es.enabled.SetTrue()
-}
-
-// Disable disables the engine source.
-func (es *EngineSource) Disable() {
-	es.enabled.SetFalse()
-}
-
-type gvMap map[string]*prometheus.GaugeVec
-
-func (m gvMap) add(name, help string, labels labelMap) {
-	if _, found := m[name]; !found {
-		gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: name,
-			Help: help,
-		}, labels.keys())
-		m[name] = gv
-	}
-}
-
-func (m gvMap) set(name string, value float64, labels labelMap) error {
-	gv, found := m[name]
-	if !found {
-		return errors.Errorf("gauge vector %s not found", name)
-	}
-	gv.With(prometheus.Labels(labels)).Set(value)
-
-	return nil
-}
-
-type cvMap map[string]*prometheus.CounterVec
-
-func (m cvMap) add(name, help string, labels labelMap) {
-	if _, found := m[name]; !found {
-		cv := prometheus.NewCounterVec(prometheus.CounterOpts{
-			Name: name,
-			Help: help,
-		}, labels.keys())
-		m[name] = cv
-	}
-}
-
-func (m cvMap) set(name string, value float64, labels labelMap) error {
-	cv, found := m[name]
-	if !found {
-		return errors.Errorf("counter vector %s not found", name)
-	}
-	cv.With(prometheus.Labels(labels)).Add(value)
-
-	return nil
-}
-
-type rankMetric struct {
-	rank     uint32
-	metric   telemetry.Metric
-	baseName string
-	labels   labelMap
-	gvm      gvMap
-	cvm      cvMap
-}
-
-func (rm *rankMetric) collect(ch chan<- prometheus.Metric) {
-	for _, gv := range rm.gvm {
-		gv.Collect(ch)
-	}
-	for _, cv := range rm.cvm {
-		cv.Collect(ch)
-	}
-}
-
-func (rm *rankMetric) resetVecs() {
-	for _, gv := range rm.gvm {
-		gv.Reset()
-	}
-	for _, cv := range rm.cvm {
-		cv.Reset()
-	}
-}
-
-func newRankMetric(log logging.Logger, rank uint32, m telemetry.Metric) *rankMetric {
-	rm := &rankMetric{
-		metric: m,
-		rank:   rank,
-		gvm:    make(gvMap),
-		cvm:    make(cvMap),
-	}
-
-	var name string
-	rm.labels, name = extractLabels(m.FullPath())
-	rm.labels["rank"] = fmt.Sprintf("%d", rm.rank)
-	rm.baseName = "engine_" + name
-
-	desc := m.Desc()
-
-	switch rm.metric.Type() {
-	case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp,
-		telemetry.MetricTypeSnapshot:
-		rm.gvm.add(rm.baseName, desc, rm.labels)
-	case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration:
-		rm.gvm.add(rm.baseName, desc, rm.labels)
-		for _, ms := range getMetricStats(rm.baseName, rm.metric) {
-			if ms.isCounter {
-				rm.cvm.add(ms.name, ms.desc, rm.labels)
-			} else {
-				rm.gvm.add(ms.name, ms.desc, rm.labels)
-			}
-		}
-	case telemetry.MetricTypeCounter:
-		rm.cvm.add(rm.baseName, desc, rm.labels)
-	default:
-		log.Errorf("[%s]: metric type %d not supported", name, rm.metric.Type())
-	}
-
-	return rm
-}
-
-func (c *Collector) isIgnored(name string) bool {
+func (c *metricsCollector) isIgnored(name string) bool {
 	for _, re := range c.ignoredMetrics {
 		// TODO: We may want to look into removing the use of regexp here
 		// in favor of a less-flexible but more efficient approach.
@@ -458,121 +47,7 @@ func (c *Collector) isIgnored(name string) bool {
 	return false
 }
 
-type metricStat struct {
-	name      string
-	desc      string
-	value     float64
-	isCounter bool
-}
-
-func getMetricStats(baseName string, m telemetry.Metric) (stats []*metricStat) {
-	ms, ok := m.(telemetry.StatsMetric)
-	if !ok {
-		return
-	}
-
-	for name, s := range map[string]struct {
-		fn        func() float64
-		desc      string
-		isCounter bool
-	}{
-		"min": {
-			fn:   func() float64 { return float64(ms.Min()) },
-			desc: " (min value)",
-		},
-		"max": {
-			fn:   func() float64 { return float64(ms.Max()) },
-			desc: " (max value)",
-		},
-		"mean": {
-			fn:   ms.Mean,
-			desc: " (mean)",
-		},
-		"sum": {
-			fn:   func() float64 { return float64(ms.Sum()) },
-			desc: " (sum)",
-		},
-		"stddev": {
-			fn:   ms.StdDev,
-			desc: " (std dev)",
-		},
-		"sumsquares": {
-			fn:   ms.SumSquares,
-			desc: " (sum of squares)",
-		},
-		"samples": {
-			fn:        func() float64 { return float64(ms.SampleSize()) },
-			desc:      " (samples)",
-			isCounter: true,
-		},
-	} {
-		stats = append(stats, &metricStat{
-			name:      baseName + "_" + name,
-			desc:      m.Desc() + s.desc,
-			value:     s.fn(),
-			isCounter: s.isCounter,
-		})
-	}
-
-	return
-}
-
-// AddSource adds an EngineSource to the Collector.
-func (c *Collector) AddSource(es *EngineSource, cleanup func()) {
-	if es == nil {
-		c.log.Error("attempted to add nil EngineSource")
-		return
-	}
-
-	c.sourceMutex.Lock()
-	defer c.sourceMutex.Unlock()
-
-	// If we attempt to add a duplicate, remove the old one.
-	c.removeSourceNoLock(es.Index)
-
-	c.sources = append(c.sources, es)
-	if cleanup != nil {
-		c.cleanupSource[es.Index] = cleanup
-	}
-}
-
-// RemoveSource removes an EngineSource with a given index from the Collector.
-func (c *Collector) RemoveSource(engineIdx uint32) {
-	c.sourceMutex.Lock()
-	defer c.sourceMutex.Unlock()
-
-	c.removeSourceNoLock(engineIdx)
-}
-
-func (c *Collector) removeSourceNoLock(engineIdx uint32) {
-	for i, es := range c.sources {
-		if es.Index == engineIdx {
-			es.Disable()
-			c.sources = append(c.sources[:i], c.sources[i+1:]...)
-
-			// Ensure that EngineSource isn't collecting during cleanup
-			es.tmMutex.Lock()
-			if cleanup, found := c.cleanupSource[engineIdx]; found && cleanup != nil {
-				cleanup()
-			}
-			es.tmMutex.Unlock()
-			delete(c.cleanupSource, engineIdx)
-			break
-		}
-	}
-}
-
-func (c *Collector) getSources() []*EngineSource {
-	c.sourceMutex.RLock()
-	defer c.sourceMutex.RUnlock()
-
-	sourceCopy := make([]*EngineSource, len(c.sources))
-	_ = copy(sourceCopy, c.sources)
-	return sourceCopy
-}
-
-// Collect collects metrics from all EngineSources.
-func (c *Collector) Collect(ch chan<- prometheus.Metric) {
+func (c *metricsCollector) Collect(ch chan<- prometheus.Metric) {
 	if c == nil {
 		return
 	}
@@ -580,55 +55,57 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) {
 		c.log.Error("passed a nil channel")
 		return
 	}
+	if c.collectFn == nil {
+		c.log.Error("collectFn is nil")
+		return
+	}
 
-	rankMetrics := make(chan *rankMetric)
-	go func(sources []*EngineSource) {
-		for _, source := range sources {
-			source.Collect(c.log, rankMetrics)
-		}
-		close(rankMetrics)
-	}(c.getSources())
+	sourceMetrics := make(chan *sourceMetric)
+	go func() {
+		c.collectFn(sourceMetrics)
+		close(sourceMetrics)
+	}()
 
-	for rm := range rankMetrics {
-		if c.isIgnored(rm.baseName) {
+	for sm := range sourceMetrics {
+		if c.isIgnored(sm.baseName) {
 			continue
 		}
 
 		var err error
-		switch rm.metric.Type() {
+		switch sm.metric.Type() {
 		case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp,
 			telemetry.MetricTypeSnapshot:
-			err = rm.gvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels)
+			err = sm.gvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels)
 		case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration:
-			if err = rm.gvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels); err != nil {
+			if err = sm.gvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels); err != nil {
 				break
 			}
-			for _, ms := range getMetricStats(rm.baseName, rm.metric) {
+			for _, ms := range getMetricStats(sm.baseName, sm.metric) {
 				if ms.isCounter {
-					if err = rm.cvm.set(ms.name, ms.value, rm.labels); err != nil {
+					if err = sm.cvm.set(ms.name, ms.value, sm.labels); err != nil {
 						break
 					}
 				} else {
-					if err = rm.gvm.set(ms.name, ms.value, rm.labels); err != nil {
+					if err = sm.gvm.set(ms.name, ms.value, sm.labels); err != nil {
 						break
 					}
 				}
 			}
 		case telemetry.MetricTypeCounter:
-			err = rm.cvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels)
+			err = sm.cvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels)
 		default:
-			c.log.Errorf("[%s]: metric type %d not supported", rm.baseName, rm.metric.Type())
+			c.log.Errorf("[%s]: metric type %d not supported", sm.baseName, sm.metric.Type())
 		}
 
 		if err != nil {
-			c.log.Errorf("[%s]: %s", rm.baseName, err)
+			c.log.Errorf("[%s]: %s", sm.baseName, err)
 			continue
 		}
 
-		rm.collect(ch)
+		sm.collect(ch)
 	}
 }
 
-func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
+func (c *metricsCollector) Describe(ch chan<- *prometheus.Desc) {
 	c.summary.Describe(ch)
 }
diff --git a/src/control/lib/telemetry/promexp/engine.go b/src/control/lib/telemetry/promexp/engine.go
new file mode 100644
index 000000000000..bb0481f12a9a
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/engine.go
@@ -0,0 +1,271 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build linux && (amd64 || arm64)
+// +build linux
+// +build amd64 arm64
+
+package promexp
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strings"
+	"sync"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/lib/atm"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+type (
+	// EngineCollector collects metrics from DAOS Engine sources.
+	EngineCollector struct {
+		metricsCollector
+		sources       []*EngineSource
+		cleanupSource map[uint32]func()
+		sourceMutex   sync.RWMutex // To protect sources
+	}
+
+	// EngineSource provides metrics for a single DAOS Engine.
+	EngineSource struct {
+		MetricSource
+		Index uint32
+		Rank  uint32
+	}
+)
+
+// NewEngineSource initializes a new metrics source for a DAOS Engine.
+func NewEngineSource(parent context.Context, idx uint32, rank uint32) (*EngineSource, func(), error) {
+	ctx, err := telemetry.Init(parent, idx)
+	if err != nil {
+		return nil, nil, errors.Wrap(err, "failed to init telemetry")
+	}
+
+	cleanupFn := func() {
+		telemetry.Detach(ctx)
+	}
+
+	return &EngineSource{
+		MetricSource: MetricSource{
+			ctx:      ctx,
+			enabled:  atm.NewBool(true),
+			tmSchema: telemetry.NewSchema(),
+			smSchema: newSourceMetricSchema(func(l logging.Logger, m telemetry.Metric) *sourceMetric {
+				return newRankMetric(l, rank, m)
+			}),
+		},
+		Index: idx,
+		Rank:  rank,
+	}, cleanupFn, nil
+}
+
+// NewEngineCollector initializes a new collector for DAOS Engine sources.
+func NewEngineCollector(log logging.Logger, opts *CollectorOpts, sources ...*EngineSource) (*EngineCollector, error) {
+	if opts == nil {
+		opts = defaultCollectorOpts()
+	}
+
+	c := &EngineCollector{
+		metricsCollector: metricsCollector{
+			log: log,
+			summary: prometheus.NewSummaryVec(
+				prometheus.SummaryOpts{
+					Namespace: "engine",
+					Subsystem: "exporter",
+					Name:      "scrape_duration_seconds",
+					Help:      "daos_exporter: Duration of a scrape job.",
+				},
+				[]string{"source", "result"},
+			),
+		},
+		sources:       sources,
+		cleanupSource: make(map[uint32]func()),
+	}
+
+	c.collectFn = func(metrics chan *sourceMetric) {
+		for _, source := range c.getSources() {
+			source.Collect(c.log, metrics)
+		}
+	}
+
+	for _, pat := range opts.Ignores {
+		re, err := regexp.Compile(pat)
+		if err != nil {
+			return nil, errors.Wrapf(err, "failed to compile %q", pat)
+		}
+		c.ignoredMetrics = append(c.ignoredMetrics, re)
+	}
+
+	return c, nil
+}
+
+// extractLabels takes a "/"-separated DAOS metric name in order to
+// create a normalized Prometheus name and label map.
+//
+// NB: Prometheus metric names should follow best practices as
+// outlined at https://prometheus.io/docs/practices/naming/
+//
+// In particular, a metric name should describe the measurement,
+// not the entity the measurement is about. In other words, if 4
+// different entities share the same measurement, then there should
+// be a single metric with a label that distinguishes between
+// individual measurement values.
+//
+// Good: pool_started_at {pool="00000000-1111-2222-3333-4444444444"}
+// Bad: pool_00000000_1111_2222_3333_4444444444_started_at
+func extractLabels(log logging.Logger, in string) (labels labelMap, name string) {
+	log.Tracef("in: %q", in)
+
+	labels = make(labelMap)
+	compsIdx := 0
+	comps := strings.Split(in, string(telemetry.PathSep))
+	if len(comps) == 0 {
+		return labels, ""
+	}
+
+	if strings.HasPrefix(comps[compsIdx], "ID") {
+		if len(comps) == 1 {
+			return labels, ""
+		}
+		compsIdx++
+	}
+
+	switch comps[compsIdx] {
+	case "pool":
+		name = "pool"
+		compsIdx++
+		labels["pool"] = comps[compsIdx]
+		compsIdx++
+		switch comps[compsIdx] {
+		case "ops":
+			compsIdx++
+			name += "_ops_" + comps[compsIdx]
+			compsIdx++
+		}
+	case "io":
+		name = "io"
+		compsIdx++
+		switch comps[compsIdx] {
+		case "latency":
+			compsIdx++
+			name += "_latency_" + comps[compsIdx]
+			compsIdx++
+			labels["size"] = comps[compsIdx]
+			compsIdx++
+		case "ops":
+			compsIdx++
+			name += "_ops_" + comps[compsIdx]
+			compsIdx++
+		default:
+			name += "_" + comps[compsIdx]
+			compsIdx++
+		}
+	case "net":
+		compsIdx++
+		if comps[compsIdx] == "uri" {
+			compsIdx++
+			name = "net_uri_" + comps[compsIdx]
+			compsIdx++
+			break
+		}
+
+		name = "net"
+		labels["provider"] = comps[compsIdx]
+		compsIdx++
+	case "nvme":
+		name = "nvme"
+		compsIdx++
+		labels["device"] = comps[compsIdx]
+		compsIdx++
+	}
+
+	for {
+		if len(comps) == compsIdx {
+			break
+		}
+
+		switch {
+		case matchLabel(labels, comps[compsIdx], "tgt_", "target"):
+			compsIdx++
+		case matchLabel(labels, comps[compsIdx], "xs_", "xstream"):
+			compsIdx++
+		case matchLabel(labels, comps[compsIdx], "ctx_", "context"):
+			compsIdx++
+		default:
+			name = appendName(name, comps[compsIdx])
+			compsIdx++
+		}
+	}
+
+	name = sanitizeMetricName(name)
+	return
+}
+
+func newRankMetric(log logging.Logger, rank uint32, m telemetry.Metric) *sourceMetric {
+	labels, name := extractLabels(log, m.FullPath())
+	baseName := "engine_" + name
+	labels["rank"] = fmt.Sprintf("%d", rank)
+
+	return newSourceMetric(log, m, baseName, labels)
+}
+
+// AddSource adds an EngineSource to the Collector.
+func (c *EngineCollector) AddSource(es *EngineSource, cleanup func()) {
+	if es == nil {
+		c.log.Error("attempted to add nil EngineSource")
+		return
+	}
+
+	c.sourceMutex.Lock()
+	defer c.sourceMutex.Unlock()
+
+	// If we attempt to add a duplicate, remove the old one.
+	c.removeSourceNoLock(es.Index)
+
+	c.sources = append(c.sources, es)
+	if cleanup != nil {
+		c.cleanupSource[es.Index] = cleanup
+	}
+}
+
+// RemoveSource removes an EngineSource with a given index from the Collector.
+func (c *EngineCollector) RemoveSource(engineIdx uint32) {
+	c.sourceMutex.Lock()
+	defer c.sourceMutex.Unlock()
+
+	c.removeSourceNoLock(engineIdx)
+}
+
+func (c *EngineCollector) removeSourceNoLock(engineIdx uint32) {
+	for i, es := range c.sources {
+		if es.Index == engineIdx {
+			es.Disable()
+			c.sources = append(c.sources[:i], c.sources[i+1:]...)
+
+			// Ensure that EngineSource isn't collecting during cleanup
+			es.tmMutex.Lock()
+			if cleanup, found := c.cleanupSource[engineIdx]; found && cleanup != nil {
+				cleanup()
+			}
+			es.tmMutex.Unlock()
+			delete(c.cleanupSource, engineIdx)
+			break
+		}
+	}
+}
+
+func (c *EngineCollector) getSources() []*EngineSource {
+	c.sourceMutex.RLock()
+	defer c.sourceMutex.RUnlock()
+
+	sourceCopy := make([]*EngineSource, len(c.sources))
+	_ = copy(sourceCopy, c.sources)
+	return sourceCopy
+}
diff --git a/src/control/lib/telemetry/promexp/collector_test.go b/src/control/lib/telemetry/promexp/engine_test.go
similarity index 88%
rename from src/control/lib/telemetry/promexp/collector_test.go
rename to src/control/lib/telemetry/promexp/engine_test.go
index e50605a033a0..b21839b7ba02 100644
--- a/src/control/lib/telemetry/promexp/collector_test.go
+++ b/src/control/lib/telemetry/promexp/engine_test.go
@@ -2,11 +2,6 @@
 // (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
-//
-//go:build linux && (amd64 || arm64)
-// +build linux
-// +build amd64 arm64
-
 //
 
 package promexp
@@ -62,7 +57,10 @@ func TestPromexp_NewEngineSource(t *testing.T) {
 
 			test.CmpErr(t, tc.expErr, err)
 
-			if diff := cmp.Diff(tc.expResult, result, cmpopts.IgnoreUnexported(EngineSource{})); diff != "" {
+			cmpOpts := cmp.Options{
+				cmpopts.IgnoreUnexported(MetricSource{}),
+			}
+			if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" {
 				t.Fatalf("(-want, +got)\n%s", diff)
 			}
 
@@ -155,31 +153,20 @@ func TestPromExp_EngineSource_Collect(t *testing.T) {
 
 	for name, tc := range map[string]struct {
 		es         *EngineSource
-		resultChan chan *rankMetric
+		resultChan chan *sourceMetric
 		expMetrics telemetry.TestMetricsMap
 	}{
-		"nil source": {
-			resultChan: make(chan *rankMetric),
-		},
 		"nil channel": {
 			es: validSrc,
 		},
-		"bad source": {
-			es: &EngineSource{
-				ctx:   test.Context(t),
-				Rank:  123,
-				Index: testIdx + 1,
-			},
-			resultChan: make(chan *rankMetric),
-		},
 		"success": {
 			es:         validSrc,
-			resultChan: make(chan *rankMetric),
+			resultChan: make(chan *sourceMetric),
 			expMetrics: realMetrics,
 		},
 		"disabled": {
 			es:         disabledSrc,
-			resultChan: make(chan *rankMetric),
+			resultChan: make(chan *sourceMetric),
 			expMetrics: telemetry.TestMetricsMap{},
 		},
 	} {
@@ -189,7 +176,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) {
 
 			go tc.es.Collect(log, tc.resultChan)
 
-			gotMetrics := []*rankMetric{}
+			gotMetrics := []*sourceMetric{}
 			for {
 				done := false
 				select {
@@ -206,7 +193,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) {
 
 			test.AssertEqual(t, len(tc.expMetrics), len(gotMetrics), "wrong number of metrics returned")
 			for _, got := range gotMetrics {
-				test.AssertEqual(t, testRank, got.rank, "wrong rank")
+				test.AssertEqual(t, fmt.Sprintf("%d", testRank), got.labels["rank"], "wrong rank")
 				expM, ok := tc.expMetrics[got.metric.Type()]
 				if !ok {
 					t.Fatalf("metric type %d not expected", got.metric.Type())
@@ -220,7 +207,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) {
 	}
 }
 
-func TestPromExp_NewCollector(t *testing.T) {
+func TestPromExp_NewEngineCollector(t *testing.T) {
 	testSrc := []*EngineSource{
 		{
 			Rank: 1,
@@ -234,20 +221,24 @@ func TestPromExp_NewCollector(t *testing.T) {
 		sources   []*EngineSource
 		opts      *CollectorOpts
 		expErr    error
-		expResult *Collector
+		expResult *EngineCollector
 	}{
 		"no sources": {
-			expResult: &Collector{
-				summary: &prometheus.SummaryVec{
-					MetricVec: &prometheus.MetricVec{},
+			expResult: &EngineCollector{
+				metricsCollector: metricsCollector{
+					summary: &prometheus.SummaryVec{
+						MetricVec: &prometheus.MetricVec{},
+					},
 				},
 			},
 		},
 		"defaults": {
 			sources: testSrc,
-			expResult: &Collector{
-				summary: &prometheus.SummaryVec{
-					MetricVec: &prometheus.MetricVec{},
+			expResult: &EngineCollector{
+				metricsCollector: metricsCollector{
+					summary: &prometheus.SummaryVec{
+						MetricVec: &prometheus.MetricVec{},
+					},
 				},
 				sources: testSrc,
 			},
@@ -255,15 +246,17 @@ func TestPromExp_NewCollector(t *testing.T) {
 		"opts with ignores": {
 			sources: testSrc,
 			opts:    &CollectorOpts{Ignores: []string{"one", "two"}},
-			expResult: &Collector{
-				summary: &prometheus.SummaryVec{
-					MetricVec: &prometheus.MetricVec{},
+			expResult: &EngineCollector{
+				metricsCollector: metricsCollector{
+					summary: &prometheus.SummaryVec{
+						MetricVec: &prometheus.MetricVec{},
+					},
+					ignoredMetrics: []*regexp.Regexp{
+						regexp.MustCompile("one"),
+						regexp.MustCompile("two"),
+					},
 				},
 				sources: testSrc,
-				ignoredMetrics: []*regexp.Regexp{
-					regexp.MustCompile("one"),
-					regexp.MustCompile("two"),
-				},
 			},
 		},
 		"bad regexp in ignores": {
@@ -276,21 +269,23 @@ func TestPromExp_NewCollector(t *testing.T) {
 			log, buf := logging.NewTestLogger(t.Name())
 			defer test.ShowBufferOnFailure(t, buf)
 
-			result, err := NewCollector(log, tc.opts, tc.sources...)
+			result, err := NewEngineCollector(log, tc.opts, tc.sources...)
 
 			test.CmpErr(t, tc.expErr, err)
 
 			cmpOpts := []cmp.Option{
-				cmpopts.IgnoreUnexported(EngineSource{}),
+				cmpopts.IgnoreUnexported(MetricSource{}),
 				cmpopts.IgnoreUnexported(prometheus.SummaryVec{}),
 				cmpopts.IgnoreUnexported(prometheus.MetricVec{}),
 				cmpopts.IgnoreUnexported(regexp.Regexp{}),
-				cmp.AllowUnexported(Collector{}),
+				cmp.AllowUnexported(EngineCollector{}),
+				cmp.AllowUnexported(metricsCollector{}),
 				cmp.FilterPath(func(p cmp.Path) bool {
 					// Ignore a few specific fields
 					return (strings.HasSuffix(p.String(), "log") ||
 						strings.HasSuffix(p.String(), "sourceMutex") ||
-						strings.HasSuffix(p.String(), "cleanupSource"))
+						strings.HasSuffix(p.String(), "cleanupSource") ||
+						strings.HasSuffix(p.String(), "collectFn"))
 				}, cmp.Ignore()),
 			}
 			if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" {
@@ -338,7 +333,7 @@ func TestPromExp_Collector_Prune(t *testing.T) {
 	}
 	defer cleanup()
 
-	defaultCollector, err := NewCollector(log, nil, engSrc)
+	defaultCollector, err := NewEngineCollector(log, nil, engSrc)
 	if err != nil {
 		t.Fatalf("failed to create collector: %s", err.Error())
 	}
@@ -357,12 +352,12 @@ func TestPromExp_Collector_Prune(t *testing.T) {
 			}
 		}
 
-		engSrc.rmSchema.mu.Lock()
-		for m := range engSrc.rmSchema.rankMetrics {
-			_, name := extractLabels(m)
+		engSrc.smSchema.mu.Lock()
+		for m := range engSrc.smSchema.sourceMetrics {
+			_, name := extractLabels(log, m)
 			names = append(names, name)
 		}
-		engSrc.rmSchema.mu.Unlock()
+		engSrc.smSchema.mu.Unlock()
 
 		sort.Strings(names)
 		return
@@ -373,7 +368,7 @@ func TestPromExp_Collector_Prune(t *testing.T) {
 		for _, m := range maps {
 			for t, m := range m {
 				if t != telemetry.MetricTypeDirectory && t != telemetry.MetricTypeLink {
-					_, name := extractLabels(m.FullPath())
+					_, name := extractLabels(log, m.FullPath())
 					unique[name] = struct{}{}
 				}
 			}
@@ -422,7 +417,7 @@ func TestPromExp_Collector_Collect(t *testing.T) {
 	}
 	defer cleanup()
 
-	defaultCollector, err := NewCollector(log, nil, engSrc)
+	defaultCollector, err := NewEngineCollector(log, nil, engSrc)
 	if err != nil {
 		t.Fatalf("failed to create collector: %s", err.Error())
 	}
@@ -433,7 +428,7 @@ func TestPromExp_Collector_Collect(t *testing.T) {
 		"engine_stats_gauge2",
 		"engine_timer_duration",
 	}
-	ignoreCollector, err := NewCollector(log, &CollectorOpts{
+	ignoreCollector, err := NewEngineCollector(log, &CollectorOpts{
 		Ignores: ignores,
 	}, engSrc)
 	if err != nil {
@@ -441,13 +436,10 @@ func TestPromExp_Collector_Collect(t *testing.T) {
 	}
 
 	for name, tc := range map[string]struct {
-		collector      *Collector
+		collector      *EngineCollector
 		resultChan     chan prometheus.Metric
 		expMetricNames []string
 	}{
-		"nil collector": {
-			resultChan: make(chan prometheus.Metric),
-		},
 		"nil channel": {
 			collector: defaultCollector,
 		},
@@ -518,7 +510,7 @@ func TestPromExp_Collector_Collect(t *testing.T) {
 	}
 }
 
-func TestPromExp_extractLabels(t *testing.T) {
+func TestPromExp_extractEngineLabels(t *testing.T) {
 	for name, tc := range map[string]struct {
 		input     string
 		expName   string
@@ -632,7 +624,10 @@ func TestPromExp_extractLabels(t *testing.T) {
 		},
 	} {
 		t.Run(name, func(t *testing.T) {
-			labels, name := extractLabels(tc.input)
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			labels, name := extractLabels(log, tc.input)
 
 			test.AssertEqual(t, name, tc.expName, "")
 			if diff := cmp.Diff(labels, tc.expLabels); diff != "" {
@@ -692,7 +687,7 @@ func TestPromExp_Collector_AddSource(t *testing.T) {
 			log, buf := logging.NewTestLogger(t.Name())
 			defer test.ShowBufferOnFailure(t, buf)
 
-			collector, err := NewCollector(log, nil, tc.startSrc...)
+			collector, err := NewEngineCollector(log, nil, tc.startSrc...)
 			if err != nil {
 				t.Fatalf("failed to set up collector: %s", err)
 			}
@@ -795,7 +790,7 @@ func TestPromExp_Collector_RemoveSource(t *testing.T) {
 			log, buf := logging.NewTestLogger(t.Name())
 			defer test.ShowBufferOnFailure(t, buf)
 
-			collector, err := NewCollector(log, nil, tc.startSrc...)
+			collector, err := NewEngineCollector(log, nil, tc.startSrc...)
 			if err != nil {
 				t.Fatalf("failed to set up collector: %s", err)
 			}
@@ -805,7 +800,10 @@ func TestPromExp_Collector_RemoveSource(t *testing.T) {
 
 			collector.RemoveSource(tc.idx)
 
-			if diff := cmp.Diff(tc.expSrc, collector.sources, cmpopts.IgnoreUnexported(EngineSource{})); diff != "" {
+			cmpOpts := cmp.Options{
+				cmpopts.IgnoreUnexported(MetricSource{}),
+			}
+			if diff := cmp.Diff(tc.expSrc, collector.sources, cmpOpts...); diff != "" {
 				t.Fatalf("(-want, +got)\n%s", diff)
 			}
 
diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go
new file mode 100644
index 000000000000..2f4c86d485dc
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/httpd.go
@@ -0,0 +1,100 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build linux && (amd64 || arm64)
+// +build linux
+// +build amd64 arm64
+
+package promexp
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+type (
+	// RegMonFn defines a function signature for registering a Prometheus
+	// monitor.
+	RegMonFn func(context.Context, logging.Logger) error
+
+	// ExporterConfig defines the configuration for the Prometheus exporter.
+	ExporterConfig struct {
+		Port     int
+		Title    string
+		Register RegMonFn
+	}
+)
+
+const (
+	// EngineTelemetryPort specifies the default port for engine telemetry.
+	EngineTelemetryPort = 9191
+	// ClientTelemetryPort specifies the default port for client telemetry.
+	ClientTelemetryPort = 9192
+)
+
+// StartExporter starts the Prometheus exporter.
+func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) (func(), error) {
+	if cfg == nil {
+		return nil, errors.New("invalid exporter config: nil config")
+	}
+
+	if cfg.Port <= 0 {
+		return nil, errors.New("invalid exporter config: bad port")
+	}
+
+	if cfg.Register == nil {
+		return nil, errors.New("invalid exporter config: nil register function")
+	}
+
+	if err := cfg.Register(ctx, log); err != nil {
+		return nil, errors.Wrap(err, "failed to register client monitor")
+	}
+
+	listenAddress := fmt.Sprintf("0.0.0.0:%d", cfg.Port)
+
+	srv := http.Server{Addr: listenAddress}
+	http.Handle("/metrics", promhttp.HandlerFor(
+		prometheus.DefaultGatherer, promhttp.HandlerOpts{},
+	))
+	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		num, err := w.Write([]byte(fmt.Sprintf(`<html>
+				<head><title>%s</title></head>
+				<body>
+				<h1>%s</h1>
+				<p><a href="/metrics">Metrics</a></p>
+				</body>
+				</html>`, cfg.Title, cfg.Title)))
+		if err != nil {
+			log.Errorf("%d: %s", num, err)
+		}
+	})
+
+	// http listener is a blocking call
+	go func() {
+		log.Infof("Listening on %s", listenAddress)
+		err := srv.ListenAndServe()
+		log.Infof("Prometheus web exporter stopped: %s", err.Error())
+	}()
+
+	return func() {
+		log.Debug("Shutting down Prometheus web exporter")
+
+		// When this cleanup function is called, the original context
+		// will probably have already been canceled.
+		timedCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+		defer cancel()
+		if err := srv.Shutdown(timedCtx); err != nil {
+			log.Noticef("HTTP server didn't shut down within timeout: %s", err.Error())
+		}
+	}, nil
+}
diff --git a/src/control/lib/telemetry/promexp/httpd_test.go b/src/control/lib/telemetry/promexp/httpd_test.go
new file mode 100644
index 000000000000..db69e122b714
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/httpd_test.go
@@ -0,0 +1,118 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package promexp_test
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/telemetry/promexp"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+func TestPromExp_StartExporter(t *testing.T) {
+	for name, tc := range map[string]struct {
+		cfg    *promexp.ExporterConfig
+		expErr error
+	}{
+		"nil cfg": {
+			expErr: errors.New("invalid exporter config"),
+		},
+		"empty cfg invalid": {
+			cfg:    &promexp.ExporterConfig{},
+			expErr: errors.New("invalid exporter config"),
+		},
+		"negative port": {
+			cfg: &promexp.ExporterConfig{
+				Port: -1,
+			},
+			expErr: errors.New("invalid exporter config"),
+		},
+		"nil register fn": {
+			cfg: &promexp.ExporterConfig{
+				Port: 1234,
+			},
+			expErr: errors.New("invalid exporter config"),
+		},
+		"register fn fails": {
+			cfg: &promexp.ExporterConfig{
+				Port: 1234,
+				Register: func(context.Context, logging.Logger) error {
+					return errors.New("whoops")
+				},
+			},
+			expErr: errors.New("failed to register"),
+		},
+		"success": {
+			cfg: &promexp.ExporterConfig{
+				Port: promexp.ClientTelemetryPort,
+				Register: func(ctx context.Context, log logging.Logger) error {
+					return nil
+				},
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			if tc.cfg != nil {
+				tc.cfg.Title = t.Name()
+			}
+			cleanup, err := promexp.StartExporter(test.Context(t), log, tc.cfg)
+			test.CmpErr(t, tc.expErr, err)
+			if tc.expErr != nil {
+				return
+			}
+
+			// Quick tests to make sure the exporter is listening and
+			// that our handlers are invoked.
+			var resp *http.Response
+			for {
+				var err error
+				resp, err = http.Get(fmt.Sprintf("http://localhost:%d/", tc.cfg.Port))
+				if err == nil {
+					break
+				}
+				log.Errorf("failed to connect to exporter: %+v", err)
+				time.Sleep(100 * time.Millisecond)
+			}
+
+			body, err := io.ReadAll(resp.Body)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !strings.Contains(string(body), tc.cfg.Title) {
+				t.Fatalf("expected %q to contain %q", string(body), tc.cfg.Title)
+			}
+			resp.Body.Close()
+
+			resp, err = http.Get(fmt.Sprintf("http://localhost:%d/metrics", tc.cfg.Port))
+			if err != nil {
+				t.Fatal(err)
+			}
+			resp.Body.Close()
+
+			cleanup()
+			time.Sleep(1 * time.Second)
+
+			// Make sure the exporter is no longer listening.
+			_, err = http.Get(fmt.Sprintf("http://localhost:%d/", tc.cfg.Port))
+			if err == nil {
+				t.Fatal("expected http Get to fail on closed port")
+			}
+		})
+	}
+}
diff --git a/src/control/lib/telemetry/promexp/source.go b/src/control/lib/telemetry/promexp/source.go
new file mode 100644
index 000000000000..2212b319ff7e
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/source.go
@@ -0,0 +1,214 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build linux && (amd64 || arm64)
+// +build linux
+// +build amd64 arm64
+
+package promexp
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/lib/atm"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+type (
+	sourceMetricSchema struct {
+		mu            sync.Mutex
+		sourceMetrics map[string]*sourceMetric
+		seen          map[string]struct{}
+		addFn         func(logging.Logger, telemetry.Metric) *sourceMetric
+	}
+
+	// MetricSource encapsulates the logic and data for collecting telemetry
+	// from a DAOS metrics source.
+	MetricSource struct {
+		ctx      context.Context
+		tmMutex  sync.RWMutex // To protect telemetry collection
+		enabled  atm.Bool
+		tmSchema *telemetry.Schema
+		smSchema *sourceMetricSchema
+	}
+)
+
+func newSourceMetricSchema(addFn func(logging.Logger, telemetry.Metric) *sourceMetric) *sourceMetricSchema {
+	return &sourceMetricSchema{
+		sourceMetrics: make(map[string]*sourceMetric),
+		seen:          make(map[string]struct{}),
+		addFn:         addFn,
+	}
+}
+
+// Prune removes any metrics that have not been seen since the last call to Prune.
+func (s *sourceMetricSchema) Prune() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	for id := range s.sourceMetrics {
+		if _, found := s.seen[id]; !found {
+			delete(s.sourceMetrics, id)
+		}
+	}
+	s.seen = make(map[string]struct{})
+}
+
+func (s *sourceMetricSchema) add(log logging.Logger, metric telemetry.Metric) (sm *sourceMetric) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	id := metric.FullPath()
+	s.seen[id] = struct{}{}
+
+	var found bool
+	if sm, found = s.sourceMetrics[id]; !found {
+		sm = s.addFn(log, metric)
+		s.sourceMetrics[id] = sm
+	} else {
+		sm.resetVecs()
+	}
+
+	return
+}
+
+func defaultCollectorOpts() *CollectorOpts {
+	return &CollectorOpts{}
+}
+
+// sourceMetric defines a wrapper for the wrapped telemetry.Metric instance.
+type sourceMetric struct {
+	metric   telemetry.Metric
+	baseName string
+	labels   labelMap
+	gvm      gvMap
+	cvm      cvMap
+}
+
+// collect sends the metrics vectors in the sourceMetric struct to the provided channel.
+func (bm *sourceMetric) collect(ch chan<- prometheus.Metric) {
+	for _, gv := range bm.gvm {
+		gv.Collect(ch)
+	}
+	for _, cv := range bm.cvm {
+		cv.Collect(ch)
+	}
+}
+
+// resetVecs resets all the metrics vectors in the sourceMetric struct.
+func (bm *sourceMetric) resetVecs() {
+	for _, gv := range bm.gvm {
+		gv.Reset()
+	}
+	for _, cv := range bm.cvm {
+		cv.Reset()
+	}
+}
+
+// newSourceMetric initializes a new sourceMetric struct.
+func newSourceMetric(log logging.Logger, m telemetry.Metric, baseName string, labels labelMap) *sourceMetric {
+	sm := &sourceMetric{
+		metric:   m,
+		baseName: baseName,
+		labels:   labels,
+		gvm:      make(gvMap),
+		cvm:      make(cvMap),
+	}
+
+	desc := m.Desc()
+
+	switch sm.metric.Type() {
+	case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp,
+		telemetry.MetricTypeSnapshot:
+		sm.gvm.add(sm.baseName, desc, sm.labels)
+	case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration:
+		sm.gvm.add(sm.baseName, desc, sm.labels)
+		for _, ms := range getMetricStats(sm.baseName, sm.metric) {
+			if ms.isCounter {
+				sm.cvm.add(ms.name, ms.desc, sm.labels)
+			} else {
+				sm.gvm.add(ms.name, ms.desc, sm.labels)
+			}
+		}
+	case telemetry.MetricTypeCounter:
+		sm.cvm.add(sm.baseName, desc, sm.labels)
+	default:
+		log.Errorf("[%s]: metric type %d not supported", baseName, sm.metric.Type())
+	}
+
+	return sm
+}
+
+// IsEnabled checks if the source is enabled.
+func (s *MetricSource) IsEnabled() bool {
+	return s.enabled.IsTrue()
+}
+
+// Enable enables the source.
+func (s *MetricSource) Enable() {
+	s.enabled.SetTrue()
+}
+
+// Disable disables the source.
+func (s *MetricSource) Disable() {
+	s.enabled.SetFalse()
+}
+
+// Collect invokes telemetry.CollectMetrics() for the metrics context
+// managed by this source. The collected metrics are sent to the provided channel.
+func (s *MetricSource) Collect(log logging.Logger, ch chan<- *sourceMetric) {
+	if s == nil {
+		log.Error("nil source")
+		return
+	}
+	if !s.IsEnabled() {
+		return
+	}
+	if ch == nil {
+		log.Error("nil channel")
+		return
+	}
+
+	s.tmMutex.RLock()
+	defer s.tmMutex.RUnlock()
+
+	metrics := make(chan telemetry.Metric)
+	go func() {
+		if err := telemetry.CollectMetrics(s.ctx, s.tmSchema, metrics); err != nil {
+			log.Errorf("failed to collect metrics: %s", err)
+			return
+		}
+		s.tmSchema.Prune()
+	}()
+
+	for metric := range metrics {
+		ch <- s.smSchema.add(log, metric)
+	}
+	s.smSchema.Prune()
+}
+
+// PruneSegments prunes unused telemetry segments.
+func (s *MetricSource) PruneSegments(log logging.Logger, maxSegAge time.Duration) {
+	if s == nil {
+		log.Error("nil source")
+		return
+	}
+	if !s.IsEnabled() {
+		return
+	}
+
+	if err := telemetry.PruneUnusedSegments(s.ctx, maxSegAge); err != nil {
+		log.Errorf("failed to prune segments: %s", err)
+		return
+	}
+
+	s.tmSchema.Prune()
+	s.smSchema.Prune()
+}
diff --git a/src/control/lib/telemetry/promexp/util.go b/src/control/lib/telemetry/promexp/util.go
new file mode 100644
index 000000000000..6ddc46623d30
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/util.go
@@ -0,0 +1,170 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build linux && (amd64 || arm64)
+// +build linux
+// +build amd64 arm64
+
+package promexp
+
+import (
+	"sort"
+	"strings"
+	"unicode"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+)
+
+type labelMap map[string]string
+
+func (lm labelMap) keys() (keys []string) {
+	for label := range lm {
+		keys = append(keys, label)
+	}
+	sort.Strings(keys)
+
+	return
+}
+
+func sanitizeMetricName(in string) string {
+	return strings.Map(func(r rune) rune {
+		switch {
+		// Valid names for Prometheus are limited to:
+		case r >= 'a' && r <= 'z': // lowercase letters
+		case r >= 'A' && r <= 'Z': // uppercase letters
+		case unicode.IsDigit(r): // digits
+		default: // sanitize any other character
+			return '_'
+		}
+
+		return r
+	}, strings.TrimLeft(in, "/"))
+}
+
+func matchLabel(labels labelMap, input, match, label string) bool {
+	if !strings.HasPrefix(input, match) {
+		return false
+	}
+
+	splitStr := strings.SplitN(input, "_", 2)
+	if len(splitStr) == 2 {
+		labels[label] = splitStr[1]
+		return true
+	}
+	return false
+}
+
+func appendName(cur, name string) string {
+	if cur == "" {
+		return name
+	}
+	return cur + "_" + name
+}
+
+type gvMap map[string]*prometheus.GaugeVec
+
+func (m gvMap) add(name, help string, labels labelMap) {
+	if _, found := m[name]; !found {
+		gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: name,
+			Help: help,
+		}, labels.keys())
+		m[name] = gv
+	}
+}
+
+func (m gvMap) set(name string, value float64, labels labelMap) error {
+	gv, found := m[name]
+	if !found {
+		return errors.Errorf("gauge vector %s not found", name)
+	}
+	gv.With(prometheus.Labels(labels)).Set(value)
+
+	return nil
+}
+
+type cvMap map[string]*prometheus.CounterVec
+
+func (m cvMap) add(name, help string, labels labelMap) {
+	if _, found := m[name]; !found {
+		cv := prometheus.NewCounterVec(prometheus.CounterOpts{
+			Name: name,
+			Help: help,
+		}, labels.keys())
+		m[name] = cv
+	}
+}
+
+func (m cvMap) set(name string, value float64, labels labelMap) error {
+	cv, found := m[name]
+	if !found {
+		return errors.Errorf("counter vector %s not found", name)
+	}
+	cv.With(prometheus.Labels(labels)).Add(value)
+
+	return nil
+}
+
+type metricStat struct {
+	name      string
+	desc      string
+	value     float64
+	isCounter bool
+}
+
+func getMetricStats(baseName string, m telemetry.Metric) (stats []*metricStat) {
+	ms, ok := m.(telemetry.StatsMetric)
+	if !ok {
+		return []*metricStat{}
+	}
+
+	for name, s := range map[string]struct {
+		fn        func() float64
+		desc      string
+		isCounter bool
+	}{
+		"min": {
+			fn:   func() float64 { return float64(ms.Min()) },
+			desc: " (min value)",
+		},
+		"max": {
+			fn:   func() float64 { return float64(ms.Max()) },
+			desc: " (max value)",
+		},
+		"mean": {
+			fn:   ms.Mean,
+			desc: " (mean)",
+		},
+		"sum": {
+			fn:   func() float64 { return float64(ms.Sum()) },
+			desc: " (sum)",
+		},
+		"stddev": {
+			fn:   ms.StdDev,
+			desc: " (std dev)",
+		},
+		"sumsquares": {
+			fn:   ms.SumSquares,
+			desc: " (sum of squares)",
+		},
+		"samples": {
+			fn:        func() float64 { return float64(ms.SampleSize()) },
+			desc:      " (samples)",
+			isCounter: true,
+		},
+	} {
+		stats = append(stats, &metricStat{
+			name:      baseName + "_" + name,
+			desc:      m.Desc() + s.desc,
+			value:     s.fn(),
+			isCounter: s.isCounter,
+		})
+	}
+
+	return
+}
diff --git a/src/control/lib/telemetry/promexp/util_test.go b/src/control/lib/telemetry/promexp/util_test.go
new file mode 100644
index 000000000000..104da9ec3836
--- /dev/null
+++ b/src/control/lib/telemetry/promexp/util_test.go
@@ -0,0 +1,135 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package promexp
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/telemetry"
+)
+
+func TestPromExp_sanitizeMetricName(t *testing.T) {
+	for input, tc := range map[string]struct {
+		expOutput string
+	}{
+		"": {
+			expOutput: "",
+		},
+		"azAZ09": {
+			expOutput: "azAZ09",
+		},
+		"/a-z A-Z 0-9/": {
+			expOutput: "a_z_A_Z_0_9_",
+		},
+	} {
+		t.Run(input, func(t *testing.T) {
+			got := sanitizeMetricName(input)
+			if got != tc.expOutput {
+				t.Errorf("sanitizeMetricName(%q) = %q, want %q", input, got, tc.expOutput)
+			}
+		})
+	}
+}
+
+func TestPromExp_getMetricStats(t *testing.T) {
+	segID := telemetry.NextTestID(telemetry.PromexpIDBase)
+	telemetry.InitTestMetricsProducer(t, segID, 4096)
+	defer telemetry.CleanupTestMetricsProducer(t)
+	testValues := []uint64{1, 2, 3, 4, 5}
+
+	ctx, err := telemetry.Init(test.Context(t), uint32(segID))
+	if err != nil {
+		t.Fatalf("Init: %v", err)
+	}
+
+	for name, tc := range map[string]struct {
+		baseName string
+		metric   *telemetry.TestMetric
+		expStats []*metricStat
+	}{
+		"non-stats gauge": {
+			baseName: "gauge",
+			metric: &telemetry.TestMetric{
+				Name: "gauge",
+				Type: telemetry.MetricTypeGauge,
+				Cur:  1.0,
+			},
+			expStats: []*metricStat{},
+		},
+		"stats gauge": {
+			baseName: "stats_gauge",
+			metric: &telemetry.TestMetric{
+				Name:   "stats_gauge",
+				Type:   telemetry.MetricTypeStatsGauge,
+				Values: testValues,
+			},
+			expStats: []*metricStat{
+				{
+					name:  "stats_gauge_min",
+					desc:  " (min value)",
+					value: 1.0,
+				},
+				{
+					name:  "stats_gauge_max",
+					desc:  " (max value)",
+					value: 5.0,
+				},
+				{
+					name:  "stats_gauge_mean",
+					desc:  " (mean)",
+					value: 3.0,
+				},
+				{
+					name:  "stats_gauge_sum",
+					desc:  " (sum)",
+					value: 15.0,
+				},
+				{
+					name:      "stats_gauge_samples",
+					desc:      " (samples)",
+					value:     5,
+					isCounter: true,
+				},
+				{
+					name:  "stats_gauge_stddev",
+					desc:  " (std dev)",
+					value: 1.58113883,
+				},
+				{
+					name:  "stats_gauge_sumsquares",
+					desc:  " (sum of squares)",
+					value: 55,
+				},
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			telemetry.AddTestMetric(t, tc.metric)
+
+			m, err := tc.metric.GetMetric(ctx)
+			if err != nil {
+				t.Fatalf("GetMetric: %v", err)
+			}
+
+			got := getMetricStats(tc.baseName, m)
+			cmpOpts := cmp.Options{
+				cmp.AllowUnexported(metricStat{}),
+				cmpopts.EquateApprox(0.000000001, 0.0),
+				cmpopts.SortSlices(func(a, b *metricStat) bool {
+					return a.name < b.name
+				}),
+			}
+			if diff := cmp.Diff(got, tc.expStats, cmpOpts...); diff != "" {
+				t.Fatalf("(-want, +got)\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/src/control/lib/telemetry/shm.go b/src/control/lib/telemetry/shm.go
new file mode 100644
index 000000000000..99fd95aaa3a1
--- /dev/null
+++ b/src/control/lib/telemetry/shm.go
@@ -0,0 +1,103 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package telemetry
+
+/*
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+*/
+import "C"
+
+import (
+	"time"
+
+	"github.com/pkg/errors"
+)
+
+type shmidStat struct {
+	id C.int
+	ds C.struct_shmid_ds
+}
+
+// Size returns the size of segment in bytes.
+func (s *shmidStat) Size() int {
+	return int(s.ds.shm_segsz)
+}
+
+// Atime returns the time of last shmat(2).
+func (s *shmidStat) Atime() time.Time {
+	return time.Unix(int64(s.ds.shm_atime), 0)
+}
+
+// Dtime returns the time of last shmdt(2).
+func (s *shmidStat) Dtime() time.Time {
+	return time.Unix(int64(s.ds.shm_dtime), 0)
+}
+
+// Ctime returns the time of last shmctl(2) or creation time.
+func (s *shmidStat) Ctime() time.Time {
+	return time.Unix(int64(s.ds.shm_ctime), 0)
+}
+
+// Cpid returns the creator pid.
+func (s *shmidStat) Cpid() int {
+	return int(s.ds.shm_cpid)
+}
+
+// Lpid returns the last shmat(2)/shmdt(2) pid.
+func (s *shmidStat) Lpid() int {
+	return int(s.ds.shm_lpid)
+}
+
+// Nattach returns the number of attached processes.
+func (s *shmidStat) Nattach() int {
+	return int(s.ds.shm_nattch)
+}
+
+// C returns the C struct.
+func (s *shmidStat) C() *C.struct_shmid_ds {
+	return &s.ds
+}
+
+func shmStat(id C.int) (*shmidStat, error) {
+	st := shmidStat{
+		id: id,
+	}
+	rc, err := C.shmctl(id, C.IPC_STAT, &st.ds)
+	if rc != 0 {
+		return nil, errors.Wrapf(err, "shmctl(IPC_STAT, %d)", id)
+	}
+
+	return &st, nil
+}
+
+func shmStatKey(key C.key_t) (*shmidStat, error) {
+	id, err := C.shmget(key, 0, 0)
+	if err != nil {
+		return nil, errors.Wrapf(err, "shmget(%d, 0, 0)", key)
+	}
+
+	return shmStat(id)
+}
+
+func shmChown(key C.key_t, uid C.uid_t, gid C.gid_t) error {
+	st, err := shmStatKey(key)
+	if err != nil {
+		return err
+	}
+
+	st.ds.shm_perm.gid = gid
+	st.ds.shm_perm.uid = uid
+
+	rc, err := C.shmctl(st.id, C.IPC_SET, st.C())
+	if rc != 0 {
+		return errors.Wrapf(err, "shmctl(IPC_SET, %d)", st.id)
+	}
+
+	return nil
+}
diff --git a/src/control/lib/telemetry/telemetry.go b/src/control/lib/telemetry/telemetry.go
index da93ffa55a40..9a626c85634b 100644
--- a/src/control/lib/telemetry/telemetry.go
+++ b/src/control/lib/telemetry/telemetry.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021-2022 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -14,8 +14,28 @@ package telemetry
 /*
 #cgo LDFLAGS: -lgurt
 
-#include "gurt/telemetry_common.h"
-#include "gurt/telemetry_consumer.h"
+#include <daos/metrics.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_consumer.h>
+#include <gurt/telemetry_producer.h>
+
+static int
+rm_ephemeral_dir(const char *path)
+{
+	return d_tm_del_ephemeral_dir(path);
+}
+
+static int
+add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, char *path)
+{
+	return d_tm_add_ephemeral_dir(node, size_bytes, path);
+}
+
+static int
+attach_segment_path(key_t key, char *path)
+{
+	return d_tm_attach_path_segment(key, path);
+}
 */
 import "C"
 
@@ -25,12 +45,19 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"unsafe"
 
 	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+
+	"github.com/daos-stack/daos/src/control/common"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/logging"
 )
 
 type MetricType int
@@ -46,6 +73,11 @@ const (
 	MetricTypeDirectory  MetricType = C.D_TM_DIRECTORY
 	MetricTypeLink       MetricType = C.D_TM_LINK
 
+	ClientJobRootID         = C.DC_TM_JOB_ROOT_ID
+	ClientJobMax            = 1024
+	ClientMetricsEnabledEnv = C.DAOS_CLIENT_METRICS_ENABLE
+	ClientMetricsRetainEnv  = C.DAOS_CLIENT_METRICS_RETAIN
+
 	BadUintVal  = ^uint64(0)
 	BadFloatVal = float64(BadUintVal)
 	BadIntVal   = int64(BadUintVal >> 1)
@@ -81,7 +113,7 @@ type (
 type (
 	handle struct {
 		sync.RWMutex
-		idx  uint32
+		id   uint32
 		rank *uint32
 		ctx  *C.struct_d_tm_context
 		root *C.struct_d_tm_node_t
@@ -109,6 +141,34 @@ const (
 	handleKey telemetryKey = "handle"
 )
 
+func (mt MetricType) String() string {
+	strFmt := func(name string) string {
+		numStr := strconv.Itoa(int(mt))
+		return name + " (" + numStr + ")"
+	}
+
+	switch mt {
+	case MetricTypeDirectory:
+		return strFmt("directory")
+	case MetricTypeCounter:
+		return strFmt("counter")
+	case MetricTypeTimestamp:
+		return strFmt("timestamp")
+	case MetricTypeSnapshot:
+		return strFmt("snapshot")
+	case MetricTypeDuration:
+		return strFmt("duration")
+	case MetricTypeGauge:
+		return strFmt("gauge")
+	case MetricTypeStatsGauge:
+		return strFmt("gauge (stats)")
+	case MetricTypeLink:
+		return strFmt("link")
+	default:
+		return strFmt("unknown")
+	}
+}
+
 func (h *handle) isValid() bool {
 	return h != nil && h.ctx != nil && h.root != nil
 }
@@ -295,24 +355,43 @@ func collectGarbageLoop(ctx context.Context, ticker *time.Ticker) {
 	}
 }
 
+func initClientRoot(parent context.Context, shmID uint32) (context.Context, error) {
+	if parent == nil {
+		return nil, errors.New("nil parent context")
+	}
+
+	shmSize := C.ulong(ClientJobMax * C.D_TM_METRIC_SIZE)
+
+	rc := C.d_tm_init(C.int(shmID), shmSize, C.D_TM_OPEN_OR_CREATE)
+	if rc != 0 {
+		return nil, errors.Errorf("failed to init client root: %s", daos.Status(rc))
+	}
+
+	return Init(parent, shmID)
+}
+
+func InitClientRoot(ctx context.Context) (context.Context, error) {
+	return initClientRoot(ctx, ClientJobRootID)
+}
+
 // Init initializes the telemetry bindings
-func Init(parent context.Context, idx uint32) (context.Context, error) {
+func Init(parent context.Context, id uint32) (context.Context, error) {
 	if parent == nil {
 		return nil, errors.New("nil parent context")
 	}
 
-	tmCtx := C.d_tm_open(C.int(idx))
+	tmCtx := C.d_tm_open(C.int(id))
 	if tmCtx == nil {
-		return nil, errors.Errorf("no shared memory segment found for idx: %d", idx)
+		return nil, errors.Errorf("no shared memory segment found for key: %d", id)
 	}
 
 	root := C.d_tm_get_root(tmCtx)
 	if root == nil {
-		return nil, errors.Errorf("no root node found in shared memory segment for idx: %d", idx)
+		return nil, errors.Errorf("no root node found in shared memory segment for key: %d", id)
 	}
 
 	handle := &handle{
-		idx:  idx,
+		id:   id,
 		ctx:  tmCtx,
 		root: root,
 	}
@@ -323,6 +402,11 @@ func Init(parent context.Context, idx uint32) (context.Context, error) {
 	return newCtx, nil
 }
 
+// Fini releases resources claimed by Init().
+func Fini() {
+	C.d_tm_fini()
+}
+
 // Detach detaches from the telemetry handle
 func Detach(ctx context.Context) {
 	if hdl, err := getHandle(ctx); err == nil {
@@ -333,6 +417,38 @@ func Detach(ctx context.Context) {
 	}
 }
 
+func addEphemeralDir(path string, shmSize uint64) error {
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+	if rc := C.add_ephemeral_dir(nil, C.ulong(shmSize), cPath); rc != 0 {
+		return daos.Status(rc)
+	}
+
+	return nil
+}
+
+// SetupClientRoot performs the necessary actions to get the client telemetry
+// segment linked into the agent-managed tree.
+func SetupClientRoot(ctx context.Context, jobid string, pid, shm_key int) error {
+	log := logging.FromContext(ctx)
+
+	if err := addEphemeralDir(jobid, ClientJobMax*C.D_TM_METRIC_SIZE); err != nil {
+		if err != daos.Exists {
+			return errors.Wrapf(err, "failed to add client job path %q", jobid)
+		}
+	}
+
+	pidPath := filepath.Join(jobid, string(PathSep), strconv.Itoa(pid))
+	cPidPath := C.CString(pidPath)
+	defer C.free(unsafe.Pointer(cPidPath))
+	if rc := C.attach_segment_path(C.key_t(shm_key), cPidPath); rc != 0 {
+		return errors.Wrapf(daos.Status(rc), "failed to attach client segment 0x%x at %q", shm_key, pidPath)
+	}
+
+	log.Tracef("attached client segment @ %q (key: 0x%x)", pidPath, shm_key)
+	return nil
+}
+
 type Schema struct {
 	mu      sync.RWMutex
 	metrics map[string]Metric
@@ -413,10 +529,12 @@ func NewSchema() *Schema {
 
 }
 
-func visit(hdl *handle, s *Schema, node *C.struct_d_tm_node_t, pathComps string, out chan<- Metric) {
+type procNodeFn func(hdl *handle, id string, node *C.struct_d_tm_node_t)
+
+func visit(hdl *handle, node *C.struct_d_tm_node_t, pathComps string, procLinks bool, procNode procNodeFn) {
 	var next *C.struct_d_tm_node_t
 
-	if node == nil {
+	if node == nil || procNode == nil {
 		return
 	}
 	name := C.GoString(C.d_tm_get_name(hdl.ctx, node))
@@ -425,29 +543,30 @@ func visit(hdl *handle, s *Schema, node *C.struct_d_tm_node_t, pathComps string,
 		id = name
 	}
 
-	cType := node.dtn_type
-	switch cType {
+	switch node.dtn_type {
 	case C.D_TM_DIRECTORY:
 		next = C.d_tm_get_child(hdl.ctx, node)
 		if next != nil {
-			visit(hdl, s, next, id, out)
+			visit(hdl, next, id, procLinks, procNode)
 		}
 	case C.D_TM_LINK:
 		next = C.d_tm_follow_link(hdl.ctx, node)
 		if next != nil {
+			if procLinks {
+				// Use next to get the linked shm key
+				procNode(hdl, id, next)
+			}
+
 			// link leads to a directory with the same name
-			visit(hdl, s, next, pathComps, out)
+			visit(hdl, next, pathComps, procLinks, procNode)
 		}
 	default:
-		m := s.Add(hdl, id, cType, node)
-		if m != nil {
-			out <- m
-		}
+		procNode(hdl, id, node)
 	}
 
 	next = C.d_tm_get_sibling(hdl.ctx, node)
 	if next != nil && next != node {
-		visit(hdl, s, next, pathComps, out)
+		visit(hdl, next, pathComps, procLinks, procNode)
 	}
 }
 
@@ -465,8 +584,98 @@ func CollectMetrics(ctx context.Context, s *Schema, out chan<- Metric) error {
 		return errors.New("invalid handle")
 	}
 
-	node := hdl.root
-	visit(hdl, s, node, "", out)
+	procNode := func(hdl *handle, id string, node *C.struct_d_tm_node_t) {
+		m := s.Add(hdl, id, node.dtn_type, node)
+		if m != nil {
+			out <- m
+		}
+	}
+
+	visit(hdl, hdl.root, "", false, procNode)
+
+	return nil
+}
+
+// PruneUnusedSegments removes shared memory segments associated with
+// unused ephemeral subdirectories.
+func PruneUnusedSegments(ctx context.Context, maxSegAge time.Duration) error {
+	log := logging.FromContext(ctx)
+
+	hdl, err := getHandle(ctx)
+	if err != nil {
+		return err
+	}
+	hdl.Lock()
+	defer hdl.Unlock()
+
+	if !hdl.isValid() {
+		return errors.New("invalid handle")
+	}
+
+	var toPrune []string
+	procNode := func(hdl *handle, id string, node *C.struct_d_tm_node_t) {
+		if node == nil || node.dtn_type != C.D_TM_DIRECTORY {
+			return
+		}
+
+		path := id
+		comps := strings.SplitN(path, string(PathSep), 2)
+		if strings.HasPrefix(comps[0], "ID:") && len(comps) > 1 {
+			path = comps[1]
+		}
+
+		st, err := shmStatKey(node.dtn_shmem_key)
+		if err != nil {
+			log.Errorf("failed to shmStat(%s): %s", path, err)
+			return
+		}
+
+		log.Tracef("path:%s shmid:%d spid:%d cpid:%d lpid:%d age:%s",
+			path, st.id, os.Getpid(), st.Cpid(), st.Lpid(), time.Since(st.Ctime()))
+
+		// If the creator process was someone other than us, and it's still
+		// around, don't mess with the segment.
+		if _, err := common.GetProcName(st.Cpid()); err == nil && st.Cpid() != unix.Getpid() {
+			return
+		}
+
+		if time.Since(st.Ctime()) <= maxSegAge {
+			return
+		}
+
+		log.Tracef("adding %s to prune list", path)
+		toPrune = append(toPrune, path)
+	}
+
+	visit(hdl, hdl.root, "", true, procNode)
+
+	sort.Sort(sort.Reverse(sort.StringSlice(toPrune)))
+	for _, path := range toPrune {
+		log.Tracef("pruning %s", path)
+		if err := removeLink(hdl, path); err != nil {
+			log.Errorf("failed to prune %s: %s", path, err)
+		}
+	}
+
+	return nil
+}
+
+func removeLink(hdl *handle, path string) error {
+	_, err := findNode(hdl, path)
+	if err != nil {
+		return err
+	}
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+	rc := C.rm_ephemeral_dir(cPath)
+	if rc != 0 {
+		return errors.Wrapf(daos.Status(rc), "failed to remove link %q", path)
+	}
+
+	if _, err := findNode(hdl, path); err == nil {
+		return errors.Errorf("failed to remove %s", path)
+	}
 
 	return nil
 }
diff --git a/src/control/lib/telemetry/telemetry_test.go b/src/control/lib/telemetry/telemetry_test.go
index a645f0e60e4d..bc63cc813998 100644
--- a/src/control/lib/telemetry/telemetry_test.go
+++ b/src/control/lib/telemetry/telemetry_test.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021-2022 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -9,6 +9,9 @@ package telemetry
 import (
 	"context"
 	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
 	"sync"
 	"testing"
 	"time"
@@ -16,6 +19,7 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/logging"
 )
 
 func TestTelemetry_Init(t *testing.T) {
@@ -50,7 +54,7 @@ func TestTelemetry_Init(t *testing.T) {
 					t.Fatalf("can't get handle from result ctx: %v", err)
 				}
 
-				test.AssertEqual(t, uint32(producerID), hdl.idx, "handle.idx doesn't match shmem ID")
+				test.AssertEqual(t, uint32(producerID), hdl.id, "handle.idx doesn't match shmem ID")
 
 				hdl.RLock()
 				defer hdl.RUnlock()
@@ -179,6 +183,106 @@ func TestTelemetry_GetRank(t *testing.T) {
 	}
 }
 
+func childErrExit(err error) {
+	if err == nil {
+		err = errors.New("unknown error")
+	}
+	fmt.Fprintf(os.Stderr, "CHILD ERROR: %s\n", err)
+	os.Exit(1)
+}
+
+const (
+	childModeEnvVar   = "TEST_CHILD_MODE"
+	childModeLinkTest = "CHILD_MODE_LINK_TEST"
+	childShmIDEnvVar  = "TEST_CHILD_SHM_ID"
+)
+
+func TestMain(m *testing.M) {
+	mode := os.Getenv(childModeEnvVar)
+	switch mode {
+	case "":
+		// default; run the test binary
+		os.Exit(m.Run())
+	case childModeLinkTest:
+		runChildTelemProc()
+	default:
+		childErrExit(errors.Errorf("Unknown child mode: %q", mode))
+	}
+}
+
+func runChildTelemProc() {
+	pid := os.Getpid()
+	shmID, err := strconv.Atoi(os.Getenv(childShmIDEnvVar))
+	if err != nil {
+		childErrExit(err)
+	}
+
+	jobDir := TestMetricsMap{
+		MetricTypeDirectory: &TestMetric{
+			Name: "job",
+		},
+	}
+	pidLink := TestMetricsMap{
+		MetricTypeLink: &TestMetric{
+			Name: fmt.Sprintf("job/%d", pid),
+		},
+	}
+	startedAt := TestMetricsMap{
+		MetricTypeTimestamp: &TestMetric{
+			Name: fmt.Sprintf("job/%d/started_at", pid),
+		},
+	}
+
+	t := &testing.T{}
+
+	InitTestMetricsProducer(t, shmID, 1024)
+
+	AddTestMetrics(t, jobDir)
+	AddTestMetrics(t, pidLink)
+	AddTestMetrics(t, startedAt)
+
+	if t.Failed() {
+		childErrExit(errors.New("test failed"))
+	}
+}
+
+func TestTelemetry_PruneSegments(t *testing.T) {
+	shmID := uint32(NextTestID())
+
+	cmd := exec.Command(os.Args[0])
+	cmd.Env = append(os.Environ(),
+		fmt.Sprintf("%s=%s", childModeEnvVar, childModeLinkTest),
+		fmt.Sprintf("%s=%d", childShmIDEnvVar, shmID),
+	)
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Errorf("child failed: %s", out)
+		t.Fatal(err)
+	}
+
+	log, buf := logging.NewTestLogger(t.Name())
+	defer test.ShowBufferOnFailure(t, buf)
+
+	ctx, err := initClientRoot(test.MustLogContext(t, log), shmID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		Fini()
+	}()
+
+	path := fmt.Sprintf("job/%d/started_at", cmd.Process.Pid)
+	_, err = GetTimestamp(ctx, path)
+	test.CmpErr(t, nil, err)
+
+	err = PruneUnusedSegments(ctx, time.Nanosecond)
+	test.CmpErr(t, nil, err)
+
+	_, err = GetTimestamp(ctx, path)
+	if err == nil {
+		t.Fatal("expected GetTimestamp() to fail after prune")
+	}
+}
+
 func TestTelemetry_CollectMetrics(t *testing.T) {
 	testMetrics := TestMetricsMap{
 		MetricTypeCounter: &TestMetric{
diff --git a/src/control/lib/telemetry/test_helpers.go b/src/control/lib/telemetry/test_helpers.go
index c0cbdda72ef1..bc014eb2502c 100644
--- a/src/control/lib/telemetry/test_helpers.go
+++ b/src/control/lib/telemetry/test_helpers.go
@@ -19,6 +19,8 @@ import (
 	"testing"
 	"time"
 
+	"github.com/pkg/errors"
+
 	"github.com/daos-stack/daos/src/control/common/test"
 	"github.com/daos-stack/daos/src/control/lib/daos"
 )
@@ -60,6 +62,7 @@ var nextIDMutex sync.Mutex
 const (
 	telemetryIDBase = 100
 	PromexpIDBase   = 200
+	AgentIDBase     = 300
 )
 
 // NextTestID gets the next available ID for a shmem segment. This helps avoid
@@ -80,6 +83,7 @@ func NextTestID(base ...int) int {
 
 type (
 	TestMetric struct {
+		Type   MetricType
 		Name   string
 		path   string
 		desc   string
@@ -87,6 +91,7 @@ type (
 		min    uint64
 		max    uint64
 		Cur    float64 // value - may be exact or approximate
+		Values []uint64
 		sum    uint64
 		mean   float64
 		stddev float64
@@ -106,6 +111,25 @@ func (tm *TestMetric) FullPath() string {
 	return fullName
 }
 
+func (tm *TestMetric) GetMetric(ctx context.Context) (Metric, error) {
+	switch tm.Type {
+	case MetricTypeCounter:
+		return GetCounter(ctx, tm.FullPath())
+	case MetricTypeTimestamp:
+		return GetTimestamp(ctx, tm.FullPath())
+	case MetricTypeSnapshot:
+		return GetSnapshot(ctx, tm.FullPath())
+	case MetricTypeDuration:
+		return GetDuration(ctx, tm.FullPath())
+	case MetricTypeGauge:
+		return GetGauge(ctx, tm.FullPath())
+	case MetricTypeStatsGauge:
+		return GetStatsGauge(ctx, tm.FullPath())
+	default:
+		return nil, errors.Errorf("unsupported metric type %s", tm.Type)
+	}
+}
+
 func InitTestMetricsProducer(t *testing.T, id int, size uint64) {
 	t.Helper()
 
@@ -115,65 +139,82 @@ func InitTestMetricsProducer(t *testing.T, id int, size uint64) {
 	}
 }
 
+func AddTestMetric(t *testing.T, tm *TestMetric) {
+	t.Helper()
+
+	fullName := tm.FullPath()
+	switch tm.Type {
+	case MetricTypeGauge:
+		rc := C.add_metric(&tm.node, C.D_TM_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+		C.d_tm_set_gauge(tm.node, C.uint64_t(tm.Cur))
+	case MetricTypeStatsGauge:
+		rc := C.add_metric(&tm.node, C.D_TM_STATS_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", tm.Name, daos.Status(rc))
+		}
+
+		vals := make([]uint64, len(tm.Values))
+		if len(tm.Values) > 0 {
+			copy(vals, tm.Values)
+		} else {
+			vals = []uint64{tm.min, tm.max, uint64(tm.Cur)}
+		}
+		t.Logf("setting values for %s: %+v\n", tm.FullPath(), vals)
+
+		for _, val := range vals {
+			C.d_tm_set_gauge(tm.node, C.uint64_t(val))
+			t.Logf("set %s to %d\n", tm.FullPath(), val)
+		}
+	case MetricTypeCounter:
+		rc := C.add_metric(&tm.node, C.D_TM_COUNTER, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+		C.d_tm_inc_counter(tm.node, C.ulong(tm.Cur))
+	case MetricTypeDuration:
+		rc := C.add_metric(&tm.node, C.D_TM_DURATION|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+		C.d_tm_mark_duration_start(tm.node, C.D_TM_CLOCK_REALTIME)
+		time.Sleep(time.Duration(tm.Cur))
+		C.d_tm_mark_duration_end(tm.node)
+	case MetricTypeTimestamp:
+		rc := C.add_metric(&tm.node, C.D_TM_TIMESTAMP, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+		C.d_tm_record_timestamp(tm.node)
+	case MetricTypeSnapshot:
+		rc := C.add_metric(&tm.node, C.D_TM_TIMER_SNAPSHOT|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+		C.d_tm_take_timer_snapshot(tm.node, C.D_TM_CLOCK_REALTIME)
+	case MetricTypeDirectory:
+		rc := C.add_metric(&tm.node, C.D_TM_DIRECTORY, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+	case MetricTypeLink:
+		rc := C.add_eph_dir(&tm.node, 1024, C.CString(fullName))
+		if rc != 0 {
+			t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
+		}
+	default:
+		t.Fatalf("metric type %s not supported", tm.Type)
+	}
+}
+
 func AddTestMetrics(t *testing.T, testMetrics TestMetricsMap) {
 	t.Helper()
 
 	for mt, tm := range testMetrics {
-		fullName := tm.FullPath()
-		switch mt {
-		case MetricTypeGauge:
-			rc := C.add_metric(&tm.node, C.D_TM_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-			C.d_tm_set_gauge(tm.node, C.uint64_t(tm.Cur))
-		case MetricTypeStatsGauge:
-			rc := C.add_metric(&tm.node, C.D_TM_STATS_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", tm.Name, daos.Status(rc))
-			}
-			for _, val := range []uint64{tm.min, tm.max, uint64(tm.Cur)} {
-				C.d_tm_set_gauge(tm.node, C.uint64_t(val))
-			}
-		case MetricTypeCounter:
-			rc := C.add_metric(&tm.node, C.D_TM_COUNTER, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-			C.d_tm_inc_counter(tm.node, C.ulong(tm.Cur))
-		case MetricTypeDuration:
-			rc := C.add_metric(&tm.node, C.D_TM_DURATION|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-			C.d_tm_mark_duration_start(tm.node, C.D_TM_CLOCK_REALTIME)
-			time.Sleep(time.Duration(tm.Cur))
-			C.d_tm_mark_duration_end(tm.node)
-		case MetricTypeTimestamp:
-			rc := C.add_metric(&tm.node, C.D_TM_TIMESTAMP, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-			C.d_tm_record_timestamp(tm.node)
-		case MetricTypeSnapshot:
-			rc := C.add_metric(&tm.node, C.D_TM_TIMER_SNAPSHOT|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-			C.d_tm_take_timer_snapshot(tm.node, C.D_TM_CLOCK_REALTIME)
-		case MetricTypeDirectory:
-			rc := C.add_metric(&tm.node, C.D_TM_DIRECTORY, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-		case MetricTypeLink:
-			rc := C.add_eph_dir(&tm.node, 1024, C.CString(fullName))
-			if rc != 0 {
-				t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc))
-			}
-		default:
-			t.Fatalf("metric type %d not supported", mt)
-		}
+		tm.Type = mt
+		AddTestMetric(t, tm)
 	}
 }
 
diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go
index f7f094ffe7e9..4b2f624aff2a 100644
--- a/src/control/server/telemetry.go
+++ b/src/control/server/telemetry.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2018-2022 Intel Corporation.
+// (C) Copyright 2018-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -8,13 +8,9 @@ package server
 
 import (
 	"context"
-	"fmt"
-	"net/http"
-	"time"
 
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promhttp"
 
 	"github.com/daos-stack/daos/src/control/lib/ranklist"
 	"github.com/daos-stack/daos/src/control/lib/telemetry/promexp"
@@ -27,7 +23,7 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng
 		return nil
 	}
 
-	c, err := promexp.NewCollector(log, &promexp.CollectorOpts{})
+	c, err := promexp.NewEngineCollector(log, &promexp.CollectorOpts{})
 	if err != nil {
 		return err
 	}
@@ -73,45 +69,13 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng
 }
 
 func startPrometheusExporter(ctx context.Context, log logging.Logger, port int, engines []Engine) (func(), error) {
-	if err := regPromEngineSources(ctx, log, engines); err != nil {
-		return nil, err
+	expCfg := &promexp.ExporterConfig{
+		Port:  port,
+		Title: "DAOS Engine Telemetry",
+		Register: func(ctx context.Context, log logging.Logger) error {
+			return regPromEngineSources(ctx, log, engines)
+		},
 	}
 
-	listenAddress := fmt.Sprintf("0.0.0.0:%d", port)
-
-	srv := http.Server{Addr: listenAddress}
-	http.Handle("/metrics", promhttp.HandlerFor(
-		prometheus.DefaultGatherer, promhttp.HandlerOpts{},
-	))
-	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-		num, err := w.Write([]byte(`<html>
-				<head><title>DAOS Exporter</title></head>
-				<body>
-				<h1>DAOS Exporter</h1>
-				<p><a href="/metrics">Metrics</a></p>
-				</body>
-				</html>`))
-		if err != nil {
-			log.Errorf("%d: %s", num, err)
-		}
-	})
-
-	// http listener is a blocking call
-	go func() {
-		log.Infof("Listening on %s", listenAddress)
-		err := srv.ListenAndServe()
-		log.Infof("Prometheus web exporter stopped: %s", err.Error())
-	}()
-
-	return func() {
-		log.Debug("Shutting down Prometheus web exporter")
-
-		// When this cleanup function is called, the original context
-		// will probably have already been canceled.
-		timedCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-		defer cancel()
-		if err := srv.Shutdown(timedCtx); err != nil {
-			log.Noticef("HTTP server didn't shut down within timeout: %s", err.Error())
-		}
-	}, nil
+	return promexp.StartExporter(ctx, log, expCfg)
 }
diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c
index 2936acfcbeaf..18e32463bb0b 100644
--- a/src/dtx/dtx_srv.c
+++ b/src/dtx/dtx_srv.c
@@ -9,6 +9,7 @@
 #define D_LOGFAC	DD_FAC(dtx)
 
 #include <daos/rpc.h>
+#include <daos/metrics.h>
 #include <daos/btree_class.h>
 #include <daos_srv/daos_engine.h>
 #include <daos_srv/container.h>
@@ -132,11 +133,11 @@ dtx_metrics_count(void)
 	return (sizeof(struct dtx_pool_metrics) / sizeof(struct d_tm_node_t *));
 }
 
-struct dss_module_metrics dtx_metrics = {
-	.dmm_tags = DAOS_TGT_TAG,
-	.dmm_init = dtx_metrics_alloc,
-	.dmm_fini = dtx_metrics_free,
-	.dmm_nr_metrics = dtx_metrics_count,
+struct daos_module_metrics dtx_metrics = {
+    .dmm_tags       = DAOS_TGT_TAG,
+    .dmm_init       = dtx_metrics_alloc,
+    .dmm_fini       = dtx_metrics_free,
+    .dmm_nr_metrics = dtx_metrics_count,
 };
 
 static void
diff --git a/src/engine/SConscript b/src/engine/SConscript
index ceb00a409d09..e94b6a83dd61 100644
--- a/src/engine/SConscript
+++ b/src/engine/SConscript
@@ -29,7 +29,7 @@ def scons():
                'drpc_handler.c', 'drpc_listener.c',
                'drpc_progress.c', 'init.c', 'module.c',
                'srv_cli.c', 'profile.c', 'rpc.c',
-               'server_iv.c', 'srv.c', 'srv.pb-c.c', 'tls.c',
+               'server_iv.c', 'srv.c', 'srv.pb-c.c',
                'sched.c', 'ult.c', 'event.pb-c.c',
                'srv_metrics.c'] + libdaos_tgts
 
diff --git a/src/engine/init.c b/src/engine/init.c
index c4dfb6e19970..d639456eeb15 100644
--- a/src/engine/init.c
+++ b/src/engine/init.c
@@ -22,6 +22,7 @@
 #include <daos/btree_class.h>
 #include <daos/common.h>
 #include <daos/placement.h>
+#include <daos/tls.h>
 #include "srv_internal.h"
 #include "drpc_internal.h"
 #include <gurt/telemetry_common.h>
@@ -628,14 +629,14 @@ server_id_cb(uint32_t *tid, uint64_t *uid)
 	}
 
 	if (tid != NULL) {
-		struct dss_thread_local_storage *dtc;
-		struct dss_module_info *dmi;
+		struct daos_thread_local_storage *dtc;
+		struct daos_module_info          *dmi;
 		int index = daos_srv_modkey.dmk_index;
 
-		/* Avoid assertion in dss_module_key_get() */
+		/* Avoid assertion in daos_module_key_get() */
 		dtc = dss_tls_get();
 		if (dtc != NULL && index >= 0 && index < DAOS_MODULE_KEYS_NR &&
-		    dss_module_keys[index] == &daos_srv_modkey) {
+		    daos_get_module_key(index) == &daos_srv_modkey) {
 			dmi = dss_get_module_info();
 			if (dmi != NULL)
 				*tid = dmi->dmi_xs_id;
diff --git a/src/engine/module.c b/src/engine/module.c
index ce33609aeba3..4ee74235ff52 100644
--- a/src/engine/module.c
+++ b/src/engine/module.c
@@ -14,6 +14,7 @@
 
 #include <daos_errno.h>
 #include <daos/common.h>
+#include <daos/metrics.h>
 #include <gurt/list.h>
 #include <daos/rpc.h>
 #include "drpc_handler.h"
@@ -387,7 +388,7 @@ dss_module_init_metrics(enum dss_module_tag tag, void **metrics,
 	struct loaded_mod *mod;
 
 	d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) {
-		struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics;
+		struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics;
 
 		if (met == NULL)
 			continue;
@@ -415,7 +416,7 @@ dss_module_fini_metrics(enum dss_module_tag tag, void **metrics)
 	struct loaded_mod *mod;
 
 	d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) {
-		struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics;
+		struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics;
 
 		if (met == NULL)
 			continue;
@@ -442,7 +443,7 @@ dss_module_nr_pool_metrics(void)
 	int			 total = 0, nr;
 
 	d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) {
-		struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics;
+		struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics;
 
 		if (met == NULL)
 			continue;
diff --git a/src/engine/srv.c b/src/engine/srv.c
index 986d8ed04c4d..e0c985c38f63 100644
--- a/src/engine/srv.c
+++ b/src/engine/srv.c
@@ -364,9 +364,9 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi)
 static void
 dss_srv_handler(void *arg)
 {
-	struct dss_xstream		*dx = (struct dss_xstream *)arg;
-	struct dss_thread_local_storage	*dtc;
-	struct dss_module_info		*dmi;
+	struct dss_xstream               *dx = (struct dss_xstream *)arg;
+	struct daos_thread_local_storage *dtc;
+	struct dss_module_info           *dmi;
 	int				 rc;
 	bool                             track_mem     = false;
 	bool				 signal_caller = true;
@@ -1300,7 +1300,7 @@ dss_srv_fini(bool force)
 		vos_standalone_tls_fini();
 		/* fall through */
 	case XD_INIT_TLS_REG:
-		pthread_key_delete(dss_tls_key);
+		ds_tls_key_delete();
 		/* fall through */
 	case XD_INIT_ULT_BARRIER:
 		ABT_cond_free(&xstream_data.xd_ult_barrier);
@@ -1402,7 +1402,7 @@ dss_srv_init(void)
 	xstream_data.xd_init_step = XD_INIT_ULT_BARRIER;
 
 	/* register xstream-local storage key */
-	rc = pthread_key_create(&dss_tls_key, NULL);
+	rc = ds_tls_key_create();
 	if (rc) {
 		rc = dss_abterr2der(rc);
 		D_ERROR("Failed to register storage key: "DF_RC"\n", DP_RC(rc));
diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h
index 8621175b44fd..1d4278a98cfa 100644
--- a/src/engine/srv_internal.h
+++ b/src/engine/srv_internal.h
@@ -319,10 +319,6 @@ sched_create_thread(struct dss_xstream *dx, void (*func)(void *), void *arg,
 	return dss_abterr2der(rc);
 }
 
-/* tls.c */
-void dss_tls_fini(struct dss_thread_local_storage *dtls);
-struct dss_thread_local_storage *dss_tls_init(int tag, int xs_id, int tgt_id);
-
 /* server_iv.c */
 void ds_iv_init(void);
 void ds_iv_fini(void);
diff --git a/src/engine/tls.c b/src/engine/tls.c
deleted file mode 100644
index 90ea6cce7c58..000000000000
--- a/src/engine/tls.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * (C) Copyright 2016-2021 Intel Corporation.
- *
- * SPDX-License-Identifier: BSD-2-Clause-Patent
- */
-/**
- * This file is part of the DAOS server. It implements thread-local storage
- * (TLS) for DAOS service threads.
- */
-#define D_LOGFAC       DD_FAC(server)
-
-#include <pthread.h>
-#include "srv_internal.h"
-
-/* The array remember all of registered module keys on one node. */
-struct dss_module_key *dss_module_keys[DAOS_MODULE_KEYS_NR] = { NULL };
-
-pthread_mutex_t dss_module_keys_lock = PTHREAD_MUTEX_INITIALIZER;
-
-void
-dss_register_key(struct dss_module_key *key)
-{
-	int i;
-
-	D_MUTEX_LOCK(&dss_module_keys_lock);
-	for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) {
-		if (dss_module_keys[i] == NULL) {
-			dss_module_keys[i] = key;
-			key->dmk_index = i;
-			break;
-		}
-	}
-	D_MUTEX_UNLOCK(&dss_module_keys_lock);
-	D_ASSERT(i < DAOS_MODULE_KEYS_NR);
-}
-
-void
-dss_unregister_key(struct dss_module_key *key)
-{
-	if (key == NULL)
-		return;
-	D_ASSERT(key->dmk_index >= 0);
-	D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR);
-	D_MUTEX_LOCK(&dss_module_keys_lock);
-	dss_module_keys[key->dmk_index] = NULL;
-	D_MUTEX_UNLOCK(&dss_module_keys_lock);
-}
-
-/**
- * Init thread context
- *
- * \param[in]dtls	Init the thread context to allocate the
- *                      local thread variable for each module.
- *
- * \retval		0 if initialization succeeds
- * \retval		negative errno if initialization fails
- */
-static int
-dss_thread_local_storage_init(struct dss_thread_local_storage *dtls,
-			      int xs_id, int tgt_id)
-{
-	int rc = 0;
-	int i;
-
-	if (dtls->dtls_values == NULL) {
-		D_ALLOC_ARRAY(dtls->dtls_values,
-			      (int)ARRAY_SIZE(dss_module_keys));
-		if (dtls->dtls_values == NULL)
-			return -DER_NOMEM;
-	}
-
-	for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) {
-		struct dss_module_key *dmk = dss_module_keys[i];
-
-		if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) {
-			D_ASSERT(dmk->dmk_init != NULL);
-			dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id);
-			if (dtls->dtls_values[i] == NULL) {
-				rc = -DER_NOMEM;
-				break;
-			}
-		}
-	}
-	return rc;
-}
-
-/**
- * Finish module context
- *
- * \param[in]dtls	Finish the thread context to free the
- *                      local thread variable for each module.
- */
-static void
-dss_thread_local_storage_fini(struct dss_thread_local_storage *dtls)
-{
-	int i;
-
-	if (dtls->dtls_values != NULL) {
-		for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) {
-			struct dss_module_key *dmk = dss_module_keys[i];
-
-			if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) {
-				D_ASSERT(dtls->dtls_values[i] != NULL);
-				D_ASSERT(dmk->dmk_fini != NULL);
-				dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]);
-			}
-		}
-	}
-
-	D_FREE(dtls->dtls_values);
-}
-
-pthread_key_t dss_tls_key;
-
-/*
- * Allocate dss_thread_local_storage for a particular thread and
- * store the pointer in a thread-specific value which can be
- * fetched at any time with dss_tls_get().
- */
-struct dss_thread_local_storage *
-dss_tls_init(int tag, int xs_id, int tgt_id)
-{
-	struct dss_thread_local_storage *dtls;
-	int		 rc;
-
-	D_ALLOC_PTR(dtls);
-	if (dtls == NULL)
-		return NULL;
-
-	dtls->dtls_tag = tag;
-	rc = dss_thread_local_storage_init(dtls, xs_id, tgt_id);
-	if (rc != 0) {
-		D_FREE(dtls);
-		return NULL;
-	}
-
-	rc = pthread_setspecific(dss_tls_key, dtls);
-	if (rc) {
-		D_ERROR("failed to initialize tls: %d\n", rc);
-		dss_thread_local_storage_fini(dtls);
-		D_FREE(dtls);
-		return NULL;
-	}
-
-	return dtls;
-}
-
-/* Free DTC for a particular thread. */
-void
-dss_tls_fini(struct dss_thread_local_storage *dtls)
-{
-	dss_thread_local_storage_fini(dtls);
-	D_FREE(dtls);
-	pthread_setspecific(dss_tls_key, NULL);
-}
diff --git a/src/gurt/examples/telem_consumer_example.c b/src/gurt/examples/telem_consumer_example.c
index 6b7b1653a163..cac33fc7077f 100644
--- a/src/gurt/examples/telem_consumer_example.c
+++ b/src/gurt/examples/telem_consumer_example.c
@@ -147,6 +147,13 @@ void read_metrics(struct d_tm_context *ctx, struct d_tm_node_t *root,
 	d_tm_list_free(head);
 }
 
+static void
+iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format,
+	   int opt_fields, void *arg)
+{
+	d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -177,8 +184,8 @@ main(int argc, char **argv)
 	filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT |
 		  D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY);
 	show_meta = true;
-	d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_STANDARD,
-		     D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout);
+	d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_STANDARD, D_TM_INCLUDE_METADATA, iter_print,
+		     stdout);
 
 	sprintf(dirname, "manually added");
 	filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT |
diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c
index f91d1e72919f..6bd3a4952465 100644
--- a/src/gurt/telemetry.c
+++ b/src/gurt/telemetry.c
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2020-2023 Intel Corporation.
+ * (C) Copyright 2020-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -16,9 +16,11 @@
 #include <gurt/common.h>
 #include <gurt/list.h>
 #include <sys/shm.h>
-#include "gurt/telemetry_common.h"
-#include "gurt/telemetry_producer.h"
-#include "gurt/telemetry_consumer.h"
+#include <sys/types.h>
+#include <daos/common.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_producer.h>
+#include <gurt/telemetry_consumer.h>
 
 /** minimal list of shared memory regions with a global ID */
 struct shmem_region_list {
@@ -31,12 +33,17 @@ struct shmem_region_list {
 struct d_tm_shmem_hdr {
 	uint64_t		 sh_base_addr;	/** address of this struct */
 	key_t			 sh_key;	/** key to access region */
-	bool			 sh_deleted;	/** marked for deletion */
+	uint32_t                 sh_deleted : 1, /** marked for deletion */
+	    sh_multiple_writer              : 1; /** require lock to protect */
 	uint8_t			 sh_reserved[3]; /** for alignment */
 	uint64_t		 sh_bytes_total; /** total size of region */
 	uint64_t		 sh_bytes_free; /** free bytes in this region */
 	void			*sh_free_addr;	/** start of free space */
 	struct d_tm_node_t	*sh_root;	/** root of metric tree */
+
+	/* lock to protect update, mostly for create and remove ephemeral dir */
+	pthread_mutex_t          sh_multiple_writer_lock;
+
 	/**
 	 * List of all ephemeral regions attached to this shmem region.
 	 */
@@ -69,8 +76,10 @@ static struct d_tm_shmem {
 	struct d_tm_context	*ctx; /** context for the producer */
 	struct d_tm_node_t	*root; /** root node of shmem */
 	pthread_mutex_t		 add_lock; /** for synchronized access */
-	bool			 sync_access; /** whether to sync access */
-	bool			 retain; /** retain shmem region on exit */
+	uint32_t                 retain : 1, /** retain shmem region during exit */
+	    sync_access                 : 1, /** enable sync access to shmem */
+	    retain_non_empty            : 1, /** retain shmem region if it is not empty */
+	    multiple_writer_lock        : 1; /** lock for multiple writer */
 	int			 id; /** Instance ID */
 } tm_shmem;
 
@@ -168,13 +177,49 @@ d_tm_get_name(struct d_tm_context *ctx, struct d_tm_node_t *node)
 static int
 d_tm_lock_shmem(void)
 {
-	return D_MUTEX_LOCK(&tm_shmem.add_lock);
+	struct d_tm_context *ctx = tm_shmem.ctx;
+	int                  rc;
+
+	if (tm_shmem.multiple_writer_lock) {
+		rc = D_MUTEX_LOCK(&ctx->shmem_root->sh_multiple_writer_lock);
+		if (unlikely(rc != 0)) {
+			DL_ERROR(rc, "failed to take multiple writer lock");
+			return rc;
+		}
+	}
+
+	rc = D_MUTEX_LOCK(&tm_shmem.add_lock);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to take shared memory lock");
+		if (tm_shmem.multiple_writer_lock)
+			D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock);
+		return rc;
+	}
+
+	return 0;
 }
 
 static int
 d_tm_unlock_shmem(void)
 {
-	return D_MUTEX_UNLOCK(&tm_shmem.add_lock);
+	struct d_tm_context *ctx = tm_shmem.ctx;
+	int                  rc;
+
+	rc = D_MUTEX_UNLOCK(&tm_shmem.add_lock);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to release shared memory lock");
+		return rc;
+	}
+
+	if (tm_shmem.multiple_writer_lock) {
+		rc = D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock);
+		if (unlikely(rc != 0)) {
+			DL_ERROR(rc, "failed to release multiple writer lock");
+			return rc;
+		}
+	}
+
+	return 0;
 }
 
 /*
@@ -200,6 +245,8 @@ attach_shmem(key_t key, size_t size, int flags, struct d_tm_shmem_hdr **shmem)
 		return -DER_SHMEM_PERMS;
 	}
 
+	D_INFO("%s shmid %d key 0x%x addr %p\n", size > 0 ? "allocated" : "attached", shmid, key,
+	       addr);
 	*shmem = addr;
 	return shmid;
 }
@@ -208,7 +255,6 @@ static int
 new_shmem(key_t key, size_t size, struct d_tm_shmem_hdr **shmem)
 {
 	int rc;
-
 	D_INFO("creating new shared memory segment, key=0x%x, size=%lu\n",
 	       key, size);
 	rc = attach_shmem(key, size, IPC_CREAT | 0660, shmem);
@@ -331,7 +377,7 @@ close_local_shmem_entry(struct local_shmem_list *entry, bool destroy)
 {
 	d_list_del(&entry->link);
 	if (destroy)
-		entry->region->sh_deleted = true;
+		entry->region->sh_deleted = 1;
 	close_shmem(entry->region);
 
 	if (destroy)
@@ -529,7 +575,7 @@ init_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t *node,
 		D_ERROR("cannot allocate node name [%s]\n", name);
 		return -DER_NO_SHMEM;
 	}
-	strncpy(node->dtn_name, name, buff_len);
+	strncpy(conv_ptr(shmem, node->dtn_name), name, buff_len);
 	node->dtn_shmem_key = shmem->sh_key;
 	node->dtn_child = NULL;
 	/* may be reinitializing an existing node, in which case we shouldn't
@@ -557,6 +603,7 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode,
 	   const char *name)
 {
 	struct d_tm_node_t	*node = NULL;
+	struct d_tm_node_t      *tmp;
 	int			rc = DER_SUCCESS;
 
 	if (shmem == NULL || newnode == NULL || name == NULL) {
@@ -569,14 +616,19 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode,
 		rc = -DER_NO_SHMEM;
 		goto out;
 	}
-	rc = init_node(shmem, node, name);
+
+	tmp = conv_ptr(shmem, node);
+
+	rc = init_node(shmem, tmp, name);
 	if (rc != 0)
 		goto out;
-	node->dtn_metric = NULL;
-	node->dtn_sibling = NULL;
-	*newnode = node;
+	tmp->dtn_metric  = NULL;
+	tmp->dtn_sibling = NULL;
 
+	*newnode = node;
 out:
+	if (rc != 0)
+		DL_ERROR(rc, "failed to alloc node for %s", name);
 	return rc;
 }
 
@@ -624,10 +676,10 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent,
 	 * 1) a previously-cleared link node that can be reused, or
 	 * 2) the right place to attach a newly allocated node.
 	 */
-	child = parent->dtn_child;
+	child = conv_ptr(shmem, parent->dtn_child);
 	while (child != NULL && !is_cleared_link(tm_shmem.ctx, child)) {
 		sibling = child;
-		child = child->dtn_sibling;
+		child   = conv_ptr(shmem, child->dtn_sibling);
 	}
 
 	if (is_cleared_link(tm_shmem.ctx, child)) {
@@ -657,6 +709,7 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent,
 	else
 		sibling->dtn_sibling = *newnode;
 
+	*newnode = conv_ptr(shmem, *newnode);
 	return 0;
 
 failure:
@@ -751,7 +804,7 @@ destroy_shmem_with_key(key_t key)
 
 /**
  * Initialize an instance of the telemetry and metrics API for the producer
- * process.
+ * process with the root set to the provided name.
  *
  * \param[in]	id		Identifies the producer process amongst others
  *				on the same machine.
@@ -763,6 +816,7 @@ destroy_shmem_with_key(key_t key)
  *				Use D_TM_RETAIN_SHMEM to retain the shared
  *				memory segment created for these metrics after
  *				this process exits.
+ * \param[in]   root_name       The name of this node in the telemetry tree.
  *
  * \return		DER_SUCCESS		Success
  *			-DER_NO_SHMEM		Out of shared memory
@@ -770,41 +824,70 @@ destroy_shmem_with_key(key_t key)
  *			-DER_INVAL		Invalid \a flag(s)
  */
 int
-d_tm_init(int id, uint64_t mem_size, int flags)
+d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name)
 {
-	struct d_tm_shmem_hdr	*new_shmem;
+	struct d_tm_shmem_hdr   *new_shmem = NULL;
 	key_t			 key;
-	int			 shmid;
-	char			 tmp[D_TM_MAX_NAME_LEN];
+	int                      shmid;
 	int			 rc = DER_SUCCESS;
 
+	if (root_name == NULL || strnlen(root_name, D_TM_MAX_NAME_LEN) == 0) {
+		D_ERROR("root name cannot be empty\n");
+		return -DER_INVAL;
+	}
+
+	if (strnlen(root_name, D_TM_MAX_NAME_LEN) == D_TM_MAX_NAME_LEN) {
+		D_ERROR("root name too long (max=%d)\n", D_TM_MAX_NAME_LEN);
+		return -DER_EXCEEDS_PATH_LEN;
+	}
+
 	memset(&tm_shmem, 0, sizeof(tm_shmem));
 
-	if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM)) != 0) {
-		D_ERROR("Invalid flags\n");
+	if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM | D_TM_RETAIN_SHMEM_IF_NON_EMPTY |
+		       D_TM_OPEN_OR_CREATE | D_TM_MULTIPLE_WRITER_LOCK)) != 0) {
+		D_ERROR("Invalid flags 0x%x\n", flags);
 		rc = -DER_INVAL;
 		goto failure;
 	}
 
 	if (flags & D_TM_SERIALIZATION) {
-		tm_shmem.sync_access = true;
+		tm_shmem.sync_access = 1;
 		D_INFO("Serialization enabled for id %d\n", id);
 	}
 
 	if (flags & D_TM_RETAIN_SHMEM) {
-		tm_shmem.retain = true;
+		tm_shmem.retain = 1;
 		D_INFO("Retaining shared memory for id %d\n", id);
 	}
 
+	if (flags & D_TM_RETAIN_SHMEM_IF_NON_EMPTY) {
+		tm_shmem.retain_non_empty = 1;
+		D_INFO("Retaining shared memory for id %d if not empty\n", id);
+	}
+
+	if (flags & D_TM_MULTIPLE_WRITER_LOCK) {
+		tm_shmem.multiple_writer_lock = 1;
+		D_INFO("Require multiple write protection for id %d\n", id);
+	}
+
 	tm_shmem.id = id;
-	snprintf(tmp, sizeof(tmp), "ID: %d", id);
 	key = d_tm_get_srv_key(id);
-	rc = destroy_shmem_with_key(key);
-	if (rc != 0)
-		goto failure;
-	rc = create_shmem(tmp, key, mem_size, &shmid, &new_shmem);
-	if (rc != 0)
-		goto failure;
+	if (flags & D_TM_OPEN_OR_CREATE) {
+		rc = open_shmem(key, &new_shmem);
+		if (rc > 0) {
+			D_ASSERT(new_shmem != NULL);
+			shmid = rc;
+		}
+	}
+
+	if (new_shmem == NULL) {
+		rc = destroy_shmem_with_key(key);
+		if (rc != 0)
+			goto failure;
+		rc = create_shmem(root_name, key, mem_size, &shmid, &new_shmem);
+		if (rc != 0)
+			goto failure;
+	}
 
 	rc = alloc_ctx(&tm_shmem.ctx, new_shmem, shmid);
 	if (rc != 0)
@@ -831,19 +914,76 @@ d_tm_init(int id, uint64_t mem_size, int flags)
 	return rc;
 }
 
+/**
+ * Initialize an instance of the telemetry and metrics API for the producer
+ * process.
+ *
+ * \param[in]	id		Identifies the producer process amongst others
+ *				on the same machine.
+ * \param[in]	mem_size	Size in bytes of the shared memory segment that
+ *				is allocated.
+ * \param[in]	flags		Optional flags to control initialization.
+ *				Use D_TM_SERIALIZATION to enable read/write
+ *				synchronization of individual nodes.
+ *				Use D_TM_RETAIN_SHMEM to retain the shared
+ *				memory segment created for these metrics after
+ *				this process exits.
+ *
+ * \return		DER_SUCCESS		Success
+ *			-DER_NO_SHMEM		Out of shared memory
+ *			-DER_EXCEEDS_PATH_LEN	Root node name exceeds path len
+ *			-DER_INVAL		Invalid \a flag(s)
+ */
+int
+d_tm_init(int id, uint64_t mem_size, int flags)
+{
+	char tmp[D_TM_MAX_NAME_LEN];
+
+	snprintf(tmp, sizeof(tmp), "ID: %d", id);
+
+	return d_tm_init_with_name(id, mem_size, flags, tmp);
+}
+
+/* Check if all children are invalid */
+static bool
+is_node_empty(struct d_tm_node_t *node)
+{
+	struct d_tm_context   *ctx = tm_shmem.ctx;
+	struct d_tm_shmem_hdr *shmem;
+	struct d_tm_node_t    *child;
+
+	shmem = get_shmem_for_key(ctx, node->dtn_shmem_key);
+	child = conv_ptr(shmem, node->dtn_child);
+	while (child != NULL && !is_cleared_link(ctx, child)) {
+		child = conv_ptr(shmem, child->dtn_sibling);
+		if (child->dtn_name != NULL)
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * Releases resources claimed by init
  */
 void
 d_tm_fini(void)
 {
-	bool	destroy_shmem = false;
+	bool destroy_shmem = true;
 
 	if (tm_shmem.ctx == NULL)
 		goto out;
 
-	if (!tm_shmem.retain)
-		destroy_shmem = true;
+	if (tm_shmem.retain)
+		destroy_shmem = false;
+
+	if (tm_shmem.retain_non_empty) {
+		struct d_tm_node_t *root;
+
+		root = d_tm_get_root(tm_shmem.ctx);
+		if (!is_node_empty(root))
+			destroy_shmem = false;
+	}
 
 	/* close with the option to destroy the shmem region if needed */
 	close_all_shmem(tm_shmem.ctx, destroy_shmem);
@@ -1451,9 +1591,9 @@ _reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node)
 	return DER_SUCCESS;
 }
 
-static void
-reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level,
-	   char *path, int format, int opt_fields, FILE *stream)
+void
+d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path,
+		int format, int opt_fields, FILE *stream)
 {
 	char	*name = NULL;
 
@@ -1467,7 +1607,7 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level,
 	switch (node->dtn_type) {
 	case D_TM_LINK:
 		node = d_tm_follow_link(ctx, node);
-		reset_node(ctx, node, level, path, format, opt_fields, stream);
+		d_tm_reset_node(ctx, node, level, path, format, opt_fields, stream);
 		break;
 	case D_TM_DIRECTORY:
 	case D_TM_COUNTER:
@@ -1507,20 +1647,18 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level,
  *				Choose D_TM_CSV for comma separated values.
  * \param[in]	opt_fields	A bitmask.  Set D_TM_INCLUDE_* as desired for
  *				the optional output fields.
- * \param[in]	show_timestamp	Set to true to print the timestamp the metric
- *				was read by the consumer.
- * \param[in]	stream		Direct output to this stream (stdout, stderr)
+ * \param[in]	iter_cb		iterate callback.
+ * \param[in]	cb_arg		argument for iterate callback.
  */
 void
-d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node,
-	     int level, int filter, char *path, int format,
-	     int opt_fields, uint32_t ops, FILE *stream)
+d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path,
+	     int format, int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg)
 {
 	struct d_tm_shmem_hdr	*shmem = NULL;
 	char			*fullpath = NULL;
 	char			*parent_name = NULL;
 
-	if ((node == NULL) || (stream == NULL))
+	if (node == NULL)
 		return;
 
 	if (node->dtn_type == D_TM_LINK) {
@@ -1533,14 +1671,8 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node,
 	if (shmem == NULL)
 		return;
 
-	if (node->dtn_type & filter) {
-		if (ops & D_TM_ITER_READ)
-			d_tm_print_node(ctx, node, level, path, format,
-					opt_fields, stream);
-		if (ops & D_TM_ITER_RESET)
-			reset_node(ctx, node, level, path, format,
-				   opt_fields, stream);
-	}
+	if (node->dtn_type & filter)
+		iter_cb(ctx, node, level, path, format, opt_fields, cb_arg);
 
 	parent_name = conv_ptr(shmem, node->dtn_name);
 	node = node->dtn_child;
@@ -1555,8 +1687,8 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node,
 		else
 			D_ASPRINTF(fullpath, "%s/%s", path, parent_name);
 
-		d_tm_iterate(ctx, node, level + 1, filter, fullpath, format,
-			     opt_fields, ops, stream);
+		d_tm_iterate(ctx, node, level + 1, filter, fullpath, format, opt_fields, iter_cb,
+			     cb_arg);
 		D_FREE(fullpath);
 		node = node->dtn_sibling;
 		node = conv_ptr(shmem, node);
@@ -2105,6 +2237,29 @@ is_initialized(void)
 	       tm_shmem.ctx->shmem_root != NULL;
 }
 
+/*
+ * Get a pointer to the last token in the path without modifying the original
+ * string.
+ */
+static const char *
+get_last_token(const char *path)
+{
+	const char *substr = path;
+	const char *ch;
+	bool        next_token = false;
+
+	for (ch = path; *ch != '\0'; ch++) {
+		if (*ch == '/') {
+			next_token = true;
+		} else if (next_token) {
+			substr     = ch;
+			next_token = false;
+		}
+	}
+
+	return substr;
+}
+
 static int
 add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type,
 	   char *desc, char *units, char *path)
@@ -2113,6 +2268,7 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type,
 	struct d_tm_node_t	*parent_node;
 	struct d_tm_node_t	*temp = NULL;
 	struct d_tm_shmem_hdr	*shmem;
+	struct d_tm_metric_t    *metric;
 	char			*token;
 	char			*rest;
 	char			*unit_string;
@@ -2154,11 +2310,11 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type,
 		}
 	}
 
-	temp->dtn_metric->dtm_stats = NULL;
+	metric            = conv_ptr(shmem, temp->dtn_metric);
+	metric->dtm_stats = NULL;
 	if (has_stats(temp)) {
-		temp->dtn_metric->dtm_stats =
-			shmalloc(shmem, sizeof(struct d_tm_stats_t));
-		if (temp->dtn_metric->dtm_stats == NULL) {
+		metric->dtm_stats = shmalloc(shmem, sizeof(struct d_tm_stats_t));
+		if (metric->dtm_stats == NULL) {
 			rc = -DER_NO_SHMEM;
 			goto out;
 		}
@@ -2175,14 +2331,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type,
 
 	if (buff_len > 0) {
 		buff_len += 1; /** make room for the trailing null */
-		temp->dtn_metric->dtm_desc = shmalloc(shmem, buff_len);
-		if (temp->dtn_metric->dtm_desc == NULL) {
+		metric->dtm_desc = shmalloc(shmem, buff_len);
+		if (metric->dtm_desc == NULL) {
 			rc = -DER_NO_SHMEM;
 			goto out;
 		}
-		strncpy(temp->dtn_metric->dtm_desc, desc, buff_len);
+		strncpy(conv_ptr(shmem, metric->dtm_desc), desc, buff_len);
 	} else {
-		temp->dtn_metric->dtm_desc = NULL;
+		metric->dtm_desc = NULL;
 	}
 
 	unit_string = units;
@@ -2216,14 +2372,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type,
 
 	if (buff_len > 0) {
 		buff_len += 1; /** make room for the trailing null */
-		temp->dtn_metric->dtm_units = shmalloc(shmem, buff_len);
-		if (temp->dtn_metric->dtm_units == NULL) {
+		metric->dtm_units = shmalloc(shmem, buff_len);
+		if (metric->dtm_units == NULL) {
 			rc = -DER_NO_SHMEM;
 			goto out;
 		}
-		strncpy(temp->dtn_metric->dtm_units, unit_string, buff_len);
+		strncpy(conv_ptr(shmem, metric->dtm_units), unit_string, buff_len);
 	} else {
-		temp->dtn_metric->dtm_units = NULL;
+		metric->dtm_units = NULL;
 	}
 
 	temp->dtn_protect = false;
@@ -2344,26 +2500,35 @@ int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc,
 }
 
 static void
-invalidate_link_node(struct d_tm_node_t *node)
+invalidate_link_node(struct d_tm_shmem_hdr *parent, struct d_tm_node_t *node)
 {
 	if (node == NULL || node->dtn_type != D_TM_LINK)
 		return;
 
 	node->dtn_name = NULL;
-	if (node->dtn_metric != NULL)
-		node->dtn_metric->dtm_data.value = 0;
+	if (node->dtn_metric != NULL) {
+		struct d_tm_metric_t *link_metric;
+
+		link_metric                 = conv_ptr(parent, node->dtn_metric);
+		link_metric->dtm_data.value = 0;
+	}
 }
 
 static int
 get_free_region_entry(struct d_tm_shmem_hdr *shmem,
 		      struct shmem_region_list **entry)
 {
+	d_list_t                        *cur;
+	d_list_t                        *head;
+	d_list_t                        *next;
 	struct shmem_region_list	*tmp;
 
 	D_ASSERT(shmem != NULL);
 	D_ASSERT(entry != NULL);
 
-	d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) {
+	head = &shmem->sh_subregions;
+	for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) {
+		tmp = d_list_entry(cur, __typeof__(*tmp), rl_link);
 		if (tmp->rl_link_node == NULL) {
 			*entry = tmp;
 			return 0;
@@ -2376,7 +2541,23 @@ get_free_region_entry(struct d_tm_shmem_hdr *shmem,
 			shmem->sh_key);
 		return -DER_NO_SHMEM;
 	}
-	d_list_add(&tmp->rl_link, &shmem->sh_subregions);
+
+	next = conv_ptr(shmem, head->next);
+	/* NB: sh_subregions is initialized by D_INIT_LIST_HEAD(), so it is not shmem address */
+	if (d_list_empty(&shmem->sh_subregions))
+		cur = (d_list_t *)(shmem->sh_base_addr +
+				   (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions));
+	else
+		cur = head->next;
+
+	head->next = &tmp->rl_link;
+	next->prev = &tmp->rl_link;
+
+	tmp               = conv_ptr(shmem, tmp);
+	tmp->rl_link.next = cur;
+	tmp->rl_link.prev =
+	    (d_list_t *)(shmem->sh_base_addr +
+			 (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions));
 
 	*entry = tmp;
 	return 0;
@@ -2413,27 +2594,199 @@ get_unique_shmem_key(const char *path, int id)
 	return (key_t)d_hash_string_u32(salted, sizeof(salted));
 }
 
+static int
+shm_stat_key(key_t key, struct shmid_ds *shminfo, int *shmid_ptr)
+{
+	int shmid;
+	int rc;
+
+	if (unlikely(shminfo == NULL)) {
+		D_ERROR("NULL shminfo\n");
+		return -DER_INVAL;
+	}
+
+	rc = shmget(key, 0, 0);
+	if (rc < 0) {
+		D_ERROR("shmget(0x%x) failed: %s (%d)\n", key, strerror(errno), errno);
+		return daos_errno2der(errno);
+	}
+	shmid = rc;
+
+	rc = shmctl(shmid, IPC_STAT, shminfo);
+	if (rc < 0) {
+		D_ERROR("shmctl(%d, IPC_STAT) failed: %s (%d)\n", shmid, strerror(errno), errno);
+		return daos_errno2der(errno);
+	}
+
+	if (shmid_ptr != NULL)
+		*shmid_ptr = shmid;
+
+	return 0;
+}
+
 /*
- * Get a pointer to the last token in the path without modifying the original
- * string.
+ * Set the child segment's ownership to match the parent segment.
+ * Needed in the client telemetry case where the client is allowing
+ * the agent to manage its telemetry segments.
  */
-static const char *
-get_last_token(const char *path)
+static int
+sync_attached_segment_uid(char *path, key_t child_key)
 {
-	const char	*substr = path;
-	const char	*ch;
-	bool		 next_token = false;
+	struct d_tm_node_t  *link_node;
+	struct d_tm_context *ctx     = tm_shmem.ctx;
+	struct shmid_ds      shminfo = {0};
+	uid_t                o_uid;
+	int                  child_shmid;
+	int                  rc;
+
+	if (unlikely(path == NULL)) {
+		D_ERROR("NULL inputs\n");
+		return -DER_INVAL;
+	}
 
-	for (ch = path; *ch != '\0'; ch++) {
-		if (*ch == '/') {
-			next_token = true;
-		} else if (next_token) {
-			substr = ch;
-			next_token = false;
-		}
+	link_node = d_tm_find_metric(ctx, path);
+	if (link_node == NULL) {
+		D_ERROR("nonexistent metric: %s", path);
+		D_GOTO(out, rc = -DER_NONEXIST);
 	}
 
-	return substr;
+	rc = shm_stat_key(link_node->dtn_shmem_key, &shminfo, NULL);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to stat parent segment");
+		goto out;
+	}
+	o_uid = shminfo.shm_perm.uid;
+
+	rc = shm_stat_key(child_key, &shminfo, &child_shmid);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to stat child segment");
+		goto out;
+	}
+
+	if (o_uid == shminfo.shm_perm.uid)
+		D_GOTO(out, rc = 0);
+
+	shminfo.shm_perm.uid = o_uid;
+	rc                   = shmctl(child_shmid, IPC_SET, &shminfo);
+	if (rc != 0) {
+		DL_ERROR(rc, "failed to set child segment ownership");
+	}
+
+out:
+	return rc;
+}
+
+static int
+attach_path_segment(key_t key, char *path)
+{
+	struct d_tm_node_t       *link_node;
+	struct d_tm_context      *ctx = tm_shmem.ctx;
+	struct d_tm_shmem_hdr    *parent_shmem;
+	struct d_tm_metric_t     *link_metric;
+	struct shmem_region_list *region_entry;
+	int                       rc;
+
+	if (unlikely(path == NULL)) {
+		D_ERROR("NULL inputs\n");
+		D_GOTO(fail, rc = -DER_INVAL);
+	}
+
+	/* Add a link to the new region */
+	rc = add_metric(ctx, &link_node, D_TM_LINK, NULL, NULL, path);
+	if (unlikely(rc != 0)) {
+		D_ERROR("can't set up the link node, " DF_RC "\n", DP_RC(rc));
+		D_GOTO(fail, rc);
+	}
+
+	/* track attached regions within the parent shmem */
+	parent_shmem = get_shmem_for_key(ctx, link_node->dtn_shmem_key);
+	if (unlikely(parent_shmem == NULL)) {
+		D_ERROR("failed to get parent shmem pointer\n");
+		D_GOTO(fail_link, rc = -DER_NO_SHMEM);
+	}
+
+	D_ASSERT(link_node->dtn_type == D_TM_LINK);
+	link_metric                 = conv_ptr(parent_shmem, link_node->dtn_metric);
+	link_metric->dtm_data.value = key;
+
+	rc = get_free_region_entry(parent_shmem, &region_entry);
+	if (unlikely(rc != 0))
+		D_GOTO(fail_link, rc);
+	region_entry->rl_key       = key;
+	region_entry->rl_link_node = link_node;
+
+	if (tm_shmem.multiple_writer_lock)
+		D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock);
+
+	return 0;
+fail_link:
+	invalidate_link_node(parent_shmem, link_node);
+fail:
+	return rc;
+}
+
+/**
+ * Attach an existing telemetry segment into the tree at the path designated
+ * by fmt. This segment will be treated the same as an ephemeral directory
+ * that can be deleted later along with its children.
+ *
+ * \param[in]	key		Key to the shared memory segment
+ * \param[in]	fmt		Path constructed via variadic arguments
+ *
+ * \return	0		Success
+ *		-DER_INVAL	Invalid input
+ *		-DER_EXIST	Requested path already exists
+ */
+int
+d_tm_attach_path_segment(key_t key, const char *fmt, ...)
+{
+	struct d_tm_node_t  *link_node;
+	struct d_tm_context *ctx = tm_shmem.ctx;
+	va_list              args;
+	char                 path[D_TM_MAX_NAME_LEN] = {0};
+	int                  rc;
+
+	if (!is_initialized())
+		D_GOTO(fail, rc = -DER_UNINIT);
+
+	if (unlikely(fmt == NULL)) {
+		D_ERROR("NULL inputs\n");
+		D_GOTO(fail, rc = -DER_INVAL);
+	}
+
+	if (strnlen(fmt, D_TM_MAX_NAME_LEN) == 0) {
+		D_ERROR("cannot attach segment at root\n");
+		D_GOTO(fail, rc = -DER_INVAL);
+	}
+
+	va_start(args, fmt);
+	rc = parse_path_fmt(path, sizeof(path), fmt, args);
+	va_end(args);
+	if (unlikely(rc != 0))
+		D_GOTO(fail, rc);
+
+	rc = d_tm_lock_shmem();
+	if (rc != 0)
+		D_GOTO(fail, rc);
+
+	link_node = d_tm_find_metric(ctx, path);
+	if (link_node != NULL) {
+		D_INFO("metric [%s] already exists\n", path);
+		D_GOTO(fail_unlock, rc = -DER_EXIST);
+	}
+
+	rc = attach_path_segment(key, path);
+	if (unlikely(rc != 0))
+		D_GOTO(fail_unlock, rc);
+
+	d_tm_unlock_shmem();
+	return 0;
+fail_unlock:
+	d_tm_unlock_shmem();
+fail:
+	if (rc != -DER_EXIST)
+		DL_ERROR(rc, "Failed to add path segment [%s] for key %d", path, key);
+	return rc;
 }
 
 /**
@@ -2455,12 +2808,9 @@ int
 d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes,
 		       const char *fmt, ...)
 {
-	struct d_tm_node_t		*new_node;
-	struct d_tm_node_t		*link_node;
-	struct d_tm_context		*ctx = tm_shmem.ctx;
-	struct d_tm_shmem_hdr		*parent_shmem;
-	struct d_tm_shmem_hdr		*new_shmem;
-	struct shmem_region_list	*region_entry;
+	struct d_tm_node_t              *new_node;
+	struct d_tm_context             *ctx = tm_shmem.ctx;
+	struct d_tm_shmem_hdr           *new_shmem;
 	va_list				 args;
 	key_t				 key;
 	char				 path[D_TM_MAX_NAME_LEN] = {0};
@@ -2495,57 +2845,52 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes,
 	rc = d_tm_lock_shmem();
 	if (unlikely(rc != 0)) {
 		D_ERROR("failed to get producer mutex\n");
-		D_GOTO(fail, rc);
+		D_GOTO(fail_unlock, rc);
 	}
 
 	new_node = d_tm_find_metric(ctx, path);
 	if (new_node != NULL) {
-		D_ERROR("metric [%s] already exists\n", path);
+		D_INFO("metric [%s] already exists\n", path);
 		D_GOTO(fail_unlock, rc = -DER_EXIST);
 	}
 
 	key = get_unique_shmem_key(path, tm_shmem.id);
 	rc = create_shmem(get_last_token(path), key, size_bytes, &new_shmid,
 			  &new_shmem);
-	if (rc != 0)
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to create shmem for %s", path);
 		D_GOTO(fail_unlock, rc);
+	}
 	new_node = new_shmem->sh_root;
 
 	/* track at the process level */
 	rc = track_open_shmem(ctx, new_shmem, new_shmid, key);
-	if (rc != 0)
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to track shmem for %s", path);
 		D_GOTO(fail_shmem, rc);
+	}
 
-	/* Add a link to the new region */
-	rc = add_metric(ctx, &link_node, D_TM_LINK, NULL, NULL, path);
-	if (rc != 0) {
-		D_ERROR("can't set up the link node, " DF_RC "\n", DP_RC(rc));
-		D_GOTO(fail_tracking, rc);
+	rc = attach_path_segment(key, path);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to attach 0x%x at %s", key, path);
+		D_GOTO(fail_attach, rc);
 	}
-	D_ASSERT(link_node->dtn_type == D_TM_LINK);
-	link_node->dtn_metric->dtm_data.value = key;
 
-	/* track attached regions within the parent shmem */
-	parent_shmem = get_shmem_for_key(ctx, link_node->dtn_shmem_key);
-	if (parent_shmem == NULL) {
-		D_ERROR("failed to get parent shmem pointer\n");
-		D_GOTO(fail_link, rc = -DER_NO_SHMEM);
+	rc = sync_attached_segment_uid(path, key);
+	if (unlikely(rc != 0)) {
+		DL_ERROR(rc, "failed to sync %s permissions", path);
+		D_GOTO(fail_sync, rc);
 	}
-	rc = get_free_region_entry(parent_shmem, &region_entry);
-	if (rc != 0)
-		D_GOTO(fail_link, rc);
-	region_entry->rl_key = key;
-	region_entry->rl_link_node = link_node;
 
 	if (node != NULL)
 		*node = new_node;
 
 	d_tm_unlock_shmem();
 	return 0;
-
-fail_link:
-	invalidate_link_node(link_node);
-fail_tracking:
+fail_sync:
+	d_tm_del_ephemeral_dir(path);
+	goto fail_unlock; /* shmem will be closed/destroyed already */
+fail_attach:
 	close_shmem_for_key(ctx, key, true);
 	goto fail_unlock; /* shmem will be closed/destroyed already */
 fail_shmem:
@@ -2554,17 +2899,21 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes,
 fail_unlock:
 	d_tm_unlock_shmem();
 fail:
-	D_ERROR("Failed to add ephemeral dir [%s]: " DF_RC "\n", path,
-		DP_RC(rc));
+	if (rc != -DER_EXIST)
+		DL_ERROR(rc, "Failed to add ephemeral dir [%s]", path);
 	return rc;
 }
 
 static void
 clear_region_entry_for_key(struct d_tm_shmem_hdr *shmem, key_t key)
 {
+	d_list_t                 *cur;
+	d_list_t                 *head;
 	struct shmem_region_list *tmp;
 
-	d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) {
+	head = &shmem->sh_subregions;
+	for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) {
+		tmp = d_list_entry(cur, __typeof__(*tmp), rl_link);
 		if (tmp->rl_key == key) {
 			D_DEBUG(DB_TRACE,
 				"cleared shmem metadata for key 0x%x\n", key);
@@ -2583,6 +2932,8 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link)
 	struct d_tm_shmem_hdr		*parent_shmem;
 	struct d_tm_shmem_hdr		*shmem;
 	struct d_tm_node_t		*node;
+	d_list_t                        *cur;
+	d_list_t                        *head;
 	struct shmem_region_list	*curr;
 	key_t				 key;
 	int				 rc = 0;
@@ -2616,8 +2967,10 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link)
 	}
 
 	/* delete sub-regions recursively */
-	d_list_for_each_entry(curr, &shmem->sh_subregions, rl_link) {
-		rc = rm_ephemeral_dir(ctx, curr->rl_link_node);
+	head = &shmem->sh_subregions;
+	for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) {
+		curr = d_list_entry(cur, __typeof__(*curr), rl_link);
+		rc   = rm_ephemeral_dir(ctx, conv_ptr(shmem, curr->rl_link_node));
 		if (rc != 0) /* nothing much we can do to recover here */
 			D_ERROR("error removing tmp dir [%s]: "DF_RC"\n",
 				link->dtn_name, DP_RC(rc));
@@ -2629,11 +2982,35 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link)
 
 out_link:
 	/* invalidate since the link node can't be deleted from parent */
-	invalidate_link_node(link);
+	invalidate_link_node(parent_shmem, link);
 out:
 	return rc;
 }
 
+static int
+try_del_ephemeral_dir(char *path, bool force)
+{
+	struct d_tm_context *ctx = tm_shmem.ctx;
+	struct d_tm_node_t  *link;
+	int                  rc = 0;
+
+	rc = d_tm_lock_shmem();
+	if (unlikely(rc != 0)) {
+		D_ERROR("failed to get producer mutex\n");
+		D_GOTO(unlock, rc);
+	}
+
+	link = get_node(ctx, path);
+	if (!force && !is_node_empty(link))
+		D_GOTO(unlock, rc == -DER_BUSY);
+
+	rc = rm_ephemeral_dir(ctx, link);
+
+unlock:
+	d_tm_unlock_shmem();
+
+	return rc;
+}
 /**
  * Deletes an ephemeral metrics directory from the metric tree.
  *
@@ -2645,11 +3022,9 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link)
 int
 d_tm_del_ephemeral_dir(const char *fmt, ...)
 {
-	struct d_tm_context	*ctx = tm_shmem.ctx;
-	struct d_tm_node_t	*link;
-	va_list			 args;
-	char			 path[D_TM_MAX_NAME_LEN] = {0};
-	int			 rc = 0;
+	va_list args;
+	char    path[D_TM_MAX_NAME_LEN] = {0};
+	int     rc                      = 0;
 
 	if (!is_initialized())
 		D_GOTO(out, rc = -DER_UNINIT);
@@ -2665,16 +3040,45 @@ d_tm_del_ephemeral_dir(const char *fmt, ...)
 	if (rc != 0)
 		D_GOTO(out, rc);
 
-	rc = d_tm_lock_shmem();
-	if (unlikely(rc != 0)) {
-		D_ERROR("failed to get producer mutex\n");
-		D_GOTO(out, rc);
+	rc = try_del_ephemeral_dir(path, true);
+out:
+	if (rc != 0)
+		D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n", DP_RC(rc));
+	else
+		D_INFO("Removed ephemeral directory [%s]\n", path);
+	return rc;
+}
+
+/**
+ * Deletes an ephemeral metrics directory from the metric tree, only if it is empty.
+ *
+ * \param[in]	fmt		Used to construct the path to be removed
+ *
+ * \return	0		Success
+ *		-DER_INVAL	Invalid input
+ */
+int
+d_tm_try_del_ephemeral_dir(const char *fmt, ...)
+{
+	va_list args;
+	char    path[D_TM_MAX_NAME_LEN] = {0};
+	int     rc                      = 0;
+
+	if (!is_initialized())
+		D_GOTO(out, rc = -DER_UNINIT);
+
+	if (fmt == NULL || strnlen(fmt, D_TM_MAX_NAME_LEN) == 0) {
+		D_ERROR("telemetry root cannot be deleted\n");
+		D_GOTO(out, rc = -DER_INVAL);
 	}
 
-	link = get_node(ctx, path);
-	rc = rm_ephemeral_dir(ctx, link);
+	va_start(args, fmt);
+	rc = parse_path_fmt(path, sizeof(path), fmt, args);
+	va_end(args);
+	if (rc != 0)
+		D_GOTO(out, rc);
 
-	d_tm_unlock_shmem();
+	rc = try_del_ephemeral_dir(path, false);
 out:
 	if (rc != 0)
 		D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n",
@@ -3538,6 +3942,7 @@ allocate_shared_memory(key_t key, size_t mem_size,
 {
 	int			 shmid;
 	struct d_tm_shmem_hdr	*header;
+	int                      rc;
 
 	D_ASSERT(shmem != NULL);
 
@@ -3559,8 +3964,17 @@ allocate_shared_memory(key_t key, size_t mem_size,
 
 	D_INIT_LIST_HEAD(&header->sh_subregions);
 
-	D_DEBUG(DB_MEM, "Created shared memory region for key 0x%x, size=%lu\n",
-		key, mem_size);
+	if (tm_shmem.multiple_writer_lock) {
+		rc = D_MUTEX_INIT(&header->sh_multiple_writer_lock, NULL);
+		if (rc) {
+			DL_ERROR(rc, "multiple writer lock failed");
+			return -DER_NO_SHMEM;
+		}
+	}
+
+	D_DEBUG(DB_MEM,
+		"Created shared memory region for key 0x%x, size=%lu header %p base %p free %p\n",
+		key, mem_size, header, (void *)header->sh_base_addr, (void *)header->sh_free_addr);
 
 	*shmem = header;
 
@@ -3664,10 +4078,9 @@ shmalloc(struct d_tm_shmem_hdr *shmem, int length)
 
 	shmem->sh_bytes_free -= length;
 	shmem->sh_free_addr += length;
-	D_DEBUG(DB_TRACE,
-		"Allocated %d bytes.  Now %" PRIu64 " remain\n",
-		length, shmem->sh_bytes_free);
-	memset(new_mem, 0, length);
+	D_DEBUG(DB_TRACE, "Allocated %d bytes.  Now %" PRIu64 " remain %p/%p\n", length,
+		shmem->sh_bytes_free, shmem, new_mem);
+	memset(conv_ptr(shmem, new_mem), 0, length);
 	return new_mem;
 }
 
diff --git a/src/gurt/tests/test_gurt_telem_producer.c b/src/gurt/tests/test_gurt_telem_producer.c
index bf3db9d19c95..32d4c4f7b893 100644
--- a/src/gurt/tests/test_gurt_telem_producer.c
+++ b/src/gurt/tests/test_gurt_telem_producer.c
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2020-2022 Intel Corporation.
+ * (C) Copyright 2020-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -1226,6 +1226,13 @@ test_verify_object_count(void **state)
 	assert_int_equal(num, exp_total);
 }
 
+static void
+iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format,
+	   int opt_fields, void *arg)
+{
+	d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg);
+}
+
 static void
 test_print_metrics(void **state)
 {
@@ -1238,15 +1245,15 @@ test_print_metrics(void **state)
 	filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT |
 		  D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY);
 
-	d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_STANDARD,
-		     D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout);
+	d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_STANDARD, D_TM_INCLUDE_METADATA,
+		     iter_print, stdout);
 
 	d_tm_print_field_descriptors(D_TM_INCLUDE_TIMESTAMP |
 				     D_TM_INCLUDE_METADATA, stdout);
 
 	filter &= ~D_TM_DIRECTORY;
-	d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_CSV,
-		     D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout);
+	d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_CSV, D_TM_INCLUDE_METADATA, iter_print,
+		     stdout);
 }
 
 static void
diff --git a/src/include/daos/drpc_modules.h b/src/include/daos/drpc_modules.h
index 69aaf568673c..a8821d9f079f 100644
--- a/src/include/daos/drpc_modules.h
+++ b/src/include/daos/drpc_modules.h
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2019-2022 Intel Corporation.
+ * (C) Copyright 2019-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -33,43 +33,44 @@ enum drpc_sec_agent_method {
 };
 
 enum drpc_mgmt_method {
-	DRPC_METHOD_MGMT_KILL_RANK		= 201,
-	DRPC_METHOD_MGMT_SET_RANK		= 202,
-	DRPC_METHOD_MGMT_GET_ATTACH_INFO	= 206,
-	DRPC_METHOD_MGMT_POOL_CREATE		= 207,
-	DRPC_METHOD_MGMT_POOL_DESTROY		= 208,
-	DRPC_METHOD_MGMT_SET_UP			= 209,
-	DRPC_METHOD_MGMT_BIO_HEALTH_QUERY	= 210,
-	DRPC_METHOD_MGMT_SMD_LIST_DEVS		= 211,
-	DRPC_METHOD_MGMT_SMD_LIST_POOLS		= 212,
-	DRPC_METHOD_MGMT_POOL_GET_ACL		= 213,
-	DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL	= 215,
-	DRPC_METHOD_MGMT_POOL_UPDATE_ACL	= 216,
-	DRPC_METHOD_MGMT_POOL_DELETE_ACL	= 217,
-	DRPC_METHOD_MGMT_PREP_SHUTDOWN		= 218,
-	DRPC_METHOD_MGMT_DEV_SET_FAULTY		= 220,
-	DRPC_METHOD_MGMT_DEV_REPLACE		= 221,
-	DRPC_METHOD_MGMT_LIST_CONTAINERS	= 222,
-	DRPC_METHOD_MGMT_POOL_QUERY		= 223,
-	DRPC_METHOD_MGMT_POOL_SET_PROP		= 224,
-	DRPC_METHOD_MGMT_PING_RANK		= 225,
-	DRPC_METHOD_MGMT_REINTEGRATE		= 226,
-	DRPC_METHOD_MGMT_CONT_SET_OWNER		= 227,
-	DRPC_METHOD_MGMT_EXCLUDE		= 228,
-	DRPC_METHOD_MGMT_EXTEND			= 229,
-	DRPC_METHOD_MGMT_POOL_EVICT		= 230,
-	DRPC_METHOD_MGMT_DRAIN			= 231,
-	DRPC_METHOD_MGMT_GROUP_UPDATE		= 232,
-	DRPC_METHOD_MGMT_NOTIFY_EXIT		= 233,
-	DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT	= 235,
-	DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT	= 236,
-	DRPC_METHOD_MGMT_POOL_GET_PROP		= 237,
-	DRPC_METHOD_MGMT_SET_LOG_MASKS		= 238,
-	DRPC_METHOD_MGMT_POOL_UPGRADE		= 239,
-	DRPC_METHOD_MGMT_POOL_QUERY_TARGETS	= 240,
-	DRPC_METHOD_MGMT_LED_MANAGE		= 241,
+	DRPC_METHOD_MGMT_KILL_RANK              = 201,
+	DRPC_METHOD_MGMT_SET_RANK               = 202,
+	DRPC_METHOD_MGMT_GET_ATTACH_INFO        = 206,
+	DRPC_METHOD_MGMT_POOL_CREATE            = 207,
+	DRPC_METHOD_MGMT_POOL_DESTROY           = 208,
+	DRPC_METHOD_MGMT_SET_UP                 = 209,
+	DRPC_METHOD_MGMT_BIO_HEALTH_QUERY       = 210,
+	DRPC_METHOD_MGMT_SMD_LIST_DEVS          = 211,
+	DRPC_METHOD_MGMT_SMD_LIST_POOLS         = 212,
+	DRPC_METHOD_MGMT_POOL_GET_ACL           = 213,
+	DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL     = 215,
+	DRPC_METHOD_MGMT_POOL_UPDATE_ACL        = 216,
+	DRPC_METHOD_MGMT_POOL_DELETE_ACL        = 217,
+	DRPC_METHOD_MGMT_PREP_SHUTDOWN          = 218,
+	DRPC_METHOD_MGMT_DEV_SET_FAULTY         = 220,
+	DRPC_METHOD_MGMT_DEV_REPLACE            = 221,
+	DRPC_METHOD_MGMT_LIST_CONTAINERS        = 222,
+	DRPC_METHOD_MGMT_POOL_QUERY             = 223,
+	DRPC_METHOD_MGMT_POOL_SET_PROP          = 224,
+	DRPC_METHOD_MGMT_PING_RANK              = 225,
+	DRPC_METHOD_MGMT_REINTEGRATE            = 226,
+	DRPC_METHOD_MGMT_CONT_SET_OWNER         = 227,
+	DRPC_METHOD_MGMT_EXCLUDE                = 228,
+	DRPC_METHOD_MGMT_EXTEND                 = 229,
+	DRPC_METHOD_MGMT_POOL_EVICT             = 230,
+	DRPC_METHOD_MGMT_DRAIN                  = 231,
+	DRPC_METHOD_MGMT_GROUP_UPDATE           = 232,
+	DRPC_METHOD_MGMT_NOTIFY_EXIT            = 233,
+	DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT    = 235,
+	DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT = 236,
+	DRPC_METHOD_MGMT_POOL_GET_PROP          = 237,
+	DRPC_METHOD_MGMT_SET_LOG_MASKS          = 238,
+	DRPC_METHOD_MGMT_POOL_UPGRADE           = 239,
+	DRPC_METHOD_MGMT_POOL_QUERY_TARGETS     = 240,
+	DRPC_METHOD_MGMT_LED_MANAGE             = 241,
+	DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM     = 242,
 
-	NUM_DRPC_MGMT_METHODS			/* Must be last */
+	NUM_DRPC_MGMT_METHODS /* Must be last */
 };
 
 enum drpc_srv_method {
diff --git a/src/include/daos/metrics.h b/src/include/daos/metrics.h
new file mode 100644
index 000000000000..a0b6f16f144f
--- /dev/null
+++ b/src/include/daos/metrics.h
@@ -0,0 +1,82 @@
+/**
+ * (C) Copyright 2016-2024 Intel Corporation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause-Patent
+ */
+/**
+ * This file is part of daos
+ *
+ * src/include/daos/metrics.h
+ */
+
+#ifndef __DAOS_METRICS_H__
+#define __DAOS_METRICS_H__
+
+#include <daos/common.h>
+#include <daos/tls.h>
+#include <daos_types.h>
+#include <gurt/telemetry_common.h>
+
+#define DC_TM_JOB_ROOT_ID             256
+/* For now TLS is only enabled if metrics are enabled */
+#define DAOS_CLIENT_METRICS_DUMP_PATH "D_CLIENT_METRICS_DUMP_PATH"
+#define DAOS_CLIENT_METRICS_ENABLE    "D_CLIENT_METRICS_ENABLE"
+#define DAOS_CLIENT_METRICS_RETAIN    "D_CLIENT_METRICS_RETAIN"
+extern bool daos_client_metric;
+extern bool daos_client_metric_retain;
+
+struct daos_module_metrics {
+	/* Indicate where the keys should be instantiated */
+	enum daos_module_tag dmm_tags;
+
+	/**
+	 * allocate metrics with path to ephemeral shmem for to the
+	 * newly-created pool
+	 */
+	void *(*dmm_init)(const char *path, int tgt_id);
+	void (*dmm_fini)(void *data);
+
+	/**
+	 * Get the number of metrics allocated by this module in total (including all targets).
+	 */
+	int (*dmm_nr_metrics)(void);
+};
+
+/* Estimate of bytes per typical metric node */
+#define NODE_BYTES                                                                                 \
+	(sizeof(struct d_tm_node_t) + sizeof(struct d_tm_metric_t) + 64 /* buffer for metadata */)
+/* Estimate of bytes per histogram bucket */
+#define BUCKET_BYTES (sizeof(struct d_tm_bucket_t) + NODE_BYTES)
+/*
+   Estimate of bytes per metric.
+   This is a generous high-water mark assuming most metrics are not using
+   histograms. May need adjustment if the balance of metrics changes.
+*/
+#define PER_METRIC_BYTES                                                                           \
+	(NODE_BYTES + sizeof(struct d_tm_stats_t) + sizeof(struct d_tm_histogram_t) + BUCKET_BYTES)
+
+int
+daos_metrics_init(enum daos_module_tag tag, uint32_t id, struct daos_module_metrics *metrics);
+void
+daos_metrics_fini(void);
+int
+daos_module_init_metrics(enum dss_module_tag tag, void **metrics, const char *path, int tgt_id);
+void
+daos_module_fini_metrics(enum dss_module_tag tag, void **metrics);
+
+int
+daos_module_nr_pool_metrics(void);
+
+/**
+ *  Called during library initialization to init metrics.
+ */
+int
+dc_tm_init(void);
+
+/**
+ *  Called during library finalization to free metrics resources
+ */
+void
+dc_tm_fini(void);
+
+#endif /*__DAOS_METRICS_H__*/
diff --git a/src/include/daos/mgmt.h b/src/include/daos/mgmt.h
index 4d999428c8c7..eee326c761bc 100644
--- a/src/include/daos/mgmt.h
+++ b/src/include/daos/mgmt.h
@@ -10,6 +10,7 @@
 #ifndef __DC_MGMT_H__
 #define __DC_MGMT_H__
 
+#include <sys/types.h>
 #include <daos/common.h>
 #include <daos/tse.h>
 #include <daos_types.h>
@@ -71,6 +72,9 @@ int dc_mgmt_net_get_num_srv_ranks(void);
 int dc_mgmt_get_sys_info(const char *sys, struct daos_sys_info **info);
 void dc_mgmt_put_sys_info(struct daos_sys_info *info);
 
+int
+     dc_mgmt_tm_register(const char *sys, const char *jobid, key_t shm_key, uid_t *owner_uid);
+
 int dc_get_attach_info(const char *name, bool all_ranks,
 		       struct dc_mgmt_sys_info *info,
 		       Mgmt__GetAttachInfoResp **respp);
diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h
index 5764e9d4002c..0807dcfcf0d8 100644
--- a/src/include/daos/pool.h
+++ b/src/include/daos/pool.h
@@ -14,6 +14,7 @@
 
 #include <daos/common.h>
 #include <gurt/hash.h>
+#include <gurt/telemetry_common.h>
 #include <daos/pool_map.h>
 #include <daos/rsvc.h>
 #include <daos/tse.h>
@@ -91,6 +92,7 @@ struct dc_pool {
 	pthread_rwlock_t	dp_map_lock;
 	struct pool_map	       *dp_map;
 	tse_task_t	       *dp_map_task;
+	void                  **dp_metrics;
 	/* highest known pool map version */
 	uint32_t		dp_map_version_known;
 	uint32_t		dp_disconnecting:1,
diff --git a/src/include/daos/tls.h b/src/include/daos/tls.h
new file mode 100644
index 000000000000..8e9628b39daa
--- /dev/null
+++ b/src/include/daos/tls.h
@@ -0,0 +1,121 @@
+/**
+ * (C) Copyright 2016-2024 Intel Corporation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause-Patent
+ */
+/**
+ * This file is part of daos
+ *
+ * src/include/daos/tls.h
+ */
+
+#ifndef __DAOS_TLS_H__
+#define __DAOS_TLS_H__
+
+#include <daos/common.h>
+#include <daos_types.h>
+
+/**
+ * Stackable Module API
+ * Provides a modular interface to load and register server-side code on
+ * demand. A module is composed of:
+ * - a set of request handlers which are registered when the module is loaded.
+ * - a server-side API (see header files suffixed by "_srv") used for
+ *   inter-module direct calls.
+ *
+ * For now, all loaded modules are assumed to be trustful, but sandboxes can be
+ * implemented in the future.
+ */
+/*
+ * Thead-local storage
+ */
+struct daos_thread_local_storage {
+	uint32_t dtls_tag;
+	void   **dtls_values;
+};
+
+enum daos_module_tag {
+	DAOS_SYS_TAG    = 1 << 0, /** only run on system xstream */
+	DAOS_TGT_TAG    = 1 << 1, /** only run on target xstream */
+	DAOS_RDB_TAG    = 1 << 2, /** only run on rdb xstream */
+	DAOS_OFF_TAG    = 1 << 3, /** only run on offload/helper xstream */
+	DAOS_CLI_TAG    = 1 << 4, /** only run on client stack */
+	DAOS_SERVER_TAG = 0xff,   /** run on all xstream */
+};
+
+/* The module key descriptor for each xstream */
+struct daos_module_key {
+	/* Indicate where the keys should be instantiated */
+	enum daos_module_tag dmk_tags;
+
+	/* The position inside the daos_module_keys */
+	int                  dmk_index;
+	/* init keys for context */
+	void *(*dmk_init)(int tags, int xs_id, int tgt_id);
+
+	/* fini keys for context */
+	void (*dmk_fini)(int tags, void *data);
+};
+
+#define DAOS_MODULE_KEYS_NR 10
+struct daos_thread_local_storage *
+dss_tls_get(void);
+struct daos_thread_local_storage *
+dc_tls_get(unsigned int tag);
+
+int
+ds_tls_key_create(void);
+int
+dc_tls_key_create(void);
+void
+ds_tls_key_delete(void);
+void
+dc_tls_key_delete(void);
+
+struct daos_module_key *
+daos_get_module_key(int index);
+
+/**
+ * Get value from context by the key
+ *
+ * Get value inside dtls by key. So each module will use this API to
+ * retrieve their own value in the thread context.
+ *
+ * \param[in] dtls	the thread context.
+ * \param[in] key	key used to retrieve the dtls_value.
+ *
+ * \retval		the dtls_value retrieved by key.
+ */
+static inline void *
+daos_module_key_get(struct daos_thread_local_storage *dtls, struct daos_module_key *key)
+{
+	D_ASSERT(key->dmk_index >= 0);
+	D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR);
+	D_ASSERT(daos_get_module_key(key->dmk_index) == key);
+	D_ASSERT(dtls != NULL);
+
+	return dtls->dtls_values[key->dmk_index];
+}
+
+#define dss_module_key_get       daos_module_key_get
+#define dss_register_key         daos_register_key
+#define dss_unregister_key       daos_unregister_key
+#define dss_module_info          daos_module_info
+#define dss_module_tag           daos_module_tag
+#define dss_module_key           daos_module_key
+#define dss_thread_local_storage daos_thread_local_storage
+
+void
+daos_register_key(struct daos_module_key *key);
+void
+daos_unregister_key(struct daos_module_key *key);
+struct daos_thread_local_storage *
+dc_tls_init(int tag, uint32_t pid);
+void
+dc_tls_fini(void);
+struct daos_thread_local_storage *
+dss_tls_init(int tag, int xs_id, int tgt_id);
+void
+dss_tls_fini(struct daos_thread_local_storage *dtls);
+
+#endif /*__DAOS_TLS_H__*/
diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h
index 06a927b8d3f0..116c486e9439 100644
--- a/src/include/daos_srv/daos_engine.h
+++ b/src/include/daos_srv/daos_engine.h
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2016-2023 Intel Corporation.
+ * (C) Copyright 2016-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -15,6 +15,7 @@
 #include <daos/drpc.h>
 #include <daos/rpc.h>
 #include <daos/cont_props.h>
+#include <daos/tls.h>
 #include <daos_srv/iv.h>
 #include <daos_srv/vos_types.h>
 #include <daos_srv/pool.h>
@@ -54,84 +55,6 @@ extern unsigned int	 dss_instance_idx;
 /** Bypass for the nvme health check */
 extern bool		 dss_nvme_bypass_health_check;
 
-/**
- * Stackable Module API
- * Provides a modular interface to load and register server-side code on
- * demand. A module is composed of:
- * - a set of request handlers which are registered when the module is loaded.
- * - a server-side API (see header files suffixed by "_srv") used for
- *   inter-module direct calls.
- *
- * For now, all loaded modules are assumed to be trustful, but sandboxes can be
- * implemented in the future.
- */
-/*
- * Thead-local storage
- */
-struct dss_thread_local_storage {
-	uint32_t	dtls_tag;
-	void		**dtls_values;
-};
-
-enum dss_module_tag {
-	DAOS_SYS_TAG    = 1 << 0, /** only run on system xstream */
-	DAOS_TGT_TAG    = 1 << 1, /** only run on target xstream */
-	DAOS_RDB_TAG    = 1 << 2, /** only run on rdb xstream */
-	DAOS_OFF_TAG    = 1 << 3, /** only run on offload/helper xstream */
-	DAOS_SERVER_TAG = 0xff,   /** run on all xstream */
-};
-
-/* The module key descriptor for each xstream */
-struct dss_module_key {
-	/* Indicate where the keys should be instantiated */
-	enum dss_module_tag dmk_tags;
-
-	/* The position inside the dss_module_keys */
-	int dmk_index;
-	/* init keys for context */
-	void *(*dmk_init)(int tags, int xs_id, int tgt_id);
-
-	/* fini keys for context */
-	void (*dmk_fini)(int tags, void *data);
-};
-
-extern pthread_key_t dss_tls_key;
-extern struct dss_module_key *dss_module_keys[];
-#define DAOS_MODULE_KEYS_NR 10
-
-static inline struct dss_thread_local_storage *
-dss_tls_get()
-{
-	return (struct dss_thread_local_storage *)
-		pthread_getspecific(dss_tls_key);
-}
-
-/**
- * Get value from context by the key
- *
- * Get value inside dtls by key. So each module will use this API to
- * retrieve their own value in the thread context.
- *
- * \param[in] dtls	the thread context.
- * \param[in] key	key used to retrieve the dtls_value.
- *
- * \retval		the dtls_value retrieved by key.
- */
-static inline void *
-dss_module_key_get(struct dss_thread_local_storage *dtls,
-		   struct dss_module_key *key)
-{
-	D_ASSERT(key->dmk_index >= 0);
-	D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR);
-	D_ASSERT(dss_module_keys[key->dmk_index] == key);
-	D_ASSERT(dtls != NULL);
-
-	return dtls->dtls_values[key->dmk_index];
-}
-
-void dss_register_key(struct dss_module_key *key);
-void dss_unregister_key(struct dss_module_key *key);
-
 /** pthread names are limited to 16 chars */
 #define DSS_XS_NAME_LEN		(32)
 
@@ -172,7 +95,7 @@ static inline struct dss_module_info *
 dss_get_module_info(void)
 {
 	struct dss_module_info *dmi;
-	struct dss_thread_local_storage *dtc;
+	struct daos_thread_local_storage *dtc;
 
 	dtc = dss_tls_get();
 	dmi = (struct dss_module_info *)
@@ -419,23 +342,6 @@ struct dss_module_ops {
 int srv_profile_stop();
 int srv_profile_start(char *path, int avg);
 
-struct dss_module_metrics {
-	/* Indicate where the keys should be instantiated */
-	enum dss_module_tag dmm_tags;
-
-	/**
-	 * allocate metrics with path to ephemeral shmem for to the
-	 * newly-created pool
-	 */
-	void	*(*dmm_init)(const char *path, int tgt_id);
-	void	 (*dmm_fini)(void *data);
-
-	/**
-	 * Get the number of metrics allocated by this module in total (including all targets).
-	 */
-	int	 (*dmm_nr_metrics)(void);
-};
-
 /**
  * Each module should provide a dss_module structure which defines the module
  * interface. The name of the allocated structure must be the library name
@@ -481,7 +387,7 @@ struct dss_module {
 	struct dss_module_ops		*sm_mod_ops;
 
 	/* Per-pool metrics (optional) */
-	struct dss_module_metrics	*sm_metrics;
+	struct daos_module_metrics      *sm_metrics;
 };
 
 /**
diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h
index 12039c24a731..efb838befaef 100644
--- a/src/include/gurt/telemetry_common.h
+++ b/src/include/gurt/telemetry_common.h
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2020-2023 Intel Corporation.
+ * (C) Copyright 2020-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -145,9 +145,12 @@ enum {
 };
 
 enum {
-	D_TM_SERVER_PROCESS		= 0x000,
-	D_TM_SERIALIZATION		= 0x001,
-	D_TM_RETAIN_SHMEM		= 0x002,
+	D_TM_SERVER_PROCESS            = 0x000,
+	D_TM_SERIALIZATION             = 0x001,
+	D_TM_RETAIN_SHMEM              = 0x002,
+	D_TM_RETAIN_SHMEM_IF_NON_EMPTY = 0x004,
+	D_TM_OPEN_OR_CREATE            = 0x008,
+	D_TM_MULTIPLE_WRITER_LOCK      = 0x010,
 };
 
 /** Output formats */
diff --git a/src/include/gurt/telemetry_consumer.h b/src/include/gurt/telemetry_consumer.h
index f0b1d706be71..138633ced918 100644
--- a/src/include/gurt/telemetry_consumer.h
+++ b/src/include/gurt/telemetry_consumer.h
@@ -49,12 +49,21 @@ int d_tm_list(struct d_tm_context *ctx, struct d_tm_nodeList_t **head,
 int d_tm_list_subdirs(struct d_tm_context *ctx, struct d_tm_nodeList_t **head,
 		      struct d_tm_node_t *node, uint64_t *node_count,
 		      int max_depth);
-void d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node,
-		  int level, int filter, char *path, int format,
-		  int opt_fields, uint32_t ops, FILE *stream);
+
+typedef void (*d_tm_iter_cb_t)(struct d_tm_context *ctx, struct d_tm_node_t *node, int level,
+			       char *path, int format, int opt_fields, void *cb_arg);
+
+void
+d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path,
+	     int format, int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg);
 void d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node,
 		     int level, char *name, int format, int opt_fields,
 		     FILE *stream);
+
+void
+      d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path,
+		      int format, int opt_fields, FILE *stream);
+
 void d_tm_print_field_descriptors(int opt_fields, FILE *stream);
 void d_tm_print_counter(uint64_t val, char *name, int format, char *units,
 			int opt_fields, FILE *stream);
diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h
index 21f506fba383..0046acf12409 100644
--- a/src/include/gurt/telemetry_producer.h
+++ b/src/include/gurt/telemetry_producer.h
@@ -1,11 +1,12 @@
 /**
- * (C) Copyright 2020-2023 Intel Corporation.
+ * (C) Copyright 2020-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
 #ifndef __TELEMETRY_PRODUCER_H__
 #define __TELEMETRY_PRODUCER_H__
 
+#include <sys/types.h>
 #include <gurt/telemetry_common.h>
 
 /* Developer facing server API to write data */
@@ -23,12 +24,19 @@ void d_tm_dec_gauge(struct d_tm_node_t *metric, uint64_t value);
 
 /* Other server functions */
 int d_tm_init(int id, uint64_t mem_size, int flags);
+int
+    d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name);
 int d_tm_init_histogram(struct d_tm_node_t *node, char *path, int num_buckets,
 			int initial_width, int multiplier);
 int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc,
 		    char *units, const char *fmt, ...);
 int d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes,
 			   const char *fmt, ...);
+int
+    d_tm_attach_path_segment(key_t key, const char *fmt, ...);
 int d_tm_del_ephemeral_dir(const char *fmt, ...);
+int
+     d_tm_try_del_ephemeral_dir(const char *fmt, ...);
 void d_tm_fini(void);
+
 #endif /* __TELEMETRY_PRODUCER_H__ */
diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c
index aa640f4c99f0..2eff852eb9f1 100644
--- a/src/mgmt/cli_mgmt.c
+++ b/src/mgmt/cli_mgmt.c
@@ -24,6 +24,7 @@
 #include "rpc.h"
 #include <errno.h>
 #include <stdlib.h>
+#include <sys/ipc.h>
 
 int
 dc_cp(tse_task_t *task, void *data)
@@ -1180,6 +1181,90 @@ dc_mgmt_pool_find(struct dc_mgmt_sys *sys, const char *label, uuid_t puuid,
 	return rc;
 }
 
+int
+dc_mgmt_tm_register(const char *sys, const char *jobid, key_t shm_key, uid_t *owner_uid)
+{
+	struct drpc_alloc          alloc = PROTO_ALLOCATOR_INIT(alloc);
+	struct drpc               *ctx;
+	Mgmt__ClientTelemetryReq   req = MGMT__CLIENT_TELEMETRY_REQ__INIT;
+	Mgmt__ClientTelemetryResp *resp;
+	uint8_t                   *reqb;
+	size_t                     reqb_size;
+	Drpc__Call                *dreq;
+	Drpc__Response            *dresp;
+	int                        rc;
+
+	if (owner_uid == NULL)
+		return -DER_INVAL;
+
+	/* Connect to daos_agent. */
+	D_ASSERT(dc_agent_sockpath != NULL);
+	rc = drpc_connect(dc_agent_sockpath, &ctx);
+	if (rc != -DER_SUCCESS) {
+		DL_ERROR(rc, "failed to connect to %s ", dc_agent_sockpath);
+		D_GOTO(out, 0);
+	}
+
+	req.sys     = (char *)sys;
+	req.jobid   = dc_jobid;
+	req.shm_key = shm_key;
+
+	reqb_size = mgmt__client_telemetry_req__get_packed_size(&req);
+	D_ALLOC(reqb, reqb_size);
+	if (reqb == NULL) {
+		D_GOTO(out_ctx, rc = -DER_NOMEM);
+	}
+	mgmt__client_telemetry_req__pack(&req, reqb);
+
+	rc = drpc_call_create(ctx, DRPC_MODULE_MGMT, DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM, &dreq);
+	if (rc != 0) {
+		D_FREE(reqb);
+		goto out_ctx;
+	}
+	dreq->body.len  = reqb_size;
+	dreq->body.data = reqb;
+
+	/* Make the call and get the response. */
+	rc = drpc_call(ctx, R_SYNC, dreq, &dresp);
+	if (rc != 0) {
+		DL_ERROR(rc, "Sending client telemetry setup request failed");
+		goto out_dreq;
+	}
+	if (dresp->status != DRPC__STATUS__SUCCESS) {
+		D_ERROR("Client telemetry setup request unsuccessful: %d\n", dresp->status);
+		rc = -DER_UNINIT;
+		goto out_dresp;
+	}
+
+	resp = mgmt__client_telemetry_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data);
+	if (alloc.oom)
+		D_GOTO(out_dresp, rc = -DER_NOMEM);
+	if (resp == NULL) {
+		D_ERROR("failed to unpack SetupClientTelemetry response\n");
+		rc = -DER_NOMEM;
+		goto out_dresp;
+	}
+	if (resp->status != 0) {
+		D_ERROR("SetupClientTelemetry(%s) failed: " DF_RC "\n", req.sys,
+			DP_RC(resp->status));
+		rc = resp->status;
+		goto out_resp;
+	}
+
+	*owner_uid = resp->agent_uid;
+
+out_resp:
+	mgmt__client_telemetry_resp__free_unpacked(resp, &alloc.alloc);
+out_dresp:
+	drpc_response_free(dresp);
+out_dreq:
+	drpc_call_free(dreq);
+out_ctx:
+	drpc_close(ctx);
+out:
+	return rc;
+}
+
 /**
  * Initialize management interface
  */
diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c
index c599d8f8aaf8..f8e4e7e52998 100644
--- a/src/mgmt/svc.pb-c.c
+++ b/src/mgmt/svc.pb-c.c
@@ -649,6 +649,86 @@ void   mgmt__pool_monitor_req__free_unpacked
   assert(message->base.descriptor == &mgmt__pool_monitor_req__descriptor);
   protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
+void
+mgmt__client_telemetry_req__init(Mgmt__ClientTelemetryReq *message)
+{
+  static const Mgmt__ClientTelemetryReq init_value = MGMT__CLIENT_TELEMETRY_REQ__INIT;
+  *message                                         = init_value;
+}
+size_t
+mgmt__client_telemetry_req__get_packed_size(const Mgmt__ClientTelemetryReq *message)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor);
+  return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message));
+}
+size_t
+mgmt__client_telemetry_req__pack(const Mgmt__ClientTelemetryReq *message, uint8_t *out)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor);
+  return protobuf_c_message_pack((const ProtobufCMessage *)message, out);
+}
+size_t
+mgmt__client_telemetry_req__pack_to_buffer(const Mgmt__ClientTelemetryReq *message,
+					   ProtobufCBuffer                *buffer)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor);
+  return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer);
+}
+Mgmt__ClientTelemetryReq *
+mgmt__client_telemetry_req__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data)
+{
+  return (Mgmt__ClientTelemetryReq *)protobuf_c_message_unpack(
+      &mgmt__client_telemetry_req__descriptor, allocator, len, data);
+}
+void
+mgmt__client_telemetry_req__free_unpacked(Mgmt__ClientTelemetryReq *message,
+					  ProtobufCAllocator       *allocator)
+{
+  if (!message)
+    return;
+  assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor);
+  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+}
+void
+mgmt__client_telemetry_resp__init(Mgmt__ClientTelemetryResp *message)
+{
+  static const Mgmt__ClientTelemetryResp init_value = MGMT__CLIENT_TELEMETRY_RESP__INIT;
+  *message                                          = init_value;
+}
+size_t
+mgmt__client_telemetry_resp__get_packed_size(const Mgmt__ClientTelemetryResp *message)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor);
+  return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message));
+}
+size_t
+mgmt__client_telemetry_resp__pack(const Mgmt__ClientTelemetryResp *message, uint8_t *out)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor);
+  return protobuf_c_message_pack((const ProtobufCMessage *)message, out);
+}
+size_t
+mgmt__client_telemetry_resp__pack_to_buffer(const Mgmt__ClientTelemetryResp *message,
+					    ProtobufCBuffer                 *buffer)
+{
+  assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor);
+  return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer);
+}
+Mgmt__ClientTelemetryResp *
+mgmt__client_telemetry_resp__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data)
+{
+  return (Mgmt__ClientTelemetryResp *)protobuf_c_message_unpack(
+      &mgmt__client_telemetry_resp__descriptor, allocator, len, data);
+}
+void
+mgmt__client_telemetry_resp__free_unpacked(Mgmt__ClientTelemetryResp *message,
+					   ProtobufCAllocator        *allocator)
+{
+  if (!message)
+    return;
+  assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor);
+  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+}
 static const ProtobufCFieldDescriptor mgmt__daos_resp__field_descriptors[1] =
 {
   {
@@ -1740,3 +1820,77 @@ const ProtobufCMessageDescriptor mgmt__pool_monitor_req__descriptor =
   (ProtobufCMessageInit) mgmt__pool_monitor_req__init,
   NULL,NULL,NULL    /* reserved[123] */
 };
+static const ProtobufCFieldDescriptor mgmt__client_telemetry_req__field_descriptors[3] = {
+    {
+	"sys", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */
+	offsetof(Mgmt__ClientTelemetryReq, sys), NULL, &protobuf_c_empty_string, 0, /* flags */
+	0, NULL, NULL /* reserved1,reserved2, etc */
+    },
+    {
+	"jobid", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */
+	offsetof(Mgmt__ClientTelemetryReq, jobid), NULL, &protobuf_c_empty_string, 0, /* flags */
+	0, NULL, NULL /* reserved1,reserved2, etc */
+    },
+    {
+	"shm_key", 3, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */
+	offsetof(Mgmt__ClientTelemetryReq, shm_key), NULL, NULL, 0,    /* flags */
+	0, NULL, NULL /* reserved1,reserved2, etc */
+    },
+};
+static const unsigned mgmt__client_telemetry_req__field_indices_by_name[] = {
+    1, /* field[1] = jobid */
+    2, /* field[2] = shm_key */
+    0, /* field[0] = sys */
+};
+static const ProtobufCIntRange mgmt__client_telemetry_req__number_ranges[1 + 1] = {{1, 0}, {0, 3}};
+const ProtobufCMessageDescriptor mgmt__client_telemetry_req__descriptor         = {
+    PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+    "mgmt.ClientTelemetryReq",
+    "ClientTelemetryReq",
+    "Mgmt__ClientTelemetryReq",
+    "mgmt",
+    sizeof(Mgmt__ClientTelemetryReq),
+    3,
+    mgmt__client_telemetry_req__field_descriptors,
+    mgmt__client_telemetry_req__field_indices_by_name,
+    1,
+    mgmt__client_telemetry_req__number_ranges,
+    (ProtobufCMessageInit)mgmt__client_telemetry_req__init,
+    NULL,
+    NULL,
+    NULL /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor mgmt__client_telemetry_resp__field_descriptors[2] = {
+    {
+	"status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */
+	offsetof(Mgmt__ClientTelemetryResp, status), NULL, NULL, 0,   /* flags */
+	0, NULL, NULL                                                 /* reserved1,reserved2, etc */
+    },
+    {
+	"agent_uid", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */
+	offsetof(Mgmt__ClientTelemetryResp, agent_uid), NULL, NULL, 0,   /* flags */
+	0, NULL, NULL /* reserved1,reserved2, etc */
+    },
+};
+static const unsigned mgmt__client_telemetry_resp__field_indices_by_name[] = {
+    1, /* field[1] = agent_uid */
+    0, /* field[0] = status */
+};
+static const ProtobufCIntRange mgmt__client_telemetry_resp__number_ranges[1 + 1] = {{1, 0}, {0, 2}};
+const ProtobufCMessageDescriptor mgmt__client_telemetry_resp__descriptor         = {
+    PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+    "mgmt.ClientTelemetryResp",
+    "ClientTelemetryResp",
+    "Mgmt__ClientTelemetryResp",
+    "mgmt",
+    sizeof(Mgmt__ClientTelemetryResp),
+    2,
+    mgmt__client_telemetry_resp__field_descriptors,
+    mgmt__client_telemetry_resp__field_indices_by_name,
+    1,
+    mgmt__client_telemetry_resp__number_ranges,
+    (ProtobufCMessageInit)mgmt__client_telemetry_resp__init,
+    NULL,
+    NULL,
+    NULL /* reserved[123] */
+};
diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h
index 381b45534f35..789a636509b4 100644
--- a/src/mgmt/svc.pb-c.h
+++ b/src/mgmt/svc.pb-c.h
@@ -31,7 +31,8 @@ typedef struct _Mgmt__PrepShutdownReq Mgmt__PrepShutdownReq;
 typedef struct _Mgmt__PingRankReq Mgmt__PingRankReq;
 typedef struct _Mgmt__SetRankReq Mgmt__SetRankReq;
 typedef struct _Mgmt__PoolMonitorReq Mgmt__PoolMonitorReq;
-
+typedef struct _Mgmt__ClientTelemetryReq         Mgmt__ClientTelemetryReq;
+typedef struct _Mgmt__ClientTelemetryResp        Mgmt__ClientTelemetryResp;
 
 /* --- enums --- */
 
@@ -223,7 +224,7 @@ struct  _Mgmt__ClientNetHint
 {
   ProtobufCMessage base;
   /*
-   * CaRT OFI provider
+   * CaRT provider
    */
   char *provider;
   /*
@@ -378,6 +379,43 @@ struct  _Mgmt__PoolMonitorReq
  { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_monitor_req__descriptor) \
     , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string }
 
+struct _Mgmt__ClientTelemetryReq {
+  ProtobufCMessage base;
+  /*
+   * DAOS system identifier
+   */
+  char            *sys;
+  /*
+   * Job ID used for client telemetry
+   */
+  char            *jobid;
+  /*
+   * Client's shared memory segment key
+   */
+  int32_t          shm_key;
+};
+#define MGMT__CLIENT_TELEMETRY_REQ__INIT                                                           \
+  {                                                                                                \
+	  PROTOBUF_C_MESSAGE_INIT(&mgmt__client_telemetry_req__descriptor)                         \
+	  , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0                    \
+  }
+
+struct _Mgmt__ClientTelemetryResp {
+  ProtobufCMessage base;
+  /*
+   * DAOS status code
+   */
+  int32_t          status;
+  /*
+   * UID of agent process
+   */
+  int32_t          agent_uid;
+};
+#define MGMT__CLIENT_TELEMETRY_RESP__INIT                                                          \
+  {                                                                                                \
+	  PROTOBUF_C_MESSAGE_INIT(&mgmt__client_telemetry_resp__descriptor)                        \
+	  , 0, 0                                                                                   \
+  }
 
 /* Mgmt__DaosResp methods */
 void   mgmt__daos_resp__init
@@ -651,6 +689,36 @@ Mgmt__PoolMonitorReq *
 void   mgmt__pool_monitor_req__free_unpacked
                      (Mgmt__PoolMonitorReq *message,
                       ProtobufCAllocator *allocator);
+/* Mgmt__ClientTelemetryReq methods */
+void
+mgmt__client_telemetry_req__init(Mgmt__ClientTelemetryReq *message);
+size_t
+mgmt__client_telemetry_req__get_packed_size(const Mgmt__ClientTelemetryReq *message);
+size_t
+mgmt__client_telemetry_req__pack(const Mgmt__ClientTelemetryReq *message, uint8_t *out);
+size_t
+mgmt__client_telemetry_req__pack_to_buffer(const Mgmt__ClientTelemetryReq *message,
+					   ProtobufCBuffer                *buffer);
+Mgmt__ClientTelemetryReq *
+mgmt__client_telemetry_req__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
+void
+mgmt__client_telemetry_req__free_unpacked(Mgmt__ClientTelemetryReq *message,
+					  ProtobufCAllocator       *allocator);
+/* Mgmt__ClientTelemetryResp methods */
+void
+mgmt__client_telemetry_resp__init(Mgmt__ClientTelemetryResp *message);
+size_t
+mgmt__client_telemetry_resp__get_packed_size(const Mgmt__ClientTelemetryResp *message);
+size_t
+mgmt__client_telemetry_resp__pack(const Mgmt__ClientTelemetryResp *message, uint8_t *out);
+size_t
+mgmt__client_telemetry_resp__pack_to_buffer(const Mgmt__ClientTelemetryResp *message,
+					    ProtobufCBuffer                 *buffer);
+Mgmt__ClientTelemetryResp *
+mgmt__client_telemetry_resp__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
+void
+mgmt__client_telemetry_resp__free_unpacked(Mgmt__ClientTelemetryResp *message,
+					   ProtobufCAllocator        *allocator);
 /* --- per-message closures --- */
 
 typedef void (*Mgmt__DaosResp_Closure)
@@ -701,6 +769,10 @@ typedef void (*Mgmt__SetRankReq_Closure)
 typedef void (*Mgmt__PoolMonitorReq_Closure)
                  (const Mgmt__PoolMonitorReq *message,
                   void *closure_data);
+typedef void (*Mgmt__ClientTelemetryReq_Closure)(const Mgmt__ClientTelemetryReq *message,
+						 void                           *closure_data);
+typedef void (*Mgmt__ClientTelemetryResp_Closure)(const Mgmt__ClientTelemetryResp *message,
+						  void                            *closure_data);
 
 /* --- services --- */
 
@@ -724,6 +796,8 @@ extern const ProtobufCMessageDescriptor mgmt__prep_shutdown_req__descriptor;
 extern const ProtobufCMessageDescriptor mgmt__ping_rank_req__descriptor;
 extern const ProtobufCMessageDescriptor mgmt__set_rank_req__descriptor;
 extern const ProtobufCMessageDescriptor mgmt__pool_monitor_req__descriptor;
+extern const ProtobufCMessageDescriptor mgmt__client_telemetry_req__descriptor;
+extern const ProtobufCMessageDescriptor mgmt__client_telemetry_resp__descriptor;
 
 PROTOBUF_C__END_DECLS
 
diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c
index 9bc4f14362c4..f39f95600f69 100644
--- a/src/object/cli_mod.c
+++ b/src/object/cli_mod.c
@@ -12,6 +12,11 @@
 #include <daos/common.h>
 #include <daos/rpc.h>
 #include <daos/mgmt.h>
+#include <daos/tls.h>
+#include <daos/metrics.h>
+#include <daos/job.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_producer.h>
 #include <daos_types.h>
 #include "obj_rpc.h"
 #include "obj_internal.h"
@@ -19,14 +24,121 @@
 unsigned int	srv_io_mode = DIM_DTX_FULL_ENABLED;
 int		dc_obj_proto_version;
 
+static void *
+dc_obj_tls_init(int tags, int xs_id, int pid)
+{
+	struct dc_obj_tls *tls;
+	int                opc;
+	int                rc;
+	unsigned long      tid = pthread_self();
+
+	D_ALLOC_PTR(tls);
+	if (tls == NULL)
+		return NULL;
+
+	/** register different per-opcode sensors */
+	for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) {
+		/** Start with number of active requests, of type gauge */
+		rc = d_tm_add_metric(&tls->cot_op_active[opc], D_TM_STATS_GAUGE,
+				     "number of active object RPCs", "ops", "%lu/io/ops/%s/active",
+				     tid, obj_opc_to_str(opc));
+		if (rc) {
+			D_WARN("Failed to create active counter: " DF_RC "\n", DP_RC(rc));
+			D_GOTO(out, rc);
+		}
+
+		if (opc == DAOS_OBJ_RPC_UPDATE || opc == DAOS_OBJ_RPC_TGT_UPDATE ||
+		    opc == DAOS_OBJ_RPC_FETCH)
+			/** See below, latency reported per size for those */
+			continue;
+
+		/** And finally the per-opcode latency, of type gauge */
+		rc = d_tm_add_metric(&tls->cot_op_lat[opc], D_TM_STATS_GAUGE,
+				     "object RPC processing time", "us", "%lu/io/ops/%s/latency",
+				     tid, obj_opc_to_str(opc));
+		if (rc) {
+			D_WARN("Failed to create latency sensor: " DF_RC "\n", DP_RC(rc));
+			D_GOTO(out, rc);
+		}
+	}
+
+	/**
+	 * Maintain per-I/O size latency for update & fetch RPCs
+	 * of type gauge
+	 */
+	rc = obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, pid, tls->cot_update_lat,
+				 obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time",
+				 false);
+	if (rc)
+		D_GOTO(out, rc);
+
+	rc = obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, pid, tls->cot_fetch_lat,
+				 obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time",
+				 false);
+	if (rc)
+		D_GOTO(out, rc);
+
+out:
+	if (rc) {
+		D_FREE(tls);
+		tls = NULL;
+	}
+
+	return tls;
+}
+
+static void
+dc_obj_tls_fini(int tags, void *data)
+{
+	struct dc_obj_tls *tls = data;
+
+	D_FREE(tls);
+}
+
+struct daos_module_key dc_obj_module_key = {
+    .dmk_tags  = DAOS_CLI_TAG,
+    .dmk_index = -1,
+    .dmk_init  = dc_obj_tls_init,
+    .dmk_fini  = dc_obj_tls_fini,
+};
+
+static void *
+dc_obj_metrics_alloc(const char *path, int tgt_id)
+{
+	return obj_metrics_alloc_internal(path, tgt_id, false);
+}
+
+static void
+dc_obj_metrics_free(void *data)
+{
+	D_FREE(data);
+}
+
+/* metrics per pool */
+struct daos_module_metrics dc_obj_metrics = {
+    .dmm_tags       = DAOS_CLI_TAG,
+    .dmm_init       = dc_obj_metrics_alloc,
+    .dmm_fini       = dc_obj_metrics_free,
+    .dmm_nr_metrics = obj_metrics_count,
+};
+
 /**
  * Initialize object interface
  */
 int
 dc_obj_init(void)
 {
-	uint32_t		ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION};
-	int			rc;
+	uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION};
+	int      rc;
+
+	if (daos_client_metric) {
+		daos_register_key(&dc_obj_module_key);
+		rc = daos_metrics_init(DAOS_CLI_TAG, DAOS_OBJ_MODULE, &dc_obj_metrics);
+		if (rc) {
+			DL_ERROR(rc, "register object failed");
+			return rc;
+		}
+	}
 
 	rc = obj_utils_init();
 	if (rc)
@@ -78,6 +190,7 @@ dc_obj_init(void)
 out_utils:
 	if (rc)
 		obj_utils_fini();
+
 	return rc;
 }
 
@@ -94,4 +207,6 @@ dc_obj_fini(void)
 	obj_ec_codec_fini();
 	obj_class_fini();
 	obj_utils_fini();
+	if (daos_client_metric)
+		daos_unregister_key(&dc_obj_module_key);
 }
diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c
index 6179e1deb25b..71b65cdf025a 100644
--- a/src/object/cli_shard.c
+++ b/src/object/cli_shard.c
@@ -14,7 +14,9 @@
 #include <daos/pool_map.h>
 #include <daos/rpc.h>
 #include <daos/checksum.h>
-#include "obj_rpc.h"
+#include <daos/metrics.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_producer.h>
 #include "obj_internal.h"
 
 static inline struct dc_obj_layout *
@@ -104,6 +106,7 @@ struct rw_cb_args {
 	daos_iom_t		*maps;
 	crt_endpoint_t		tgt_ep;
 	struct shard_rw_args	*shard_args;
+	uint64_t                 send_time;
 };
 
 static struct dcs_layout *
@@ -886,6 +889,99 @@ dc_shard_update_size(struct rw_cb_args *rw_args, int fetch_rc)
 	return rc;
 }
 
+daos_size_t
+obj_get_fetch_size(struct rw_cb_args *arg)
+{
+	struct obj_rw_out *orwo;
+	daos_size_t        size = 0;
+
+	orwo = crt_reply_get(arg->rpc);
+
+	if (orwo->orw_sgls.ca_count > 0) {
+		/* inline transfer */
+		size =
+		    daos_sgls_packed_size(orwo->orw_sgls.ca_arrays, orwo->orw_sgls.ca_count, NULL);
+	} else if (arg->rwaa_sgls != NULL) {
+		/* bulk transfer */
+		daos_size_t *replied_sizes = orwo->orw_data_sizes.ca_arrays;
+		int          i;
+
+		for (i = 0; i < orwo->orw_data_sizes.ca_count; i++)
+			size += replied_sizes[i];
+	}
+
+	return size;
+}
+
+static void
+obj_shard_update_metrics_begin(crt_rpc_t *rpc)
+{
+	struct dc_obj_tls *tls;
+	int                opc;
+
+	if (!daos_client_metric)
+		return;
+
+	tls = dc_obj_tls_get();
+	D_ASSERT(tls != NULL);
+	opc = opc_get(rpc->cr_opc);
+	d_tm_inc_gauge(tls->cot_op_active[opc], 1);
+}
+
+static void
+obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int ret)
+{
+	struct dc_obj_tls       *tls;
+	struct rw_cb_args       *rw_args;
+	struct dc_pool          *pool;
+	struct obj_rw_in        *orw;
+	struct d_tm_node_t      *lat = NULL;
+	struct obj_pool_metrics *opm = NULL;
+	daos_size_t              size;
+	uint64_t                 time;
+	int                      opc;
+
+	if (!daos_client_metric || ret != 0)
+		return;
+	tls = dc_obj_tls_get();
+	D_ASSERT(tls != NULL);
+	opc = opc_get(rpc->cr_opc);
+	orw = crt_req_get(rpc);
+	d_tm_dec_gauge(tls->cot_op_active[opc], 1);
+	/**
+	 * Measure latency of successful I/O only.
+	 * Use bit shift for performance and tolerate some inaccuracy.
+	 */
+	time = daos_get_ntime() - send_time;
+	time >>= 10;
+
+	switch (opc) {
+	case DAOS_OBJ_RPC_UPDATE:
+	case DAOS_OBJ_RPC_FETCH:
+		rw_args = arg;
+		pool    = rw_args->shard_args->auxi.obj_auxi->obj->cob_pool;
+		D_ASSERT(pool != NULL);
+		opm = pool->dp_metrics[DAOS_OBJ_MODULE];
+		D_ASSERTF(opm != NULL, "pool %p\n", pool);
+		if (opc == DAOS_OBJ_RPC_UPDATE) {
+			size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL);
+			d_tm_inc_counter(opm->opm_update_bytes, size);
+			lat = tls->cot_update_lat[lat_bucket(size)];
+		} else {
+			size = obj_get_fetch_size(rw_args);
+			lat  = tls->cot_fetch_lat[lat_bucket(size)];
+			d_tm_inc_counter(opm->opm_fetch_bytes, size);
+		}
+		break;
+	default:
+		lat = tls->cot_op_lat[opc];
+		break;
+	}
+
+	if (lat != NULL)
+		d_tm_set_gauge(lat, time);
+}
+
 static int
 dc_rw_cb(tse_task_t *task, void *arg)
 {
@@ -1191,10 +1287,15 @@ dc_rw_cb(tse_task_t *task, void *arg)
 out:
 	if (rc == -DER_CSUM && opc == DAOS_OBJ_RPC_FETCH)
 		dc_shard_csum_report(task, &rw_args->tgt_ep, rw_args->rpc);
+
+	obj_shard_update_metrics_end(rw_args->rpc, rw_args->send_time, rw_args,
+				     ret == 0 ? rc : ret);
+
 	crt_req_decref(rw_args->rpc);
 
 	if (ret == 0 || obj_retry_error(rc))
 		ret = rc;
+
 	return ret;
 }
 
@@ -1362,7 +1463,9 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc,
 	rw_args.co = shard->do_co;
 	rw_args.shard_args = args;
 	/* remember the sgl to copyout the data inline for fetch */
-	rw_args.rwaa_sgls = (opc == DAOS_OBJ_RPC_FETCH) ? sgls : NULL;
+	rw_args.rwaa_sgls = sgls;
+	rw_args.send_time = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 	if (args->reasb_req && args->reasb_req->orr_recov) {
 		rw_args.maps = NULL;
 		orw->orw_flags |= ORF_EC_RECOV;
@@ -1421,6 +1524,7 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc,
 struct obj_punch_cb_args {
 	crt_rpc_t	*rpc;
 	unsigned int	*map_ver;
+	uint64_t         send_time;
 };
 
 static int
@@ -1436,7 +1540,10 @@ obj_shard_punch_cb(tse_task_t *task, void *data)
 		*cb_args->map_ver = obj_reply_map_version_get(rpc);
 	}
 
+	obj_shard_update_metrics_end(cb_args->rpc, cb_args->send_time, cb_args, task->dt_result);
+
 	crt_req_decref(rpc);
+
 	return task->dt_result;
 }
 
@@ -1480,6 +1587,8 @@ dc_obj_shard_punch(struct dc_obj_shard *shard, enum obj_rpc_opc opc,
 	crt_req_addref(req);
 	cb_args.rpc = req;
 	cb_args.map_ver = &args->pa_auxi.map_ver;
+	cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 	rc = tse_task_register_comp_cb(task, obj_shard_punch_cb, &cb_args,
 				       sizeof(cb_args));
 	if (rc != 0)
@@ -1540,6 +1649,7 @@ struct obj_enum_args {
 	d_iov_t			*csum;
 	struct dtx_epoch	*epoch;
 	daos_handle_t		*th;
+	uint64_t                 send_time;
 };
 
 /**
@@ -1858,10 +1968,15 @@ dc_enumerate_cb(tse_task_t *task, void *arg)
 		crt_bulk_free(oei->oei_bulk);
 	if (oei->oei_kds_bulk != NULL)
 		crt_bulk_free(oei->oei_kds_bulk);
+
+	obj_shard_update_metrics_end(enum_args->rpc, enum_args->send_time, enum_args,
+				     ret == 0 ? rc : ret);
+
 	crt_req_decref(enum_args->rpc);
 
 	if (ret == 0 || obj_retry_error(rc))
 		ret = rc;
+
 	return ret;
 }
 
@@ -2007,6 +2122,8 @@ dc_obj_shard_list(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc,
 	enum_args.eaa_recxs = args->la_recxs;
 	enum_args.epoch = &args->la_auxi.epoch;
 	enum_args.th = &obj_args->th;
+	enum_args.send_time       = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 	rc = tse_task_register_comp_cb(task, dc_enumerate_cb, &enum_args,
 				       sizeof(enum_args));
 	if (rc != 0)
@@ -2038,6 +2155,7 @@ struct obj_query_key_cb_args {
 	struct dc_obj_shard	*shard;
 	struct dtx_epoch	epoch;
 	daos_handle_t		th;
+	uint64_t                 send_time;
 };
 
 static void
@@ -2235,6 +2353,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data)
 	D_SPIN_UNLOCK(&cb_args->obj->cob_spin);
 
 out:
+	obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc);
 	crt_req_decref(rpc);
 	if (ret == 0 || obj_retry_error(rc))
 		ret = rc;
@@ -2285,6 +2404,8 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint
 	cb_args.epoch		= *epoch;
 	cb_args.th		= th;
 	cb_args.max_epoch	= max_epoch;
+	cb_args.send_time       = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 
 	rc = tse_task_register_comp_cb(task, obj_shard_query_key_cb, &cb_args, sizeof(cb_args));
 	if (rc != 0)
@@ -2328,6 +2449,7 @@ struct obj_shard_sync_cb_args {
 	crt_rpc_t	*rpc;
 	daos_epoch_t	*epoch;
 	uint32_t	*map_ver;
+	uint64_t         send_time;
 };
 
 static int
@@ -2377,6 +2499,8 @@ obj_shard_sync_cb(tse_task_t *task, void *data)
 		oso->oso_epoch, oso->oso_map_version);
 
 out:
+	obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc);
+
 	crt_req_decref(rpc);
 	return rc;
 }
@@ -2418,10 +2542,11 @@ dc_obj_shard_sync(struct dc_obj_shard *shard, enum obj_rpc_opc opc,
 		D_GOTO(out, rc);
 
 	crt_req_addref(req);
-	cb_args.rpc	= req;
-	cb_args.epoch	= args->sa_epoch;
-	cb_args.map_ver = &args->sa_auxi.map_ver;
-
+	cb_args.rpc       = req;
+	cb_args.epoch     = args->sa_epoch;
+	cb_args.map_ver   = &args->sa_auxi.map_ver;
+	cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 	rc = tse_task_register_comp_cb(task, obj_shard_sync_cb, &cb_args,
 				       sizeof(cb_args));
 	if (rc != 0)
@@ -2455,8 +2580,9 @@ struct obj_k2a_args {
 	unsigned int		*eaa_map_ver;
 	struct dtx_epoch	*epoch;
 	daos_handle_t		*th;
-	daos_anchor_t		*anchor;
-	uint32_t		shard;
+	daos_anchor_t           *anchor;
+	uint64_t                 send_time;
+	uint32_t                 shard;
 };
 
 static int
@@ -2511,6 +2637,8 @@ dc_k2a_cb(tse_task_t *task, void *arg)
 	enum_anchor_copy(k2a_args->anchor, &oko->oko_anchor);
 	dc_obj_shard2anchor(k2a_args->anchor, k2a_args->shard);
 out:
+	obj_shard_update_metrics_end(k2a_args->rpc, k2a_args->send_time, k2a_args,
+				     ret == 0 ? rc : ret);
 	if (k2a_args->eaa_obj != NULL)
 		obj_shard_decref(k2a_args->eaa_obj);
 	crt_req_decref(k2a_args->rpc);
@@ -2584,6 +2712,8 @@ dc_obj_shard_key2anchor(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc,
 	cb_args.th = &obj_args->th;
 	cb_args.anchor = args->ka_anchor;
 	cb_args.shard = obj_shard->do_shard_idx;
+	cb_args.send_time   = daos_client_metric ? daos_get_ntime() : 0;
+	obj_shard_update_metrics_begin(req);
 	rc = tse_task_register_comp_cb(task, dc_k2a_cb, &cb_args, sizeof(cb_args));
 	if (rc != 0)
 		D_GOTO(out_eaa, rc);
diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h
index 149915c10fa7..4d750c873328 100644
--- a/src/object/obj_internal.h
+++ b/src/object/obj_internal.h
@@ -22,6 +22,7 @@
 #include <daos/object.h>
 #include <daos/cont_props.h>
 #include <daos/container.h>
+#include <daos/tls.h>
 
 #include "obj_rpc.h"
 #include "obj_ec.h"
@@ -535,6 +536,87 @@ struct dc_obj_verify_args {
 	struct dc_obj_verify_cursor	 cursor;
 };
 
+/*
+ * Report latency on a per-I/O size.
+ * Buckets starts at [0; 256B[ and are increased by power of 2
+ * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[
+ * Since 4MB = 2^22 and 256B = 2^8, this means
+ * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so
+ * 16 buckets in total.
+ */
+#define NR_LATENCY_BUCKETS 16
+
+struct dc_obj_tls {
+	/** Measure update/fetch latency based on I/O size (type = gauge) */
+	struct d_tm_node_t *cot_update_lat[NR_LATENCY_BUCKETS];
+	struct d_tm_node_t *cot_fetch_lat[NR_LATENCY_BUCKETS];
+
+	/** Measure per-operation latency in us (type = gauge) */
+	struct d_tm_node_t *cot_op_lat[OBJ_PROTO_CLI_COUNT];
+	/** Count number of per-opcode active requests (type = gauge) */
+	struct d_tm_node_t *cot_op_active[OBJ_PROTO_CLI_COUNT];
+};
+
+int
+obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc,
+		    bool server);
+extern struct daos_module_key dc_obj_module_key;
+
+static inline struct dc_obj_tls *
+dc_obj_tls_get()
+{
+	struct daos_thread_local_storage *dtls;
+
+	dtls = dc_tls_get(dc_obj_module_key.dmk_tags);
+	D_ASSERT(dtls != NULL);
+	return daos_module_key_get(dtls, &dc_obj_module_key);
+}
+
+struct obj_pool_metrics {
+	/** Count number of total per-opcode requests (type = counter) */
+	struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT];
+	/** Total number of bytes fetched (type = counter) */
+	struct d_tm_node_t *opm_fetch_bytes;
+	/** Total number of bytes updated (type = counter) */
+	struct d_tm_node_t *opm_update_bytes;
+
+	/** Total number of silently restarted updates (type = counter) */
+	struct d_tm_node_t *opm_update_restart;
+	/** Total number of resent update operations (type = counter) */
+	struct d_tm_node_t *opm_update_resent;
+	/** Total number of retry update operations (type = counter) */
+	struct d_tm_node_t *opm_update_retry;
+	/** Total number of EC full-stripe update operations (type = counter) */
+	struct d_tm_node_t *opm_update_ec_full;
+	/** Total number of EC partial update operations (type = counter) */
+	struct d_tm_node_t *opm_update_ec_partial;
+};
+
+void
+obj_metrics_free(void *data);
+int
+obj_metrics_count(void);
+void *
+obj_metrics_alloc_internal(const char *path, int tgt_id, bool server);
+
+static inline unsigned int
+lat_bucket(uint64_t size)
+{
+	int nr;
+
+	if (size <= 256)
+		return 0;
+
+	/** return number of leading zero-bits */
+	nr = __builtin_clzl(size - 1);
+
+	/** >4MB, return last bucket */
+	if (nr < 42)
+		return NR_LATENCY_BUCKETS - 1;
+
+	return 56 - nr;
+}
+
 static inline int
 dc_cont2uuid(struct dc_cont *dc_cont, uuid_t *hdl_uuid, uuid_t *uuid)
 {
diff --git a/src/object/obj_utils.c b/src/object/obj_utils.c
index 8312c6719d89..f85409aee9b2 100644
--- a/src/object/obj_utils.c
+++ b/src/object/obj_utils.c
@@ -10,6 +10,10 @@
 #define DDSUBSYS	DDFAC(object)
 
 #include <daos_types.h>
+#include <daos/debug.h>
+#include <daos/job.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_producer.h>
 #include "obj_internal.h"
 
 static daos_size_t
@@ -86,6 +90,150 @@ daos_iods_free(daos_iod_t *iods, int nr, bool need_free)
 		D_FREE(iods);
 }
 
+int
+obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc,
+		    bool server)
+{
+	unsigned int bucket_max = 256;
+	int          i;
+	int          rc = 0;
+
+	for (i = 0; i < NR_LATENCY_BUCKETS; i++) {
+		char *path;
+
+		if (server) {
+			if (bucket_max < 1024) /** B */
+				D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u", op, bucket_max,
+					   tgt_id);
+			else if (bucket_max < 1024 * 1024) /** KB */
+				D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u", op, bucket_max / 1024,
+					   tgt_id);
+			else if (bucket_max <= 1024 * 1024 * 4) /** MB */
+				D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u", op,
+					   bucket_max / (1024 * 1024), tgt_id);
+			else /** >4MB */
+				D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u", op, tgt_id);
+		} else {
+			unsigned long tid = pthread_self();
+
+			if (bucket_max < 1024) /** B */
+				D_ASPRINTF(path, "%lu/io/latency/%s/%uB", tid, op, bucket_max);
+			else if (bucket_max < 1024 * 1024) /** KB */
+				D_ASPRINTF(path, "%lu/io/latency/%s/%uKB", tid, op,
+					   bucket_max / 1024);
+			else if (bucket_max <= 1024 * 1024 * 4) /** MB */
+				D_ASPRINTF(path, "%lu/io/latency/%s/%uMB", tid, op,
+					   bucket_max / (1024 * 1024));
+			else /** >4MB */
+				D_ASPRINTF(path, "%lu/io/latency/%s/GT4MB", tid, op);
+		}
+		rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path);
+		if (rc)
+			D_WARN("Failed to create per-I/O size latency "
+			       "sensor: " DF_RC "\n",
+			       DP_RC(rc));
+		D_FREE(path);
+
+		bucket_max <<= 1;
+	}
+
+	return rc;
+}
+
+void
+obj_metrics_free(void *data)
+{
+	D_FREE(data);
+}
+
+int
+obj_metrics_count(void)
+{
+	return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *));
+}
+
+void *
+obj_metrics_alloc_internal(const char *path, int tgt_id, bool server)
+{
+	struct obj_pool_metrics *metrics;
+	char                     tgt_path[32];
+	uint32_t                 opc;
+	int                      rc;
+
+	D_ASSERT(tgt_id >= 0);
+	if (server)
+		snprintf(tgt_path, sizeof(tgt_path), "/tgt_%u", tgt_id);
+	else
+		tgt_path[0] = '\0';
+
+	D_ALLOC_PTR(metrics);
+	if (metrics == NULL) {
+		D_ERROR("failed to alloc object metrics");
+		return NULL;
+	}
+
+	/** register different per-opcode counters */
+	for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) {
+		/** Then the total number of requests, of type counter */
+		rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER,
+				     "total number of processed object RPCs", "ops", "%s/ops/%s%s",
+				     path, obj_opc_to_str(opc), tgt_path);
+		if (rc)
+			D_WARN("Failed to create total counter: " DF_RC "\n", DP_RC(rc));
+	}
+
+	/** Total number of silently restarted updates, of type counter */
+	rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER,
+			     "total number of restarted update ops", "updates", "%s/restarted%s",
+			     path, tgt_path);
+	if (rc)
+		D_WARN("Failed to create restarted counter: " DF_RC "\n", DP_RC(rc));
+
+	/** Total number of resent updates, of type counter */
+	rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER,
+			     "total number of resent update RPCs", "updates", "%s/resent%s", path,
+			     tgt_path);
+	if (rc)
+		D_WARN("Failed to create resent counter: " DF_RC "\n", DP_RC(rc));
+
+	/** Total number of retry updates locally, of type counter */
+	rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER,
+			     "total number of retried update RPCs", "updates", "%s/retry%s", path,
+			     tgt_path);
+	if (rc)
+		D_WARN("Failed to create retry cnt sensor: " DF_RC "\n", DP_RC(rc));
+
+	/** Total bytes read */
+	rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER,
+			     "total number of bytes fetched/read", "bytes", "%s/xferred/fetch%s",
+			     path, tgt_path);
+	if (rc)
+		D_WARN("Failed to create bytes fetch counter: " DF_RC "\n", DP_RC(rc));
+
+	/** Total bytes written */
+	rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER,
+			     "total number of bytes updated/written", "bytes",
+			     "%s/xferred/update%s", path, tgt_path);
+	if (rc)
+		D_WARN("Failed to create bytes update counter: " DF_RC "\n", DP_RC(rc));
+
+	/** Total number of EC full-stripe update operations, of type counter */
+	rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER,
+			     "total number of EC full-stripe updates", "updates",
+			     "%s/EC_update/full_stripe%s", path, tgt_path);
+	if (rc)
+		D_WARN("Failed to create EC full stripe update counter: " DF_RC "\n", DP_RC(rc));
+
+	/** Total number of EC partial update operations, of type counter */
+	rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER,
+			     "total number of EC partial updates", "updates",
+			     "%s/EC_update/partial%s", path, tgt_path);
+	if (rc)
+		D_WARN("Failed to create EC partial update counter: " DF_RC "\n", DP_RC(rc));
+
+	return metrics;
+}
+
 struct recx_rec {
 	daos_recx_t	*rr_recx;
 };
diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h
index 368595bbfb46..885a966c55cb 100644
--- a/src/object/srv_internal.h
+++ b/src/object/srv_internal.h
@@ -114,36 +114,6 @@ struct migrate_cont_hdl {
 void
 migrate_pool_tls_destroy(struct migrate_pool_tls *tls);
 
-/*
- * Report latency on a per-I/O size.
- * Buckets starts at [0; 256B[ and are increased by power of 2
- * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[
- * Since 4MB = 2^22 and 256B = 2^8, this means
- * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so
- * 16 buckets in total.
- */
-#define NR_LATENCY_BUCKETS 16
-
-struct obj_pool_metrics {
-	/** Count number of total per-opcode requests (type = counter) */
-	struct d_tm_node_t	*opm_total[OBJ_PROTO_CLI_COUNT];
-	/** Total number of bytes fetched (type = counter) */
-	struct d_tm_node_t	*opm_fetch_bytes;
-	/** Total number of bytes updated (type = counter) */
-	struct d_tm_node_t	*opm_update_bytes;
-
-	/** Total number of silently restarted updates (type = counter) */
-	struct d_tm_node_t	*opm_update_restart;
-	/** Total number of resent update operations (type = counter) */
-	struct d_tm_node_t	*opm_update_resent;
-	/** Total number of retry update operations (type = counter) */
-	struct d_tm_node_t	*opm_update_retry;
-	/** Total number of EC full-stripe update operations (type = counter) */
-	struct d_tm_node_t	*opm_update_ec_full;
-	/** Total number of EC partial update operations (type = counter) */
-	struct d_tm_node_t	*opm_update_ec_partial;
-};
-
 struct obj_tls {
 	d_sg_list_t		ot_echo_sgl;
 	d_list_t		ot_pool_list;
@@ -175,24 +145,6 @@ obj_tls_get()
 	return dss_module_key_get(dss_tls_get(), &obj_module_key);
 }
 
-static inline unsigned int
-lat_bucket(uint64_t size)
-{
-	int nr;
-
-	if (size <= 256)
-		return 0;
-
-	/** return number of leading zero-bits */
-	nr =  __builtin_clzl(size - 1);
-
-	/** >4MB, return last bucket */
-	if (nr < 42)
-		return NR_LATENCY_BUCKETS - 1;
-
-	return 56 - nr;
-}
-
 enum latency_type {
 	BULK_LATENCY,
 	BIO_LATENCY,
diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c
index 4fd889bb7de0..ddb39b8e9fb7 100644
--- a/src/object/srv_mod.c
+++ b/src/object/srv_mod.c
@@ -12,6 +12,7 @@
 #include <daos_srv/vos.h>
 #include <daos_srv/pool.h>
 #include <daos/rpc.h>
+#include <daos/metrics.h>
 #include "obj_rpc.h"
 #include "srv_internal.h"
 
@@ -73,41 +74,6 @@ static struct daos_rpc_handler obj_handlers[] = {
 
 #undef X
 
-static int
-obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc)
-{
-	unsigned int	bucket_max = 256;
-	int		i;
-	int		rc = 0;
-
-	for (i = 0; i < NR_LATENCY_BUCKETS; i++) {
-		char *path;
-
-		if (bucket_max < 1024) /** B */
-			D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u",
-				   op, bucket_max, tgt_id);
-		else if (bucket_max < 1024 * 1024) /** KB */
-			D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u",
-				   op, bucket_max / 1024, tgt_id);
-		else if (bucket_max <= 1024 * 1024 * 4) /** MB */
-			D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u",
-				   op, bucket_max / (1024 * 1024), tgt_id);
-		else /** >4MB */
-			D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u",
-				   op, tgt_id);
-
-		rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path);
-		if (rc)
-			D_WARN("Failed to create per-I/O size latency "
-			       "sensor: "DF_RC"\n", DP_RC(rc));
-		D_FREE(path);
-
-		bucket_max <<= 1;
-	}
-
-	return rc;
-}
-
 static void *
 obj_tls_init(int tags, int xs_id, int tgt_id)
 {
@@ -158,27 +124,28 @@ obj_tls_init(int tags, int xs_id, int tgt_id)
 	 */
 
 	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_lat,
-			    obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time");
+			    obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time",
+			    true);
 	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_lat,
-			    obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time");
+			    obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time", true);
 
 	obj_latency_tm_init(DAOS_OBJ_RPC_TGT_UPDATE, tgt_id, tls->ot_tgt_update_lat,
 			    obj_opc_to_str(DAOS_OBJ_RPC_TGT_UPDATE),
-			    "update tgt RPC processing time");
-	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bulk_lat,
-			    "bulk_update", "Bulk update processing time");
-	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bulk_lat,
-			    "bulk_fetch", "Bulk fetch processing time");
-
-	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_vos_lat,
-			    "vos_update", "VOS update processing time");
-	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_vos_lat,
-			    "vos_fetch", "VOS fetch processing time");
-
-	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bio_lat,
-			    "bio_update", "BIO update processing time");
-	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bio_lat,
-			    "bio_fetch", "BIO fetch processing time");
+			    "update tgt RPC processing time", true);
+	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bulk_lat, "bulk_update",
+			    "Bulk update processing time", true);
+	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bulk_lat, "bulk_fetch",
+			    "Bulk fetch processing time", true);
+
+	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_vos_lat, "vos_update",
+			    "VOS update processing time", true);
+	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_vos_lat, "vos_fetch",
+			    "VOS fetch processing time", true);
+
+	obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bio_lat, "bio_update",
+			    "BIO update processing time", true);
+	obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bio_lat, "bio_fetch",
+			    "BIO fetch processing time", true);
 
 	return tls;
 }
@@ -239,103 +206,14 @@ static struct dss_module_ops ds_obj_mod_ops = {
 static void *
 obj_metrics_alloc(const char *path, int tgt_id)
 {
-	struct obj_pool_metrics	*metrics;
-	uint32_t		opc;
-	int			rc;
-
-	D_ASSERT(tgt_id >= 0);
-
-	D_ALLOC_PTR(metrics);
-	if (metrics == NULL)
-		return NULL;
-
-	/** register different per-opcode counters */
-	for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) {
-		/** Then the total number of requests, of type counter */
-		rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER,
-				     "total number of processed object RPCs",
-				     "ops", "%s/ops/%s/tgt_%u", path,
-				     obj_opc_to_str(opc), tgt_id);
-		if (rc)
-			D_WARN("Failed to create total counter: "DF_RC"\n",
-			       DP_RC(rc));
-	}
-
-	/** Total number of silently restarted updates, of type counter */
-	rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER,
-			     "total number of restarted update ops", "updates",
-			     "%s/restarted/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create restarted counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	/** Total number of resent updates, of type counter */
-	rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER,
-			     "total number of resent update RPCs", "updates",
-			     "%s/resent/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create resent counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	/** Total number of retry updates locally, of type counter */
-	rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER,
-			     "total number of retried update RPCs", "updates",
-			     "%s/retry/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create retry cnt sensor: "DF_RC"\n", DP_RC(rc));
-
-	/** Total bytes read */
-	rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER,
-			     "total number of bytes fetched/read", "bytes",
-			     "%s/xferred/fetch/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create bytes fetch counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	/** Total bytes written */
-	rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER,
-			     "total number of bytes updated/written", "bytes",
-			     "%s/xferred/update/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create bytes update counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	/** Total number of EC full-stripe update operations, of type counter */
-	rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER,
-			     "total number of EC sull-stripe updates", "updates",
-			     "%s/EC_update/full_stripe/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create EC full stripe update counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	/** Total number of EC partial update operations, of type counter */
-	rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER,
-			     "total number of EC sull-partial updates", "updates",
-			     "%s/EC_update/partial/tgt_%u", path, tgt_id);
-	if (rc)
-		D_WARN("Failed to create EC partial update counter: "DF_RC"\n",
-		       DP_RC(rc));
-
-	return metrics;
-}
-
-static void
-obj_metrics_free(void *data)
-{
-	D_FREE(data);
-}
-
-static int
-obj_metrics_count(void)
-{
-	return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *));
+	return obj_metrics_alloc_internal(path, tgt_id, true);
 }
 
-struct dss_module_metrics obj_metrics = {
-	.dmm_tags = DAOS_TGT_TAG,
-	.dmm_init = obj_metrics_alloc,
-	.dmm_fini = obj_metrics_free,
-	.dmm_nr_metrics = obj_metrics_count,
+struct daos_module_metrics obj_metrics = {
+    .dmm_tags       = DAOS_TGT_TAG,
+    .dmm_init       = obj_metrics_alloc,
+    .dmm_fini       = obj_metrics_free,
+    .dmm_nr_metrics = obj_metrics_count,
 };
 
 struct dss_module obj_module = {
diff --git a/src/pool/cli.c b/src/pool/cli.c
index e688cd9ecd33..89f7eb256a14 100644
--- a/src/pool/cli.c
+++ b/src/pool/cli.c
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2016-2022 Intel Corporation.
+ * (C) Copyright 2016-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -15,9 +15,13 @@
 #define D_LOGFAC	DD_FAC(pool)
 
 #include <daos/common.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_producer.h>
 #include <daos/event.h>
 #include <daos/mgmt.h>
 #include <daos/placement.h>
+#include <daos/metrics.h>
+#include <daos/job.h>
 #include <daos/pool.h>
 #include <daos/security.h>
 #include <daos_types.h>
@@ -32,6 +36,152 @@ struct rsvc_client_state {
 
 int	dc_pool_proto_version;
 
+struct dc_pool_metrics {
+	d_list_t dp_pool_list; /* pool metrics list on this thread */
+	uuid_t   dp_uuid;
+	char     dp_path[D_TM_MAX_NAME_LEN];
+	void    *dp_metrics[DAOS_NR_MODULE];
+	int      dp_ref;
+};
+
+/**
+ * Destroy metrics for a specific pool.
+ *
+ * \param[in]	pool	pointer to ds_pool structure
+ */
+static void
+dc_pool_metrics_free(struct dc_pool_metrics *metrics)
+{
+	int rc;
+
+	if (!daos_client_metric)
+		return;
+
+	daos_module_fini_metrics(DAOS_CLI_TAG, metrics->dp_metrics);
+	if (!daos_client_metric_retain) {
+		rc = d_tm_del_ephemeral_dir(metrics->dp_path);
+		if (rc != 0) {
+			D_WARN(DF_UUID ": failed to remove pool metrics dir for pool: " DF_RC "\n",
+			       DP_UUID(metrics->dp_uuid), DP_RC(rc));
+			return;
+		}
+	}
+
+	D_INFO(DF_UUID ": destroyed ds_pool metrics: %s\n", DP_UUID(metrics->dp_uuid),
+	       metrics->dp_path);
+}
+
+static int
+dc_pool_metrics_alloc(uuid_t pool_uuid, struct dc_pool_metrics **metrics_p)
+{
+	struct dc_pool_metrics *metrics = NULL;
+	int                     pid;
+	size_t                  size;
+	int                     rc;
+
+	if (!daos_client_metric)
+		return 0;
+
+	D_ALLOC_PTR(metrics);
+	if (metrics == NULL)
+		return -DER_NOMEM;
+
+	uuid_copy(metrics->dp_uuid, pool_uuid);
+	pid = getpid();
+	snprintf(metrics->dp_path, sizeof(metrics->dp_path), "pool/" DF_UUIDF,
+		 DP_UUID(metrics->dp_uuid));
+
+	/** create new shmem space for per-pool metrics */
+	size = daos_module_nr_pool_metrics() * PER_METRIC_BYTES;
+	rc   = d_tm_add_ephemeral_dir(NULL, size, metrics->dp_path);
+	if (rc != 0) {
+		D_WARN(DF_UUID ": failed to create metrics dir for pool: " DF_RC "\n",
+		       DP_UUID(metrics->dp_uuid), DP_RC(rc));
+		return rc;
+	}
+
+	/* initialize metrics on the system xstream for each module */
+	rc = daos_module_init_metrics(DAOS_CLI_TAG, metrics->dp_metrics, metrics->dp_path, pid);
+	if (rc != 0) {
+		D_WARN(DF_UUID ": failed to initialize module metrics: " DF_RC "\n",
+		       DP_UUID(metrics->dp_uuid), DP_RC(rc));
+		dc_pool_metrics_free(metrics);
+		return rc;
+	}
+
+	D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(metrics->dp_uuid),
+	       metrics->dp_path);
+	*metrics_p = metrics;
+
+	return 0;
+}
+
+struct dc_pool_metrics *
+dc_pool_metrics_lookup(struct dc_pool_tls *tls, uuid_t pool_uuid)
+{
+	struct dc_pool_metrics *metrics;
+
+	D_MUTEX_LOCK(&tls->dpc_metrics_list_lock);
+	d_list_for_each_entry(metrics, &tls->dpc_metrics_list, dp_pool_list) {
+		if (uuid_compare(pool_uuid, metrics->dp_uuid) == 0) {
+			D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock);
+			return metrics;
+		}
+	}
+	D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock);
+
+	return NULL;
+}
+
+static void *
+dc_pool_tls_init(int tags, int xs_id, int pid)
+{
+	struct dc_pool_tls *tls;
+	int                 rc;
+
+	D_ALLOC_PTR(tls);
+	if (tls == NULL)
+		return NULL;
+
+	rc = D_MUTEX_INIT(&tls->dpc_metrics_list_lock, NULL);
+	if (rc != 0) {
+		D_FREE(tls);
+		return NULL;
+	}
+
+	D_INIT_LIST_HEAD(&tls->dpc_metrics_list);
+	return tls;
+}
+
+static void
+dc_pool_tls_fini(int tags, void *data)
+{
+	struct dc_pool_tls     *tls = data;
+	struct dc_pool_metrics *dpm;
+	struct dc_pool_metrics *tmp;
+
+	D_MUTEX_LOCK(&tls->dpc_metrics_list_lock);
+	d_list_for_each_entry_safe(dpm, tmp, &tls->dpc_metrics_list, dp_pool_list) {
+		if (dpm->dp_ref != 0)
+			D_WARN("still reference for pool " DF_UUID " metrics\n",
+			       DP_UUID(dpm->dp_uuid));
+		d_list_del_init(&dpm->dp_pool_list);
+		dc_pool_metrics_free(dpm);
+		D_FREE(dpm);
+	}
+	D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock);
+
+	D_MUTEX_DESTROY(&tls->dpc_metrics_list_lock);
+	D_FREE(tls);
+}
+
+struct daos_module_key dc_pool_module_key = {
+    .dmk_tags  = DAOS_CLI_TAG,
+    .dmk_index = -1,
+    .dmk_init  = dc_pool_tls_init,
+    .dmk_fini  = dc_pool_tls_fini,
+};
+
 /**
  * Initialize pool interface
  */
@@ -41,6 +191,9 @@ dc_pool_init(void)
 	uint32_t		ver_array[2] = {DAOS_POOL_VERSION - 1, DAOS_POOL_VERSION};
 	int			rc;
 
+	if (daos_client_metric)
+		daos_register_key(&dc_pool_module_key);
+
 	dc_pool_proto_version = 0;
 	rc = daos_rpc_proto_query(pool_proto_fmt_v4.cpf_base, ver_array, 2, &dc_pool_proto_version);
 	if (rc)
@@ -77,7 +230,68 @@ dc_pool_fini(void)
 	else
 		rc = daos_rpc_unregister(&pool_proto_fmt_v5);
 	if (rc != 0)
-		D_ERROR("failed to unregister pool RPCs: "DF_RC"\n", DP_RC(rc));
+		DL_ERROR(rc, "failed to unregister pool RPCs");
+
+	if (daos_client_metric)
+		daos_unregister_key(&dc_pool_module_key);
+}
+
+static int
+dc_pool_metrics_start(struct dc_pool *pool)
+{
+	struct dc_pool_tls     *tls;
+	struct dc_pool_metrics *metrics;
+	int                     rc;
+
+	if (!daos_client_metric)
+		return 0;
+
+	if (pool->dp_metrics != NULL)
+		return 0;
+
+	tls = dc_pool_tls_get();
+	D_ASSERT(tls != NULL);
+
+	metrics = dc_pool_metrics_lookup(tls, pool->dp_pool);
+	if (metrics != NULL) {
+		metrics->dp_ref++;
+		pool->dp_metrics = metrics->dp_metrics;
+		return 0;
+	}
+
+	rc = dc_pool_metrics_alloc(pool->dp_pool, &metrics);
+	if (rc != 0)
+		return rc;
+
+	D_MUTEX_LOCK(&tls->dpc_metrics_list_lock);
+	d_list_add(&metrics->dp_pool_list, &tls->dpc_metrics_list);
+	D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock);
+	metrics->dp_ref++;
+	pool->dp_metrics = metrics->dp_metrics;
+
+	return 0;
+}
+
+static void
+dc_pool_metrics_stop(struct dc_pool *pool)
+{
+	struct dc_pool_metrics *metrics;
+	struct dc_pool_tls     *tls;
+
+	if (!daos_client_metric)
+		return;
+
+	if (pool->dp_metrics == NULL)
+		return;
+
+	tls = dc_pool_tls_get();
+	D_ASSERT(tls != NULL);
+
+	metrics = dc_pool_metrics_lookup(tls, pool->dp_pool);
+	if (metrics != NULL)
+		metrics->dp_ref--;
+
+	pool->dp_metrics = NULL;
 }
 
 static void
@@ -99,6 +313,8 @@ pool_free(struct d_hlink *hlink)
 	if (pool->dp_map != NULL)
 		pool_map_decref(pool->dp_map);
 
+	dc_pool_metrics_stop(pool);
+
 	rsvc_client_fini(&pool->dp_client);
 	if (pool->dp_sys != NULL)
 		dc_mgmt_sys_detach(pool->dp_sys);
@@ -609,6 +825,10 @@ dc_pool_connect_internal(tse_task_t *task, daos_pool_info_t *info,
 		goto out;
 	}
 
+	rc = dc_pool_metrics_start(pool);
+	if (rc != 0)
+		D_GOTO(out, rc);
+
 	/** Pool connect RPC by UUID (provided, or looked up by label above) */
 	rc = pool_req_create(daos_task2ctx(task), &ep, POOL_CONNECT, &rpc);
 	if (rc != 0) {
@@ -1090,6 +1310,10 @@ dc_pool_g2l(struct dc_pool_glob *pool_glob, size_t len, daos_handle_t *poh)
 	if (rc < 0)
 		goto out;
 
+	rc = dc_pool_metrics_start(pool);
+	if (rc != 0)
+		goto out;
+
 	rc = pool_map_create(map_buf, pool_glob->dpg_map_version, &map);
 	if (rc != 0) {
 		D_ERROR("failed to create local pool map: "DF_RC"\n",
diff --git a/src/pool/cli_internal.h b/src/pool/cli_internal.h
index f8f965b4469a..fd3b26539f8d 100644
--- a/src/pool/cli_internal.h
+++ b/src/pool/cli_internal.h
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2016-2022 Intel Corporation.
+ * (C) Copyright 2016-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -16,4 +16,20 @@ struct dc_pool *dc_pool_alloc(unsigned int nr);
 
 int dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect);
 
+struct dc_pool_tls {
+	pthread_mutex_t dpc_metrics_list_lock;
+	d_list_t        dpc_metrics_list;
+};
+
+extern struct daos_module_key dc_pool_module_key;
+
+static inline struct dc_pool_tls *
+dc_pool_tls_get()
+{
+	struct daos_thread_local_storage *dtls;
+
+	dtls = dc_tls_get(dc_pool_module_key.dmk_tags);
+	D_ASSERT(dtls != NULL);
+	return daos_module_key_get(dtls, &dc_pool_module_key);
+}
 #endif /* __POOL_CLIENT_INTERNAL_H__ */
diff --git a/src/pool/srv.c b/src/pool/srv.c
index 40f1d7d18eb5..8a7ba7d14efd 100644
--- a/src/pool/srv.c
+++ b/src/pool/srv.c
@@ -13,6 +13,7 @@
 
 #include <daos_srv/pool.h>
 #include <daos/rpc.h>
+#include <daos/metrics.h>
 #include <daos_srv/daos_engine.h>
 #include <daos_srv/bio.h>
 #include "rpc.h"
@@ -174,11 +175,11 @@ struct dss_module_key pool_module_key = {
 	.dmk_fini = pool_tls_fini,
 };
 
-struct dss_module_metrics pool_metrics = {
-	.dmm_tags = DAOS_SYS_TAG,
-	.dmm_init = ds_pool_metrics_alloc,
-	.dmm_fini = ds_pool_metrics_free,
-	.dmm_nr_metrics = ds_pool_metrics_count,
+struct daos_module_metrics pool_metrics = {
+    .dmm_tags       = DAOS_SYS_TAG,
+    .dmm_init       = ds_pool_metrics_alloc,
+    .dmm_fini       = ds_pool_metrics_free,
+    .dmm_nr_metrics = ds_pool_metrics_count,
 };
 
 struct dss_module pool_module =  {
diff --git a/src/pool/srv_metrics.c b/src/pool/srv_metrics.c
index 0ca5b494df17..615af9deba1b 100644
--- a/src/pool/srv_metrics.c
+++ b/src/pool/srv_metrics.c
@@ -8,24 +8,9 @@
 
 #include "srv_internal.h"
 #include <abt.h>
+#include <daos/metrics.h>
 #include <gurt/telemetry_producer.h>
 
-
-/* Estimate of bytes per typical metric node */
-#define NODE_BYTES		(sizeof(struct d_tm_node_t) + \
-				 sizeof(struct d_tm_metric_t) + \
-				 64 /* buffer for metadata */)
-/* Estimate of bytes per histogram bucket */
-#define BUCKET_BYTES		(sizeof(struct d_tm_bucket_t) + NODE_BYTES)
-/*
-   Estimate of bytes per metric.
-   This is a generous high-water mark assuming most metrics are not using
-   histograms. May need adjustment if the balance of metrics changes.
-*/
-#define PER_METRIC_BYTES	(NODE_BYTES + sizeof(struct d_tm_stats_t) + \
-				 sizeof(struct d_tm_histogram_t) + \
-				 BUCKET_BYTES)
-
 /**
  * Initializes the pool metrics
  */
diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto
index a284d645106a..129fecd53707 100644
--- a/src/proto/mgmt/svc.proto
+++ b/src/proto/mgmt/svc.proto
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2018-2023 Intel Corporation.
+// (C) Copyright 2018-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -122,3 +122,16 @@ message PoolMonitorReq {
 	string poolHandleUUID = 3; // Pool Handle UUID for the connection
 	string jobid = 4;	// Job ID to associate instance with.
 }
+
+message ClientTelemetryReq
+{
+	string sys     = 1; // DAOS system identifier
+	string jobid   = 2; // Job ID used for client telemetry
+	int32  shm_key = 3; // Client's shared memory segment key
+}
+
+message ClientTelemetryResp
+{
+	int32 status    = 1; // DAOS status code
+	int32 agent_uid = 2; // UID of agent process
+}
diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.py b/src/tests/ftest/telemetry/basic_client_telemetry.py
new file mode 100644
index 000000000000..1d115b4c95e5
--- /dev/null
+++ b/src/tests/ftest/telemetry/basic_client_telemetry.py
@@ -0,0 +1,54 @@
+"""
+  (C) Copyright 2024 Intel Corporation.
+
+  SPDX-License-Identifier: BSD-2-Clause-Patent
+"""
+from ior_utils import read_data, write_data
+from telemetry_test_base import TestWithClientTelemetry
+
+
+class BasicClientTelemetry(TestWithClientTelemetry):
+    """Tests to verify basic client telemetry.
+
+    :avocado: recursive
+    """
+
+    def test_client_metrics_exist(self):
+        """JIRA ID: DAOS-8331.
+
+        Verify that the client-side telemetry captures some throughput metrics.
+        After performing some I/O, there should be some client telemetry data.
+
+        Test steps:
+        1) Create a pool and container
+        2) Perform some I/O with IOR
+        3) Verify that there is some client telemetry data
+
+        :avocado: tags=all,daily_regression
+        :avocado: tags=vm
+        :avocado: tags=telemetry
+        :avocado: tags=BasicClientTelemetry,test_client_metrics_exist
+        """
+        # create pool and container
+        pool = self.get_pool(connect=True)
+        container = self.get_container(pool=pool)
+
+        self.log_step('Writing data to the pool (ior)')
+        ior = write_data(self, container)
+        self.log_step('Reading data from the pool (ior)')
+        read_data(self, ior, container)
+
+        metric_names = [
+            "client_pool_xferred_fetch",
+            "client_pool_xferred_update",
+        ]
+
+        self.log_step('Reading client telemetry (reads & writes should be > 0)')
+        after_metrics = self.telemetry.collect_client_data(metric_names)
+        for metric in metric_names:
+            msum = 0
+            for value in after_metrics[metric].values():
+                msum += value
+            self.assertGreater(msum, 0)
+
+        self.log_step('Test passed')
diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.yaml b/src/tests/ftest/telemetry/basic_client_telemetry.yaml
new file mode 100644
index 000000000000..d585dc81fda4
--- /dev/null
+++ b/src/tests/ftest/telemetry/basic_client_telemetry.yaml
@@ -0,0 +1,46 @@
+hosts:
+  test_servers: 1
+  test_clients: 1
+
+timeout: 180
+
+server_config:
+  name: daos_server
+  engines_per_host: 1
+  engines:
+    0:
+      targets: 4
+      nr_xs_helpers: 0
+      storage:
+        0:
+          class: ram
+          scm_mount: /mnt/daos
+  system_ram_reserved: 1
+
+agent_config:
+  telemetry_port: 9191
+  telemetry_retain: 30s
+  telemetry_enabled: true
+
+pool:
+  scm_size: 2G
+
+container:
+  type: POSIX
+  control_method: daos
+  dfs_oclass: SX
+
+ior: &ior_base
+  ppn: 4
+  api: DFS
+  transfer_size: 512K
+  block_size: 1M
+  dfs_oclass: SX
+
+ior_write:
+  <<: *ior_base
+  flags: "-k -v -w -W -G 1"
+
+ior_read:
+  <<: *ior_base
+  flags: "-v -r -R -G 1"
diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py
index 46b793f31ef8..7f92b9f479aa 100644
--- a/src/tests/ftest/util/agent_utils_params.py
+++ b/src/tests/ftest/util/agent_utils_params.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -57,10 +57,19 @@ def __init__(self, filename, common_yaml):
         #       Specifies the log level for agent logs.
         #   - exclude_fabric_ifaces: <list>, Ignore a subset of fabric interfaces when selecting
         #       an interface for client applications.
+        #   - telemetry_port: <int>, e.g. 9192
+        #        Enable Prometheus endpoint for client telemetry.
+        #   - telemetry_enabled: <bool>, e.g. True
+        #        Enable client telemetry for all client processes.
+        #   - telemetry_retain: <str>, e.g. 5m
+        #        Time to retain per-client telemetry data.
         self.runtime_dir = BasicParameter(None, "/var/run/daos_agent")
         self.log_file = LogParameter(log_dir, None, "daos_agent.log")
         self.control_log_mask = BasicParameter(None, "debug")
         self.exclude_fabric_ifaces = BasicParameter(None)
+        self.telemetry_port = BasicParameter(None)
+        self.telemetry_enabled = BasicParameter(None)
+        self.telemetry_retain = BasicParameter(None)
 
     def update_log_file(self, name):
         """Update the log file name for the daos agent.
diff --git a/src/tests/ftest/util/telemetry_test_base.py b/src/tests/ftest/util/telemetry_test_base.py
index 7641fe8d5465..6a2389935f70 100644
--- a/src/tests/ftest/util/telemetry_test_base.py
+++ b/src/tests/ftest/util/telemetry_test_base.py
@@ -1,10 +1,10 @@
 """
-(C) Copyright 2021-2023 Intel Corporation.
+(C) Copyright 2021-2024 Intel Corporation.
 
 SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 from apricot import TestWithServers
-from telemetry_utils import TelemetryUtils
+from telemetry_utils import ClientTelemetryUtils, TelemetryUtils
 
 
 class TestWithTelemetry(TestWithServers):
@@ -263,3 +263,36 @@ def sum_values(metric_out):
                     total += value
 
         return total
+
+
+class TestWithClientTelemetry(TestWithTelemetry):
+    """Test client telemetry metrics.
+
+    :avocado: recursive
+    """
+    def setUp(self):
+        """Set up each test case."""
+        super().setUp()
+        self.telemetry = ClientTelemetryUtils(
+            self.get_dmg_command(), self.server_managers[0].hosts, self.hostlist_clients)
+
+    def verify_client_telemetry_list(self, with_pools=False):
+        """Verify the  dmg telemetry metrics list command output."""
+        # Define a list of expected telemetry metrics names
+        expected = self.telemetry.get_all_client_metrics_names(
+            with_pools=with_pools)
+
+        # List all of the telemetry metrics
+        result = self.telemetry.list_metrics()
+
+        # Verify the lists are detected for each agent
+        errors = self.compare_lists(
+            list(result), self.hostlist_clients, 0, "",
+            "telemetry metrics list hosts")
+        for host, host_result in result.items():
+            errors.extend(
+                self.compare_lists(expected, host_result, 2, host, "telemetry metric names"))
+        if errors:
+            self.fail("\n".join(errors))
+
+        self.log.info("Test PASSED")
diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py
index d4d151af68fa..46fdd00c62fe 100644
--- a/src/tests/ftest/util/telemetry_utils.py
+++ b/src/tests/ftest/util/telemetry_utils.py
@@ -1,5 +1,5 @@
 """
-(C) Copyright 2021-2023 Intel Corporation.
+(C) Copyright 2021-2024 Intel Corporation.
 
 SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -30,7 +30,7 @@ def _gen_stats_metrics(basename):
 
 class TelemetryUtils():
     # pylint: disable=too-many-nested-blocks
-    """Defines a object used to verify telemetry information."""
+    """Defines an object used to verify server telemetry information."""
 
     # Define a set of patterns that shouldn't be used for comparisons.
     METRIC_EXCLUDE_PATTERNS = [
@@ -342,15 +342,13 @@ class TelemetryUtils():
     ENGINE_NET_METRICS = [
         "engine_net_glitch",
         "engine_net_failed_addr",
+        "engine_net_quota_exceeded",
         "engine_net_req_timeout",
-        "engine_net_swim_delay_stddev",
-        "engine_net_swim_delay_max",
-        "engine_net_swim_delay_mean",
-        "engine_net_swim_delay",
-        "engine_net_swim_delay_min",
+        *_gen_stats_metrics("engine_net_swim_delay"),
         "engine_net_uri_lookup_timeout",
         "engine_net_uri_lookup_other",
-        "engine_net_uri_lookup_self"]
+        "engine_net_uri_lookup_self",
+        "engine_net_waitq_depth"]
     ENGINE_RANK_METRICS = [
         "engine_rank"]
     ENGINE_NVME_HEALTH_METRICS = [
@@ -475,7 +473,7 @@ def is_excluded_metric(self, name):
                 return True
         return False
 
-    def list_metrics(self):
+    def list_metrics(self, hosts=None):
         """List the available metrics for each host.
 
         Returns:
@@ -483,8 +481,9 @@ def list_metrics(self):
 
         """
         info = {}
-        self.log.info("Listing telemetry metrics from %s", self.hosts)
-        for host in self.hosts:
+        host_list = hosts or self.hosts
+        self.log.info("Listing telemetry metrics from %s", host_list)
+        for host in host_list:
             data = self.dmg.telemetry_metrics_list(host=host)
             info[host] = []
             if "response" in data:
@@ -494,7 +493,7 @@ def list_metrics(self):
                             info[host].append(entry["name"])
         return info
 
-    def collect_data(self, names):
+    def collect_data(self, names, hosts=None):
         """Collect telemetry data for the specified metrics.
 
         Args:
@@ -510,7 +509,9 @@ def collect_data(self, names):
                     },
                     ...
         """
-        return self._data.collect(self.log, names, self.hosts, self.dmg)
+        host_list = hosts or self.hosts
+        self.log.info("Collecting telemetry data from %s", host_list)
+        return self._data.collect(self.log, names, host_list, self.dmg)
 
     def display_data(self):
         """Display the telemetry metric values."""
@@ -531,7 +532,7 @@ def verify_data(self, ranges):
         """
         return self._data.verify(self.log, ranges)
 
-    def get_metrics(self, name):
+    def get_metrics(self, name, hosts=None):
         """Obtain the specified metric information for each host.
 
         Args:
@@ -543,8 +544,9 @@ def get_metrics(self, name):
 
         """
         info = {}
-        self.log.info("Querying telemetry metric %s from %s", name, self.hosts)
-        for host in self.hosts:
+        host_list = hosts or self.hosts
+        self.log.info("Querying telemetry metric %s from %s", name, host_list)
+        for host in host_list:
             data = self.dmg.telemetry_metrics_query(host=host, metrics=name)
             info[host] = {}
             if "response" in data:
@@ -812,6 +814,246 @@ def verify_metric_value(self, metrics_data, min_value=None, max_value=None):
         return status
 
 
+class ClientTelemetryUtils(TelemetryUtils):
+    """Defines an object used to verify server and client telemetry information."""
+
+    CLIENT_EVENT_METRICS = [
+        "client_started_at"]
+    CLIENT_POOL_ACTION_METRICS = [
+        "client_pool_resent",
+        "client_pool_restarted",
+        "client_pool_retry",
+        "client_pool_xferred_fetch",
+        "client_pool_xferred_update"]
+    CLIENT_POOL_OPS_METRICS = [
+        "client_pool_ops_akey_enum",
+        "client_pool_ops_akey_punch",
+        "client_pool_ops_compound",
+        "client_pool_ops_dkey_enum",
+        "client_pool_ops_dkey_punch",
+        "client_pool_ops_ec_agg",
+        "client_pool_ops_ec_rep",
+        "client_pool_ops_fetch",
+        "client_pool_ops_key2anchor",
+        "client_pool_ops_key_query",
+        "client_pool_ops_migrate",
+        "client_pool_ops_obj_coll_punch",
+        "client_pool_ops_obj_coll_query",
+        "client_pool_ops_obj_enum",
+        "client_pool_ops_obj_punch",
+        "client_pool_ops_obj_sync",
+        "client_pool_ops_recx_enum",
+        "client_pool_ops_tgt_akey_punch",
+        "client_pool_ops_tgt_dkey_punch",
+        "client_pool_ops_tgt_punch",
+        "client_pool_ops_tgt_update",
+        "client_pool_ops_update"]
+    CLIENT_POOL_EC_UPDATE_METRICS = [
+        "client_pool_EC_update_full_stripe",
+        "client_pool_EC_update_partial"]
+    CLIENT_POOL_METRICS = CLIENT_POOL_ACTION_METRICS +\
+        CLIENT_POOL_OPS_METRICS +\
+        CLIENT_POOL_EC_UPDATE_METRICS
+    CLIENT_IO_LATENCY_FETCH_METRICS = \
+        _gen_stats_metrics("client_io_latency_fetch")
+    CLIENT_IO_LATENCY_UPDATE_METRICS = \
+        _gen_stats_metrics("client_io_latency_update")
+    CLIENT_IO_OPS_AKEY_ENUM_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_akey_enum_active")
+    CLIENT_IO_OPS_AKEY_ENUM_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_akey_enum_latency")
+    CLIENT_IO_OPS_AKEY_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_akey_punch_active")
+    CLIENT_IO_OPS_AKEY_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_akey_punch_latency")
+    CLIENT_IO_OPS_COMPOUND_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_compound_active")
+    CLIENT_IO_OPS_COMPOUND_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_compound_latency")
+    CLIENT_IO_OPS_DKEY_ENUM_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_dkey_enum_active")
+    CLIENT_IO_OPS_DKEY_ENUM_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_dkey_enum_latency")
+    CLIENT_IO_OPS_DKEY_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_dkey_punch_active")
+    CLIENT_IO_OPS_DKEY_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_dkey_punch_latency")
+    CLIENT_IO_OPS_EC_AGG_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_ec_agg_active")
+    CLIENT_IO_OPS_EC_AGG_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_ec_agg_latency")
+    CLIENT_IO_OPS_EC_REP_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_ec_rep_active")
+    CLIENT_IO_OPS_EC_REP_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_ec_rep_latency")
+    CLIENT_IO_OPS_FETCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_fetch_active")
+    CLIENT_IO_OPS_KEY2ANCHOR_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_key2anchor_active")
+    CLIENT_IO_OPS_KEY2ANCHOR_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_key2anchor_latency")
+    CLIENT_IO_OPS_KEY_QUERY_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_key_query_active")
+    CLIENT_IO_OPS_KEY_QUERY_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_key_query_latency")
+    CLIENT_IO_OPS_MIGRATE_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_migrate_active")
+    CLIENT_IO_OPS_MIGRATE_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_migrate_latency")
+    CLIENT_IO_OPS_OBJ_COLL_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_coll_punch_active")
+    CLIENT_IO_OPS_OBJ_COLL_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_coll_punch_latency")
+    CLIENT_IO_OPS_OBJ_COLL_QUERY_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_coll_query_active")
+    CLIENT_IO_OPS_OBJ_COLL_QUERY_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_coll_query_latency")
+    CLIENT_IO_OPS_OBJ_ENUM_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_enum_active")
+    CLIENT_IO_OPS_OBJ_ENUM_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_enum_latency")
+    CLIENT_IO_OPS_OBJ_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_punch_active")
+    CLIENT_IO_OPS_OBJ_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_punch_latency")
+    CLIENT_IO_OPS_OBJ_punch_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_sync_active")
+    CLIENT_IO_OPS_OBJ_SYNC_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_obj_sync_latency")
+    CLIENT_IO_OPS_RECX_ENUM_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_recx_enum_active")
+    CLIENT_IO_OPS_RECX_ENUM_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_recx_enum_latency")
+    CLIENT_IO_OPS_TGT_AKEY_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_akey_punch_active")
+    CLIENT_IO_OPS_TGT_AKEY_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_akey_punch_latency")
+    CLIENT_IO_OPS_TGT_DKEY_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_dkey_punch_active")
+    CLIENT_IO_OPS_TGT_DKEY_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_dkey_punch_latency")
+    CLIENT_IO_OPS_TGT_PUNCH_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_punch_active")
+    CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_punch_latency")
+    CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_tgt_update_active")
+    CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS = \
+        _gen_stats_metrics("client_io_ops_update_active")
+    CLIENT_IO_METRICS = CLIENT_IO_LATENCY_FETCH_METRICS +\
+        CLIENT_IO_LATENCY_UPDATE_METRICS +\
+        CLIENT_IO_OPS_AKEY_ENUM_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_AKEY_ENUM_LATENCY_METRICS +\
+        CLIENT_IO_OPS_AKEY_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_AKEY_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_COMPOUND_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_COMPOUND_LATENCY_METRICS +\
+        CLIENT_IO_OPS_DKEY_ENUM_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_DKEY_ENUM_LATENCY_METRICS +\
+        CLIENT_IO_OPS_DKEY_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_DKEY_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_EC_AGG_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_EC_AGG_LATENCY_METRICS +\
+        CLIENT_IO_OPS_EC_REP_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_EC_REP_LATENCY_METRICS +\
+        CLIENT_IO_OPS_FETCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_KEY2ANCHOR_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_KEY2ANCHOR_LATENCY_METRICS +\
+        CLIENT_IO_OPS_KEY_QUERY_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_KEY_QUERY_LATENCY_METRICS +\
+        CLIENT_IO_OPS_MIGRATE_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_MIGRATE_LATENCY_METRICS +\
+        CLIENT_IO_OPS_OBJ_COLL_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_OBJ_COLL_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_OBJ_COLL_QUERY_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_OBJ_COLL_QUERY_LATENCY_METRICS +\
+        CLIENT_IO_OPS_OBJ_ENUM_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_OBJ_ENUM_LATENCY_METRICS +\
+        CLIENT_IO_OPS_OBJ_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_OBJ_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_OBJ_punch_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_OBJ_SYNC_LATENCY_METRICS +\
+        CLIENT_IO_OPS_RECX_ENUM_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_RECX_ENUM_LATENCY_METRICS +\
+        CLIENT_IO_OPS_TGT_AKEY_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_TGT_AKEY_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_TGT_DKEY_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_TGT_DKEY_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_TGT_PUNCH_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS +\
+        CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS +\
+        CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS
+
+    def __init__(self, dmg, servers, clients):
+        """Create a ClientTelemetryUtils object.
+
+        Args:
+            dmg (DmgCommand): the DmgCommand object configured to communicate
+                with the servers
+            servers (list): a list of server host names
+            clients (list): a list of client host names
+        """
+        super().__init__(dmg, servers)
+        self.clients = NodeSet.fromlist(clients)
+
+    def get_all_client_metrics_names(self, with_pools=False):
+        """Get all the telemetry metrics names for this client.
+
+        Args:
+            with_pools (bool): if True, include pool metrics in the results
+
+        Returns:
+            list: all of the telemetry metrics names for this client
+
+        """
+        all_metrics_names = list(self.CLIENT_EVENT_METRICS)
+        all_metrics_names.extend(self.CLIENT_IO_METRICS)
+        if with_pools:
+            all_metrics_names.extend(self.CLIENT_POOL_METRICS)
+
+        return all_metrics_names
+
+    def list_client_metrics(self):
+        """List the available metrics for each host.
+
+        Returns:
+            dict: a dictionary of host keys linked to a list of metric names
+
+        """
+        return super().list_metrics(hosts=self.clients)
+
+    def collect_client_data(self, names):
+        """Collect telemetry data for the specified metrics.
+
+        Args:
+            names (list): list of metric names
+
+        Returns:
+            dict: dictionary of metric values keyed by the metric name and combination of metric
+                labels and values, e.g.
+                    <metric_name>: {
+                        <label_1:label_1_value,label_2:label_2_value,...>: <value_1>,
+                        <label_1:label_1_value,label_2:label_2_value,...>: <value_2>,
+                        ...
+                    },
+                    ...
+        """
+        return super().collect_data(names, hosts=self.clients)
+
+    def get_client_metrics(self, name):
+        """Obtain the specified metric information for each host.
+
+        Args:
+            name (str): Comma-separated list of metric names to query.
+
+        Returns:
+            dict: a dictionary of host keys linked to metric data for each
+                metric name specified
+
+        """
+        return super().get_metrics(name, hosts=self.clients)
+
+
 class MetricData():
     """Defines a object used to collect, display, and verify telemetry metric data."""
 
@@ -890,9 +1132,10 @@ def verify(self, log, ranges):
         log.info(format_str, *['-' * self._display['widths'][name] for name in columns])
         for metric in sorted(self._display['data']):
             for value, labels in self._display['data'][metric].items():
-                log.info(
-                    format_str, metric, *self._label_values(labels), value,
-                    *self._label_values(labels, ['check']))
+                for label in labels:
+                    log.info(
+                        format_str, metric, *self._label_values(label), value,
+                        *self._label_values(label, ['check']))
         return status
 
     def _get_metrics(self, log, names, hosts, dmg):
diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c
index 2b0e9af1b574..f2133237587b 100644
--- a/src/utils/daos_metrics/daos_metrics.c
+++ b/src/utils/daos_metrics/daos_metrics.c
@@ -10,8 +10,9 @@
 
 #include <getopt.h>
 #include <string.h>
-#include "gurt/telemetry_common.h"
-#include "gurt/telemetry_consumer.h"
+#include <daos/metrics.h>
+#include <gurt/telemetry_common.h>
+#include <gurt/telemetry_consumer.h>
 
 static void
 print_usage(const char *prog_name)
@@ -52,30 +53,90 @@ print_usage(const char *prog_name)
 	       "--gauge, -g\n"
 	       "\tInclude gauges\n"
 	       "--read, -r\n"
+	       "\tInclude timestamp of when metric was read\n"
 	       "--reset, -e\n"
-	       "\tInclude timestamp of when metric was read\n",
+	       "\tReset all metrics to zero\n"
+	       "--jobid, -j\n"
+	       "\tDisplay metrics of the specified job\n",
 	       prog_name);
 }
 
-int
-main(int argc, char **argv)
+static int
+process_metrics(int metric_id, char *dirname, int format, int filter, int extra_descriptors,
+		int delay, int num_iter, d_tm_iter_cb_t iter_cb, void *arg)
 {
 	struct d_tm_node_t	*root = NULL;
 	struct d_tm_node_t	*node = NULL;
 	struct d_tm_context	*ctx = NULL;
+	int                      iteration = 0;
+	int                      rc        = 0;
+
+	ctx = d_tm_open(metric_id);
+	if (!ctx)
+		D_GOTO(out, rc = 0);
+
+	root = d_tm_get_root(ctx);
+	if (!root)
+		D_GOTO(out, rc = -DER_NONEXIST);
+
+	if (strncmp(dirname, "/", D_TM_MAX_NAME_LEN) != 0) {
+		node = d_tm_find_metric(ctx, dirname);
+		if (node != NULL) {
+			root = node;
+		} else {
+			printf("No metrics found at: '%s'\n", dirname);
+			D_GOTO(out, rc = 0);
+		}
+	}
+
+	if (format == D_TM_CSV)
+		d_tm_print_field_descriptors(extra_descriptors, (FILE *)arg);
+
+	while ((num_iter == 0) || (iteration < num_iter)) {
+		d_tm_iterate(ctx, root, 0, filter, NULL, format, extra_descriptors, iter_cb, arg);
+		iteration++;
+		sleep(delay);
+		if (format == D_TM_STANDARD)
+			printf("\n\n");
+	}
+
+out:
+	if (ctx != NULL)
+		d_tm_close(&ctx);
+	return rc;
+}
+
+static void
+iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format,
+	   int opt_fields, void *arg)
+{
+	d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg);
+}
+
+static void
+iter_reset(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format,
+	   int opt_fields, void *arg)
+{
+	d_tm_reset_node(ctx, node, level, path, format, opt_fields, (FILE *)arg);
+}
+
+int
+main(int argc, char **argv)
+{
 	char			dirname[D_TM_MAX_NAME_LEN] = {0};
+	char                    jobid[D_TM_MAX_NAME_LEN]   = {0};
 	bool			show_meta = false;
 	bool			show_when_read = false;
 	bool			show_type = false;
-	int			srv_idx = 0;
-	int			iteration = 0;
+	int                     srv_idx                    = 0;
 	int			num_iter = 1;
 	int			filter = 0;
 	int			delay = 1;
 	int			format = D_TM_STANDARD;
 	int			opt;
 	int			extra_descriptors = 0;
-	uint32_t		ops = 0;
+	d_tm_iter_cb_t          iter_cb           = NULL;
+	int                     rc;
 
 	sprintf(dirname, "/");
 
@@ -96,10 +157,11 @@ main(int argc, char **argv)
 						       {"type", no_argument, NULL, 'T'},
 						       {"read", no_argument, NULL, 'r'},
 						       {"reset", no_argument, NULL, 'e'},
+						       {"jobid", required_argument, NULL, 'j'},
 						       {"help", no_argument, NULL, 'h'},
 						       {NULL, 0, NULL, 0}};
 
-		opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrhe", long_options, NULL);
+		opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrj:he", long_options, NULL);
 		if (opt == -1)
 			break;
 
@@ -147,7 +209,10 @@ main(int argc, char **argv)
 			delay = atoi(optarg);
 			break;
 		case 'e':
-			ops |= D_TM_ITER_RESET;
+			iter_cb = iter_reset;
+			break;
+		case 'j':
+			snprintf(jobid, sizeof(jobid), "%s", optarg);
 			break;
 		case 'h':
 		case '?':
@@ -157,37 +222,13 @@ main(int argc, char **argv)
 		}
 	}
 
-	if (ops == 0)
-		ops |= D_TM_ITER_READ;
+	if (iter_cb == NULL)
+		iter_cb = iter_print;
 
 	if (filter == 0)
 		filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO |
 			 D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE;
 
-	ctx = d_tm_open(srv_idx);
-	if (!ctx)
-		goto failure;
-
-	root = d_tm_get_root(ctx);
-	if (!root)
-		goto failure;
-
-	if (strncmp(dirname, "/", D_TM_MAX_NAME_LEN) != 0) {
-		node = d_tm_find_metric(ctx, dirname);
-		if (node != NULL) {
-			root = node;
-		} else {
-			printf("No metrics found at: '%s'\n", dirname);
-			exit(0);
-		}
-	}
-
-	if (format == D_TM_CSV)
-		filter &= ~D_TM_DIRECTORY;
-	else
-		filter |= D_TM_DIRECTORY;
-
-
 	if (show_when_read)
 		extra_descriptors |= D_TM_INCLUDE_TIMESTAMP;
 	if (show_meta)
@@ -196,27 +237,24 @@ main(int argc, char **argv)
 		extra_descriptors |= D_TM_INCLUDE_TYPE;
 
 	if (format == D_TM_CSV)
-		d_tm_print_field_descriptors(extra_descriptors, stdout);
+		filter &= ~D_TM_DIRECTORY;
+	else
+		filter |= D_TM_DIRECTORY;
 
-	while ((num_iter == 0) || (iteration < num_iter)) {
-		d_tm_iterate(ctx, root, 0, filter, NULL, format, extra_descriptors,
-			     ops, stdout);
-		iteration++;
-		sleep(delay);
-		if (format == D_TM_STANDARD)
-			printf("\n\n");
+	if (strlen(jobid) > 0) {
+		srv_idx = DC_TM_JOB_ROOT_ID;
+		snprintf(dirname, sizeof(dirname), "%s", jobid);
 	}
 
-	d_tm_close(&ctx);
-	return 0;
-
-failure:
-	printf("Unable to attach to the shared memory for the server index: %d"
-	       "\nMake sure to run the I/O Engine with the same index to "
-	       "initialize the shared memory and populate it with metrics.\n"
-	       "Verify user/group settings match those that started the I/O "
-	       "Engine.\n",
-	       srv_idx);
-	d_tm_close(&ctx);
-	return -1;
+	/* fetch metrics from server side */
+	rc = process_metrics(srv_idx, dirname, format, filter, extra_descriptors, delay, num_iter,
+			     iter_cb, stdout);
+	if (rc)
+		printf("Unable to attach to the shared memory for the server index: %d"
+		       "\nMake sure to run the I/O Engine with the same index to "
+		       "initialize the shared memory and populate it with metrics.\n"
+		       "Verify user/group settings match those that started the I/O "
+		       "Engine.\n",
+		       srv_idx);
+	return rc != 0 ? -1 : 0;
 }
diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c
index 4d62b45b6094..2a59197f709b 100644
--- a/src/vos/vos_common.c
+++ b/src/vos/vos_common.c
@@ -13,6 +13,7 @@
 
 #include <fcntl.h>
 #include <daos/common.h>
+#include <daos/metrics.h>
 #include <daos/rpc.h>
 #include <daos/lru.h>
 #include <daos/btree_class.h>
@@ -821,11 +822,11 @@ vos_metrics_alloc(const char *path, int tgt_id)
 	return vp_metrics;
 }
 
-struct dss_module_metrics vos_metrics = {
-	.dmm_tags = DAOS_TGT_TAG,
-	.dmm_init = vos_metrics_alloc,
-	.dmm_fini = vos_metrics_free,
-	.dmm_nr_metrics = vos_metrics_count,
+struct daos_module_metrics vos_metrics = {
+    .dmm_tags       = DAOS_TGT_TAG,
+    .dmm_init       = vos_metrics_alloc,
+    .dmm_fini       = vos_metrics_free,
+    .dmm_nr_metrics = vos_metrics_count,
 };
 
 struct dss_module vos_srv_module =  {
diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml
index 3656d4862682..4a7f13b36546 100644
--- a/utils/config/daos_agent.yml
+++ b/utils/config/daos_agent.yml
@@ -26,6 +26,27 @@
 # default: 10001
 #port: 10001
 
+## Enable HTTP endpoint for remote telemetry collection.
+# Note that enabling the endpoint automatically enables
+# client telemetry collection.
+#
+## default endpoint state: disabled
+## default endpoint port: 9192
+#telemetry_port: 9192
+
+## Enable client telemetry for all DAOS clients.
+# If false, clients will need to optionally enable telemetry by setting
+# the D_CLIENT_METRICS_ENABLE environment variable to true.
+#
+## default: false
+#telemetry_enabled: true
+
+## Retain client telemetry for a period of time after the client
+# process exits.
+#
+## default 0 (do not retain telemetry after client exit)
+#telemetry_retain: 1m
+
 ## Transport Credentials Specifying certificates to secure communications
 #
 #transport_config: