From 5a027b1b5ecd9335cac02c65d1daf7a9dfb1dafb Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Mon, 29 Apr 2024 11:39:41 -0400 Subject: [PATCH] DAOS-8331 client: Add client side metrics (#14030) (#14204) This commit comprises two separate patches to enable optional collection and export of client-side telemetry. The daos_agent configuration file includes new parameters to control collection and export of per-client telemetry. If the telemetry_port option is set, then per-client telemetry will be published in Prometheus format for real-time sampling of client processes. By default, the client telemetry will be automatically cleaned up on client exit, but may be optionally retained for some amount of time after client exit in order to allow for a final sample to be read. Example daos_agent.yml updates: telemetry_port: 9192 # export on port 9192 telemetry_enable: true # enable client telemetry for all connected clients telemetry_retain: 1m # retain metrics for 1 minute after client exit If telemetry_enable is false (default), client telemetry may be enabled on a per-process basis by setting D_CLIENT_METRICS_ENABLE=1 in the environment for clients that should collect telemetry. Notes from the first patch by Di: Move TLS to common, so both client and server can have TLS, which metrics can be attached metrics on it. Add object metrics on the client side, enabled by export D_CLIENT_METRICS_ENABLE=1. And client metrics are organized as "/jobid/pid/xxxxx". During each daos thread initialization, it will created another shmem (pid/xxx), which all metrics of the thread will be attached to. And this metric will be destroyed once the thread exit, though if D_CLIENT_METRICS_RETAIN is set, these client metrics will be retain, and it can be retrieved by daos_metrics --jobid Add D_CLIENT_METRICS_DUMP_PATH dump metrics from current thread once it exit. Some fixes in telemetrics about conv_ptr during re-open the share memory. Add daos_metrics --jobid XXX options to retrieve all metrics of the job. Includes some useful ftest updates from the following commit: * DAOS-11626 test: Adding MD on SSD metrics tests (#13661) Adding tests for WAL commit, reply, and checkpoint metrics. Signed-off-by: Phil Henderson Signed-off-by: Michael MacDonald Signed-off-by: Di Wang Co-authored-by: Phil Henderson Co-authored-by: Di Wang --- src/cart/README.env | 12 + src/cart/crt_init.c | 94 +-- src/client/api/SConscript | 2 +- src/client/api/init.c | 14 +- src/client/api/metrics.c | 216 ++++++ src/common/SConscript | 2 +- src/common/metrics.c | 131 ++++ src/common/tls.c | 227 ++++++ src/container/srv.c | 11 +- src/control/cmd/daos_agent/config.go | 16 + src/control/cmd/daos_agent/infocache.go | 75 +- src/control/cmd/daos_agent/infocache_test.go | 86 ++- src/control/cmd/daos_agent/main.go | 13 +- src/control/cmd/daos_agent/mgmt_rpc.go | 32 + src/control/cmd/daos_agent/mgmt_rpc_test.go | 119 ++- src/control/cmd/daos_agent/start.go | 28 +- src/control/cmd/daos_agent/telemetry.go | 36 + src/control/common/proto/mgmt/svc.pb.go | 192 ++++- src/control/common/test/utils.go | 15 +- src/control/drpc/modules.go | 5 +- src/control/lib/daos/logging.go | 47 ++ src/control/lib/telemetry/promexp/client.go | 176 +++++ .../lib/telemetry/promexp/client_test.go | 163 ++++ .../lib/telemetry/promexp/collector.go | 589 +-------------- src/control/lib/telemetry/promexp/engine.go | 271 +++++++ .../{collector_test.go => engine_test.go} | 114 ++- src/control/lib/telemetry/promexp/httpd.go | 100 +++ .../lib/telemetry/promexp/httpd_test.go | 118 +++ src/control/lib/telemetry/promexp/source.go | 214 ++++++ src/control/lib/telemetry/promexp/util.go | 170 +++++ .../lib/telemetry/promexp/util_test.go | 135 ++++ src/control/lib/telemetry/shm.go | 103 +++ src/control/lib/telemetry/telemetry.go | 253 ++++++- src/control/lib/telemetry/telemetry_test.go | 108 ++- src/control/lib/telemetry/test_helpers.go | 151 ++-- src/control/server/telemetry.go | 54 +- src/dtx/dtx_srv.c | 11 +- src/engine/SConscript | 2 +- src/engine/init.c | 9 +- src/engine/module.c | 7 +- src/engine/srv.c | 10 +- src/engine/srv_internal.h | 4 - src/engine/tls.c | 155 ---- src/gurt/examples/telem_consumer_example.c | 11 +- src/gurt/telemetry.c | 703 ++++++++++++++---- src/gurt/tests/test_gurt_telem_producer.c | 17 +- src/include/daos/drpc_modules.h | 75 +- src/include/daos/metrics.h | 82 ++ src/include/daos/mgmt.h | 4 + src/include/daos/pool.h | 2 + src/include/daos/tls.h | 121 +++ src/include/daos_srv/daos_engine.h | 102 +-- src/include/gurt/telemetry_common.h | 11 +- src/include/gurt/telemetry_consumer.h | 15 +- src/include/gurt/telemetry_producer.h | 10 +- src/mgmt/cli_mgmt.c | 85 +++ src/mgmt/svc.pb-c.c | 154 ++++ src/mgmt/svc.pb-c.h | 78 +- src/object/cli_mod.c | 119 ++- src/object/cli_shard.c | 146 +++- src/object/obj_internal.h | 82 ++ src/object/obj_utils.c | 148 ++++ src/object/srv_internal.h | 48 -- src/object/srv_mod.c | 172 +---- src/pool/cli.c | 228 +++++- src/pool/cli_internal.h | 18 +- src/pool/srv.c | 11 +- src/pool/srv_metrics.c | 17 +- src/proto/mgmt/svc.proto | 15 +- src/tests/ftest/server/replay.py | 58 +- src/tests/ftest/server/replay.yaml | 3 +- .../ftest/telemetry/basic_client_telemetry.py | 54 ++ .../telemetry/basic_client_telemetry.yaml | 46 ++ .../ftest/telemetry/dkey_akey_enum_punch.py | 16 +- src/tests/ftest/util/agent_utils_params.py | 11 +- src/tests/ftest/util/ior_utils.py | 80 +- src/tests/ftest/util/telemetry_test_base.py | 37 +- src/tests/ftest/util/telemetry_utils.py | 637 +++++++++++++++- src/utils/daos_metrics/daos_metrics.c | 150 ++-- src/vos/vos_common.c | 11 +- utils/config/daos_agent.yml | 21 + 81 files changed, 6174 insertions(+), 1714 deletions(-) create mode 100644 src/client/api/metrics.c create mode 100644 src/common/metrics.c create mode 100644 src/common/tls.c create mode 100644 src/control/cmd/daos_agent/telemetry.go create mode 100644 src/control/lib/daos/logging.go create mode 100644 src/control/lib/telemetry/promexp/client.go create mode 100644 src/control/lib/telemetry/promexp/client_test.go create mode 100644 src/control/lib/telemetry/promexp/engine.go rename src/control/lib/telemetry/promexp/{collector_test.go => engine_test.go} (88%) create mode 100644 src/control/lib/telemetry/promexp/httpd.go create mode 100644 src/control/lib/telemetry/promexp/httpd_test.go create mode 100644 src/control/lib/telemetry/promexp/source.go create mode 100644 src/control/lib/telemetry/promexp/util.go create mode 100644 src/control/lib/telemetry/promexp/util_test.go create mode 100644 src/control/lib/telemetry/shm.go delete mode 100644 src/engine/tls.c create mode 100644 src/include/daos/metrics.h create mode 100644 src/include/daos/tls.h create mode 100644 src/tests/ftest/telemetry/basic_client_telemetry.py create mode 100644 src/tests/ftest/telemetry/basic_client_telemetry.yaml diff --git a/src/cart/README.env b/src/cart/README.env index 656f2ab73e4..ad84d8c1b31 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -167,3 +167,15 @@ This file lists the environment variables used in CaRT. . CRT_TEST_CONT When set to 1, orterun does not automatically shut down other servers when one server is shutdown. Used in cart internal testing. + + . D_CLIENT_METRICS_ENABLE + When set to 1, client side metrics will be collected on each daos client, which + can by retrieved by daos_metrics -j job_id on each client. + + . D_CLIENT_METRICS_RETAIN + when set to 1, client side metrics will be retained even after the job exits, i.e. + those metrics can be retrieved by daos_metrics even after job exits. + + . D_CLIENT_METRICS_DUMP_PATH + Set client side metrics dump path(file) for each client, so these metrics will be + dumped to the specified file when the job exits. diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 3766753c059..df243b1dce9 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -18,6 +18,50 @@ static volatile int gdata_init_flag; struct crt_plugin_gdata crt_plugin_gdata; static bool g_prov_settings_applied[CRT_PROV_COUNT]; +/* List of the environment variables used in CaRT */ +static const char *crt_env_names[] = { + "D_PROVIDER", + "D_INTERFACE", + "D_DOMAIN", + "D_PORT", + "CRT_PHY_ADDR_STR", + "D_LOG_STDERR_IN_LOG", + "D_LOG_SIZE", + "D_LOG_FILE", + "D_LOG_FILE_APPEND_PID", + "D_LOG_MASK", + "DD_MASK", + "DD_STDERR", + "DD_SUBSYS", + "CRT_TIMEOUT", + "CRT_ATTACH_INFO_PATH", + "OFI_PORT", + "OFI_INTERFACE", + "OFI_DOMAIN", + "CRT_CREDIT_EP_CTX", + "CRT_CTX_SHARE_ADDR", + "CRT_CTX_NUM", + "D_FI_CONFIG", + "FI_UNIVERSE_SIZE", + "CRT_ENABLE_MEM_PIN", + "FI_OFI_RXM_USE_SRX", + "D_LOG_FLUSH", + "CRT_MRC_ENABLE", + "CRT_SECONDARY_PROVIDER", + "D_PROVIDER_AUTH_KEY", + "D_PORT_AUTO_ADJUST", + "D_POLL_TIMEOUT", + "D_LOG_FILE_APPEND_RANK", + "D_QUOTA_RPCS", + "D_POST_INIT", + "D_POST_INCR", + "DAOS_SIGNAL_REGISTER", + "D_CLIENT_METRICS_ENABLE", + "D_CLIENT_METRICS_RETAIN", + "D_CLIENT_METRICS_DUMP_PATH", + +}; + static void crt_lib_init(void) __attribute__((__constructor__)); @@ -62,53 +106,19 @@ crt_lib_fini(void) static void dump_envariables(void) { - int i; - char *val; - static const char *var_names[] = {"D_PROVIDER", - "D_INTERFACE", - "D_DOMAIN", - "D_PORT", - "CRT_PHY_ADDR_STR", - "D_LOG_STDERR_IN_LOG", - "D_LOG_SIZE", - "D_LOG_FILE", - "D_LOG_FILE_APPEND_PID", - "D_LOG_MASK", - "DD_MASK", - "DD_STDERR", - "DD_SUBSYS", - "CRT_TIMEOUT", - "CRT_ATTACH_INFO_PATH", - "OFI_PORT", - "OFI_INTERFACE", - "OFI_DOMAIN", - "CRT_CREDIT_EP_CTX", - "CRT_CTX_SHARE_ADDR", - "CRT_CTX_NUM", - "D_FI_CONFIG", - "FI_UNIVERSE_SIZE", - "CRT_ENABLE_MEM_PIN", - "FI_OFI_RXM_USE_SRX", - "D_LOG_FLUSH", - "CRT_MRC_ENABLE", - "CRT_SECONDARY_PROVIDER", - "D_PROVIDER_AUTH_KEY", - "D_PORT_AUTO_ADJUST", - "D_POLL_TIMEOUT", - "D_LOG_FILE_APPEND_RANK", - "D_QUOTA_RPCS", - "D_POST_INIT", - "D_POST_INCR"}; + int i; D_INFO("-- ENVARS: --\n"); - for (i = 0; i < ARRAY_SIZE(var_names); i++) { - d_agetenv_str(&val, var_names[i]); + for (i = 0; i < ARRAY_SIZE(crt_env_names); i++) { + char *val = NULL; + + d_agetenv_str(&val, crt_env_names[i]); if (val == NULL) continue; - if (strcmp(var_names[i], "D_PROVIDER_AUTH_KEY") == 0) - D_INFO("%s = %s\n", var_names[i], "********"); + if (strcmp(crt_env_names[i], "D_PROVIDER_AUTH_KEY") == 0) + D_INFO("%s = %s\n", crt_env_names[i], "********"); else - D_INFO("%s = %s\n", var_names[i], val); + D_INFO("%s = %s\n", crt_env_names[i], val); d_freeenv_str(&val); } } diff --git a/src/client/api/SConscript b/src/client/api/SConscript index e12aa93eaa9..b7e93f516d5 100644 --- a/src/client/api/SConscript +++ b/src/client/api/SConscript @@ -1,7 +1,7 @@ """Build DAOS client""" LIBDAOS_SRC = ['agent.c', 'array.c', 'container.c', 'event.c', 'init.c', 'job.c', 'kv.c', 'mgmt.c', - 'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c'] + 'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c', 'metrics.c'] def scons(): diff --git a/src/client/api/init.c b/src/client/api/init.c index c93fd639321..f574169d8c7 100644 --- a/src/client/api/init.c +++ b/src/client/api/init.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -23,6 +23,7 @@ #include #include #include +#include #include "task_internal.h" #include @@ -219,6 +220,13 @@ daos_init(void) if (rc != 0) D_GOTO(out_pl, rc); + /** set up client telemetry */ + rc = dc_tm_init(); + if (rc != 0) { + /* should not be fatal */ + DL_WARN(rc, "failed to initialize client telemetry"); + } + /** set up pool */ rc = dc_pool_init(); if (rc != 0) @@ -242,6 +250,7 @@ daos_init(void) out_pool: dc_pool_fini(); out_mgmt: + dc_tm_fini(); dc_mgmt_fini(); out_pl: pl_fini(); @@ -291,6 +300,8 @@ daos_fini(void) D_GOTO(unlock, rc); } + /** clean up all registered per-module metrics */ + daos_metrics_fini(); dc_obj_fini(); dc_cont_fini(); dc_pool_fini(); @@ -301,6 +312,7 @@ daos_fini(void) D_ERROR("failed to disconnect some resources may leak, " DF_RC"\n", DP_RC(rc)); + dc_tm_fini(); dc_agent_fini(); dc_job_fini(); diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c new file mode 100644 index 00000000000..2395d9b40f5 --- /dev/null +++ b/src/client/api/metrics.c @@ -0,0 +1,216 @@ +/* + * (C) Copyright 2020-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(client) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INIT_JOB_NUM 1024 +bool daos_client_metric; +bool daos_client_metric_retain; + +#define MAX_IDS_SIZE(num) (num * D_TM_METRIC_SIZE) +/* The client side metrics structure looks like + * root/job_id/pid/.... + */ + +static int +shm_chown(key_t key, uid_t new_owner) +{ + struct shmid_ds shmid_ds; + int shmid; + int rc; + + rc = shmget(key, 0, 0); + if (rc < 0) { + D_ERROR("shmget(0x%x) failed: %s (%d)\n", key, strerror(errno), errno); + return daos_errno2der(errno); + } + shmid = rc; + + rc = shmctl(shmid, IPC_STAT, &shmid_ds); + if (rc < 0) { + D_ERROR("shmctl(0x%x, IPC_STAT) failed: %s (%d)\n", shmid, strerror(errno), errno); + return daos_errno2der(errno); + } + + shmid_ds.shm_perm.uid = new_owner; + rc = shmctl(shmid, IPC_SET, &shmid_ds); + if (rc < 0) { + D_ERROR("shmctl(0x%x, IPC_SET) failed: %s (%d)\n", shmid, strerror(errno), errno); + return daos_errno2der(errno); + } + + return 0; +} + +static int +init_managed_root(const char *name, pid_t pid, int flags) +{ + uid_t agent_uid; + key_t key; + int rc; + + /* Set the key based on our pid so that it can be easily found. */ + key = pid - D_TM_SHARED_MEMORY_KEY; + rc = d_tm_init_with_name(key, MAX_IDS_SIZE(INIT_JOB_NUM), flags, name); + if (rc != 0) { + DL_ERROR(rc, "failed to initialize root for %s.", name); + return rc; + } + + /* Request that the agent adds our segment into the tree. */ + rc = dc_mgmt_tm_register(NULL, dc_jobid, pid, &agent_uid); + if (rc != 0) { + DL_ERROR(rc, "client telemetry setup failed."); + return rc; + } + + /* Change ownership of the segment so that the agent can manage it. */ + D_INFO("setting shm segment 0x%x to be owned by uid %d\n", pid, agent_uid); + rc = shm_chown(pid, agent_uid); + if (rc != 0) { + DL_ERROR(rc, "failed to chown shm segment."); + return rc; + } + + return 0; +} + +int +dc_tm_init(void) +{ + struct d_tm_node_t *started_at; + pid_t pid = getpid(); + int metrics_tag; + char root_name[D_TM_MAX_NAME_LEN]; + int rc; + + d_getenv_bool(DAOS_CLIENT_METRICS_ENABLE, &daos_client_metric); + if (!daos_client_metric && d_isenv_def(DAOS_CLIENT_METRICS_DUMP_PATH)) + daos_client_metric = true; + + if (!daos_client_metric) + return 0; + + D_INFO("Setting up client telemetry for %s/%d\n", dc_jobid, pid); + + rc = dc_tls_key_create(); + if (rc) + D_GOTO(out, rc); + + metrics_tag = D_TM_OPEN_OR_CREATE | D_TM_MULTIPLE_WRITER_LOCK; + d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN, &daos_client_metric_retain); + if (daos_client_metric_retain) + metrics_tag |= D_TM_RETAIN_SHMEM; + + snprintf(root_name, sizeof(root_name), "%d", pid); + rc = init_managed_root(root_name, pid, metrics_tag); + if (rc != 0) { + DL_ERROR(rc, "failed to initialize client telemetry"); + D_GOTO(out, rc); + } + + rc = d_tm_add_metric(&started_at, D_TM_TIMESTAMP, "Timestamp of client startup", NULL, + "started_at"); + if (rc != 0) { + DL_ERROR(rc, "add metric started_at failed."); + D_GOTO(out, rc); + } + + d_tm_record_timestamp(started_at); +out: + if (rc != 0) { + daos_client_metric = false; + d_tm_fini(); + } + + return rc; +} + +static void +iter_dump(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format, + int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + +static int +dump_tm_file(const char *dump_path) +{ + struct d_tm_context *ctx; + struct d_tm_node_t *root; + char dirname[D_TM_MAX_NAME_LEN] = {0}; + uint32_t filter; + FILE *dump_file; + int rc = 0; + + dump_file = fopen(dump_path, "w+"); + if (dump_file == NULL) { + D_INFO("cannot open %s", dump_path); + return -DER_INVAL; + } + + filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | + D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; + + ctx = d_tm_open(DC_TM_JOB_ROOT_ID); + if (ctx == NULL) + D_GOTO(close, rc = -DER_NOMEM); + + snprintf(dirname, sizeof(dirname), "%s/%u", dc_jobid, getpid()); + root = d_tm_find_metric(ctx, dirname); + if (root == NULL) { + printf("No metrics found at: '%s'\n", dirname); + D_GOTO(close_ctx, rc = -DER_NONEXIST); + } + + d_tm_print_field_descriptors(0, dump_file); + + d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_CSV, 0, iter_dump, dump_file); + +close_ctx: + d_tm_close(&ctx); +close: + fclose(dump_file); + return rc; +} + +void +dc_tm_fini() +{ + char *dump_path; + int rc; + + if (!daos_client_metric) + return; + + rc = d_agetenv_str(&dump_path, DAOS_CLIENT_METRICS_DUMP_PATH); + if (rc != 0) + D_GOTO(out, rc); + if (dump_path != NULL) { + D_INFO("dump path is %s\n", dump_path); + dump_tm_file(dump_path); + } + d_freeenv_str(&dump_path); + +out: + dc_tls_fini(); + dc_tls_key_delete(); + + d_tm_fini(); +} diff --git a/src/common/SConscript b/src/common/SConscript index c61ecdeebe3..38bd221793e 100644 --- a/src/common/SConscript +++ b/src/common/SConscript @@ -9,7 +9,7 @@ COMMON_FILES = ['debug.c', 'mem.c', 'fail_loc.c', 'lru.c', 'dedup.c', 'profile.c', 'compression.c', 'compression_isal.c', 'compression_qat.c', 'multihash.c', 'multihash_isal.c', 'cipher.c', 'cipher_isal.c', 'qat.c', 'fault_domain.c', - 'policy.c'] + 'policy.c', 'tls.c', 'metrics.c'] def build_daos_common(denv, client): diff --git a/src/common/metrics.c b/src/common/metrics.c new file mode 100644 index 00000000000..b6c88a3ea0d --- /dev/null +++ b/src/common/metrics.c @@ -0,0 +1,131 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * It implements thread-local storage (TLS) for DAOS. + */ +#include +#include +#include + +struct metrics_list { + struct daos_module_metrics *mm_metrics; + d_list_t mm_list; + uint32_t mm_id; +}; + +/* Track list of loaded modules */ +D_LIST_HEAD(metrics_mod_list); +pthread_mutex_t metrics_mod_list_lock = PTHREAD_MUTEX_INITIALIZER; + +int +daos_metrics_init(enum daos_module_tag tag, uint32_t id, struct daos_module_metrics *metrics) +{ + struct metrics_list *ml; + + D_ALLOC_PTR(ml); + if (ml == NULL) + return -DER_NOMEM; + ml->mm_metrics = metrics; + ml->mm_id = id; + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_add_tail(&ml->mm_list, &metrics_mod_list); + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + + return 0; +} + +void +daos_metrics_fini(void) +{ + struct metrics_list *ml; + struct metrics_list *tmp; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry_safe(ml, tmp, &metrics_mod_list, mm_list) { + d_list_del_init(&ml->mm_list); + D_FREE(ml); + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); +} + +void +daos_module_fini_metrics(enum dss_module_tag tag, void **metrics) +{ + struct metrics_list *ml; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if ((met->dmm_tags & tag) == 0) + continue; + if (met->dmm_fini == NULL) + continue; + if (metrics[ml->mm_id] == NULL) + continue; + + met->dmm_fini(metrics[ml->mm_id]); + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); +} + +int +daos_module_init_metrics(enum dss_module_tag tag, void **metrics, const char *path, int tgt_id) +{ + struct metrics_list *ml; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if ((met->dmm_tags & tag) == 0) + continue; + if (met->dmm_init == NULL) + continue; + + metrics[ml->mm_id] = met->dmm_init(path, tgt_id); + if (metrics[ml->mm_id] == NULL) { + D_ERROR("failed to allocate per-pool metrics for module %u\n", ml->mm_id); + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + daos_module_fini_metrics(tag, metrics); + return -DER_NOMEM; + } + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + + return 0; +} + +/** + * Query all modules for the number of per-pool metrics they create. + * + * \return Total number of metrics for all modules + */ +int +daos_module_nr_pool_metrics(void) +{ + struct metrics_list *ml; + int total = 0; + + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if (met->dmm_nr_metrics == NULL) + continue; + if (!(met->dmm_tags & DAOS_CLI_TAG)) + continue; + + total += met->dmm_nr_metrics(); + } + + return total; +} diff --git a/src/common/tls.c b/src/common/tls.c new file mode 100644 index 00000000000..89b9baf13e8 --- /dev/null +++ b/src/common/tls.c @@ -0,0 +1,227 @@ +/** + * (C) Copyright 2016-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * It implements thread-local storage (TLS) for DAOS. + */ +#include +#include + +/* The array remember all of registered module keys on one node. */ +static struct daos_module_key *daos_module_keys[DAOS_MODULE_KEYS_NR] = {NULL}; +pthread_mutex_t daos_module_keys_lock = PTHREAD_MUTEX_INITIALIZER; + +static __thread bool dc_tls_thread_init; + +static pthread_key_t dss_tls_key; +static pthread_key_t dc_tls_key; + +void +daos_register_key(struct daos_module_key *key) +{ + int i; + + D_MUTEX_LOCK(&daos_module_keys_lock); + for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { + if (daos_module_keys[i] == NULL) { + daos_module_keys[i] = key; + key->dmk_index = i; + break; + } + } + D_MUTEX_UNLOCK(&daos_module_keys_lock); + D_ASSERT(i < DAOS_MODULE_KEYS_NR); +} + +void +daos_unregister_key(struct daos_module_key *key) +{ + if (key == NULL) + return; + D_ASSERT(key->dmk_index >= 0); + D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); + D_MUTEX_LOCK(&daos_module_keys_lock); + daos_module_keys[key->dmk_index] = NULL; + D_MUTEX_UNLOCK(&daos_module_keys_lock); +} + +struct daos_module_key * +daos_get_module_key(int index) +{ + D_ASSERT(index < DAOS_MODULE_KEYS_NR); + D_ASSERT(index >= 0); + + return daos_module_keys[index]; +} + +static int +daos_thread_local_storage_init(struct daos_thread_local_storage *dtls, int xs_id, int tgt_id) +{ + int rc = 0; + int i; + + if (dtls->dtls_values == NULL) { + D_ALLOC_ARRAY(dtls->dtls_values, DAOS_MODULE_KEYS_NR); + if (dtls->dtls_values == NULL) + return -DER_NOMEM; + } + + for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { + struct daos_module_key *dmk = daos_module_keys[i]; + + if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { + D_ASSERT(dmk->dmk_init != NULL); + dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id); + if (dtls->dtls_values[i] == NULL) { + rc = -DER_NOMEM; + break; + } + } + } + return rc; +} + +static void +daos_thread_local_storage_fini(struct daos_thread_local_storage *dtls) +{ + int i; + + if (dtls->dtls_values != NULL) { + for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) { + struct daos_module_key *dmk = daos_module_keys[i]; + + if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { + D_ASSERT(dtls->dtls_values[i] != NULL); + D_ASSERT(dmk->dmk_fini != NULL); + dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]); + } + } + } + + D_FREE(dtls->dtls_values); +} + +/* + * Allocate daos_thread_local_storage for a particular thread on server and + * store the pointer in a thread-specific value which can be fetched at any + * time with daos_tls_get(). + */ +static struct daos_thread_local_storage * +daos_tls_init(int tag, int xs_id, int tgt_id, bool server) +{ + struct daos_thread_local_storage *dtls; + int rc; + + D_ALLOC_PTR(dtls); + if (dtls == NULL) + return NULL; + + dtls->dtls_tag = tag; + rc = daos_thread_local_storage_init(dtls, xs_id, tgt_id); + if (rc != 0) { + D_FREE(dtls); + return NULL; + } + + if (server) { + rc = pthread_setspecific(dss_tls_key, dtls); + } else { + rc = pthread_setspecific(dc_tls_key, dtls); + if (rc == 0) + dc_tls_thread_init = true; + } + + if (rc) { + D_ERROR("failed to initialize tls: %d\n", rc); + daos_thread_local_storage_fini(dtls); + D_FREE(dtls); + return NULL; + } + + return dtls; +} + +int +ds_tls_key_create(void) +{ + return pthread_key_create(&dss_tls_key, NULL); +} + +int +dc_tls_key_create(void) +{ + return pthread_key_create(&dc_tls_key, NULL); +} + +void +ds_tls_key_delete() +{ + pthread_key_delete(dss_tls_key); +} + +void +dc_tls_key_delete(void) +{ + pthread_key_delete(dc_tls_key); +} + +/* Free DTC for a particular thread. */ +static void +daos_tls_fini(struct daos_thread_local_storage *dtls, bool server) +{ + daos_thread_local_storage_fini(dtls); + D_FREE(dtls); + if (server) + pthread_setspecific(dss_tls_key, NULL); + else + pthread_setspecific(dc_tls_key, NULL); +} + +/* Allocate local per thread storage. */ +struct daos_thread_local_storage * +dc_tls_init(int tag, uint32_t pid) +{ + return daos_tls_init(tag, -1, pid, false); +} + +/* Free DTC for a particular thread. */ +void +dc_tls_fini(void) +{ + struct daos_thread_local_storage *dtls; + + dtls = (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key); + if (dtls != NULL) + daos_tls_fini(dtls, false); +} + +struct daos_thread_local_storage * +dc_tls_get(unsigned int tag) +{ + if (!dc_tls_thread_init) + return dc_tls_init(tag, getpid()); + + return (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key); +} + +struct daos_thread_local_storage * +dss_tls_get() +{ + return (struct daos_thread_local_storage *)pthread_getspecific(dss_tls_key); +} + +/* Allocate local per thread storage. */ +struct daos_thread_local_storage * +dss_tls_init(int tag, int xs_id, int tgt_id) +{ + return daos_tls_init(tag, xs_id, tgt_id, true); +} + +/* Free DTC for a particular thread. */ +void +dss_tls_fini(struct daos_thread_local_storage *dtls) +{ + daos_tls_fini(dtls, true); +} diff --git a/src/container/srv.c b/src/container/srv.c index 80650f7c16c..05760d9439e 100644 --- a/src/container/srv.c +++ b/src/container/srv.c @@ -12,6 +12,7 @@ #define D_LOGFAC DD_FAC(container) #include +#include #include #include "rpc.h" #include "srv_internal.h" @@ -142,11 +143,11 @@ struct dss_module_key cont_module_key = { .dmk_fini = dsm_tls_fini, }; -struct dss_module_metrics cont_metrics = { - .dmm_tags = DAOS_SYS_TAG, - .dmm_init = ds_cont_metrics_alloc, - .dmm_fini = ds_cont_metrics_free, - .dmm_nr_metrics = ds_cont_metrics_count, +struct daos_module_metrics cont_metrics = { + .dmm_tags = DAOS_SYS_TAG, + .dmm_init = ds_cont_metrics_alloc, + .dmm_fini = ds_cont_metrics_free, + .dmm_nr_metrics = ds_cont_metrics_count, }; struct dss_module cont_module = { diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index 3a6f7a14368..c9d08d19744 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -55,6 +55,14 @@ type Config struct { DisableAutoEvict bool `yaml:"disable_auto_evict,omitempty"` ExcludeFabricIfaces common.StringSet `yaml:"exclude_fabric_ifaces,omitempty"` FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` + TelemetryPort int `yaml:"telemetry_port,omitempty"` + TelemetryEnabled bool `yaml:"telemetry_enabled,omitempty"` + TelemetryRetain time.Duration `yaml:"telemetry_retain,omitempty"` +} + +// TelemetryExportEnabled returns true if client telemetry export is enabled. +func (c *Config) TelemetryExportEnabled() bool { + return c.TelemetryPort > 0 } // NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA @@ -89,6 +97,14 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, fmt.Errorf("invalid system name: %q", cfg.SystemName) } + if cfg.TelemetryRetain > 0 && cfg.TelemetryPort == 0 { + return nil, errors.New("telemetry_retain requires telemetry_port") + } + + if cfg.TelemetryEnabled && cfg.TelemetryPort == 0 { + return nil, errors.New("telemetry_enabled requires telemetry_port") + } + return cfg, nil } diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index 0dbdf4fc645..cb777396ff1 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2023 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,6 +8,7 @@ package main import ( "context" + "fmt" "net" "strings" "sync" @@ -22,6 +23,7 @@ import ( "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/hardware" "github.com/daos-stack/daos/src/control/lib/hardware/hwprov" + "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" ) @@ -36,17 +38,20 @@ type fabricScanFn func(ctx context.Context, providers ...string) (*NUMAFabric, e // NewInfoCache creates a new InfoCache with appropriate parameters set. func NewInfoCache(ctx context.Context, log logging.Logger, client control.UnaryInvoker, cfg *Config) *InfoCache { ic := &InfoCache{ - log: log, - ignoreIfaces: cfg.ExcludeFabricIfaces, - client: client, - cache: cache.NewItemCache(log), - getAttachInfo: control.GetAttachInfo, - fabricScan: getFabricScanFn(log, cfg, hwprov.DefaultFabricScanner(log)), - netIfaces: net.Interfaces, - devClassGetter: hwprov.DefaultNetDevClassProvider(log), - devStateGetter: hwprov.DefaultNetDevStateProvider(log), + log: log, + ignoreIfaces: cfg.ExcludeFabricIfaces, + client: client, + cache: cache.NewItemCache(log), + getAttachInfoCb: control.GetAttachInfo, + fabricScan: getFabricScanFn(log, cfg, hwprov.DefaultFabricScanner(log)), + netIfaces: net.Interfaces, + devClassGetter: hwprov.DefaultNetDevClassProvider(log), + devStateGetter: hwprov.DefaultNetDevStateProvider(log), } + ic.clientTelemetryEnabled.Store(cfg.TelemetryEnabled) + ic.clientTelemetryRetain.Store(cfg.TelemetryRetain > 0) + if cfg.DisableCache { ic.DisableAttachInfoCache() ic.DisableFabricCache() @@ -198,12 +203,14 @@ type InfoCache struct { cache *cache.ItemCache fabricCacheDisabled atm.Bool attachInfoCacheDisabled atm.Bool + clientTelemetryEnabled atm.Bool + clientTelemetryRetain atm.Bool - getAttachInfo getAttachInfoFn - fabricScan fabricScanFn - netIfaces func() ([]net.Interface, error) - devClassGetter hardware.NetDevClassProvider - devStateGetter hardware.NetDevStateProvider + getAttachInfoCb getAttachInfoFn + fabricScan fabricScanFn + netIfaces func() ([]net.Interface, error) + devClassGetter hardware.NetDevClassProvider + devStateGetter hardware.NetDevStateProvider client control.UnaryInvoker attachInfoRefresh time.Duration @@ -292,6 +299,41 @@ func (c *InfoCache) EnableStaticFabricCache(ctx context.Context, nf *NUMAFabric) c.EnableFabricCache() } +func (c *InfoCache) getAttachInfo(ctx context.Context, rpcClient control.UnaryInvoker, req *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + if c == nil { + return nil, errors.New("InfoCache is nil") + } + if c.getAttachInfoCb == nil { + return nil, errors.New("getAttachInfoFn is nil") + } + + resp, err := c.getAttachInfoCb(ctx, rpcClient, req) + if err != nil { + return nil, err + } + c.addTelemetrySettings(resp) + return resp, nil +} + +// addTelemetrySettings modifies the response by adding telemetry settings +// before returning it. +func (c *InfoCache) addTelemetrySettings(resp *control.GetAttachInfoResp) { + if c == nil || resp == nil { + return + } + + if c.clientTelemetryEnabled.IsTrue() { + resp.ClientNetHint.EnvVars = append(resp.ClientNetHint.EnvVars, + fmt.Sprintf("%s=1", telemetry.ClientMetricsEnabledEnv), + ) + if c.clientTelemetryRetain.IsTrue() { + resp.ClientNetHint.EnvVars = append(resp.ClientNetHint.EnvVars, + fmt.Sprintf("%s=1", telemetry.ClientMetricsRetainEnv), + ) + } + } +} + // GetAttachInfo fetches the attach info from the cache, and refreshes if necessary. func (c *InfoCache) GetAttachInfo(ctx context.Context, sys string) (*control.GetAttachInfoResp, error) { if c == nil { @@ -308,7 +350,8 @@ func (c *InfoCache) GetAttachInfo(ctx context.Context, sys string) (*control.Get } createItem := func() (cache.Item, error) { c.log.Debugf("cache miss for %s", sysAttachInfoKey(sys)) - return newCachedAttachInfo(c.attachInfoRefresh, sys, c.client, c.getAttachInfo), nil + cai := newCachedAttachInfo(c.attachInfoRefresh, sys, c.client, c.getAttachInfo) + return cai, nil } item, release, err := c.cache.GetOrCreate(ctx, sysAttachInfoKey(sys), createItem) diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index 54571d006a7..e86c44bfc0c 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2023 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,20 +8,23 @@ package main import ( "context" + "fmt" "net" "testing" "time" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/cache" "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/pkg/errors" ) type testInfoCacheParams struct { @@ -32,6 +35,8 @@ type testInfoCacheParams struct { mockNetDevStateGetter hardware.NetDevStateProvider disableFabricCache bool disableAttachInfoCache bool + enableClientTelemetry bool + retainClientTelemetry bool ctlInvoker control.Invoker cachedItems []cache.Item } @@ -43,16 +48,19 @@ func newTestInfoCache(t *testing.T, log logging.Logger, params testInfoCachePara } ic := &InfoCache{ - log: log, - getAttachInfo: params.mockGetAttachInfo, - fabricScan: params.mockScanFabric, - devClassGetter: params.mockNetDevClassGetter, - devStateGetter: params.mockNetDevStateGetter, - netIfaces: params.mockNetIfaces, - client: params.ctlInvoker, - cache: c, + log: log, + getAttachInfoCb: params.mockGetAttachInfo, + fabricScan: params.mockScanFabric, + devClassGetter: params.mockNetDevClassGetter, + devStateGetter: params.mockNetDevStateGetter, + netIfaces: params.mockNetIfaces, + client: params.ctlInvoker, + cache: c, } + ic.clientTelemetryEnabled.Store(params.enableClientTelemetry) + ic.clientTelemetryRetain.Store(params.retainClientTelemetry) + if ic.netIfaces == nil { ic.netIfaces = func() ([]net.Interface, error) { return []net.Interface{ @@ -714,6 +722,14 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { NetDevClass: uint32(hardware.Ether), }, } + telemEnabledResp := copyGetAttachInfoResp(ctlResp) + telemEnabledResp.ClientNetHint.EnvVars = append(telemEnabledResp.ClientNetHint.EnvVars, + fmt.Sprintf("%s=1", telemetry.ClientMetricsEnabledEnv), + ) + telemRetainedResp := copyGetAttachInfoResp(telemEnabledResp) + telemRetainedResp.ClientNetHint.EnvVars = append(telemRetainedResp.ClientNetHint.EnvVars, + fmt.Sprintf("%s=1", telemetry.ClientMetricsRetainEnv), + ) for name, tc := range map[string]struct { getInfoCache func(logging.Logger) *InfoCache @@ -734,7 +750,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { disableAttachInfoCache: true, }) }, - remoteResp: ctlResp, + remoteResp: copyGetAttachInfoResp(ctlResp), expResp: ctlResp, expRemote: true, }, @@ -748,11 +764,45 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { expErr: errors.New("mock remote"), expRemote: true, }, + "cache disabled; client telemetry enabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + disableAttachInfoCache: true, + enableClientTelemetry: true, + }) + }, + remoteResp: copyGetAttachInfoResp(ctlResp), + expResp: telemEnabledResp, + expRemote: true, + }, + "cache enabled; client telemetry enabled": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + enableClientTelemetry: true, + }) + }, + remoteResp: copyGetAttachInfoResp(ctlResp), + expResp: telemEnabledResp, + expRemote: true, + expCached: true, + }, + "cache enabled; client telemetry enabled; client telemetry retained": { + getInfoCache: func(l logging.Logger) *InfoCache { + return newTestInfoCache(t, l, testInfoCacheParams{ + enableClientTelemetry: true, + retainClientTelemetry: true, + }) + }, + remoteResp: copyGetAttachInfoResp(ctlResp), + expResp: telemRetainedResp, + expRemote: true, + expCached: true, + }, "enabled but empty": { getInfoCache: func(l logging.Logger) *InfoCache { return newTestInfoCache(t, l, testInfoCacheParams{}) }, - remoteResp: ctlResp, + remoteResp: copyGetAttachInfoResp(ctlResp), expResp: ctlResp, expRemote: true, expCached: true, @@ -772,7 +822,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { return nil, errors.New("shouldn't call cached remote") }, - lastResponse: ctlResp, + lastResponse: copyGetAttachInfoResp(ctlResp), cacheItem: cacheItem{lastCached: time.Now()}, system: "test", }) @@ -790,7 +840,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { fetch: func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { return nil, errors.New("shouldn't call cached remote") }, - lastResponse: ctlResp, + lastResponse: copyGetAttachInfoResp(ctlResp), cacheItem: cacheItem{lastCached: time.Now()}, system: build.DefaultSystemName, }) @@ -814,7 +864,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { return ic }, system: "somethingelse", - remoteResp: ctlResp, + remoteResp: copyGetAttachInfoResp(ctlResp), expResp: ctlResp, expCached: true, expRemote: true, @@ -831,7 +881,7 @@ func TestAgent_InfoCache_GetAttachInfo(t *testing.T) { calledRemote := false if ic != nil { - ic.getAttachInfo = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { + ic.getAttachInfoCb = func(_ context.Context, _ control.UnaryInvoker, _ *control.GetAttachInfoReq) (*control.GetAttachInfoResp, error) { calledRemote = true return tc.remoteResp, tc.remoteErr } diff --git a/src/control/cmd/daos_agent/main.go b/src/control/cmd/daos_agent/main.go index 73788a7cb49..1518207a3cb 100644 --- a/src/control/cmd/daos_agent/main.go +++ b/src/control/cmd/daos_agent/main.go @@ -20,6 +20,7 @@ import ( "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/lib/atm" "github.com/daos-stack/daos/src/control/lib/control" + "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/hardware/hwprov" "github.com/daos-stack/daos/src/control/logging" ) @@ -112,6 +113,17 @@ func parseOpts(args []string, opts *cliOptions, invoker control.Invoker, log *lo logCmd.SetLog(log) } + daosLogMask := daos.DefaultErrorMask + if opts.Debug { + log.SetLevel(logging.LogLevelTrace) + daosLogMask = daos.DefaultDebugMask + } + fini, err := daos.InitLogging(daosLogMask) + if err != nil { + return err + } + defer fini() + if jsonCmd, ok := cmd.(cmdutil.JSONOutputter); ok && opts.JSON { jsonCmd.EnableJSONOutput(os.Stdout, &wroteJSON) // disable output on stdout other than JSON @@ -194,7 +206,6 @@ func parseOpts(args []string, opts *cliOptions, invoker control.Invoker, log *lo return errors.Wrap(err, "Unable to load Certificate Data") } - var err error if cfg.AccessPoints, err = common.ParseHostList(cfg.AccessPoints, cfg.ControlPort); err != nil { return errors.Wrap(err, "Failed to parse config access_points") } diff --git a/src/control/cmd/daos_agent/mgmt_rpc.go b/src/control/cmd/daos_agent/mgmt_rpc.go index 17c07b4a2f6..75dc337e313 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc.go +++ b/src/control/cmd/daos_agent/mgmt_rpc.go @@ -25,6 +25,8 @@ import ( "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/telemetry" + "github.com/daos-stack/daos/src/control/lib/telemetry/promexp" "github.com/daos-stack/daos/src/control/logging" ) @@ -40,6 +42,7 @@ type mgmtModule struct { ctlInvoker control.Invoker cache *InfoCache monitor *procMon + cliMetricsSrc *promexp.ClientSource useDefaultNUMA bool numaGetter hardware.ProcessNUMAProvider @@ -71,6 +74,8 @@ func (mod *mgmtModule) HandleCall(ctx context.Context, session *drpc.Session, me switch method { case drpc.MethodGetAttachInfo: return mod.handleGetAttachInfo(ctx, req, cred.Pid) + case drpc.MethodSetupClientTelemetry: + return mod.handleSetupClientTelemetry(ctx, req, cred) case drpc.MethodNotifyPoolConnect: return nil, mod.handleNotifyPoolConnect(ctx, req, cred.Pid) case drpc.MethodNotifyPoolDisconnect: @@ -214,6 +219,33 @@ func (mod *mgmtModule) getFabricInterface(ctx context.Context, numaNode int, net return mod.cache.GetFabricDevice(ctx, numaNode, netDevClass, provider) } +func (mod *mgmtModule) handleSetupClientTelemetry(ctx context.Context, reqb []byte, cred *unix.Ucred) ([]byte, error) { + if len(reqb) == 0 { + return nil, errors.New("empty request") + } + + pbReq := new(mgmtpb.ClientTelemetryReq) + if err := proto.Unmarshal(reqb, pbReq); err != nil { + return nil, drpc.UnmarshalingPayloadFailure() + } + if pbReq.Jobid == "" { + return nil, errors.New("empty jobid") + } + if pbReq.ShmKey == 0 { + return nil, errors.New("unset shm key") + } + if cred == nil { + return nil, errors.New("nil user credentials") + } + + if err := telemetry.SetupClientRoot(ctx, pbReq.Jobid, int(cred.Pid), int(pbReq.ShmKey)); err != nil { + return nil, err + } + resp := &mgmtpb.ClientTelemetryResp{AgentUid: int32(unix.Getuid())} + mod.log.Tracef("%d: %s", cred.Pid, pblog.Debug(resp)) + return proto.Marshal(resp) +} + func (mod *mgmtModule) handleNotifyPoolConnect(ctx context.Context, reqb []byte, pid int32) error { pbReq := new(mgmtpb.PoolMonitorReq) if err := proto.Unmarshal(reqb, pbReq); err != nil { diff --git a/src/control/cmd/daos_agent/mgmt_rpc_test.go b/src/control/cmd/daos_agent/mgmt_rpc_test.go index 9bd85decf08..59fcb507a81 100644 --- a/src/control/cmd/daos_agent/mgmt_rpc_test.go +++ b/src/control/cmd/daos_agent/mgmt_rpc_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -15,18 +15,22 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" + "golang.org/x/sys/unix" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/testing/protocmp" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/drpc" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/fault/code" "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" ) @@ -388,3 +392,116 @@ func TestAgent_mgmtModule_RefreshCache(t *testing.T) { }) } } + +func TestAgent_handleSetupClientTelemetry(t *testing.T) { + testCreds := &unix.Ucred{ + Uid: 123, + Gid: 456, + } + testSysName := "test-sys" + testJobID := "test-job" + testShmKey := int32(42) + + for name, tc := range map[string]struct { + clientBytes []byte + clientReq *mgmtpb.ClientTelemetryReq + clientCred *unix.Ucred + expResp *mgmtpb.ClientTelemetryResp + expErr error + }{ + "nil client request": { + clientReq: nil, + clientCred: testCreds, + expErr: errors.New("empty request"), + }, + "garbage client request": { + clientBytes: []byte("invalid"), + clientCred: testCreds, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "unset jobid": { + clientReq: &mgmtpb.ClientTelemetryReq{ + Sys: testSysName, + Jobid: "", + ShmKey: testShmKey, + }, + clientCred: testCreds, + expErr: errors.New("empty jobid"), + }, + "unset shm key": { + clientReq: &mgmtpb.ClientTelemetryReq{ + Sys: testSysName, + Jobid: testJobID, + ShmKey: 0, + }, + clientCred: testCreds, + expErr: errors.New("unset shm key"), + }, + "nil user creds": { + clientReq: &mgmtpb.ClientTelemetryReq{ + Sys: testSysName, + Jobid: testJobID, + ShmKey: testShmKey, + }, + clientCred: nil, + expErr: errors.New("nil user credentials"), + }, + "success": { + clientReq: &mgmtpb.ClientTelemetryReq{ + Sys: testSysName, + Jobid: testJobID, + ShmKey: testShmKey, + }, + clientCred: testCreds, + expResp: &mgmtpb.ClientTelemetryResp{ + AgentUid: int32(unix.Getuid()), + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + mod := &mgmtModule{ + log: log, + } + + var reqBytes []byte + if len(tc.clientBytes) > 0 { + reqBytes = tc.clientBytes + } else { + var err error + reqBytes, err = proto.Marshal(tc.clientReq) + if err != nil { + t.Fatal(err) + } + } + + testID := uint32(telemetry.NextTestID(telemetry.AgentIDBase)) + telemetry.InitTestMetricsProducer(t, int(testID), 2048) + defer telemetry.CleanupTestMetricsProducer(t) + + parent := test.MustLogContext(t, log) + ctx, err := telemetry.Init(parent, testID) + if err != nil { + t.Fatal(err) + } + defer telemetry.Fini() + + gotResp, gotErr := mod.handleSetupClientTelemetry(ctx, reqBytes, tc.clientCred) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + expRespBytes, err := proto.Marshal(tc.expResp) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(expRespBytes, gotResp, protocmp.Transform()); diff != "" { + t.Fatalf("-want, +got:\n%s", diff) + } + }) + } +} diff --git a/src/control/cmd/daos_agent/start.go b/src/control/cmd/daos_agent/start.go index cb5505234d5..e5416ee874b 100644 --- a/src/control/cmd/daos_agent/start.go +++ b/src/control/cmd/daos_agent/start.go @@ -23,6 +23,7 @@ import ( "github.com/daos-stack/daos/src/control/lib/hardware/hwloc" "github.com/daos-stack/daos/src/control/lib/hardware/hwprov" "github.com/daos-stack/daos/src/control/lib/systemd" + "github.com/daos-stack/daos/src/control/lib/telemetry/promexp" ) type ctxKey string @@ -98,15 +99,30 @@ func (cmd *startCmd) Execute(_ []string) error { procmon.startMonitoring(ctx) cmd.Debugf("started process monitor: %s", time.Since(procmonStart)) + var clientMetricSource *promexp.ClientSource + if cmd.cfg.TelemetryExportEnabled() { + if clientMetricSource, err = promexp.NewClientSource(ctx); err != nil { + return errors.Wrap(err, "unable to create client metrics source") + } + telemetryStart := time.Now() + shutdown, err := startPrometheusExporter(ctx, cmd, clientMetricSource, cmd.cfg) + if err != nil { + return errors.Wrap(err, "unable to start prometheus exporter") + } + defer shutdown() + cmd.Debugf("telemetry exporter started: %s", time.Since(telemetryStart)) + } + drpcRegStart := time.Now() drpcServer.RegisterRPCModule(NewSecurityModule(cmd.Logger, cmd.cfg.TransportConfig)) mgmtMod := &mgmtModule{ - log: cmd.Logger, - sys: cmd.cfg.SystemName, - ctlInvoker: cmd.ctlInvoker, - cache: cache, - numaGetter: hwprov.DefaultProcessNUMAProvider(cmd.Logger), - monitor: procmon, + log: cmd.Logger, + sys: cmd.cfg.SystemName, + ctlInvoker: cmd.ctlInvoker, + cache: cache, + numaGetter: hwprov.DefaultProcessNUMAProvider(cmd.Logger), + monitor: procmon, + cliMetricsSrc: clientMetricSource, } drpcServer.RegisterRPCModule(mgmtMod) cmd.Debugf("registered dRPC modules: %s", time.Since(drpcRegStart)) diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go new file mode 100644 index 00000000000..4c0e2d35b4c --- /dev/null +++ b/src/control/cmd/daos_agent/telemetry.go @@ -0,0 +1,36 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/lib/telemetry/promexp" + "github.com/daos-stack/daos/src/control/logging" +) + +func startPrometheusExporter(ctx context.Context, log logging.Logger, cs *promexp.ClientSource, cfg *Config) (func(), error) { + expCfg := &promexp.ExporterConfig{ + Port: cfg.TelemetryPort, + Title: "DAOS Client Telemetry", + Register: func(ctx context.Context, log logging.Logger) error { + c, err := promexp.NewClientCollector(ctx, log, cs, &promexp.CollectorOpts{ + RetainDuration: cfg.TelemetryRetain, + }) + if err != nil { + return err + } + prometheus.MustRegister(c) + + return nil + }, + } + + return promexp.StartExporter(ctx, log, expCfg) +} diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 444f64c5769..86c11e72f08 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -1,13 +1,13 @@ // -// (C) Copyright 2018-2023 Intel Corporation. +// (C) Copyright 2018-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v3.11.4 +// protoc-gen-go v1.31.0 +// protoc v3.21.12 // source: mgmt/svc.proto package mgmt @@ -990,6 +990,124 @@ func (x *PoolMonitorReq) GetJobid() string { return "" } +type ClientTelemetryReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Sys string `protobuf:"bytes,1,opt,name=sys,proto3" json:"sys,omitempty"` // DAOS system identifier + Jobid string `protobuf:"bytes,2,opt,name=jobid,proto3" json:"jobid,omitempty"` // Job ID used for client telemetry + ShmKey int32 `protobuf:"varint,3,opt,name=shm_key,json=shmKey,proto3" json:"shm_key,omitempty"` // Client's shared memory segment key +} + +func (x *ClientTelemetryReq) Reset() { + *x = ClientTelemetryReq{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_svc_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ClientTelemetryReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ClientTelemetryReq) ProtoMessage() {} + +func (x *ClientTelemetryReq) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_svc_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ClientTelemetryReq.ProtoReflect.Descriptor instead. +func (*ClientTelemetryReq) Descriptor() ([]byte, []int) { + return file_mgmt_svc_proto_rawDescGZIP(), []int{14} +} + +func (x *ClientTelemetryReq) GetSys() string { + if x != nil { + return x.Sys + } + return "" +} + +func (x *ClientTelemetryReq) GetJobid() string { + if x != nil { + return x.Jobid + } + return "" +} + +func (x *ClientTelemetryReq) GetShmKey() int32 { + if x != nil { + return x.ShmKey + } + return 0 +} + +type ClientTelemetryResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS status code + AgentUid int32 `protobuf:"varint,2,opt,name=agent_uid,json=agentUid,proto3" json:"agent_uid,omitempty"` // UID of agent process +} + +func (x *ClientTelemetryResp) Reset() { + *x = ClientTelemetryResp{} + if protoimpl.UnsafeEnabled { + mi := &file_mgmt_svc_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ClientTelemetryResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ClientTelemetryResp) ProtoMessage() {} + +func (x *ClientTelemetryResp) ProtoReflect() protoreflect.Message { + mi := &file_mgmt_svc_proto_msgTypes[15] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ClientTelemetryResp.ProtoReflect.Descriptor instead. +func (*ClientTelemetryResp) Descriptor() ([]byte, []int) { + return file_mgmt_svc_proto_rawDescGZIP(), []int{15} +} + +func (x *ClientTelemetryResp) GetStatus() int32 { + if x != nil { + return x.Status + } + return 0 +} + +func (x *ClientTelemetryResp) GetAgentUid() int32 { + if x != nil { + return x.AgentUid + } + return 0 +} + type GroupUpdateReq_Engine struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1003,7 +1121,7 @@ type GroupUpdateReq_Engine struct { func (x *GroupUpdateReq_Engine) Reset() { *x = GroupUpdateReq_Engine{} if protoimpl.UnsafeEnabled { - mi := &file_mgmt_svc_proto_msgTypes[14] + mi := &file_mgmt_svc_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1016,7 +1134,7 @@ func (x *GroupUpdateReq_Engine) String() string { func (*GroupUpdateReq_Engine) ProtoMessage() {} func (x *GroupUpdateReq_Engine) ProtoReflect() protoreflect.Message { - mi := &file_mgmt_svc_proto_msgTypes[14] + mi := &file_mgmt_svc_proto_msgTypes[16] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1065,7 +1183,7 @@ type GetAttachInfoResp_RankUri struct { func (x *GetAttachInfoResp_RankUri) Reset() { *x = GetAttachInfoResp_RankUri{} if protoimpl.UnsafeEnabled { - mi := &file_mgmt_svc_proto_msgTypes[15] + mi := &file_mgmt_svc_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1078,7 +1196,7 @@ func (x *GetAttachInfoResp_RankUri) String() string { func (*GetAttachInfoResp_RankUri) ProtoMessage() {} func (x *GetAttachInfoResp_RankUri) ProtoReflect() protoreflect.Message { - mi := &file_mgmt_svc_proto_msgTypes[15] + mi := &file_mgmt_svc_proto_msgTypes[17] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1221,11 +1339,21 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, - 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, - 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x69, 0x64, 0x22, 0x55, 0x0a, 0x12, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x54, 0x65, 0x6c, 0x65, + 0x6d, 0x65, 0x74, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, + 0x62, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x12, 0x17, 0x0a, 0x07, 0x73, 0x68, 0x6d, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x68, 0x6d, 0x4b, 0x65, 0x79, 0x22, 0x4a, 0x0a, 0x13, 0x43, 0x6c, 0x69, + 0x65, 0x6e, 0x74, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x67, 0x65, 0x6e, + 0x74, 0x5f, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x61, 0x67, 0x65, + 0x6e, 0x74, 0x55, 0x69, 0x64, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, + 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1241,7 +1369,7 @@ func file_mgmt_svc_proto_rawDescGZIP() []byte { } var file_mgmt_svc_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_mgmt_svc_proto_msgTypes = make([]protoimpl.MessageInfo, 16) +var file_mgmt_svc_proto_msgTypes = make([]protoimpl.MessageInfo, 18) var file_mgmt_svc_proto_goTypes = []interface{}{ (JoinResp_State)(0), // 0: mgmt.JoinResp.State (*DaosResp)(nil), // 1: mgmt.DaosResp @@ -1258,13 +1386,15 @@ var file_mgmt_svc_proto_goTypes = []interface{}{ (*PingRankReq)(nil), // 12: mgmt.PingRankReq (*SetRankReq)(nil), // 13: mgmt.SetRankReq (*PoolMonitorReq)(nil), // 14: mgmt.PoolMonitorReq - (*GroupUpdateReq_Engine)(nil), // 15: mgmt.GroupUpdateReq.Engine - (*GetAttachInfoResp_RankUri)(nil), // 16: mgmt.GetAttachInfoResp.RankUri + (*ClientTelemetryReq)(nil), // 15: mgmt.ClientTelemetryReq + (*ClientTelemetryResp)(nil), // 16: mgmt.ClientTelemetryResp + (*GroupUpdateReq_Engine)(nil), // 17: mgmt.GroupUpdateReq.Engine + (*GetAttachInfoResp_RankUri)(nil), // 18: mgmt.GetAttachInfoResp.RankUri } var file_mgmt_svc_proto_depIdxs = []int32{ - 15, // 0: mgmt.GroupUpdateReq.engines:type_name -> mgmt.GroupUpdateReq.Engine + 17, // 0: mgmt.GroupUpdateReq.engines:type_name -> mgmt.GroupUpdateReq.Engine 0, // 1: mgmt.JoinResp.state:type_name -> mgmt.JoinResp.State - 16, // 2: mgmt.GetAttachInfoResp.rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri + 18, // 2: mgmt.GetAttachInfoResp.rank_uris:type_name -> mgmt.GetAttachInfoResp.RankUri 9, // 3: mgmt.GetAttachInfoResp.client_net_hint:type_name -> mgmt.ClientNetHint 4, // [4:4] is the sub-list for method output_type 4, // [4:4] is the sub-list for method input_type @@ -1448,7 +1578,7 @@ func file_mgmt_svc_proto_init() { } } file_mgmt_svc_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GroupUpdateReq_Engine); i { + switch v := v.(*ClientTelemetryReq); i { case 0: return &v.state case 1: @@ -1460,6 +1590,30 @@ func file_mgmt_svc_proto_init() { } } file_mgmt_svc_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ClientTelemetryResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_svc_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GroupUpdateReq_Engine); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_mgmt_svc_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*GetAttachInfoResp_RankUri); i { case 0: return &v.state @@ -1478,7 +1632,7 @@ func file_mgmt_svc_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_mgmt_svc_proto_rawDesc, NumEnums: 1, - NumMessages: 16, + NumMessages: 18, NumExtensions: 0, NumServices: 0, }, diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index cd88b5acf25..4d27fb78b2a 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2022 Intel Corporation. +// (C) Copyright 2018-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -26,6 +26,8 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "golang.org/x/sys/unix" "google.golang.org/protobuf/testing/protocmp" + + "github.com/daos-stack/daos/src/control/logging" ) // AssertTrue asserts b is true @@ -408,3 +410,14 @@ func Context(t *testing.T) context.Context { t.Cleanup(cancel) return ctx } + +// MustLogContext returns a context containing the supplied logger. +// Canceled when the test is done. +func MustLogContext(t *testing.T, log logging.Logger) context.Context { + t.Helper() + ctx, err := logging.ToContext(Context(t), log) + if err != nil { + t.Fatal(err) + } + return ctx +} diff --git a/src/control/drpc/modules.go b/src/control/drpc/modules.go index 1a51bc2f67c..0aacbae1d4a 100644 --- a/src/control/drpc/modules.go +++ b/src/control/drpc/modules.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -157,6 +157,7 @@ func (m MgmtMethod) String() string { MethodPoolGetProp: "PoolGetProp", MethodPoolUpgrade: "PoolUpgrade", MethodLedManage: "LedManage", + MethodSetupClientTelemetry: "SetupClientTelemetry", }[m]; ok { return s } @@ -244,6 +245,8 @@ const ( MethodPoolUpgrade MgmtMethod = C.DRPC_METHOD_MGMT_POOL_UPGRADE // MethodLedManage defines a method to manage a VMD device LED state MethodLedManage MgmtMethod = C.DRPC_METHOD_MGMT_LED_MANAGE + // MethodSetupClientTelemetry defines a method to setup client telemetry + MethodSetupClientTelemetry MgmtMethod = C.DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM ) type srvMethod int32 diff --git a/src/control/lib/daos/logging.go b/src/control/lib/daos/logging.go new file mode 100644 index 00000000000..9891adba0be --- /dev/null +++ b/src/control/lib/daos/logging.go @@ -0,0 +1,47 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package daos + +import ( + "os" + "strings" + + "github.com/pkg/errors" +) + +/* +#cgo LDFLAGS: -lgurt + +#include +*/ +import "C" + +const ( + // DefaultDebugMask defines the basic debug mask. + DefaultDebugMask = "DEBUG,MEM=ERR,OBJECT=ERR,PLACEMENT=ERR" + // DefaultInfoMask defines the basic info mask. + DefaultInfoMask = "INFO" + // DefaultErrorMask defines the basic error mask. + DefaultErrorMask = "ERROR" +) + +// InitLogging initializes the DAOS logging system. +func InitLogging(masks ...string) (func(), error) { + mask := strings.Join(masks, ",") + if mask == "" { + mask = DefaultInfoMask + } + os.Setenv("D_LOG_MASK", mask) + + if rc := C.daos_debug_init(nil); rc != 0 { + return func() {}, errors.Wrap(Status(rc), "daos_debug_init() failed") + } + + return func() { + C.daos_debug_fini() + }, nil +} diff --git a/src/control/lib/telemetry/promexp/client.go b/src/control/lib/telemetry/promexp/client.go new file mode 100644 index 00000000000..e6eefeaf396 --- /dev/null +++ b/src/control/lib/telemetry/promexp/client.go @@ -0,0 +1,176 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package promexp + +import ( + "context" + "regexp" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/lib/atm" + "github.com/daos-stack/daos/src/control/lib/telemetry" + "github.com/daos-stack/daos/src/control/logging" +) + +const ( + // defaultCleanupInterval is the default interval for pruning unused + // shared memory segments. + defaultCleanupInterval = 1 * time.Minute +) + +type ( + // ClientCollector is a metrics collector for DAOS client metrics. + ClientCollector struct { + metricsCollector + } + + // ClientSource is a metrics source for DAOS client metrics. + ClientSource struct { + MetricSource + cleanup func() + } +) + +func extractClientLabels(log logging.Logger, in string) (labels labelMap, name string) { + log.Tracef("in: %q", in) + + labels = make(labelMap) + compsIdx := 0 + comps := strings.Split(in, string(telemetry.PathSep)) + if len(comps) == 0 { + return labels, "" + } + + if strings.HasPrefix(comps[compsIdx], "ID") { + if len(comps) == 1 { + return labels, "" + } + compsIdx++ + } + + for i, label := range []string{"job", "pid", "tid"} { + if i > 0 { + // After jobid, we should have a pid and/or tid, and + // then move on to the engine labels. + _, err := strconv.Atoi(comps[compsIdx]) + if err != nil { + break + } + } + + if len(comps) == compsIdx+1 { + // If we have a weird path ending on a pid or tid, treat it + // as empty of labels. + if _, err := strconv.Atoi(comps[compsIdx]); err == nil && i > 0 { + return labelMap{}, "" + } + return labels, comps[compsIdx] + } + labels[label] = comps[compsIdx] + compsIdx++ + } + + var engLabels labelMap + engLabels, name = extractLabels(log, strings.Join(comps[compsIdx:], string(telemetry.PathSep))) + for k, v := range engLabels { + labels[k] = v + } + + return +} + +func newClientMetric(log logging.Logger, m telemetry.Metric) *sourceMetric { + labels, name := extractClientLabels(log, m.FullPath()) + baseName := "client_" + name + + return newSourceMetric(log, m, baseName, labels) +} + +// NewClientSource creates a new ClientSource for client metrics. +func NewClientSource(parent context.Context) (*ClientSource, error) { + ctx, err := telemetry.InitClientRoot(parent) + if err != nil { + return nil, errors.Wrap(err, "failed to init telemetry") + } + + go func(outer, inner context.Context) { + <-outer.Done() + telemetry.Detach(inner) + }(parent, ctx) + + return &ClientSource{ + MetricSource: MetricSource{ + ctx: ctx, + enabled: atm.NewBool(true), + tmSchema: telemetry.NewSchema(), + smSchema: newSourceMetricSchema(newClientMetric), + }, + }, nil +} + +// NewClientCollector creates a new ClientCollector instance. +func NewClientCollector(ctx context.Context, log logging.Logger, source *ClientSource, opts *CollectorOpts) (*ClientCollector, error) { + if opts == nil { + opts = defaultCollectorOpts() + } + + if opts.RetainDuration == 0 { + // Clients will clean up after themselves, but we still need to + // periodically remove the top-level jobid segments. + opts.RetainDuration = defaultCleanupInterval + } + + log.Debugf("pruning unused client metric segments every %s", opts.RetainDuration) + go func() { + pruneTicker := time.NewTicker(opts.RetainDuration) + defer pruneTicker.Stop() + + for { + select { + case <-ctx.Done(): + case <-pruneTicker.C: + source.PruneSegments(log, opts.RetainDuration) + } + } + }() + + c := &ClientCollector{ + metricsCollector: metricsCollector{ + log: log, + summary: prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: "client", + Subsystem: "exporter", + Name: "scrape_duration_seconds", + Help: "daos_client_exporter: Duration of a scrape job.", + }, + []string{"source", "result"}, + ), + collectFn: func(ch chan *sourceMetric) { + source.Collect(log, ch) + }, + }, + } + + for _, pat := range opts.Ignores { + re, err := regexp.Compile(pat) + if err != nil { + return nil, errors.Wrapf(err, "failed to compile %q", pat) + } + c.ignoredMetrics = append(c.ignoredMetrics, re) + } + + return c, nil +} diff --git a/src/control/lib/telemetry/promexp/client_test.go b/src/control/lib/telemetry/promexp/client_test.go new file mode 100644 index 00000000000..d0274f157b5 --- /dev/null +++ b/src/control/lib/telemetry/promexp/client_test.go @@ -0,0 +1,163 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package promexp + +import ( + "fmt" + "regexp" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/logging" +) + +func TestPromExp_extractClientLabels(t *testing.T) { + shmID := 256 + jobID := "testJob" + pid := "12345" + tid := "67890" + + testPath := func(suffix string) string { + return fmt.Sprintf("ID: %d/%s/%s/%s/%s", shmID, jobID, pid, tid, suffix) + } + + for name, tc := range map[string]struct { + input string + expName string + expLabels labelMap + }{ + "empty": { + expLabels: labelMap{}, + }, + "ID stripped": { + input: "ID: 123", + expLabels: labelMap{}, + }, + "weird truncation": { + input: "ID: 123/jobbo/6783/90", + expLabels: labelMap{}, + }, + "active update ops": { + input: testPath("io/ops/update/active"), + expName: "io_ops_update_active", + expLabels: labelMap{ + "job": jobID, + "pid": pid, + "tid": tid, + }, + }, + "fetch latency 1MB": { + input: testPath("io/latency/fetch/1MB"), + expName: "io_latency_fetch", + expLabels: labelMap{ + "job": jobID, + "pid": pid, + "tid": tid, + "size": "1MB", + }, + }, + "started_at": { + input: fmt.Sprintf("ID: %d/%s/%s/started_at", shmID, jobID, pid), + expName: "started_at", + expLabels: labelMap{ + "job": jobID, + "pid": pid, + }, + }, + "pool ops": { + input: fmt.Sprintf("ID: %d/%s/%s/pool/%s/ops/foo", shmID, jobID, pid, test.MockPoolUUID(1)), + expName: "pool_ops_foo", + expLabels: labelMap{ + "job": jobID, + "pid": pid, + "pool": test.MockPoolUUID(1).String(), + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + labels, name := extractClientLabels(log, tc.input) + + test.AssertEqual(t, name, tc.expName, "") + if diff := cmp.Diff(labels, tc.expLabels); diff != "" { + t.Errorf("labels mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestPromExp_NewClientCollector(t *testing.T) { + for name, tc := range map[string]struct { + opts *CollectorOpts + expErr error + expResult *ClientCollector + }{ + "defaults": { + expResult: &ClientCollector{ + metricsCollector: metricsCollector{ + summary: &prometheus.SummaryVec{ + MetricVec: &prometheus.MetricVec{}, + }, + }, + }, + }, + "opts with ignores": { + opts: &CollectorOpts{Ignores: []string{"one", "two"}}, + expResult: &ClientCollector{ + metricsCollector: metricsCollector{ + summary: &prometheus.SummaryVec{ + MetricVec: &prometheus.MetricVec{}, + }, + ignoredMetrics: []*regexp.Regexp{ + regexp.MustCompile("one"), + regexp.MustCompile("two"), + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ctx := test.MustLogContext(t, log) + cs, err := NewClientSource(ctx) + if err != nil { + t.Fatal(err) + } + result, err := NewClientCollector(ctx, log, cs, tc.opts) + + test.CmpErr(t, tc.expErr, err) + + cmpOpts := []cmp.Option{ + cmpopts.IgnoreUnexported(MetricSource{}), + cmpopts.IgnoreUnexported(prometheus.SummaryVec{}), + cmpopts.IgnoreUnexported(prometheus.MetricVec{}), + cmpopts.IgnoreUnexported(regexp.Regexp{}), + cmp.AllowUnexported(ClientCollector{}), + cmp.AllowUnexported(metricsCollector{}), + cmp.FilterPath(func(p cmp.Path) bool { + // Ignore a few specific fields + return (strings.HasSuffix(p.String(), "log") || + strings.HasSuffix(p.String(), "sourceMutex") || + strings.HasSuffix(p.String(), "cleanupSource") || + strings.HasSuffix(p.String(), "collectFn")) + }, cmp.Ignore()), + } + if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" { + t.Fatalf("(-want, +got)\n%s", diff) + } + }) + } +} diff --git a/src/control/lib/telemetry/promexp/collector.go b/src/control/lib/telemetry/promexp/collector.go index 03e6fa40dd5..ec70c0e8fbd 100644 --- a/src/control/lib/telemetry/promexp/collector.go +++ b/src/control/lib/telemetry/promexp/collector.go @@ -7,445 +7,34 @@ // +build linux // +build amd64 arm64 -// - package promexp import ( - "context" - "fmt" "regexp" - "strings" - "sync" - "unicode" + "time" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/daos-stack/daos/src/control/lib/atm" "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" ) type ( - Collector struct { - log logging.Logger - summary *prometheus.SummaryVec - ignoredMetrics []*regexp.Regexp - sources []*EngineSource - cleanupSource map[uint32]func() - sourceMutex sync.RWMutex // To protect sources - } - + // CollectorOpts contains options for the metrics collector. CollectorOpts struct { - Ignores []string - } - - EngineSource struct { - ctx context.Context - tmMutex sync.RWMutex // To protect telemetry collection - Index uint32 - Rank uint32 - enabled atm.Bool - tmSchema *telemetry.Schema - rmSchema rankMetricSchema + Ignores []string + RetainDuration time.Duration } - rankMetricSchema struct { - mu sync.Mutex - rankMetrics map[string]*rankMetric - seen map[string]struct{} + metricsCollector struct { + log logging.Logger + summary *prometheus.SummaryVec + ignoredMetrics []*regexp.Regexp + collectFn func(ch chan *sourceMetric) } ) -func (s *rankMetricSchema) Prune() { - s.mu.Lock() - defer s.mu.Unlock() - - for id := range s.rankMetrics { - if _, found := s.seen[id]; !found { - delete(s.rankMetrics, id) - } - } - s.seen = make(map[string]struct{}) -} - -func (s *rankMetricSchema) add(log logging.Logger, rank uint32, metric telemetry.Metric) (rm *rankMetric) { - s.mu.Lock() - defer s.mu.Unlock() - - id := metric.FullPath() - s.seen[id] = struct{}{} - - var found bool - if rm, found = s.rankMetrics[id]; !found { - rm = newRankMetric(log, rank, metric) - s.rankMetrics[id] = rm - } else { - rm.resetVecs() - } - - return -} - -func NewEngineSource(parent context.Context, idx uint32, rank uint32) (*EngineSource, func(), error) { - ctx, err := telemetry.Init(parent, idx) - if err != nil { - return nil, nil, errors.Wrap(err, "failed to init telemetry") - } - - cleanupFn := func() { - telemetry.Detach(ctx) - } - - return &EngineSource{ - ctx: ctx, - Index: idx, - Rank: rank, - enabled: atm.NewBool(true), - tmSchema: telemetry.NewSchema(), - rmSchema: rankMetricSchema{ - rankMetrics: make(map[string]*rankMetric), - seen: make(map[string]struct{}), - }, - }, cleanupFn, nil -} - -func defaultCollectorOpts() *CollectorOpts { - return &CollectorOpts{} -} - -func NewCollector(log logging.Logger, opts *CollectorOpts, sources ...*EngineSource) (*Collector, error) { - if opts == nil { - opts = defaultCollectorOpts() - } - - c := &Collector{ - log: log, - sources: sources, - cleanupSource: make(map[uint32]func()), - summary: prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: "engine", - Subsystem: "exporter", - Name: "scrape_duration_seconds", - Help: "daos_exporter: Duration of a scrape job.", - }, - []string{"source", "result"}, - ), - } - - for _, pat := range opts.Ignores { - re, err := regexp.Compile(pat) - if err != nil { - return nil, errors.Wrapf(err, "failed to compile %q", pat) - } - c.ignoredMetrics = append(c.ignoredMetrics, re) - } - - return c, nil -} - -type labelMap map[string]string - -func (lm labelMap) keys() (keys []string) { - for label := range lm { - keys = append(keys, label) - } - - return -} - -func sanitizeMetricName(in string) string { - return strings.Map(func(r rune) rune { - switch { - // Valid names for Prometheus are limited to: - case r >= 'a' && r <= 'z': // lowercase letters - case r >= 'A' && r <= 'Z': // uppercase letters - case unicode.IsDigit(r): // digits - default: // sanitize any other character - return '_' - } - - return r - }, strings.TrimLeft(in, "/")) -} - -func matchLabel(labels labelMap, input, match, label string) bool { - if !strings.HasPrefix(input, match) { - return false - } - - splitStr := strings.SplitN(input, "_", 2) - if len(splitStr) == 2 { - labels[label] = splitStr[1] - return true - } - return false -} - -func appendName(cur, name string) string { - if cur == "" { - return name - } - return cur + "_" + name -} - -// extractLabels takes a "/"-separated DAOS metric name in order to -// create a normalized Prometheus name and label map. -// -// NB: Prometheus metric names should follow best practices as -// outlined at https://prometheus.io/docs/practices/naming/ -// -// In particular, a metric name should describe the measurement, -// not the entity the measurement is about. In other words, if 4 -// different entities share the same measurement, then there should -// be a single metric with a label that distinguishes between -// individual measurement values. -// -// Good: pool_started_at {pool="00000000-1111-2222-3333-4444444444"} -// Bad: pool_00000000_1111_2222_3333_4444444444_started_at -func extractLabels(in string) (labels labelMap, name string) { - labels = make(labelMap) - compsIdx := 0 - comps := strings.Split(in, string(telemetry.PathSep)) - if len(comps) == 0 { - return labels, in - } - - if strings.HasPrefix(comps[compsIdx], "ID") { - if len(comps) == 1 { - return labels, "" - } - compsIdx++ - } - - switch comps[compsIdx] { - case "pool": - name = "pool" - compsIdx++ - labels["pool"] = comps[compsIdx] - compsIdx++ - switch comps[compsIdx] { - case "ops": - compsIdx++ - name += "_ops_" + comps[compsIdx] - compsIdx++ - } - case "io": - name = "io" - compsIdx++ - switch comps[compsIdx] { - case "latency": - compsIdx++ - name += "_latency_" + comps[compsIdx] - compsIdx++ - labels["size"] = comps[compsIdx] - compsIdx++ - case "ops": - compsIdx++ - name += "_ops_" + comps[compsIdx] - compsIdx++ - default: - name += "_" + comps[compsIdx] - compsIdx++ - } - case "net": - compsIdx++ - if comps[compsIdx] == "uri" { - compsIdx++ - name = "net_uri_" + comps[compsIdx] - compsIdx++ - break - } - - name = "net" - labels["provider"] = comps[compsIdx] - compsIdx++ - case "nvme": - name = "nvme" - compsIdx++ - labels["device"] = comps[compsIdx] - compsIdx++ - } - - for { - if len(comps) == compsIdx { - break - } - - switch { - case matchLabel(labels, comps[compsIdx], "tgt_", "target"): - compsIdx++ - case matchLabel(labels, comps[compsIdx], "xs_", "xstream"): - compsIdx++ - case matchLabel(labels, comps[compsIdx], "ctx_", "context"): - compsIdx++ - default: - name = appendName(name, comps[compsIdx]) - compsIdx++ - } - } - - name = sanitizeMetricName(name) - return -} - -func (es *EngineSource) Collect(log logging.Logger, ch chan<- *rankMetric) { - if es == nil { - log.Error("nil engine source") - return - } - if !es.IsEnabled() { - return - } - if ch == nil { - log.Error("nil channel") - return - } - - es.tmMutex.RLock() - defer es.tmMutex.RUnlock() - - metrics := make(chan telemetry.Metric) - go func() { - if err := telemetry.CollectMetrics(es.ctx, es.tmSchema, metrics); err != nil { - log.Errorf("failed to collect metrics for engine rank %d: %s", es.Rank, err) - return - } - es.tmSchema.Prune() - }() - - for metric := range metrics { - ch <- es.rmSchema.add(log, es.Rank, metric) - } - es.rmSchema.Prune() -} - -// IsEnabled checks if the engine source is enabled. -func (es *EngineSource) IsEnabled() bool { - return es.enabled.IsTrue() -} - -// Enable enables the engine source. -func (es *EngineSource) Enable() { - es.enabled.SetTrue() -} - -// Disable disables the engine source. -func (es *EngineSource) Disable() { - es.enabled.SetFalse() -} - -type gvMap map[string]*prometheus.GaugeVec - -func (m gvMap) add(name, help string, labels labelMap) { - if _, found := m[name]; !found { - gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: name, - Help: help, - }, labels.keys()) - m[name] = gv - } -} - -func (m gvMap) set(name string, value float64, labels labelMap) error { - gv, found := m[name] - if !found { - return errors.Errorf("gauge vector %s not found", name) - } - gv.With(prometheus.Labels(labels)).Set(value) - - return nil -} - -type cvMap map[string]*prometheus.CounterVec - -func (m cvMap) add(name, help string, labels labelMap) { - if _, found := m[name]; !found { - cv := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: name, - Help: help, - }, labels.keys()) - m[name] = cv - } -} - -func (m cvMap) set(name string, value float64, labels labelMap) error { - cv, found := m[name] - if !found { - return errors.Errorf("counter vector %s not found", name) - } - cv.With(prometheus.Labels(labels)).Add(value) - - return nil -} - -type rankMetric struct { - rank uint32 - metric telemetry.Metric - baseName string - labels labelMap - gvm gvMap - cvm cvMap -} - -func (rm *rankMetric) collect(ch chan<- prometheus.Metric) { - for _, gv := range rm.gvm { - gv.Collect(ch) - } - for _, cv := range rm.cvm { - cv.Collect(ch) - } -} - -func (rm *rankMetric) resetVecs() { - for _, gv := range rm.gvm { - gv.Reset() - } - for _, cv := range rm.cvm { - cv.Reset() - } -} - -func newRankMetric(log logging.Logger, rank uint32, m telemetry.Metric) *rankMetric { - rm := &rankMetric{ - metric: m, - rank: rank, - gvm: make(gvMap), - cvm: make(cvMap), - } - - var name string - rm.labels, name = extractLabels(m.FullPath()) - rm.labels["rank"] = fmt.Sprintf("%d", rm.rank) - rm.baseName = "engine_" + name - - desc := m.Desc() - - switch rm.metric.Type() { - case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp, - telemetry.MetricTypeSnapshot: - rm.gvm.add(rm.baseName, desc, rm.labels) - case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration: - rm.gvm.add(rm.baseName, desc, rm.labels) - for _, ms := range getMetricStats(rm.baseName, rm.metric) { - if ms.isCounter { - rm.cvm.add(ms.name, ms.desc, rm.labels) - } else { - rm.gvm.add(ms.name, ms.desc, rm.labels) - } - } - case telemetry.MetricTypeCounter: - rm.cvm.add(rm.baseName, desc, rm.labels) - default: - log.Errorf("[%s]: metric type %d not supported", name, rm.metric.Type()) - } - - return rm -} - -func (c *Collector) isIgnored(name string) bool { +func (c *metricsCollector) isIgnored(name string) bool { for _, re := range c.ignoredMetrics { // TODO: We may want to look into removing the use of regexp here // in favor of a less-flexible but more efficient approach. @@ -458,121 +47,7 @@ func (c *Collector) isIgnored(name string) bool { return false } -type metricStat struct { - name string - desc string - value float64 - isCounter bool -} - -func getMetricStats(baseName string, m telemetry.Metric) (stats []*metricStat) { - ms, ok := m.(telemetry.StatsMetric) - if !ok { - return - } - - for name, s := range map[string]struct { - fn func() float64 - desc string - isCounter bool - }{ - "min": { - fn: func() float64 { return float64(ms.Min()) }, - desc: " (min value)", - }, - "max": { - fn: func() float64 { return float64(ms.Max()) }, - desc: " (max value)", - }, - "mean": { - fn: ms.Mean, - desc: " (mean)", - }, - "sum": { - fn: func() float64 { return float64(ms.Sum()) }, - desc: " (sum)", - }, - "stddev": { - fn: ms.StdDev, - desc: " (std dev)", - }, - "sumsquares": { - fn: ms.SumSquares, - desc: " (sum of squares)", - }, - "samples": { - fn: func() float64 { return float64(ms.SampleSize()) }, - desc: " (samples)", - isCounter: true, - }, - } { - stats = append(stats, &metricStat{ - name: baseName + "_" + name, - desc: m.Desc() + s.desc, - value: s.fn(), - isCounter: s.isCounter, - }) - } - - return -} - -// AddSource adds an EngineSource to the Collector. -func (c *Collector) AddSource(es *EngineSource, cleanup func()) { - if es == nil { - c.log.Error("attempted to add nil EngineSource") - return - } - - c.sourceMutex.Lock() - defer c.sourceMutex.Unlock() - - // If we attempt to add a duplicate, remove the old one. - c.removeSourceNoLock(es.Index) - - c.sources = append(c.sources, es) - if cleanup != nil { - c.cleanupSource[es.Index] = cleanup - } -} - -// RemoveSource removes an EngineSource with a given index from the Collector. -func (c *Collector) RemoveSource(engineIdx uint32) { - c.sourceMutex.Lock() - defer c.sourceMutex.Unlock() - - c.removeSourceNoLock(engineIdx) -} - -func (c *Collector) removeSourceNoLock(engineIdx uint32) { - for i, es := range c.sources { - if es.Index == engineIdx { - es.Disable() - c.sources = append(c.sources[:i], c.sources[i+1:]...) - - // Ensure that EngineSource isn't collecting during cleanup - es.tmMutex.Lock() - if cleanup, found := c.cleanupSource[engineIdx]; found && cleanup != nil { - cleanup() - } - es.tmMutex.Unlock() - delete(c.cleanupSource, engineIdx) - break - } - } -} - -func (c *Collector) getSources() []*EngineSource { - c.sourceMutex.RLock() - defer c.sourceMutex.RUnlock() - - sourceCopy := make([]*EngineSource, len(c.sources)) - _ = copy(sourceCopy, c.sources) - return sourceCopy -} - -// Collect collects metrics from all EngineSources. -func (c *Collector) Collect(ch chan<- prometheus.Metric) { +func (c *metricsCollector) Collect(ch chan<- prometheus.Metric) { if c == nil { return } @@ -580,55 +55,57 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { c.log.Error("passed a nil channel") return } + if c.collectFn == nil { + c.log.Error("collectFn is nil") + return + } - rankMetrics := make(chan *rankMetric) - go func(sources []*EngineSource) { - for _, source := range sources { - source.Collect(c.log, rankMetrics) - } - close(rankMetrics) - }(c.getSources()) + sourceMetrics := make(chan *sourceMetric) + go func() { + c.collectFn(sourceMetrics) + close(sourceMetrics) + }() - for rm := range rankMetrics { - if c.isIgnored(rm.baseName) { + for sm := range sourceMetrics { + if c.isIgnored(sm.baseName) { continue } var err error - switch rm.metric.Type() { + switch sm.metric.Type() { case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp, telemetry.MetricTypeSnapshot: - err = rm.gvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels) + err = sm.gvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels) case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration: - if err = rm.gvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels); err != nil { + if err = sm.gvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels); err != nil { break } - for _, ms := range getMetricStats(rm.baseName, rm.metric) { + for _, ms := range getMetricStats(sm.baseName, sm.metric) { if ms.isCounter { - if err = rm.cvm.set(ms.name, ms.value, rm.labels); err != nil { + if err = sm.cvm.set(ms.name, ms.value, sm.labels); err != nil { break } } else { - if err = rm.gvm.set(ms.name, ms.value, rm.labels); err != nil { + if err = sm.gvm.set(ms.name, ms.value, sm.labels); err != nil { break } } } case telemetry.MetricTypeCounter: - err = rm.cvm.set(rm.baseName, rm.metric.FloatValue(), rm.labels) + err = sm.cvm.set(sm.baseName, sm.metric.FloatValue(), sm.labels) default: - c.log.Errorf("[%s]: metric type %d not supported", rm.baseName, rm.metric.Type()) + c.log.Errorf("[%s]: metric type %d not supported", sm.baseName, sm.metric.Type()) } if err != nil { - c.log.Errorf("[%s]: %s", rm.baseName, err) + c.log.Errorf("[%s]: %s", sm.baseName, err) continue } - rm.collect(ch) + sm.collect(ch) } } -func (c *Collector) Describe(ch chan<- *prometheus.Desc) { +func (c *metricsCollector) Describe(ch chan<- *prometheus.Desc) { c.summary.Describe(ch) } diff --git a/src/control/lib/telemetry/promexp/engine.go b/src/control/lib/telemetry/promexp/engine.go new file mode 100644 index 00000000000..bb0481f12a9 --- /dev/null +++ b/src/control/lib/telemetry/promexp/engine.go @@ -0,0 +1,271 @@ +// +// (C) Copyright 2021-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package promexp + +import ( + "context" + "fmt" + "regexp" + "strings" + "sync" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/lib/atm" + "github.com/daos-stack/daos/src/control/lib/telemetry" + "github.com/daos-stack/daos/src/control/logging" +) + +type ( + // EngineCollector collects metrics from DAOS Engine sources. + EngineCollector struct { + metricsCollector + sources []*EngineSource + cleanupSource map[uint32]func() + sourceMutex sync.RWMutex // To protect sources + } + + // EngineSource provides metrics for a single DAOS Engine. + EngineSource struct { + MetricSource + Index uint32 + Rank uint32 + } +) + +// NewEngineSource initializes a new metrics source for a DAOS Engine. +func NewEngineSource(parent context.Context, idx uint32, rank uint32) (*EngineSource, func(), error) { + ctx, err := telemetry.Init(parent, idx) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to init telemetry") + } + + cleanupFn := func() { + telemetry.Detach(ctx) + } + + return &EngineSource{ + MetricSource: MetricSource{ + ctx: ctx, + enabled: atm.NewBool(true), + tmSchema: telemetry.NewSchema(), + smSchema: newSourceMetricSchema(func(l logging.Logger, m telemetry.Metric) *sourceMetric { + return newRankMetric(l, rank, m) + }), + }, + Index: idx, + Rank: rank, + }, cleanupFn, nil +} + +// NewEngineCollector initializes a new collector for DAOS Engine sources. +func NewEngineCollector(log logging.Logger, opts *CollectorOpts, sources ...*EngineSource) (*EngineCollector, error) { + if opts == nil { + opts = defaultCollectorOpts() + } + + c := &EngineCollector{ + metricsCollector: metricsCollector{ + log: log, + summary: prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: "engine", + Subsystem: "exporter", + Name: "scrape_duration_seconds", + Help: "daos_exporter: Duration of a scrape job.", + }, + []string{"source", "result"}, + ), + }, + sources: sources, + cleanupSource: make(map[uint32]func()), + } + + c.collectFn = func(metrics chan *sourceMetric) { + for _, source := range c.getSources() { + source.Collect(c.log, metrics) + } + } + + for _, pat := range opts.Ignores { + re, err := regexp.Compile(pat) + if err != nil { + return nil, errors.Wrapf(err, "failed to compile %q", pat) + } + c.ignoredMetrics = append(c.ignoredMetrics, re) + } + + return c, nil +} + +// extractLabels takes a "/"-separated DAOS metric name in order to +// create a normalized Prometheus name and label map. +// +// NB: Prometheus metric names should follow best practices as +// outlined at https://prometheus.io/docs/practices/naming/ +// +// In particular, a metric name should describe the measurement, +// not the entity the measurement is about. In other words, if 4 +// different entities share the same measurement, then there should +// be a single metric with a label that distinguishes between +// individual measurement values. +// +// Good: pool_started_at {pool="00000000-1111-2222-3333-4444444444"} +// Bad: pool_00000000_1111_2222_3333_4444444444_started_at +func extractLabels(log logging.Logger, in string) (labels labelMap, name string) { + log.Tracef("in: %q", in) + + labels = make(labelMap) + compsIdx := 0 + comps := strings.Split(in, string(telemetry.PathSep)) + if len(comps) == 0 { + return labels, "" + } + + if strings.HasPrefix(comps[compsIdx], "ID") { + if len(comps) == 1 { + return labels, "" + } + compsIdx++ + } + + switch comps[compsIdx] { + case "pool": + name = "pool" + compsIdx++ + labels["pool"] = comps[compsIdx] + compsIdx++ + switch comps[compsIdx] { + case "ops": + compsIdx++ + name += "_ops_" + comps[compsIdx] + compsIdx++ + } + case "io": + name = "io" + compsIdx++ + switch comps[compsIdx] { + case "latency": + compsIdx++ + name += "_latency_" + comps[compsIdx] + compsIdx++ + labels["size"] = comps[compsIdx] + compsIdx++ + case "ops": + compsIdx++ + name += "_ops_" + comps[compsIdx] + compsIdx++ + default: + name += "_" + comps[compsIdx] + compsIdx++ + } + case "net": + compsIdx++ + if comps[compsIdx] == "uri" { + compsIdx++ + name = "net_uri_" + comps[compsIdx] + compsIdx++ + break + } + + name = "net" + labels["provider"] = comps[compsIdx] + compsIdx++ + case "nvme": + name = "nvme" + compsIdx++ + labels["device"] = comps[compsIdx] + compsIdx++ + } + + for { + if len(comps) == compsIdx { + break + } + + switch { + case matchLabel(labels, comps[compsIdx], "tgt_", "target"): + compsIdx++ + case matchLabel(labels, comps[compsIdx], "xs_", "xstream"): + compsIdx++ + case matchLabel(labels, comps[compsIdx], "ctx_", "context"): + compsIdx++ + default: + name = appendName(name, comps[compsIdx]) + compsIdx++ + } + } + + name = sanitizeMetricName(name) + return +} + +func newRankMetric(log logging.Logger, rank uint32, m telemetry.Metric) *sourceMetric { + labels, name := extractLabels(log, m.FullPath()) + baseName := "engine_" + name + labels["rank"] = fmt.Sprintf("%d", rank) + + return newSourceMetric(log, m, baseName, labels) +} + +// AddSource adds an EngineSource to the Collector. +func (c *EngineCollector) AddSource(es *EngineSource, cleanup func()) { + if es == nil { + c.log.Error("attempted to add nil EngineSource") + return + } + + c.sourceMutex.Lock() + defer c.sourceMutex.Unlock() + + // If we attempt to add a duplicate, remove the old one. + c.removeSourceNoLock(es.Index) + + c.sources = append(c.sources, es) + if cleanup != nil { + c.cleanupSource[es.Index] = cleanup + } +} + +// RemoveSource removes an EngineSource with a given index from the Collector. +func (c *EngineCollector) RemoveSource(engineIdx uint32) { + c.sourceMutex.Lock() + defer c.sourceMutex.Unlock() + + c.removeSourceNoLock(engineIdx) +} + +func (c *EngineCollector) removeSourceNoLock(engineIdx uint32) { + for i, es := range c.sources { + if es.Index == engineIdx { + es.Disable() + c.sources = append(c.sources[:i], c.sources[i+1:]...) + + // Ensure that EngineSource isn't collecting during cleanup + es.tmMutex.Lock() + if cleanup, found := c.cleanupSource[engineIdx]; found && cleanup != nil { + cleanup() + } + es.tmMutex.Unlock() + delete(c.cleanupSource, engineIdx) + break + } + } +} + +func (c *EngineCollector) getSources() []*EngineSource { + c.sourceMutex.RLock() + defer c.sourceMutex.RUnlock() + + sourceCopy := make([]*EngineSource, len(c.sources)) + _ = copy(sourceCopy, c.sources) + return sourceCopy +} diff --git a/src/control/lib/telemetry/promexp/collector_test.go b/src/control/lib/telemetry/promexp/engine_test.go similarity index 88% rename from src/control/lib/telemetry/promexp/collector_test.go rename to src/control/lib/telemetry/promexp/engine_test.go index e50605a033a..b21839b7ba0 100644 --- a/src/control/lib/telemetry/promexp/collector_test.go +++ b/src/control/lib/telemetry/promexp/engine_test.go @@ -2,11 +2,6 @@ // (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent -// -//go:build linux && (amd64 || arm64) -// +build linux -// +build amd64 arm64 - // package promexp @@ -62,7 +57,10 @@ func TestPromexp_NewEngineSource(t *testing.T) { test.CmpErr(t, tc.expErr, err) - if diff := cmp.Diff(tc.expResult, result, cmpopts.IgnoreUnexported(EngineSource{})); diff != "" { + cmpOpts := cmp.Options{ + cmpopts.IgnoreUnexported(MetricSource{}), + } + if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" { t.Fatalf("(-want, +got)\n%s", diff) } @@ -155,31 +153,20 @@ func TestPromExp_EngineSource_Collect(t *testing.T) { for name, tc := range map[string]struct { es *EngineSource - resultChan chan *rankMetric + resultChan chan *sourceMetric expMetrics telemetry.TestMetricsMap }{ - "nil source": { - resultChan: make(chan *rankMetric), - }, "nil channel": { es: validSrc, }, - "bad source": { - es: &EngineSource{ - ctx: test.Context(t), - Rank: 123, - Index: testIdx + 1, - }, - resultChan: make(chan *rankMetric), - }, "success": { es: validSrc, - resultChan: make(chan *rankMetric), + resultChan: make(chan *sourceMetric), expMetrics: realMetrics, }, "disabled": { es: disabledSrc, - resultChan: make(chan *rankMetric), + resultChan: make(chan *sourceMetric), expMetrics: telemetry.TestMetricsMap{}, }, } { @@ -189,7 +176,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) { go tc.es.Collect(log, tc.resultChan) - gotMetrics := []*rankMetric{} + gotMetrics := []*sourceMetric{} for { done := false select { @@ -206,7 +193,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) { test.AssertEqual(t, len(tc.expMetrics), len(gotMetrics), "wrong number of metrics returned") for _, got := range gotMetrics { - test.AssertEqual(t, testRank, got.rank, "wrong rank") + test.AssertEqual(t, fmt.Sprintf("%d", testRank), got.labels["rank"], "wrong rank") expM, ok := tc.expMetrics[got.metric.Type()] if !ok { t.Fatalf("metric type %d not expected", got.metric.Type()) @@ -220,7 +207,7 @@ func TestPromExp_EngineSource_Collect(t *testing.T) { } } -func TestPromExp_NewCollector(t *testing.T) { +func TestPromExp_NewEngineCollector(t *testing.T) { testSrc := []*EngineSource{ { Rank: 1, @@ -234,20 +221,24 @@ func TestPromExp_NewCollector(t *testing.T) { sources []*EngineSource opts *CollectorOpts expErr error - expResult *Collector + expResult *EngineCollector }{ "no sources": { - expResult: &Collector{ - summary: &prometheus.SummaryVec{ - MetricVec: &prometheus.MetricVec{}, + expResult: &EngineCollector{ + metricsCollector: metricsCollector{ + summary: &prometheus.SummaryVec{ + MetricVec: &prometheus.MetricVec{}, + }, }, }, }, "defaults": { sources: testSrc, - expResult: &Collector{ - summary: &prometheus.SummaryVec{ - MetricVec: &prometheus.MetricVec{}, + expResult: &EngineCollector{ + metricsCollector: metricsCollector{ + summary: &prometheus.SummaryVec{ + MetricVec: &prometheus.MetricVec{}, + }, }, sources: testSrc, }, @@ -255,15 +246,17 @@ func TestPromExp_NewCollector(t *testing.T) { "opts with ignores": { sources: testSrc, opts: &CollectorOpts{Ignores: []string{"one", "two"}}, - expResult: &Collector{ - summary: &prometheus.SummaryVec{ - MetricVec: &prometheus.MetricVec{}, + expResult: &EngineCollector{ + metricsCollector: metricsCollector{ + summary: &prometheus.SummaryVec{ + MetricVec: &prometheus.MetricVec{}, + }, + ignoredMetrics: []*regexp.Regexp{ + regexp.MustCompile("one"), + regexp.MustCompile("two"), + }, }, sources: testSrc, - ignoredMetrics: []*regexp.Regexp{ - regexp.MustCompile("one"), - regexp.MustCompile("two"), - }, }, }, "bad regexp in ignores": { @@ -276,21 +269,23 @@ func TestPromExp_NewCollector(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - result, err := NewCollector(log, tc.opts, tc.sources...) + result, err := NewEngineCollector(log, tc.opts, tc.sources...) test.CmpErr(t, tc.expErr, err) cmpOpts := []cmp.Option{ - cmpopts.IgnoreUnexported(EngineSource{}), + cmpopts.IgnoreUnexported(MetricSource{}), cmpopts.IgnoreUnexported(prometheus.SummaryVec{}), cmpopts.IgnoreUnexported(prometheus.MetricVec{}), cmpopts.IgnoreUnexported(regexp.Regexp{}), - cmp.AllowUnexported(Collector{}), + cmp.AllowUnexported(EngineCollector{}), + cmp.AllowUnexported(metricsCollector{}), cmp.FilterPath(func(p cmp.Path) bool { // Ignore a few specific fields return (strings.HasSuffix(p.String(), "log") || strings.HasSuffix(p.String(), "sourceMutex") || - strings.HasSuffix(p.String(), "cleanupSource")) + strings.HasSuffix(p.String(), "cleanupSource") || + strings.HasSuffix(p.String(), "collectFn")) }, cmp.Ignore()), } if diff := cmp.Diff(tc.expResult, result, cmpOpts...); diff != "" { @@ -338,7 +333,7 @@ func TestPromExp_Collector_Prune(t *testing.T) { } defer cleanup() - defaultCollector, err := NewCollector(log, nil, engSrc) + defaultCollector, err := NewEngineCollector(log, nil, engSrc) if err != nil { t.Fatalf("failed to create collector: %s", err.Error()) } @@ -357,12 +352,12 @@ func TestPromExp_Collector_Prune(t *testing.T) { } } - engSrc.rmSchema.mu.Lock() - for m := range engSrc.rmSchema.rankMetrics { - _, name := extractLabels(m) + engSrc.smSchema.mu.Lock() + for m := range engSrc.smSchema.sourceMetrics { + _, name := extractLabels(log, m) names = append(names, name) } - engSrc.rmSchema.mu.Unlock() + engSrc.smSchema.mu.Unlock() sort.Strings(names) return @@ -373,7 +368,7 @@ func TestPromExp_Collector_Prune(t *testing.T) { for _, m := range maps { for t, m := range m { if t != telemetry.MetricTypeDirectory && t != telemetry.MetricTypeLink { - _, name := extractLabels(m.FullPath()) + _, name := extractLabels(log, m.FullPath()) unique[name] = struct{}{} } } @@ -422,7 +417,7 @@ func TestPromExp_Collector_Collect(t *testing.T) { } defer cleanup() - defaultCollector, err := NewCollector(log, nil, engSrc) + defaultCollector, err := NewEngineCollector(log, nil, engSrc) if err != nil { t.Fatalf("failed to create collector: %s", err.Error()) } @@ -433,7 +428,7 @@ func TestPromExp_Collector_Collect(t *testing.T) { "engine_stats_gauge2", "engine_timer_duration", } - ignoreCollector, err := NewCollector(log, &CollectorOpts{ + ignoreCollector, err := NewEngineCollector(log, &CollectorOpts{ Ignores: ignores, }, engSrc) if err != nil { @@ -441,13 +436,10 @@ func TestPromExp_Collector_Collect(t *testing.T) { } for name, tc := range map[string]struct { - collector *Collector + collector *EngineCollector resultChan chan prometheus.Metric expMetricNames []string }{ - "nil collector": { - resultChan: make(chan prometheus.Metric), - }, "nil channel": { collector: defaultCollector, }, @@ -518,7 +510,7 @@ func TestPromExp_Collector_Collect(t *testing.T) { } } -func TestPromExp_extractLabels(t *testing.T) { +func TestPromExp_extractEngineLabels(t *testing.T) { for name, tc := range map[string]struct { input string expName string @@ -632,7 +624,10 @@ func TestPromExp_extractLabels(t *testing.T) { }, } { t.Run(name, func(t *testing.T) { - labels, name := extractLabels(tc.input) + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + labels, name := extractLabels(log, tc.input) test.AssertEqual(t, name, tc.expName, "") if diff := cmp.Diff(labels, tc.expLabels); diff != "" { @@ -692,7 +687,7 @@ func TestPromExp_Collector_AddSource(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - collector, err := NewCollector(log, nil, tc.startSrc...) + collector, err := NewEngineCollector(log, nil, tc.startSrc...) if err != nil { t.Fatalf("failed to set up collector: %s", err) } @@ -795,7 +790,7 @@ func TestPromExp_Collector_RemoveSource(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - collector, err := NewCollector(log, nil, tc.startSrc...) + collector, err := NewEngineCollector(log, nil, tc.startSrc...) if err != nil { t.Fatalf("failed to set up collector: %s", err) } @@ -805,7 +800,10 @@ func TestPromExp_Collector_RemoveSource(t *testing.T) { collector.RemoveSource(tc.idx) - if diff := cmp.Diff(tc.expSrc, collector.sources, cmpopts.IgnoreUnexported(EngineSource{})); diff != "" { + cmpOpts := cmp.Options{ + cmpopts.IgnoreUnexported(MetricSource{}), + } + if diff := cmp.Diff(tc.expSrc, collector.sources, cmpOpts...); diff != "" { t.Fatalf("(-want, +got)\n%s", diff) } diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go new file mode 100644 index 00000000000..2f4c86d485d --- /dev/null +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -0,0 +1,100 @@ +// +// (C) Copyright 2021-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package promexp + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/daos-stack/daos/src/control/logging" +) + +type ( + // RegMonFn defines a function signature for registering a Prometheus + // monitor. + RegMonFn func(context.Context, logging.Logger) error + + // ExporterConfig defines the configuration for the Prometheus exporter. + ExporterConfig struct { + Port int + Title string + Register RegMonFn + } +) + +const ( + // EngineTelemetryPort specifies the default port for engine telemetry. + EngineTelemetryPort = 9191 + // ClientTelemetryPort specifies the default port for client telemetry. + ClientTelemetryPort = 9192 +) + +// StartExporter starts the Prometheus exporter. +func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) (func(), error) { + if cfg == nil { + return nil, errors.New("invalid exporter config: nil config") + } + + if cfg.Port <= 0 { + return nil, errors.New("invalid exporter config: bad port") + } + + if cfg.Register == nil { + return nil, errors.New("invalid exporter config: nil register function") + } + + if err := cfg.Register(ctx, log); err != nil { + return nil, errors.Wrap(err, "failed to register client monitor") + } + + listenAddress := fmt.Sprintf("0.0.0.0:%d", cfg.Port) + + srv := http.Server{Addr: listenAddress} + http.Handle("/metrics", promhttp.HandlerFor( + prometheus.DefaultGatherer, promhttp.HandlerOpts{}, + )) + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + num, err := w.Write([]byte(fmt.Sprintf(` + %s + +

%s

+

Metrics

+ + `, cfg.Title, cfg.Title))) + if err != nil { + log.Errorf("%d: %s", num, err) + } + }) + + // http listener is a blocking call + go func() { + log.Infof("Listening on %s", listenAddress) + err := srv.ListenAndServe() + log.Infof("Prometheus web exporter stopped: %s", err.Error()) + }() + + return func() { + log.Debug("Shutting down Prometheus web exporter") + + // When this cleanup function is called, the original context + // will probably have already been canceled. + timedCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + if err := srv.Shutdown(timedCtx); err != nil { + log.Noticef("HTTP server didn't shut down within timeout: %s", err.Error()) + } + }, nil +} diff --git a/src/control/lib/telemetry/promexp/httpd_test.go b/src/control/lib/telemetry/promexp/httpd_test.go new file mode 100644 index 00000000000..db69e122b71 --- /dev/null +++ b/src/control/lib/telemetry/promexp/httpd_test.go @@ -0,0 +1,118 @@ +// +// (C) Copyright 2021-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package promexp_test + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/telemetry/promexp" + "github.com/daos-stack/daos/src/control/logging" +) + +func TestPromExp_StartExporter(t *testing.T) { + for name, tc := range map[string]struct { + cfg *promexp.ExporterConfig + expErr error + }{ + "nil cfg": { + expErr: errors.New("invalid exporter config"), + }, + "empty cfg invalid": { + cfg: &promexp.ExporterConfig{}, + expErr: errors.New("invalid exporter config"), + }, + "negative port": { + cfg: &promexp.ExporterConfig{ + Port: -1, + }, + expErr: errors.New("invalid exporter config"), + }, + "nil register fn": { + cfg: &promexp.ExporterConfig{ + Port: 1234, + }, + expErr: errors.New("invalid exporter config"), + }, + "register fn fails": { + cfg: &promexp.ExporterConfig{ + Port: 1234, + Register: func(context.Context, logging.Logger) error { + return errors.New("whoops") + }, + }, + expErr: errors.New("failed to register"), + }, + "success": { + cfg: &promexp.ExporterConfig{ + Port: promexp.ClientTelemetryPort, + Register: func(ctx context.Context, log logging.Logger) error { + return nil + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.cfg != nil { + tc.cfg.Title = t.Name() + } + cleanup, err := promexp.StartExporter(test.Context(t), log, tc.cfg) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + // Quick tests to make sure the exporter is listening and + // that our handlers are invoked. + var resp *http.Response + for { + var err error + resp, err = http.Get(fmt.Sprintf("http://localhost:%d/", tc.cfg.Port)) + if err == nil { + break + } + log.Errorf("failed to connect to exporter: %+v", err) + time.Sleep(100 * time.Millisecond) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(body), tc.cfg.Title) { + t.Fatalf("expected %q to contain %q", string(body), tc.cfg.Title) + } + resp.Body.Close() + + resp, err = http.Get(fmt.Sprintf("http://localhost:%d/metrics", tc.cfg.Port)) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + cleanup() + time.Sleep(1 * time.Second) + + // Make sure the exporter is no longer listening. + _, err = http.Get(fmt.Sprintf("http://localhost:%d/", tc.cfg.Port)) + if err == nil { + t.Fatal("expected http Get to fail on closed port") + } + }) + } +} diff --git a/src/control/lib/telemetry/promexp/source.go b/src/control/lib/telemetry/promexp/source.go new file mode 100644 index 00000000000..2212b319ff7 --- /dev/null +++ b/src/control/lib/telemetry/promexp/source.go @@ -0,0 +1,214 @@ +// +// (C) Copyright 2021-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package promexp + +import ( + "context" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/lib/atm" + "github.com/daos-stack/daos/src/control/lib/telemetry" + "github.com/daos-stack/daos/src/control/logging" +) + +type ( + sourceMetricSchema struct { + mu sync.Mutex + sourceMetrics map[string]*sourceMetric + seen map[string]struct{} + addFn func(logging.Logger, telemetry.Metric) *sourceMetric + } + + // MetricSource encapsulates the logic and data for collecting telemetry + // from a DAOS metrics source. + MetricSource struct { + ctx context.Context + tmMutex sync.RWMutex // To protect telemetry collection + enabled atm.Bool + tmSchema *telemetry.Schema + smSchema *sourceMetricSchema + } +) + +func newSourceMetricSchema(addFn func(logging.Logger, telemetry.Metric) *sourceMetric) *sourceMetricSchema { + return &sourceMetricSchema{ + sourceMetrics: make(map[string]*sourceMetric), + seen: make(map[string]struct{}), + addFn: addFn, + } +} + +// Prune removes any metrics that have not been seen since the last call to Prune. +func (s *sourceMetricSchema) Prune() { + s.mu.Lock() + defer s.mu.Unlock() + + for id := range s.sourceMetrics { + if _, found := s.seen[id]; !found { + delete(s.sourceMetrics, id) + } + } + s.seen = make(map[string]struct{}) +} + +func (s *sourceMetricSchema) add(log logging.Logger, metric telemetry.Metric) (sm *sourceMetric) { + s.mu.Lock() + defer s.mu.Unlock() + + id := metric.FullPath() + s.seen[id] = struct{}{} + + var found bool + if sm, found = s.sourceMetrics[id]; !found { + sm = s.addFn(log, metric) + s.sourceMetrics[id] = sm + } else { + sm.resetVecs() + } + + return +} + +func defaultCollectorOpts() *CollectorOpts { + return &CollectorOpts{} +} + +// sourceMetric defines a wrapper for the wrapped telemetry.Metric instance. +type sourceMetric struct { + metric telemetry.Metric + baseName string + labels labelMap + gvm gvMap + cvm cvMap +} + +// collect sends the metrics vectors in the sourceMetric struct to the provided channel. +func (bm *sourceMetric) collect(ch chan<- prometheus.Metric) { + for _, gv := range bm.gvm { + gv.Collect(ch) + } + for _, cv := range bm.cvm { + cv.Collect(ch) + } +} + +// resetVecs resets all the metrics vectors in the sourceMetric struct. +func (bm *sourceMetric) resetVecs() { + for _, gv := range bm.gvm { + gv.Reset() + } + for _, cv := range bm.cvm { + cv.Reset() + } +} + +// newSourceMetric initializes a new sourceMetric struct. +func newSourceMetric(log logging.Logger, m telemetry.Metric, baseName string, labels labelMap) *sourceMetric { + sm := &sourceMetric{ + metric: m, + baseName: baseName, + labels: labels, + gvm: make(gvMap), + cvm: make(cvMap), + } + + desc := m.Desc() + + switch sm.metric.Type() { + case telemetry.MetricTypeGauge, telemetry.MetricTypeTimestamp, + telemetry.MetricTypeSnapshot: + sm.gvm.add(sm.baseName, desc, sm.labels) + case telemetry.MetricTypeStatsGauge, telemetry.MetricTypeDuration: + sm.gvm.add(sm.baseName, desc, sm.labels) + for _, ms := range getMetricStats(sm.baseName, sm.metric) { + if ms.isCounter { + sm.cvm.add(ms.name, ms.desc, sm.labels) + } else { + sm.gvm.add(ms.name, ms.desc, sm.labels) + } + } + case telemetry.MetricTypeCounter: + sm.cvm.add(sm.baseName, desc, sm.labels) + default: + log.Errorf("[%s]: metric type %d not supported", baseName, sm.metric.Type()) + } + + return sm +} + +// IsEnabled checks if the source is enabled. +func (s *MetricSource) IsEnabled() bool { + return s.enabled.IsTrue() +} + +// Enable enables the source. +func (s *MetricSource) Enable() { + s.enabled.SetTrue() +} + +// Disable disables the source. +func (s *MetricSource) Disable() { + s.enabled.SetFalse() +} + +// Collect invokes telemetry.CollectMetrics() for the metrics context +// managed by this source. The collected metrics are sent to the provided channel. +func (s *MetricSource) Collect(log logging.Logger, ch chan<- *sourceMetric) { + if s == nil { + log.Error("nil source") + return + } + if !s.IsEnabled() { + return + } + if ch == nil { + log.Error("nil channel") + return + } + + s.tmMutex.RLock() + defer s.tmMutex.RUnlock() + + metrics := make(chan telemetry.Metric) + go func() { + if err := telemetry.CollectMetrics(s.ctx, s.tmSchema, metrics); err != nil { + log.Errorf("failed to collect metrics: %s", err) + return + } + s.tmSchema.Prune() + }() + + for metric := range metrics { + ch <- s.smSchema.add(log, metric) + } + s.smSchema.Prune() +} + +// PruneSegments prunes unused telemetry segments. +func (s *MetricSource) PruneSegments(log logging.Logger, maxSegAge time.Duration) { + if s == nil { + log.Error("nil source") + return + } + if !s.IsEnabled() { + return + } + + if err := telemetry.PruneUnusedSegments(s.ctx, maxSegAge); err != nil { + log.Errorf("failed to prune segments: %s", err) + return + } + + s.tmSchema.Prune() + s.smSchema.Prune() +} diff --git a/src/control/lib/telemetry/promexp/util.go b/src/control/lib/telemetry/promexp/util.go new file mode 100644 index 00000000000..6ddc46623d3 --- /dev/null +++ b/src/control/lib/telemetry/promexp/util.go @@ -0,0 +1,170 @@ +// +// (C) Copyright 2021-2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build linux && (amd64 || arm64) +// +build linux +// +build amd64 arm64 + +package promexp + +import ( + "sort" + "strings" + "unicode" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + + "github.com/daos-stack/daos/src/control/lib/telemetry" +) + +type labelMap map[string]string + +func (lm labelMap) keys() (keys []string) { + for label := range lm { + keys = append(keys, label) + } + sort.Strings(keys) + + return +} + +func sanitizeMetricName(in string) string { + return strings.Map(func(r rune) rune { + switch { + // Valid names for Prometheus are limited to: + case r >= 'a' && r <= 'z': // lowercase letters + case r >= 'A' && r <= 'Z': // uppercase letters + case unicode.IsDigit(r): // digits + default: // sanitize any other character + return '_' + } + + return r + }, strings.TrimLeft(in, "/")) +} + +func matchLabel(labels labelMap, input, match, label string) bool { + if !strings.HasPrefix(input, match) { + return false + } + + splitStr := strings.SplitN(input, "_", 2) + if len(splitStr) == 2 { + labels[label] = splitStr[1] + return true + } + return false +} + +func appendName(cur, name string) string { + if cur == "" { + return name + } + return cur + "_" + name +} + +type gvMap map[string]*prometheus.GaugeVec + +func (m gvMap) add(name, help string, labels labelMap) { + if _, found := m[name]; !found { + gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: name, + Help: help, + }, labels.keys()) + m[name] = gv + } +} + +func (m gvMap) set(name string, value float64, labels labelMap) error { + gv, found := m[name] + if !found { + return errors.Errorf("gauge vector %s not found", name) + } + gv.With(prometheus.Labels(labels)).Set(value) + + return nil +} + +type cvMap map[string]*prometheus.CounterVec + +func (m cvMap) add(name, help string, labels labelMap) { + if _, found := m[name]; !found { + cv := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: name, + Help: help, + }, labels.keys()) + m[name] = cv + } +} + +func (m cvMap) set(name string, value float64, labels labelMap) error { + cv, found := m[name] + if !found { + return errors.Errorf("counter vector %s not found", name) + } + cv.With(prometheus.Labels(labels)).Add(value) + + return nil +} + +type metricStat struct { + name string + desc string + value float64 + isCounter bool +} + +func getMetricStats(baseName string, m telemetry.Metric) (stats []*metricStat) { + ms, ok := m.(telemetry.StatsMetric) + if !ok { + return []*metricStat{} + } + + for name, s := range map[string]struct { + fn func() float64 + desc string + isCounter bool + }{ + "min": { + fn: func() float64 { return float64(ms.Min()) }, + desc: " (min value)", + }, + "max": { + fn: func() float64 { return float64(ms.Max()) }, + desc: " (max value)", + }, + "mean": { + fn: ms.Mean, + desc: " (mean)", + }, + "sum": { + fn: func() float64 { return float64(ms.Sum()) }, + desc: " (sum)", + }, + "stddev": { + fn: ms.StdDev, + desc: " (std dev)", + }, + "sumsquares": { + fn: ms.SumSquares, + desc: " (sum of squares)", + }, + "samples": { + fn: func() float64 { return float64(ms.SampleSize()) }, + desc: " (samples)", + isCounter: true, + }, + } { + stats = append(stats, &metricStat{ + name: baseName + "_" + name, + desc: m.Desc() + s.desc, + value: s.fn(), + isCounter: s.isCounter, + }) + } + + return +} diff --git a/src/control/lib/telemetry/promexp/util_test.go b/src/control/lib/telemetry/promexp/util_test.go new file mode 100644 index 00000000000..104da9ec383 --- /dev/null +++ b/src/control/lib/telemetry/promexp/util_test.go @@ -0,0 +1,135 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package promexp + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/telemetry" +) + +func TestPromExp_sanitizeMetricName(t *testing.T) { + for input, tc := range map[string]struct { + expOutput string + }{ + "": { + expOutput: "", + }, + "azAZ09": { + expOutput: "azAZ09", + }, + "/a-z A-Z 0-9/": { + expOutput: "a_z_A_Z_0_9_", + }, + } { + t.Run(input, func(t *testing.T) { + got := sanitizeMetricName(input) + if got != tc.expOutput { + t.Errorf("sanitizeMetricName(%q) = %q, want %q", input, got, tc.expOutput) + } + }) + } +} + +func TestPromExp_getMetricStats(t *testing.T) { + segID := telemetry.NextTestID(telemetry.PromexpIDBase) + telemetry.InitTestMetricsProducer(t, segID, 4096) + defer telemetry.CleanupTestMetricsProducer(t) + testValues := []uint64{1, 2, 3, 4, 5} + + ctx, err := telemetry.Init(test.Context(t), uint32(segID)) + if err != nil { + t.Fatalf("Init: %v", err) + } + + for name, tc := range map[string]struct { + baseName string + metric *telemetry.TestMetric + expStats []*metricStat + }{ + "non-stats gauge": { + baseName: "gauge", + metric: &telemetry.TestMetric{ + Name: "gauge", + Type: telemetry.MetricTypeGauge, + Cur: 1.0, + }, + expStats: []*metricStat{}, + }, + "stats gauge": { + baseName: "stats_gauge", + metric: &telemetry.TestMetric{ + Name: "stats_gauge", + Type: telemetry.MetricTypeStatsGauge, + Values: testValues, + }, + expStats: []*metricStat{ + { + name: "stats_gauge_min", + desc: " (min value)", + value: 1.0, + }, + { + name: "stats_gauge_max", + desc: " (max value)", + value: 5.0, + }, + { + name: "stats_gauge_mean", + desc: " (mean)", + value: 3.0, + }, + { + name: "stats_gauge_sum", + desc: " (sum)", + value: 15.0, + }, + { + name: "stats_gauge_samples", + desc: " (samples)", + value: 5, + isCounter: true, + }, + { + name: "stats_gauge_stddev", + desc: " (std dev)", + value: 1.58113883, + }, + { + name: "stats_gauge_sumsquares", + desc: " (sum of squares)", + value: 55, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + telemetry.AddTestMetric(t, tc.metric) + + m, err := tc.metric.GetMetric(ctx) + if err != nil { + t.Fatalf("GetMetric: %v", err) + } + + got := getMetricStats(tc.baseName, m) + cmpOpts := cmp.Options{ + cmp.AllowUnexported(metricStat{}), + cmpopts.EquateApprox(0.000000001, 0.0), + cmpopts.SortSlices(func(a, b *metricStat) bool { + return a.name < b.name + }), + } + if diff := cmp.Diff(got, tc.expStats, cmpOpts...); diff != "" { + t.Fatalf("(-want, +got)\n%s", diff) + } + }) + } +} diff --git a/src/control/lib/telemetry/shm.go b/src/control/lib/telemetry/shm.go new file mode 100644 index 00000000000..99fd95aaa3a --- /dev/null +++ b/src/control/lib/telemetry/shm.go @@ -0,0 +1,103 @@ +// +// (C) Copyright 2024 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package telemetry + +/* +#include +#include +#include +*/ +import "C" + +import ( + "time" + + "github.com/pkg/errors" +) + +type shmidStat struct { + id C.int + ds C.struct_shmid_ds +} + +// Size returns the size of segment in bytes. +func (s *shmidStat) Size() int { + return int(s.ds.shm_segsz) +} + +// Atime returns the time of last shmat(2). +func (s *shmidStat) Atime() time.Time { + return time.Unix(int64(s.ds.shm_atime), 0) +} + +// Dtime returns the time of last shmdt(2). +func (s *shmidStat) Dtime() time.Time { + return time.Unix(int64(s.ds.shm_dtime), 0) +} + +// Ctime returns the time of last shmctl(2) or creation time. +func (s *shmidStat) Ctime() time.Time { + return time.Unix(int64(s.ds.shm_ctime), 0) +} + +// Cpid returns the creator pid. +func (s *shmidStat) Cpid() int { + return int(s.ds.shm_cpid) +} + +// Lpid returns the last shmat(2)/shmdt(2) pid. +func (s *shmidStat) Lpid() int { + return int(s.ds.shm_lpid) +} + +// Nattach returns the number of attached processes. +func (s *shmidStat) Nattach() int { + return int(s.ds.shm_nattch) +} + +// C returns the C struct. +func (s *shmidStat) C() *C.struct_shmid_ds { + return &s.ds +} + +func shmStat(id C.int) (*shmidStat, error) { + st := shmidStat{ + id: id, + } + rc, err := C.shmctl(id, C.IPC_STAT, &st.ds) + if rc != 0 { + return nil, errors.Wrapf(err, "shmctl(IPC_STAT, %d)", id) + } + + return &st, nil +} + +func shmStatKey(key C.key_t) (*shmidStat, error) { + id, err := C.shmget(key, 0, 0) + if err != nil { + return nil, errors.Wrapf(err, "shmget(%d, 0, 0)", key) + } + + return shmStat(id) +} + +func shmChown(key C.key_t, uid C.uid_t, gid C.gid_t) error { + st, err := shmStatKey(key) + if err != nil { + return err + } + + st.ds.shm_perm.gid = gid + st.ds.shm_perm.uid = uid + + rc, err := C.shmctl(st.id, C.IPC_SET, st.C()) + if rc != 0 { + return errors.Wrapf(err, "shmctl(IPC_SET, %d)", st.id) + } + + return nil +} diff --git a/src/control/lib/telemetry/telemetry.go b/src/control/lib/telemetry/telemetry.go index da93ffa55a4..9a626c85634 100644 --- a/src/control/lib/telemetry/telemetry.go +++ b/src/control/lib/telemetry/telemetry.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -14,8 +14,28 @@ package telemetry /* #cgo LDFLAGS: -lgurt -#include "gurt/telemetry_common.h" -#include "gurt/telemetry_consumer.h" +#include +#include +#include +#include + +static int +rm_ephemeral_dir(const char *path) +{ + return d_tm_del_ephemeral_dir(path); +} + +static int +add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, char *path) +{ + return d_tm_add_ephemeral_dir(node, size_bytes, path); +} + +static int +attach_segment_path(key_t key, char *path) +{ + return d_tm_attach_path_segment(key, path); +} */ import "C" @@ -25,12 +45,19 @@ import ( "io" "os" "path/filepath" + "sort" + "strconv" "strings" "sync" "time" "unsafe" "github.com/pkg/errors" + "golang.org/x/sys/unix" + + "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" ) type MetricType int @@ -46,6 +73,11 @@ const ( MetricTypeDirectory MetricType = C.D_TM_DIRECTORY MetricTypeLink MetricType = C.D_TM_LINK + ClientJobRootID = C.DC_TM_JOB_ROOT_ID + ClientJobMax = 1024 + ClientMetricsEnabledEnv = C.DAOS_CLIENT_METRICS_ENABLE + ClientMetricsRetainEnv = C.DAOS_CLIENT_METRICS_RETAIN + BadUintVal = ^uint64(0) BadFloatVal = float64(BadUintVal) BadIntVal = int64(BadUintVal >> 1) @@ -81,7 +113,7 @@ type ( type ( handle struct { sync.RWMutex - idx uint32 + id uint32 rank *uint32 ctx *C.struct_d_tm_context root *C.struct_d_tm_node_t @@ -109,6 +141,34 @@ const ( handleKey telemetryKey = "handle" ) +func (mt MetricType) String() string { + strFmt := func(name string) string { + numStr := strconv.Itoa(int(mt)) + return name + " (" + numStr + ")" + } + + switch mt { + case MetricTypeDirectory: + return strFmt("directory") + case MetricTypeCounter: + return strFmt("counter") + case MetricTypeTimestamp: + return strFmt("timestamp") + case MetricTypeSnapshot: + return strFmt("snapshot") + case MetricTypeDuration: + return strFmt("duration") + case MetricTypeGauge: + return strFmt("gauge") + case MetricTypeStatsGauge: + return strFmt("gauge (stats)") + case MetricTypeLink: + return strFmt("link") + default: + return strFmt("unknown") + } +} + func (h *handle) isValid() bool { return h != nil && h.ctx != nil && h.root != nil } @@ -295,24 +355,43 @@ func collectGarbageLoop(ctx context.Context, ticker *time.Ticker) { } } +func initClientRoot(parent context.Context, shmID uint32) (context.Context, error) { + if parent == nil { + return nil, errors.New("nil parent context") + } + + shmSize := C.ulong(ClientJobMax * C.D_TM_METRIC_SIZE) + + rc := C.d_tm_init(C.int(shmID), shmSize, C.D_TM_OPEN_OR_CREATE) + if rc != 0 { + return nil, errors.Errorf("failed to init client root: %s", daos.Status(rc)) + } + + return Init(parent, shmID) +} + +func InitClientRoot(ctx context.Context) (context.Context, error) { + return initClientRoot(ctx, ClientJobRootID) +} + // Init initializes the telemetry bindings -func Init(parent context.Context, idx uint32) (context.Context, error) { +func Init(parent context.Context, id uint32) (context.Context, error) { if parent == nil { return nil, errors.New("nil parent context") } - tmCtx := C.d_tm_open(C.int(idx)) + tmCtx := C.d_tm_open(C.int(id)) if tmCtx == nil { - return nil, errors.Errorf("no shared memory segment found for idx: %d", idx) + return nil, errors.Errorf("no shared memory segment found for key: %d", id) } root := C.d_tm_get_root(tmCtx) if root == nil { - return nil, errors.Errorf("no root node found in shared memory segment for idx: %d", idx) + return nil, errors.Errorf("no root node found in shared memory segment for key: %d", id) } handle := &handle{ - idx: idx, + id: id, ctx: tmCtx, root: root, } @@ -323,6 +402,11 @@ func Init(parent context.Context, idx uint32) (context.Context, error) { return newCtx, nil } +// Fini releases resources claimed by Init(). +func Fini() { + C.d_tm_fini() +} + // Detach detaches from the telemetry handle func Detach(ctx context.Context) { if hdl, err := getHandle(ctx); err == nil { @@ -333,6 +417,38 @@ func Detach(ctx context.Context) { } } +func addEphemeralDir(path string, shmSize uint64) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + if rc := C.add_ephemeral_dir(nil, C.ulong(shmSize), cPath); rc != 0 { + return daos.Status(rc) + } + + return nil +} + +// SetupClientRoot performs the necessary actions to get the client telemetry +// segment linked into the agent-managed tree. +func SetupClientRoot(ctx context.Context, jobid string, pid, shm_key int) error { + log := logging.FromContext(ctx) + + if err := addEphemeralDir(jobid, ClientJobMax*C.D_TM_METRIC_SIZE); err != nil { + if err != daos.Exists { + return errors.Wrapf(err, "failed to add client job path %q", jobid) + } + } + + pidPath := filepath.Join(jobid, string(PathSep), strconv.Itoa(pid)) + cPidPath := C.CString(pidPath) + defer C.free(unsafe.Pointer(cPidPath)) + if rc := C.attach_segment_path(C.key_t(shm_key), cPidPath); rc != 0 { + return errors.Wrapf(daos.Status(rc), "failed to attach client segment 0x%x at %q", shm_key, pidPath) + } + + log.Tracef("attached client segment @ %q (key: 0x%x)", pidPath, shm_key) + return nil +} + type Schema struct { mu sync.RWMutex metrics map[string]Metric @@ -413,10 +529,12 @@ func NewSchema() *Schema { } -func visit(hdl *handle, s *Schema, node *C.struct_d_tm_node_t, pathComps string, out chan<- Metric) { +type procNodeFn func(hdl *handle, id string, node *C.struct_d_tm_node_t) + +func visit(hdl *handle, node *C.struct_d_tm_node_t, pathComps string, procLinks bool, procNode procNodeFn) { var next *C.struct_d_tm_node_t - if node == nil { + if node == nil || procNode == nil { return } name := C.GoString(C.d_tm_get_name(hdl.ctx, node)) @@ -425,29 +543,30 @@ func visit(hdl *handle, s *Schema, node *C.struct_d_tm_node_t, pathComps string, id = name } - cType := node.dtn_type - switch cType { + switch node.dtn_type { case C.D_TM_DIRECTORY: next = C.d_tm_get_child(hdl.ctx, node) if next != nil { - visit(hdl, s, next, id, out) + visit(hdl, next, id, procLinks, procNode) } case C.D_TM_LINK: next = C.d_tm_follow_link(hdl.ctx, node) if next != nil { + if procLinks { + // Use next to get the linked shm key + procNode(hdl, id, next) + } + // link leads to a directory with the same name - visit(hdl, s, next, pathComps, out) + visit(hdl, next, pathComps, procLinks, procNode) } default: - m := s.Add(hdl, id, cType, node) - if m != nil { - out <- m - } + procNode(hdl, id, node) } next = C.d_tm_get_sibling(hdl.ctx, node) if next != nil && next != node { - visit(hdl, s, next, pathComps, out) + visit(hdl, next, pathComps, procLinks, procNode) } } @@ -465,8 +584,98 @@ func CollectMetrics(ctx context.Context, s *Schema, out chan<- Metric) error { return errors.New("invalid handle") } - node := hdl.root - visit(hdl, s, node, "", out) + procNode := func(hdl *handle, id string, node *C.struct_d_tm_node_t) { + m := s.Add(hdl, id, node.dtn_type, node) + if m != nil { + out <- m + } + } + + visit(hdl, hdl.root, "", false, procNode) + + return nil +} + +// PruneUnusedSegments removes shared memory segments associated with +// unused ephemeral subdirectories. +func PruneUnusedSegments(ctx context.Context, maxSegAge time.Duration) error { + log := logging.FromContext(ctx) + + hdl, err := getHandle(ctx) + if err != nil { + return err + } + hdl.Lock() + defer hdl.Unlock() + + if !hdl.isValid() { + return errors.New("invalid handle") + } + + var toPrune []string + procNode := func(hdl *handle, id string, node *C.struct_d_tm_node_t) { + if node == nil || node.dtn_type != C.D_TM_DIRECTORY { + return + } + + path := id + comps := strings.SplitN(path, string(PathSep), 2) + if strings.HasPrefix(comps[0], "ID:") && len(comps) > 1 { + path = comps[1] + } + + st, err := shmStatKey(node.dtn_shmem_key) + if err != nil { + log.Errorf("failed to shmStat(%s): %s", path, err) + return + } + + log.Tracef("path:%s shmid:%d spid:%d cpid:%d lpid:%d age:%s", + path, st.id, os.Getpid(), st.Cpid(), st.Lpid(), time.Since(st.Ctime())) + + // If the creator process was someone other than us, and it's still + // around, don't mess with the segment. + if _, err := common.GetProcName(st.Cpid()); err == nil && st.Cpid() != unix.Getpid() { + return + } + + if time.Since(st.Ctime()) <= maxSegAge { + return + } + + log.Tracef("adding %s to prune list", path) + toPrune = append(toPrune, path) + } + + visit(hdl, hdl.root, "", true, procNode) + + sort.Sort(sort.Reverse(sort.StringSlice(toPrune))) + for _, path := range toPrune { + log.Tracef("pruning %s", path) + if err := removeLink(hdl, path); err != nil { + log.Errorf("failed to prune %s: %s", path, err) + } + } + + return nil +} + +func removeLink(hdl *handle, path string) error { + _, err := findNode(hdl, path) + if err != nil { + return err + } + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + rc := C.rm_ephemeral_dir(cPath) + if rc != 0 { + return errors.Wrapf(daos.Status(rc), "failed to remove link %q", path) + } + + if _, err := findNode(hdl, path); err == nil { + return errors.Errorf("failed to remove %s", path) + } return nil } diff --git a/src/control/lib/telemetry/telemetry_test.go b/src/control/lib/telemetry/telemetry_test.go index a645f0e60e4..bc63cc81399 100644 --- a/src/control/lib/telemetry/telemetry_test.go +++ b/src/control/lib/telemetry/telemetry_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,6 +9,9 @@ package telemetry import ( "context" "fmt" + "os" + "os/exec" + "strconv" "sync" "testing" "time" @@ -16,6 +19,7 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/logging" ) func TestTelemetry_Init(t *testing.T) { @@ -50,7 +54,7 @@ func TestTelemetry_Init(t *testing.T) { t.Fatalf("can't get handle from result ctx: %v", err) } - test.AssertEqual(t, uint32(producerID), hdl.idx, "handle.idx doesn't match shmem ID") + test.AssertEqual(t, uint32(producerID), hdl.id, "handle.idx doesn't match shmem ID") hdl.RLock() defer hdl.RUnlock() @@ -179,6 +183,106 @@ func TestTelemetry_GetRank(t *testing.T) { } } +func childErrExit(err error) { + if err == nil { + err = errors.New("unknown error") + } + fmt.Fprintf(os.Stderr, "CHILD ERROR: %s\n", err) + os.Exit(1) +} + +const ( + childModeEnvVar = "TEST_CHILD_MODE" + childModeLinkTest = "CHILD_MODE_LINK_TEST" + childShmIDEnvVar = "TEST_CHILD_SHM_ID" +) + +func TestMain(m *testing.M) { + mode := os.Getenv(childModeEnvVar) + switch mode { + case "": + // default; run the test binary + os.Exit(m.Run()) + case childModeLinkTest: + runChildTelemProc() + default: + childErrExit(errors.Errorf("Unknown child mode: %q", mode)) + } +} + +func runChildTelemProc() { + pid := os.Getpid() + shmID, err := strconv.Atoi(os.Getenv(childShmIDEnvVar)) + if err != nil { + childErrExit(err) + } + + jobDir := TestMetricsMap{ + MetricTypeDirectory: &TestMetric{ + Name: "job", + }, + } + pidLink := TestMetricsMap{ + MetricTypeLink: &TestMetric{ + Name: fmt.Sprintf("job/%d", pid), + }, + } + startedAt := TestMetricsMap{ + MetricTypeTimestamp: &TestMetric{ + Name: fmt.Sprintf("job/%d/started_at", pid), + }, + } + + t := &testing.T{} + + InitTestMetricsProducer(t, shmID, 1024) + + AddTestMetrics(t, jobDir) + AddTestMetrics(t, pidLink) + AddTestMetrics(t, startedAt) + + if t.Failed() { + childErrExit(errors.New("test failed")) + } +} + +func TestTelemetry_PruneSegments(t *testing.T) { + shmID := uint32(NextTestID()) + + cmd := exec.Command(os.Args[0]) + cmd.Env = append(os.Environ(), + fmt.Sprintf("%s=%s", childModeEnvVar, childModeLinkTest), + fmt.Sprintf("%s=%d", childShmIDEnvVar, shmID), + ) + if out, err := cmd.CombinedOutput(); err != nil { + t.Errorf("child failed: %s", out) + t.Fatal(err) + } + + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ctx, err := initClientRoot(test.MustLogContext(t, log), shmID) + if err != nil { + t.Fatal(err) + } + defer func() { + Fini() + }() + + path := fmt.Sprintf("job/%d/started_at", cmd.Process.Pid) + _, err = GetTimestamp(ctx, path) + test.CmpErr(t, nil, err) + + err = PruneUnusedSegments(ctx, time.Nanosecond) + test.CmpErr(t, nil, err) + + _, err = GetTimestamp(ctx, path) + if err == nil { + t.Fatal("expected GetTimestamp() to fail after prune") + } +} + func TestTelemetry_CollectMetrics(t *testing.T) { testMetrics := TestMetricsMap{ MetricTypeCounter: &TestMetric{ diff --git a/src/control/lib/telemetry/test_helpers.go b/src/control/lib/telemetry/test_helpers.go index c0cbdda72ef..bc014eb2502 100644 --- a/src/control/lib/telemetry/test_helpers.go +++ b/src/control/lib/telemetry/test_helpers.go @@ -19,6 +19,8 @@ import ( "testing" "time" + "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" ) @@ -60,6 +62,7 @@ var nextIDMutex sync.Mutex const ( telemetryIDBase = 100 PromexpIDBase = 200 + AgentIDBase = 300 ) // NextTestID gets the next available ID for a shmem segment. This helps avoid @@ -80,6 +83,7 @@ func NextTestID(base ...int) int { type ( TestMetric struct { + Type MetricType Name string path string desc string @@ -87,6 +91,7 @@ type ( min uint64 max uint64 Cur float64 // value - may be exact or approximate + Values []uint64 sum uint64 mean float64 stddev float64 @@ -106,6 +111,25 @@ func (tm *TestMetric) FullPath() string { return fullName } +func (tm *TestMetric) GetMetric(ctx context.Context) (Metric, error) { + switch tm.Type { + case MetricTypeCounter: + return GetCounter(ctx, tm.FullPath()) + case MetricTypeTimestamp: + return GetTimestamp(ctx, tm.FullPath()) + case MetricTypeSnapshot: + return GetSnapshot(ctx, tm.FullPath()) + case MetricTypeDuration: + return GetDuration(ctx, tm.FullPath()) + case MetricTypeGauge: + return GetGauge(ctx, tm.FullPath()) + case MetricTypeStatsGauge: + return GetStatsGauge(ctx, tm.FullPath()) + default: + return nil, errors.Errorf("unsupported metric type %s", tm.Type) + } +} + func InitTestMetricsProducer(t *testing.T, id int, size uint64) { t.Helper() @@ -115,65 +139,82 @@ func InitTestMetricsProducer(t *testing.T, id int, size uint64) { } } +func AddTestMetric(t *testing.T, tm *TestMetric) { + t.Helper() + + fullName := tm.FullPath() + switch tm.Type { + case MetricTypeGauge: + rc := C.add_metric(&tm.node, C.D_TM_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + C.d_tm_set_gauge(tm.node, C.uint64_t(tm.Cur)) + case MetricTypeStatsGauge: + rc := C.add_metric(&tm.node, C.D_TM_STATS_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", tm.Name, daos.Status(rc)) + } + + vals := make([]uint64, len(tm.Values)) + if len(tm.Values) > 0 { + copy(vals, tm.Values) + } else { + vals = []uint64{tm.min, tm.max, uint64(tm.Cur)} + } + t.Logf("setting values for %s: %+v\n", tm.FullPath(), vals) + + for _, val := range vals { + C.d_tm_set_gauge(tm.node, C.uint64_t(val)) + t.Logf("set %s to %d\n", tm.FullPath(), val) + } + case MetricTypeCounter: + rc := C.add_metric(&tm.node, C.D_TM_COUNTER, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + C.d_tm_inc_counter(tm.node, C.ulong(tm.Cur)) + case MetricTypeDuration: + rc := C.add_metric(&tm.node, C.D_TM_DURATION|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + C.d_tm_mark_duration_start(tm.node, C.D_TM_CLOCK_REALTIME) + time.Sleep(time.Duration(tm.Cur)) + C.d_tm_mark_duration_end(tm.node) + case MetricTypeTimestamp: + rc := C.add_metric(&tm.node, C.D_TM_TIMESTAMP, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + C.d_tm_record_timestamp(tm.node) + case MetricTypeSnapshot: + rc := C.add_metric(&tm.node, C.D_TM_TIMER_SNAPSHOT|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + C.d_tm_take_timer_snapshot(tm.node, C.D_TM_CLOCK_REALTIME) + case MetricTypeDirectory: + rc := C.add_metric(&tm.node, C.D_TM_DIRECTORY, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + case MetricTypeLink: + rc := C.add_eph_dir(&tm.node, 1024, C.CString(fullName)) + if rc != 0 { + t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) + } + default: + t.Fatalf("metric type %s not supported", tm.Type) + } +} + func AddTestMetrics(t *testing.T, testMetrics TestMetricsMap) { t.Helper() for mt, tm := range testMetrics { - fullName := tm.FullPath() - switch mt { - case MetricTypeGauge: - rc := C.add_metric(&tm.node, C.D_TM_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - C.d_tm_set_gauge(tm.node, C.uint64_t(tm.Cur)) - case MetricTypeStatsGauge: - rc := C.add_metric(&tm.node, C.D_TM_STATS_GAUGE, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", tm.Name, daos.Status(rc)) - } - for _, val := range []uint64{tm.min, tm.max, uint64(tm.Cur)} { - C.d_tm_set_gauge(tm.node, C.uint64_t(val)) - } - case MetricTypeCounter: - rc := C.add_metric(&tm.node, C.D_TM_COUNTER, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - C.d_tm_inc_counter(tm.node, C.ulong(tm.Cur)) - case MetricTypeDuration: - rc := C.add_metric(&tm.node, C.D_TM_DURATION|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - C.d_tm_mark_duration_start(tm.node, C.D_TM_CLOCK_REALTIME) - time.Sleep(time.Duration(tm.Cur)) - C.d_tm_mark_duration_end(tm.node) - case MetricTypeTimestamp: - rc := C.add_metric(&tm.node, C.D_TM_TIMESTAMP, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - C.d_tm_record_timestamp(tm.node) - case MetricTypeSnapshot: - rc := C.add_metric(&tm.node, C.D_TM_TIMER_SNAPSHOT|C.D_TM_CLOCK_REALTIME, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - C.d_tm_take_timer_snapshot(tm.node, C.D_TM_CLOCK_REALTIME) - case MetricTypeDirectory: - rc := C.add_metric(&tm.node, C.D_TM_DIRECTORY, C.CString(tm.desc), C.CString(tm.units), C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - case MetricTypeLink: - rc := C.add_eph_dir(&tm.node, 1024, C.CString(fullName)) - if rc != 0 { - t.Fatalf("failed to add %s: %s", fullName, daos.Status(rc)) - } - default: - t.Fatalf("metric type %d not supported", mt) - } + tm.Type = mt + AddTestMetric(t, tm) } } diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index f7f094ffe7e..4b2f624aff2 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2022 Intel Corporation. +// (C) Copyright 2018-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,13 +8,9 @@ package server import ( "context" - "fmt" - "net/http" - "time" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/lib/telemetry/promexp" @@ -27,7 +23,7 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng return nil } - c, err := promexp.NewCollector(log, &promexp.CollectorOpts{}) + c, err := promexp.NewEngineCollector(log, &promexp.CollectorOpts{}) if err != nil { return err } @@ -73,45 +69,13 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng } func startPrometheusExporter(ctx context.Context, log logging.Logger, port int, engines []Engine) (func(), error) { - if err := regPromEngineSources(ctx, log, engines); err != nil { - return nil, err + expCfg := &promexp.ExporterConfig{ + Port: port, + Title: "DAOS Engine Telemetry", + Register: func(ctx context.Context, log logging.Logger) error { + return regPromEngineSources(ctx, log, engines) + }, } - listenAddress := fmt.Sprintf("0.0.0.0:%d", port) - - srv := http.Server{Addr: listenAddress} - http.Handle("/metrics", promhttp.HandlerFor( - prometheus.DefaultGatherer, promhttp.HandlerOpts{}, - )) - http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - num, err := w.Write([]byte(` - DAOS Exporter - -

DAOS Exporter

-

Metrics

- - `)) - if err != nil { - log.Errorf("%d: %s", num, err) - } - }) - - // http listener is a blocking call - go func() { - log.Infof("Listening on %s", listenAddress) - err := srv.ListenAndServe() - log.Infof("Prometheus web exporter stopped: %s", err.Error()) - }() - - return func() { - log.Debug("Shutting down Prometheus web exporter") - - // When this cleanup function is called, the original context - // will probably have already been canceled. - timedCtx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() - if err := srv.Shutdown(timedCtx); err != nil { - log.Noticef("HTTP server didn't shut down within timeout: %s", err.Error()) - } - }, nil + return promexp.StartExporter(ctx, log, expCfg) } diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index 2936acfcbea..18e32463bb0 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -9,6 +9,7 @@ #define D_LOGFAC DD_FAC(dtx) #include +#include #include #include #include @@ -132,11 +133,11 @@ dtx_metrics_count(void) return (sizeof(struct dtx_pool_metrics) / sizeof(struct d_tm_node_t *)); } -struct dss_module_metrics dtx_metrics = { - .dmm_tags = DAOS_TGT_TAG, - .dmm_init = dtx_metrics_alloc, - .dmm_fini = dtx_metrics_free, - .dmm_nr_metrics = dtx_metrics_count, +struct daos_module_metrics dtx_metrics = { + .dmm_tags = DAOS_TGT_TAG, + .dmm_init = dtx_metrics_alloc, + .dmm_fini = dtx_metrics_free, + .dmm_nr_metrics = dtx_metrics_count, }; static void diff --git a/src/engine/SConscript b/src/engine/SConscript index ceb00a409d0..e94b6a83dd6 100644 --- a/src/engine/SConscript +++ b/src/engine/SConscript @@ -29,7 +29,7 @@ def scons(): 'drpc_handler.c', 'drpc_listener.c', 'drpc_progress.c', 'init.c', 'module.c', 'srv_cli.c', 'profile.c', 'rpc.c', - 'server_iv.c', 'srv.c', 'srv.pb-c.c', 'tls.c', + 'server_iv.c', 'srv.c', 'srv.pb-c.c', 'sched.c', 'ult.c', 'event.pb-c.c', 'srv_metrics.c'] + libdaos_tgts diff --git a/src/engine/init.c b/src/engine/init.c index c4dfb6e1997..d639456eeb1 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "srv_internal.h" #include "drpc_internal.h" #include @@ -628,14 +629,14 @@ server_id_cb(uint32_t *tid, uint64_t *uid) } if (tid != NULL) { - struct dss_thread_local_storage *dtc; - struct dss_module_info *dmi; + struct daos_thread_local_storage *dtc; + struct daos_module_info *dmi; int index = daos_srv_modkey.dmk_index; - /* Avoid assertion in dss_module_key_get() */ + /* Avoid assertion in daos_module_key_get() */ dtc = dss_tls_get(); if (dtc != NULL && index >= 0 && index < DAOS_MODULE_KEYS_NR && - dss_module_keys[index] == &daos_srv_modkey) { + daos_get_module_key(index) == &daos_srv_modkey) { dmi = dss_get_module_info(); if (dmi != NULL) *tid = dmi->dmi_xs_id; diff --git a/src/engine/module.c b/src/engine/module.c index ce33609aeba..4ee74235ff5 100644 --- a/src/engine/module.c +++ b/src/engine/module.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include "drpc_handler.h" @@ -387,7 +388,7 @@ dss_module_init_metrics(enum dss_module_tag tag, void **metrics, struct loaded_mod *mod; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; @@ -415,7 +416,7 @@ dss_module_fini_metrics(enum dss_module_tag tag, void **metrics) struct loaded_mod *mod; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; @@ -442,7 +443,7 @@ dss_module_nr_pool_metrics(void) int total = 0, nr; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; diff --git a/src/engine/srv.c b/src/engine/srv.c index 986d8ed04c4..e0c985c38f6 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -364,9 +364,9 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi) static void dss_srv_handler(void *arg) { - struct dss_xstream *dx = (struct dss_xstream *)arg; - struct dss_thread_local_storage *dtc; - struct dss_module_info *dmi; + struct dss_xstream *dx = (struct dss_xstream *)arg; + struct daos_thread_local_storage *dtc; + struct dss_module_info *dmi; int rc; bool track_mem = false; bool signal_caller = true; @@ -1300,7 +1300,7 @@ dss_srv_fini(bool force) vos_standalone_tls_fini(); /* fall through */ case XD_INIT_TLS_REG: - pthread_key_delete(dss_tls_key); + ds_tls_key_delete(); /* fall through */ case XD_INIT_ULT_BARRIER: ABT_cond_free(&xstream_data.xd_ult_barrier); @@ -1402,7 +1402,7 @@ dss_srv_init(void) xstream_data.xd_init_step = XD_INIT_ULT_BARRIER; /* register xstream-local storage key */ - rc = pthread_key_create(&dss_tls_key, NULL); + rc = ds_tls_key_create(); if (rc) { rc = dss_abterr2der(rc); D_ERROR("Failed to register storage key: "DF_RC"\n", DP_RC(rc)); diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index 8621175b44f..1d4278a98cf 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -319,10 +319,6 @@ sched_create_thread(struct dss_xstream *dx, void (*func)(void *), void *arg, return dss_abterr2der(rc); } -/* tls.c */ -void dss_tls_fini(struct dss_thread_local_storage *dtls); -struct dss_thread_local_storage *dss_tls_init(int tag, int xs_id, int tgt_id); - /* server_iv.c */ void ds_iv_init(void); void ds_iv_fini(void); diff --git a/src/engine/tls.c b/src/engine/tls.c deleted file mode 100644 index 90ea6cce7c5..00000000000 --- a/src/engine/tls.c +++ /dev/null @@ -1,155 +0,0 @@ -/** - * (C) Copyright 2016-2021 Intel Corporation. - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ -/** - * This file is part of the DAOS server. It implements thread-local storage - * (TLS) for DAOS service threads. - */ -#define D_LOGFAC DD_FAC(server) - -#include -#include "srv_internal.h" - -/* The array remember all of registered module keys on one node. */ -struct dss_module_key *dss_module_keys[DAOS_MODULE_KEYS_NR] = { NULL }; - -pthread_mutex_t dss_module_keys_lock = PTHREAD_MUTEX_INITIALIZER; - -void -dss_register_key(struct dss_module_key *key) -{ - int i; - - D_MUTEX_LOCK(&dss_module_keys_lock); - for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { - if (dss_module_keys[i] == NULL) { - dss_module_keys[i] = key; - key->dmk_index = i; - break; - } - } - D_MUTEX_UNLOCK(&dss_module_keys_lock); - D_ASSERT(i < DAOS_MODULE_KEYS_NR); -} - -void -dss_unregister_key(struct dss_module_key *key) -{ - if (key == NULL) - return; - D_ASSERT(key->dmk_index >= 0); - D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); - D_MUTEX_LOCK(&dss_module_keys_lock); - dss_module_keys[key->dmk_index] = NULL; - D_MUTEX_UNLOCK(&dss_module_keys_lock); -} - -/** - * Init thread context - * - * \param[in]dtls Init the thread context to allocate the - * local thread variable for each module. - * - * \retval 0 if initialization succeeds - * \retval negative errno if initialization fails - */ -static int -dss_thread_local_storage_init(struct dss_thread_local_storage *dtls, - int xs_id, int tgt_id) -{ - int rc = 0; - int i; - - if (dtls->dtls_values == NULL) { - D_ALLOC_ARRAY(dtls->dtls_values, - (int)ARRAY_SIZE(dss_module_keys)); - if (dtls->dtls_values == NULL) - return -DER_NOMEM; - } - - for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { - struct dss_module_key *dmk = dss_module_keys[i]; - - if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { - D_ASSERT(dmk->dmk_init != NULL); - dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id); - if (dtls->dtls_values[i] == NULL) { - rc = -DER_NOMEM; - break; - } - } - } - return rc; -} - -/** - * Finish module context - * - * \param[in]dtls Finish the thread context to free the - * local thread variable for each module. - */ -static void -dss_thread_local_storage_fini(struct dss_thread_local_storage *dtls) -{ - int i; - - if (dtls->dtls_values != NULL) { - for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) { - struct dss_module_key *dmk = dss_module_keys[i]; - - if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { - D_ASSERT(dtls->dtls_values[i] != NULL); - D_ASSERT(dmk->dmk_fini != NULL); - dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]); - } - } - } - - D_FREE(dtls->dtls_values); -} - -pthread_key_t dss_tls_key; - -/* - * Allocate dss_thread_local_storage for a particular thread and - * store the pointer in a thread-specific value which can be - * fetched at any time with dss_tls_get(). - */ -struct dss_thread_local_storage * -dss_tls_init(int tag, int xs_id, int tgt_id) -{ - struct dss_thread_local_storage *dtls; - int rc; - - D_ALLOC_PTR(dtls); - if (dtls == NULL) - return NULL; - - dtls->dtls_tag = tag; - rc = dss_thread_local_storage_init(dtls, xs_id, tgt_id); - if (rc != 0) { - D_FREE(dtls); - return NULL; - } - - rc = pthread_setspecific(dss_tls_key, dtls); - if (rc) { - D_ERROR("failed to initialize tls: %d\n", rc); - dss_thread_local_storage_fini(dtls); - D_FREE(dtls); - return NULL; - } - - return dtls; -} - -/* Free DTC for a particular thread. */ -void -dss_tls_fini(struct dss_thread_local_storage *dtls) -{ - dss_thread_local_storage_fini(dtls); - D_FREE(dtls); - pthread_setspecific(dss_tls_key, NULL); -} diff --git a/src/gurt/examples/telem_consumer_example.c b/src/gurt/examples/telem_consumer_example.c index 6b7b1653a16..cac33fc7077 100644 --- a/src/gurt/examples/telem_consumer_example.c +++ b/src/gurt/examples/telem_consumer_example.c @@ -147,6 +147,13 @@ void read_metrics(struct d_tm_context *ctx, struct d_tm_node_t *root, d_tm_list_free(head); } +static void +iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format, + int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + int main(int argc, char **argv) { @@ -177,8 +184,8 @@ main(int argc, char **argv) filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT | D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY); show_meta = true; - d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_STANDARD, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_STANDARD, D_TM_INCLUDE_METADATA, iter_print, + stdout); sprintf(dirname, "manually added"); filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT | diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index f91d1e72919..6bd3a495246 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -16,9 +16,11 @@ #include #include #include -#include "gurt/telemetry_common.h" -#include "gurt/telemetry_producer.h" -#include "gurt/telemetry_consumer.h" +#include +#include +#include +#include +#include /** minimal list of shared memory regions with a global ID */ struct shmem_region_list { @@ -31,12 +33,17 @@ struct shmem_region_list { struct d_tm_shmem_hdr { uint64_t sh_base_addr; /** address of this struct */ key_t sh_key; /** key to access region */ - bool sh_deleted; /** marked for deletion */ + uint32_t sh_deleted : 1, /** marked for deletion */ + sh_multiple_writer : 1; /** require lock to protect */ uint8_t sh_reserved[3]; /** for alignment */ uint64_t sh_bytes_total; /** total size of region */ uint64_t sh_bytes_free; /** free bytes in this region */ void *sh_free_addr; /** start of free space */ struct d_tm_node_t *sh_root; /** root of metric tree */ + + /* lock to protect update, mostly for create and remove ephemeral dir */ + pthread_mutex_t sh_multiple_writer_lock; + /** * List of all ephemeral regions attached to this shmem region. */ @@ -69,8 +76,10 @@ static struct d_tm_shmem { struct d_tm_context *ctx; /** context for the producer */ struct d_tm_node_t *root; /** root node of shmem */ pthread_mutex_t add_lock; /** for synchronized access */ - bool sync_access; /** whether to sync access */ - bool retain; /** retain shmem region on exit */ + uint32_t retain : 1, /** retain shmem region during exit */ + sync_access : 1, /** enable sync access to shmem */ + retain_non_empty : 1, /** retain shmem region if it is not empty */ + multiple_writer_lock : 1; /** lock for multiple writer */ int id; /** Instance ID */ } tm_shmem; @@ -168,13 +177,49 @@ d_tm_get_name(struct d_tm_context *ctx, struct d_tm_node_t *node) static int d_tm_lock_shmem(void) { - return D_MUTEX_LOCK(&tm_shmem.add_lock); + struct d_tm_context *ctx = tm_shmem.ctx; + int rc; + + if (tm_shmem.multiple_writer_lock) { + rc = D_MUTEX_LOCK(&ctx->shmem_root->sh_multiple_writer_lock); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to take multiple writer lock"); + return rc; + } + } + + rc = D_MUTEX_LOCK(&tm_shmem.add_lock); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to take shared memory lock"); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); + return rc; + } + + return 0; } static int d_tm_unlock_shmem(void) { - return D_MUTEX_UNLOCK(&tm_shmem.add_lock); + struct d_tm_context *ctx = tm_shmem.ctx; + int rc; + + rc = D_MUTEX_UNLOCK(&tm_shmem.add_lock); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to release shared memory lock"); + return rc; + } + + if (tm_shmem.multiple_writer_lock) { + rc = D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to release multiple writer lock"); + return rc; + } + } + + return 0; } /* @@ -200,6 +245,8 @@ attach_shmem(key_t key, size_t size, int flags, struct d_tm_shmem_hdr **shmem) return -DER_SHMEM_PERMS; } + D_INFO("%s shmid %d key 0x%x addr %p\n", size > 0 ? "allocated" : "attached", shmid, key, + addr); *shmem = addr; return shmid; } @@ -208,7 +255,6 @@ static int new_shmem(key_t key, size_t size, struct d_tm_shmem_hdr **shmem) { int rc; - D_INFO("creating new shared memory segment, key=0x%x, size=%lu\n", key, size); rc = attach_shmem(key, size, IPC_CREAT | 0660, shmem); @@ -331,7 +377,7 @@ close_local_shmem_entry(struct local_shmem_list *entry, bool destroy) { d_list_del(&entry->link); if (destroy) - entry->region->sh_deleted = true; + entry->region->sh_deleted = 1; close_shmem(entry->region); if (destroy) @@ -529,7 +575,7 @@ init_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t *node, D_ERROR("cannot allocate node name [%s]\n", name); return -DER_NO_SHMEM; } - strncpy(node->dtn_name, name, buff_len); + strncpy(conv_ptr(shmem, node->dtn_name), name, buff_len); node->dtn_shmem_key = shmem->sh_key; node->dtn_child = NULL; /* may be reinitializing an existing node, in which case we shouldn't @@ -557,6 +603,7 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode, const char *name) { struct d_tm_node_t *node = NULL; + struct d_tm_node_t *tmp; int rc = DER_SUCCESS; if (shmem == NULL || newnode == NULL || name == NULL) { @@ -569,14 +616,19 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode, rc = -DER_NO_SHMEM; goto out; } - rc = init_node(shmem, node, name); + + tmp = conv_ptr(shmem, node); + + rc = init_node(shmem, tmp, name); if (rc != 0) goto out; - node->dtn_metric = NULL; - node->dtn_sibling = NULL; - *newnode = node; + tmp->dtn_metric = NULL; + tmp->dtn_sibling = NULL; + *newnode = node; out: + if (rc != 0) + DL_ERROR(rc, "failed to alloc node for %s", name); return rc; } @@ -624,10 +676,10 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent, * 1) a previously-cleared link node that can be reused, or * 2) the right place to attach a newly allocated node. */ - child = parent->dtn_child; + child = conv_ptr(shmem, parent->dtn_child); while (child != NULL && !is_cleared_link(tm_shmem.ctx, child)) { sibling = child; - child = child->dtn_sibling; + child = conv_ptr(shmem, child->dtn_sibling); } if (is_cleared_link(tm_shmem.ctx, child)) { @@ -657,6 +709,7 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent, else sibling->dtn_sibling = *newnode; + *newnode = conv_ptr(shmem, *newnode); return 0; failure: @@ -751,7 +804,7 @@ destroy_shmem_with_key(key_t key) /** * Initialize an instance of the telemetry and metrics API for the producer - * process. + * process with the root set to the provided name. * * \param[in] id Identifies the producer process amongst others * on the same machine. @@ -763,6 +816,7 @@ destroy_shmem_with_key(key_t key) * Use D_TM_RETAIN_SHMEM to retain the shared * memory segment created for these metrics after * this process exits. + * \param[in] root_name The name of this node in the telemetry tree. * * \return DER_SUCCESS Success * -DER_NO_SHMEM Out of shared memory @@ -770,41 +824,70 @@ destroy_shmem_with_key(key_t key) * -DER_INVAL Invalid \a flag(s) */ int -d_tm_init(int id, uint64_t mem_size, int flags) +d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name) { - struct d_tm_shmem_hdr *new_shmem; + struct d_tm_shmem_hdr *new_shmem = NULL; key_t key; - int shmid; - char tmp[D_TM_MAX_NAME_LEN]; + int shmid; int rc = DER_SUCCESS; + if (root_name == NULL || strnlen(root_name, D_TM_MAX_NAME_LEN) == 0) { + D_ERROR("root name cannot be empty\n"); + return -DER_INVAL; + } + + if (strnlen(root_name, D_TM_MAX_NAME_LEN) == D_TM_MAX_NAME_LEN) { + D_ERROR("root name too long (max=%d)\n", D_TM_MAX_NAME_LEN); + return -DER_EXCEEDS_PATH_LEN; + } + memset(&tm_shmem, 0, sizeof(tm_shmem)); - if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM)) != 0) { - D_ERROR("Invalid flags\n"); + if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM | D_TM_RETAIN_SHMEM_IF_NON_EMPTY | + D_TM_OPEN_OR_CREATE | D_TM_MULTIPLE_WRITER_LOCK)) != 0) { + D_ERROR("Invalid flags 0x%x\n", flags); rc = -DER_INVAL; goto failure; } if (flags & D_TM_SERIALIZATION) { - tm_shmem.sync_access = true; + tm_shmem.sync_access = 1; D_INFO("Serialization enabled for id %d\n", id); } if (flags & D_TM_RETAIN_SHMEM) { - tm_shmem.retain = true; + tm_shmem.retain = 1; D_INFO("Retaining shared memory for id %d\n", id); } + if (flags & D_TM_RETAIN_SHMEM_IF_NON_EMPTY) { + tm_shmem.retain_non_empty = 1; + D_INFO("Retaining shared memory for id %d if not empty\n", id); + } + + if (flags & D_TM_MULTIPLE_WRITER_LOCK) { + tm_shmem.multiple_writer_lock = 1; + D_INFO("Require multiple write protection for id %d\n", id); + } + tm_shmem.id = id; - snprintf(tmp, sizeof(tmp), "ID: %d", id); key = d_tm_get_srv_key(id); - rc = destroy_shmem_with_key(key); - if (rc != 0) - goto failure; - rc = create_shmem(tmp, key, mem_size, &shmid, &new_shmem); - if (rc != 0) - goto failure; + if (flags & D_TM_OPEN_OR_CREATE) { + rc = open_shmem(key, &new_shmem); + if (rc > 0) { + D_ASSERT(new_shmem != NULL); + shmid = rc; + } + } + + if (new_shmem == NULL) { + rc = destroy_shmem_with_key(key); + if (rc != 0) + goto failure; + rc = create_shmem(root_name, key, mem_size, &shmid, &new_shmem); + if (rc != 0) + goto failure; + } rc = alloc_ctx(&tm_shmem.ctx, new_shmem, shmid); if (rc != 0) @@ -831,19 +914,76 @@ d_tm_init(int id, uint64_t mem_size, int flags) return rc; } +/** + * Initialize an instance of the telemetry and metrics API for the producer + * process. + * + * \param[in] id Identifies the producer process amongst others + * on the same machine. + * \param[in] mem_size Size in bytes of the shared memory segment that + * is allocated. + * \param[in] flags Optional flags to control initialization. + * Use D_TM_SERIALIZATION to enable read/write + * synchronization of individual nodes. + * Use D_TM_RETAIN_SHMEM to retain the shared + * memory segment created for these metrics after + * this process exits. + * + * \return DER_SUCCESS Success + * -DER_NO_SHMEM Out of shared memory + * -DER_EXCEEDS_PATH_LEN Root node name exceeds path len + * -DER_INVAL Invalid \a flag(s) + */ +int +d_tm_init(int id, uint64_t mem_size, int flags) +{ + char tmp[D_TM_MAX_NAME_LEN]; + + snprintf(tmp, sizeof(tmp), "ID: %d", id); + + return d_tm_init_with_name(id, mem_size, flags, tmp); +} + +/* Check if all children are invalid */ +static bool +is_node_empty(struct d_tm_node_t *node) +{ + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_shmem_hdr *shmem; + struct d_tm_node_t *child; + + shmem = get_shmem_for_key(ctx, node->dtn_shmem_key); + child = conv_ptr(shmem, node->dtn_child); + while (child != NULL && !is_cleared_link(ctx, child)) { + child = conv_ptr(shmem, child->dtn_sibling); + if (child->dtn_name != NULL) + return false; + } + + return true; +} + /** * Releases resources claimed by init */ void d_tm_fini(void) { - bool destroy_shmem = false; + bool destroy_shmem = true; if (tm_shmem.ctx == NULL) goto out; - if (!tm_shmem.retain) - destroy_shmem = true; + if (tm_shmem.retain) + destroy_shmem = false; + + if (tm_shmem.retain_non_empty) { + struct d_tm_node_t *root; + + root = d_tm_get_root(tm_shmem.ctx); + if (!is_node_empty(root)) + destroy_shmem = false; + } /* close with the option to destroy the shmem region if needed */ close_all_shmem(tm_shmem.ctx, destroy_shmem); @@ -1451,9 +1591,9 @@ _reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node) return DER_SUCCESS; } -static void -reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, - char *path, int format, int opt_fields, FILE *stream) +void +d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, + int format, int opt_fields, FILE *stream) { char *name = NULL; @@ -1467,7 +1607,7 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, switch (node->dtn_type) { case D_TM_LINK: node = d_tm_follow_link(ctx, node); - reset_node(ctx, node, level, path, format, opt_fields, stream); + d_tm_reset_node(ctx, node, level, path, format, opt_fields, stream); break; case D_TM_DIRECTORY: case D_TM_COUNTER: @@ -1507,20 +1647,18 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, * Choose D_TM_CSV for comma separated values. * \param[in] opt_fields A bitmask. Set D_TM_INCLUDE_* as desired for * the optional output fields. - * \param[in] show_timestamp Set to true to print the timestamp the metric - * was read by the consumer. - * \param[in] stream Direct output to this stream (stdout, stderr) + * \param[in] iter_cb iterate callback. + * \param[in] cb_arg argument for iterate callback. */ void -d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, - int level, int filter, char *path, int format, - int opt_fields, uint32_t ops, FILE *stream) +d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path, + int format, int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg) { struct d_tm_shmem_hdr *shmem = NULL; char *fullpath = NULL; char *parent_name = NULL; - if ((node == NULL) || (stream == NULL)) + if (node == NULL) return; if (node->dtn_type == D_TM_LINK) { @@ -1533,14 +1671,8 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, if (shmem == NULL) return; - if (node->dtn_type & filter) { - if (ops & D_TM_ITER_READ) - d_tm_print_node(ctx, node, level, path, format, - opt_fields, stream); - if (ops & D_TM_ITER_RESET) - reset_node(ctx, node, level, path, format, - opt_fields, stream); - } + if (node->dtn_type & filter) + iter_cb(ctx, node, level, path, format, opt_fields, cb_arg); parent_name = conv_ptr(shmem, node->dtn_name); node = node->dtn_child; @@ -1555,8 +1687,8 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, else D_ASPRINTF(fullpath, "%s/%s", path, parent_name); - d_tm_iterate(ctx, node, level + 1, filter, fullpath, format, - opt_fields, ops, stream); + d_tm_iterate(ctx, node, level + 1, filter, fullpath, format, opt_fields, iter_cb, + cb_arg); D_FREE(fullpath); node = node->dtn_sibling; node = conv_ptr(shmem, node); @@ -2105,6 +2237,29 @@ is_initialized(void) tm_shmem.ctx->shmem_root != NULL; } +/* + * Get a pointer to the last token in the path without modifying the original + * string. + */ +static const char * +get_last_token(const char *path) +{ + const char *substr = path; + const char *ch; + bool next_token = false; + + for (ch = path; *ch != '\0'; ch++) { + if (*ch == '/') { + next_token = true; + } else if (next_token) { + substr = ch; + next_token = false; + } + } + + return substr; +} + static int add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, char *desc, char *units, char *path) @@ -2113,6 +2268,7 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, struct d_tm_node_t *parent_node; struct d_tm_node_t *temp = NULL; struct d_tm_shmem_hdr *shmem; + struct d_tm_metric_t *metric; char *token; char *rest; char *unit_string; @@ -2154,11 +2310,11 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, } } - temp->dtn_metric->dtm_stats = NULL; + metric = conv_ptr(shmem, temp->dtn_metric); + metric->dtm_stats = NULL; if (has_stats(temp)) { - temp->dtn_metric->dtm_stats = - shmalloc(shmem, sizeof(struct d_tm_stats_t)); - if (temp->dtn_metric->dtm_stats == NULL) { + metric->dtm_stats = shmalloc(shmem, sizeof(struct d_tm_stats_t)); + if (metric->dtm_stats == NULL) { rc = -DER_NO_SHMEM; goto out; } @@ -2175,14 +2331,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, if (buff_len > 0) { buff_len += 1; /** make room for the trailing null */ - temp->dtn_metric->dtm_desc = shmalloc(shmem, buff_len); - if (temp->dtn_metric->dtm_desc == NULL) { + metric->dtm_desc = shmalloc(shmem, buff_len); + if (metric->dtm_desc == NULL) { rc = -DER_NO_SHMEM; goto out; } - strncpy(temp->dtn_metric->dtm_desc, desc, buff_len); + strncpy(conv_ptr(shmem, metric->dtm_desc), desc, buff_len); } else { - temp->dtn_metric->dtm_desc = NULL; + metric->dtm_desc = NULL; } unit_string = units; @@ -2216,14 +2372,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, if (buff_len > 0) { buff_len += 1; /** make room for the trailing null */ - temp->dtn_metric->dtm_units = shmalloc(shmem, buff_len); - if (temp->dtn_metric->dtm_units == NULL) { + metric->dtm_units = shmalloc(shmem, buff_len); + if (metric->dtm_units == NULL) { rc = -DER_NO_SHMEM; goto out; } - strncpy(temp->dtn_metric->dtm_units, unit_string, buff_len); + strncpy(conv_ptr(shmem, metric->dtm_units), unit_string, buff_len); } else { - temp->dtn_metric->dtm_units = NULL; + metric->dtm_units = NULL; } temp->dtn_protect = false; @@ -2344,26 +2500,35 @@ int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc, } static void -invalidate_link_node(struct d_tm_node_t *node) +invalidate_link_node(struct d_tm_shmem_hdr *parent, struct d_tm_node_t *node) { if (node == NULL || node->dtn_type != D_TM_LINK) return; node->dtn_name = NULL; - if (node->dtn_metric != NULL) - node->dtn_metric->dtm_data.value = 0; + if (node->dtn_metric != NULL) { + struct d_tm_metric_t *link_metric; + + link_metric = conv_ptr(parent, node->dtn_metric); + link_metric->dtm_data.value = 0; + } } static int get_free_region_entry(struct d_tm_shmem_hdr *shmem, struct shmem_region_list **entry) { + d_list_t *cur; + d_list_t *head; + d_list_t *next; struct shmem_region_list *tmp; D_ASSERT(shmem != NULL); D_ASSERT(entry != NULL); - d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) { + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + tmp = d_list_entry(cur, __typeof__(*tmp), rl_link); if (tmp->rl_link_node == NULL) { *entry = tmp; return 0; @@ -2376,7 +2541,23 @@ get_free_region_entry(struct d_tm_shmem_hdr *shmem, shmem->sh_key); return -DER_NO_SHMEM; } - d_list_add(&tmp->rl_link, &shmem->sh_subregions); + + next = conv_ptr(shmem, head->next); + /* NB: sh_subregions is initialized by D_INIT_LIST_HEAD(), so it is not shmem address */ + if (d_list_empty(&shmem->sh_subregions)) + cur = (d_list_t *)(shmem->sh_base_addr + + (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions)); + else + cur = head->next; + + head->next = &tmp->rl_link; + next->prev = &tmp->rl_link; + + tmp = conv_ptr(shmem, tmp); + tmp->rl_link.next = cur; + tmp->rl_link.prev = + (d_list_t *)(shmem->sh_base_addr + + (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions)); *entry = tmp; return 0; @@ -2413,27 +2594,199 @@ get_unique_shmem_key(const char *path, int id) return (key_t)d_hash_string_u32(salted, sizeof(salted)); } +static int +shm_stat_key(key_t key, struct shmid_ds *shminfo, int *shmid_ptr) +{ + int shmid; + int rc; + + if (unlikely(shminfo == NULL)) { + D_ERROR("NULL shminfo\n"); + return -DER_INVAL; + } + + rc = shmget(key, 0, 0); + if (rc < 0) { + D_ERROR("shmget(0x%x) failed: %s (%d)\n", key, strerror(errno), errno); + return daos_errno2der(errno); + } + shmid = rc; + + rc = shmctl(shmid, IPC_STAT, shminfo); + if (rc < 0) { + D_ERROR("shmctl(%d, IPC_STAT) failed: %s (%d)\n", shmid, strerror(errno), errno); + return daos_errno2der(errno); + } + + if (shmid_ptr != NULL) + *shmid_ptr = shmid; + + return 0; +} + /* - * Get a pointer to the last token in the path without modifying the original - * string. + * Set the child segment's ownership to match the parent segment. + * Needed in the client telemetry case where the client is allowing + * the agent to manage its telemetry segments. */ -static const char * -get_last_token(const char *path) +static int +sync_attached_segment_uid(char *path, key_t child_key) { - const char *substr = path; - const char *ch; - bool next_token = false; + struct d_tm_node_t *link_node; + struct d_tm_context *ctx = tm_shmem.ctx; + struct shmid_ds shminfo = {0}; + uid_t o_uid; + int child_shmid; + int rc; + + if (unlikely(path == NULL)) { + D_ERROR("NULL inputs\n"); + return -DER_INVAL; + } - for (ch = path; *ch != '\0'; ch++) { - if (*ch == '/') { - next_token = true; - } else if (next_token) { - substr = ch; - next_token = false; - } + link_node = d_tm_find_metric(ctx, path); + if (link_node == NULL) { + D_ERROR("nonexistent metric: %s", path); + D_GOTO(out, rc = -DER_NONEXIST); } - return substr; + rc = shm_stat_key(link_node->dtn_shmem_key, &shminfo, NULL); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to stat parent segment"); + goto out; + } + o_uid = shminfo.shm_perm.uid; + + rc = shm_stat_key(child_key, &shminfo, &child_shmid); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to stat child segment"); + goto out; + } + + if (o_uid == shminfo.shm_perm.uid) + D_GOTO(out, rc = 0); + + shminfo.shm_perm.uid = o_uid; + rc = shmctl(child_shmid, IPC_SET, &shminfo); + if (rc != 0) { + DL_ERROR(rc, "failed to set child segment ownership"); + } + +out: + return rc; +} + +static int +attach_path_segment(key_t key, char *path) +{ + struct d_tm_node_t *link_node; + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_shmem_hdr *parent_shmem; + struct d_tm_metric_t *link_metric; + struct shmem_region_list *region_entry; + int rc; + + if (unlikely(path == NULL)) { + D_ERROR("NULL inputs\n"); + D_GOTO(fail, rc = -DER_INVAL); + } + + /* Add a link to the new region */ + rc = add_metric(ctx, &link_node, D_TM_LINK, NULL, NULL, path); + if (unlikely(rc != 0)) { + D_ERROR("can't set up the link node, " DF_RC "\n", DP_RC(rc)); + D_GOTO(fail, rc); + } + + /* track attached regions within the parent shmem */ + parent_shmem = get_shmem_for_key(ctx, link_node->dtn_shmem_key); + if (unlikely(parent_shmem == NULL)) { + D_ERROR("failed to get parent shmem pointer\n"); + D_GOTO(fail_link, rc = -DER_NO_SHMEM); + } + + D_ASSERT(link_node->dtn_type == D_TM_LINK); + link_metric = conv_ptr(parent_shmem, link_node->dtn_metric); + link_metric->dtm_data.value = key; + + rc = get_free_region_entry(parent_shmem, ®ion_entry); + if (unlikely(rc != 0)) + D_GOTO(fail_link, rc); + region_entry->rl_key = key; + region_entry->rl_link_node = link_node; + + if (tm_shmem.multiple_writer_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); + + return 0; +fail_link: + invalidate_link_node(parent_shmem, link_node); +fail: + return rc; +} + +/** + * Attach an existing telemetry segment into the tree at the path designated + * by fmt. This segment will be treated the same as an ephemeral directory + * that can be deleted later along with its children. + * + * \param[in] key Key to the shared memory segment + * \param[in] fmt Path constructed via variadic arguments + * + * \return 0 Success + * -DER_INVAL Invalid input + * -DER_EXIST Requested path already exists + */ +int +d_tm_attach_path_segment(key_t key, const char *fmt, ...) +{ + struct d_tm_node_t *link_node; + struct d_tm_context *ctx = tm_shmem.ctx; + va_list args; + char path[D_TM_MAX_NAME_LEN] = {0}; + int rc; + + if (!is_initialized()) + D_GOTO(fail, rc = -DER_UNINIT); + + if (unlikely(fmt == NULL)) { + D_ERROR("NULL inputs\n"); + D_GOTO(fail, rc = -DER_INVAL); + } + + if (strnlen(fmt, D_TM_MAX_NAME_LEN) == 0) { + D_ERROR("cannot attach segment at root\n"); + D_GOTO(fail, rc = -DER_INVAL); + } + + va_start(args, fmt); + rc = parse_path_fmt(path, sizeof(path), fmt, args); + va_end(args); + if (unlikely(rc != 0)) + D_GOTO(fail, rc); + + rc = d_tm_lock_shmem(); + if (rc != 0) + D_GOTO(fail, rc); + + link_node = d_tm_find_metric(ctx, path); + if (link_node != NULL) { + D_INFO("metric [%s] already exists\n", path); + D_GOTO(fail_unlock, rc = -DER_EXIST); + } + + rc = attach_path_segment(key, path); + if (unlikely(rc != 0)) + D_GOTO(fail_unlock, rc); + + d_tm_unlock_shmem(); + return 0; +fail_unlock: + d_tm_unlock_shmem(); +fail: + if (rc != -DER_EXIST) + DL_ERROR(rc, "Failed to add path segment [%s] for key %d", path, key); + return rc; } /** @@ -2455,12 +2808,9 @@ int d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, const char *fmt, ...) { - struct d_tm_node_t *new_node; - struct d_tm_node_t *link_node; - struct d_tm_context *ctx = tm_shmem.ctx; - struct d_tm_shmem_hdr *parent_shmem; - struct d_tm_shmem_hdr *new_shmem; - struct shmem_region_list *region_entry; + struct d_tm_node_t *new_node; + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_shmem_hdr *new_shmem; va_list args; key_t key; char path[D_TM_MAX_NAME_LEN] = {0}; @@ -2495,57 +2845,52 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, rc = d_tm_lock_shmem(); if (unlikely(rc != 0)) { D_ERROR("failed to get producer mutex\n"); - D_GOTO(fail, rc); + D_GOTO(fail_unlock, rc); } new_node = d_tm_find_metric(ctx, path); if (new_node != NULL) { - D_ERROR("metric [%s] already exists\n", path); + D_INFO("metric [%s] already exists\n", path); D_GOTO(fail_unlock, rc = -DER_EXIST); } key = get_unique_shmem_key(path, tm_shmem.id); rc = create_shmem(get_last_token(path), key, size_bytes, &new_shmid, &new_shmem); - if (rc != 0) + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to create shmem for %s", path); D_GOTO(fail_unlock, rc); + } new_node = new_shmem->sh_root; /* track at the process level */ rc = track_open_shmem(ctx, new_shmem, new_shmid, key); - if (rc != 0) + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to track shmem for %s", path); D_GOTO(fail_shmem, rc); + } - /* Add a link to the new region */ - rc = add_metric(ctx, &link_node, D_TM_LINK, NULL, NULL, path); - if (rc != 0) { - D_ERROR("can't set up the link node, " DF_RC "\n", DP_RC(rc)); - D_GOTO(fail_tracking, rc); + rc = attach_path_segment(key, path); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to attach 0x%x at %s", key, path); + D_GOTO(fail_attach, rc); } - D_ASSERT(link_node->dtn_type == D_TM_LINK); - link_node->dtn_metric->dtm_data.value = key; - /* track attached regions within the parent shmem */ - parent_shmem = get_shmem_for_key(ctx, link_node->dtn_shmem_key); - if (parent_shmem == NULL) { - D_ERROR("failed to get parent shmem pointer\n"); - D_GOTO(fail_link, rc = -DER_NO_SHMEM); + rc = sync_attached_segment_uid(path, key); + if (unlikely(rc != 0)) { + DL_ERROR(rc, "failed to sync %s permissions", path); + D_GOTO(fail_sync, rc); } - rc = get_free_region_entry(parent_shmem, ®ion_entry); - if (rc != 0) - D_GOTO(fail_link, rc); - region_entry->rl_key = key; - region_entry->rl_link_node = link_node; if (node != NULL) *node = new_node; d_tm_unlock_shmem(); return 0; - -fail_link: - invalidate_link_node(link_node); -fail_tracking: +fail_sync: + d_tm_del_ephemeral_dir(path); + goto fail_unlock; /* shmem will be closed/destroyed already */ +fail_attach: close_shmem_for_key(ctx, key, true); goto fail_unlock; /* shmem will be closed/destroyed already */ fail_shmem: @@ -2554,17 +2899,21 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, fail_unlock: d_tm_unlock_shmem(); fail: - D_ERROR("Failed to add ephemeral dir [%s]: " DF_RC "\n", path, - DP_RC(rc)); + if (rc != -DER_EXIST) + DL_ERROR(rc, "Failed to add ephemeral dir [%s]", path); return rc; } static void clear_region_entry_for_key(struct d_tm_shmem_hdr *shmem, key_t key) { + d_list_t *cur; + d_list_t *head; struct shmem_region_list *tmp; - d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) { + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + tmp = d_list_entry(cur, __typeof__(*tmp), rl_link); if (tmp->rl_key == key) { D_DEBUG(DB_TRACE, "cleared shmem metadata for key 0x%x\n", key); @@ -2583,6 +2932,8 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) struct d_tm_shmem_hdr *parent_shmem; struct d_tm_shmem_hdr *shmem; struct d_tm_node_t *node; + d_list_t *cur; + d_list_t *head; struct shmem_region_list *curr; key_t key; int rc = 0; @@ -2616,8 +2967,10 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) } /* delete sub-regions recursively */ - d_list_for_each_entry(curr, &shmem->sh_subregions, rl_link) { - rc = rm_ephemeral_dir(ctx, curr->rl_link_node); + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + curr = d_list_entry(cur, __typeof__(*curr), rl_link); + rc = rm_ephemeral_dir(ctx, conv_ptr(shmem, curr->rl_link_node)); if (rc != 0) /* nothing much we can do to recover here */ D_ERROR("error removing tmp dir [%s]: "DF_RC"\n", link->dtn_name, DP_RC(rc)); @@ -2629,11 +2982,35 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) out_link: /* invalidate since the link node can't be deleted from parent */ - invalidate_link_node(link); + invalidate_link_node(parent_shmem, link); out: return rc; } +static int +try_del_ephemeral_dir(char *path, bool force) +{ + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_node_t *link; + int rc = 0; + + rc = d_tm_lock_shmem(); + if (unlikely(rc != 0)) { + D_ERROR("failed to get producer mutex\n"); + D_GOTO(unlock, rc); + } + + link = get_node(ctx, path); + if (!force && !is_node_empty(link)) + D_GOTO(unlock, rc == -DER_BUSY); + + rc = rm_ephemeral_dir(ctx, link); + +unlock: + d_tm_unlock_shmem(); + + return rc; +} /** * Deletes an ephemeral metrics directory from the metric tree. * @@ -2645,11 +3022,9 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) int d_tm_del_ephemeral_dir(const char *fmt, ...) { - struct d_tm_context *ctx = tm_shmem.ctx; - struct d_tm_node_t *link; - va_list args; - char path[D_TM_MAX_NAME_LEN] = {0}; - int rc = 0; + va_list args; + char path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; if (!is_initialized()) D_GOTO(out, rc = -DER_UNINIT); @@ -2665,16 +3040,45 @@ d_tm_del_ephemeral_dir(const char *fmt, ...) if (rc != 0) D_GOTO(out, rc); - rc = d_tm_lock_shmem(); - if (unlikely(rc != 0)) { - D_ERROR("failed to get producer mutex\n"); - D_GOTO(out, rc); + rc = try_del_ephemeral_dir(path, true); +out: + if (rc != 0) + D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n", DP_RC(rc)); + else + D_INFO("Removed ephemeral directory [%s]\n", path); + return rc; +} + +/** + * Deletes an ephemeral metrics directory from the metric tree, only if it is empty. + * + * \param[in] fmt Used to construct the path to be removed + * + * \return 0 Success + * -DER_INVAL Invalid input + */ +int +d_tm_try_del_ephemeral_dir(const char *fmt, ...) +{ + va_list args; + char path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (!is_initialized()) + D_GOTO(out, rc = -DER_UNINIT); + + if (fmt == NULL || strnlen(fmt, D_TM_MAX_NAME_LEN) == 0) { + D_ERROR("telemetry root cannot be deleted\n"); + D_GOTO(out, rc = -DER_INVAL); } - link = get_node(ctx, path); - rc = rm_ephemeral_dir(ctx, link); + va_start(args, fmt); + rc = parse_path_fmt(path, sizeof(path), fmt, args); + va_end(args); + if (rc != 0) + D_GOTO(out, rc); - d_tm_unlock_shmem(); + rc = try_del_ephemeral_dir(path, false); out: if (rc != 0) D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n", @@ -3538,6 +3942,7 @@ allocate_shared_memory(key_t key, size_t mem_size, { int shmid; struct d_tm_shmem_hdr *header; + int rc; D_ASSERT(shmem != NULL); @@ -3559,8 +3964,17 @@ allocate_shared_memory(key_t key, size_t mem_size, D_INIT_LIST_HEAD(&header->sh_subregions); - D_DEBUG(DB_MEM, "Created shared memory region for key 0x%x, size=%lu\n", - key, mem_size); + if (tm_shmem.multiple_writer_lock) { + rc = D_MUTEX_INIT(&header->sh_multiple_writer_lock, NULL); + if (rc) { + DL_ERROR(rc, "multiple writer lock failed"); + return -DER_NO_SHMEM; + } + } + + D_DEBUG(DB_MEM, + "Created shared memory region for key 0x%x, size=%lu header %p base %p free %p\n", + key, mem_size, header, (void *)header->sh_base_addr, (void *)header->sh_free_addr); *shmem = header; @@ -3664,10 +4078,9 @@ shmalloc(struct d_tm_shmem_hdr *shmem, int length) shmem->sh_bytes_free -= length; shmem->sh_free_addr += length; - D_DEBUG(DB_TRACE, - "Allocated %d bytes. Now %" PRIu64 " remain\n", - length, shmem->sh_bytes_free); - memset(new_mem, 0, length); + D_DEBUG(DB_TRACE, "Allocated %d bytes. Now %" PRIu64 " remain %p/%p\n", length, + shmem->sh_bytes_free, shmem, new_mem); + memset(conv_ptr(shmem, new_mem), 0, length); return new_mem; } diff --git a/src/gurt/tests/test_gurt_telem_producer.c b/src/gurt/tests/test_gurt_telem_producer.c index bf3db9d19c9..32d4c4f7b89 100644 --- a/src/gurt/tests/test_gurt_telem_producer.c +++ b/src/gurt/tests/test_gurt_telem_producer.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2020-2022 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1226,6 +1226,13 @@ test_verify_object_count(void **state) assert_int_equal(num, exp_total); } +static void +iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, int format, + int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + static void test_print_metrics(void **state) { @@ -1238,15 +1245,15 @@ test_print_metrics(void **state) filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT | D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY); - d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_STANDARD, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_STANDARD, D_TM_INCLUDE_METADATA, + iter_print, stdout); d_tm_print_field_descriptors(D_TM_INCLUDE_TIMESTAMP | D_TM_INCLUDE_METADATA, stdout); filter &= ~D_TM_DIRECTORY; - d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_CSV, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_CSV, D_TM_INCLUDE_METADATA, iter_print, + stdout); } static void diff --git a/src/include/daos/drpc_modules.h b/src/include/daos/drpc_modules.h index 69aaf568673..a8821d9f079 100644 --- a/src/include/daos/drpc_modules.h +++ b/src/include/daos/drpc_modules.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -33,43 +33,44 @@ enum drpc_sec_agent_method { }; enum drpc_mgmt_method { - DRPC_METHOD_MGMT_KILL_RANK = 201, - DRPC_METHOD_MGMT_SET_RANK = 202, - DRPC_METHOD_MGMT_GET_ATTACH_INFO = 206, - DRPC_METHOD_MGMT_POOL_CREATE = 207, - DRPC_METHOD_MGMT_POOL_DESTROY = 208, - DRPC_METHOD_MGMT_SET_UP = 209, - DRPC_METHOD_MGMT_BIO_HEALTH_QUERY = 210, - DRPC_METHOD_MGMT_SMD_LIST_DEVS = 211, - DRPC_METHOD_MGMT_SMD_LIST_POOLS = 212, - DRPC_METHOD_MGMT_POOL_GET_ACL = 213, - DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL = 215, - DRPC_METHOD_MGMT_POOL_UPDATE_ACL = 216, - DRPC_METHOD_MGMT_POOL_DELETE_ACL = 217, - DRPC_METHOD_MGMT_PREP_SHUTDOWN = 218, - DRPC_METHOD_MGMT_DEV_SET_FAULTY = 220, - DRPC_METHOD_MGMT_DEV_REPLACE = 221, - DRPC_METHOD_MGMT_LIST_CONTAINERS = 222, - DRPC_METHOD_MGMT_POOL_QUERY = 223, - DRPC_METHOD_MGMT_POOL_SET_PROP = 224, - DRPC_METHOD_MGMT_PING_RANK = 225, - DRPC_METHOD_MGMT_REINTEGRATE = 226, - DRPC_METHOD_MGMT_CONT_SET_OWNER = 227, - DRPC_METHOD_MGMT_EXCLUDE = 228, - DRPC_METHOD_MGMT_EXTEND = 229, - DRPC_METHOD_MGMT_POOL_EVICT = 230, - DRPC_METHOD_MGMT_DRAIN = 231, - DRPC_METHOD_MGMT_GROUP_UPDATE = 232, - DRPC_METHOD_MGMT_NOTIFY_EXIT = 233, - DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT = 235, - DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT = 236, - DRPC_METHOD_MGMT_POOL_GET_PROP = 237, - DRPC_METHOD_MGMT_SET_LOG_MASKS = 238, - DRPC_METHOD_MGMT_POOL_UPGRADE = 239, - DRPC_METHOD_MGMT_POOL_QUERY_TARGETS = 240, - DRPC_METHOD_MGMT_LED_MANAGE = 241, + DRPC_METHOD_MGMT_KILL_RANK = 201, + DRPC_METHOD_MGMT_SET_RANK = 202, + DRPC_METHOD_MGMT_GET_ATTACH_INFO = 206, + DRPC_METHOD_MGMT_POOL_CREATE = 207, + DRPC_METHOD_MGMT_POOL_DESTROY = 208, + DRPC_METHOD_MGMT_SET_UP = 209, + DRPC_METHOD_MGMT_BIO_HEALTH_QUERY = 210, + DRPC_METHOD_MGMT_SMD_LIST_DEVS = 211, + DRPC_METHOD_MGMT_SMD_LIST_POOLS = 212, + DRPC_METHOD_MGMT_POOL_GET_ACL = 213, + DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL = 215, + DRPC_METHOD_MGMT_POOL_UPDATE_ACL = 216, + DRPC_METHOD_MGMT_POOL_DELETE_ACL = 217, + DRPC_METHOD_MGMT_PREP_SHUTDOWN = 218, + DRPC_METHOD_MGMT_DEV_SET_FAULTY = 220, + DRPC_METHOD_MGMT_DEV_REPLACE = 221, + DRPC_METHOD_MGMT_LIST_CONTAINERS = 222, + DRPC_METHOD_MGMT_POOL_QUERY = 223, + DRPC_METHOD_MGMT_POOL_SET_PROP = 224, + DRPC_METHOD_MGMT_PING_RANK = 225, + DRPC_METHOD_MGMT_REINTEGRATE = 226, + DRPC_METHOD_MGMT_CONT_SET_OWNER = 227, + DRPC_METHOD_MGMT_EXCLUDE = 228, + DRPC_METHOD_MGMT_EXTEND = 229, + DRPC_METHOD_MGMT_POOL_EVICT = 230, + DRPC_METHOD_MGMT_DRAIN = 231, + DRPC_METHOD_MGMT_GROUP_UPDATE = 232, + DRPC_METHOD_MGMT_NOTIFY_EXIT = 233, + DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT = 235, + DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT = 236, + DRPC_METHOD_MGMT_POOL_GET_PROP = 237, + DRPC_METHOD_MGMT_SET_LOG_MASKS = 238, + DRPC_METHOD_MGMT_POOL_UPGRADE = 239, + DRPC_METHOD_MGMT_POOL_QUERY_TARGETS = 240, + DRPC_METHOD_MGMT_LED_MANAGE = 241, + DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM = 242, - NUM_DRPC_MGMT_METHODS /* Must be last */ + NUM_DRPC_MGMT_METHODS /* Must be last */ }; enum drpc_srv_method { diff --git a/src/include/daos/metrics.h b/src/include/daos/metrics.h new file mode 100644 index 00000000000..a0b6f16f144 --- /dev/null +++ b/src/include/daos/metrics.h @@ -0,0 +1,82 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of daos + * + * src/include/daos/metrics.h + */ + +#ifndef __DAOS_METRICS_H__ +#define __DAOS_METRICS_H__ + +#include +#include +#include +#include + +#define DC_TM_JOB_ROOT_ID 256 +/* For now TLS is only enabled if metrics are enabled */ +#define DAOS_CLIENT_METRICS_DUMP_PATH "D_CLIENT_METRICS_DUMP_PATH" +#define DAOS_CLIENT_METRICS_ENABLE "D_CLIENT_METRICS_ENABLE" +#define DAOS_CLIENT_METRICS_RETAIN "D_CLIENT_METRICS_RETAIN" +extern bool daos_client_metric; +extern bool daos_client_metric_retain; + +struct daos_module_metrics { + /* Indicate where the keys should be instantiated */ + enum daos_module_tag dmm_tags; + + /** + * allocate metrics with path to ephemeral shmem for to the + * newly-created pool + */ + void *(*dmm_init)(const char *path, int tgt_id); + void (*dmm_fini)(void *data); + + /** + * Get the number of metrics allocated by this module in total (including all targets). + */ + int (*dmm_nr_metrics)(void); +}; + +/* Estimate of bytes per typical metric node */ +#define NODE_BYTES \ + (sizeof(struct d_tm_node_t) + sizeof(struct d_tm_metric_t) + 64 /* buffer for metadata */) +/* Estimate of bytes per histogram bucket */ +#define BUCKET_BYTES (sizeof(struct d_tm_bucket_t) + NODE_BYTES) +/* + Estimate of bytes per metric. + This is a generous high-water mark assuming most metrics are not using + histograms. May need adjustment if the balance of metrics changes. +*/ +#define PER_METRIC_BYTES \ + (NODE_BYTES + sizeof(struct d_tm_stats_t) + sizeof(struct d_tm_histogram_t) + BUCKET_BYTES) + +int +daos_metrics_init(enum daos_module_tag tag, uint32_t id, struct daos_module_metrics *metrics); +void +daos_metrics_fini(void); +int +daos_module_init_metrics(enum dss_module_tag tag, void **metrics, const char *path, int tgt_id); +void +daos_module_fini_metrics(enum dss_module_tag tag, void **metrics); + +int +daos_module_nr_pool_metrics(void); + +/** + * Called during library initialization to init metrics. + */ +int +dc_tm_init(void); + +/** + * Called during library finalization to free metrics resources + */ +void +dc_tm_fini(void); + +#endif /*__DAOS_METRICS_H__*/ diff --git a/src/include/daos/mgmt.h b/src/include/daos/mgmt.h index 4d999428c8c..eee326c761b 100644 --- a/src/include/daos/mgmt.h +++ b/src/include/daos/mgmt.h @@ -10,6 +10,7 @@ #ifndef __DC_MGMT_H__ #define __DC_MGMT_H__ +#include #include #include #include @@ -71,6 +72,9 @@ int dc_mgmt_net_get_num_srv_ranks(void); int dc_mgmt_get_sys_info(const char *sys, struct daos_sys_info **info); void dc_mgmt_put_sys_info(struct daos_sys_info *info); +int + dc_mgmt_tm_register(const char *sys, const char *jobid, key_t shm_key, uid_t *owner_uid); + int dc_get_attach_info(const char *name, bool all_ranks, struct dc_mgmt_sys_info *info, Mgmt__GetAttachInfoResp **respp); diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 5764e9d4002..0807dcfcf0d 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -91,6 +92,7 @@ struct dc_pool { pthread_rwlock_t dp_map_lock; struct pool_map *dp_map; tse_task_t *dp_map_task; + void **dp_metrics; /* highest known pool map version */ uint32_t dp_map_version_known; uint32_t dp_disconnecting:1, diff --git a/src/include/daos/tls.h b/src/include/daos/tls.h new file mode 100644 index 00000000000..8e9628b39da --- /dev/null +++ b/src/include/daos/tls.h @@ -0,0 +1,121 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of daos + * + * src/include/daos/tls.h + */ + +#ifndef __DAOS_TLS_H__ +#define __DAOS_TLS_H__ + +#include +#include + +/** + * Stackable Module API + * Provides a modular interface to load and register server-side code on + * demand. A module is composed of: + * - a set of request handlers which are registered when the module is loaded. + * - a server-side API (see header files suffixed by "_srv") used for + * inter-module direct calls. + * + * For now, all loaded modules are assumed to be trustful, but sandboxes can be + * implemented in the future. + */ +/* + * Thead-local storage + */ +struct daos_thread_local_storage { + uint32_t dtls_tag; + void **dtls_values; +}; + +enum daos_module_tag { + DAOS_SYS_TAG = 1 << 0, /** only run on system xstream */ + DAOS_TGT_TAG = 1 << 1, /** only run on target xstream */ + DAOS_RDB_TAG = 1 << 2, /** only run on rdb xstream */ + DAOS_OFF_TAG = 1 << 3, /** only run on offload/helper xstream */ + DAOS_CLI_TAG = 1 << 4, /** only run on client stack */ + DAOS_SERVER_TAG = 0xff, /** run on all xstream */ +}; + +/* The module key descriptor for each xstream */ +struct daos_module_key { + /* Indicate where the keys should be instantiated */ + enum daos_module_tag dmk_tags; + + /* The position inside the daos_module_keys */ + int dmk_index; + /* init keys for context */ + void *(*dmk_init)(int tags, int xs_id, int tgt_id); + + /* fini keys for context */ + void (*dmk_fini)(int tags, void *data); +}; + +#define DAOS_MODULE_KEYS_NR 10 +struct daos_thread_local_storage * +dss_tls_get(void); +struct daos_thread_local_storage * +dc_tls_get(unsigned int tag); + +int +ds_tls_key_create(void); +int +dc_tls_key_create(void); +void +ds_tls_key_delete(void); +void +dc_tls_key_delete(void); + +struct daos_module_key * +daos_get_module_key(int index); + +/** + * Get value from context by the key + * + * Get value inside dtls by key. So each module will use this API to + * retrieve their own value in the thread context. + * + * \param[in] dtls the thread context. + * \param[in] key key used to retrieve the dtls_value. + * + * \retval the dtls_value retrieved by key. + */ +static inline void * +daos_module_key_get(struct daos_thread_local_storage *dtls, struct daos_module_key *key) +{ + D_ASSERT(key->dmk_index >= 0); + D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); + D_ASSERT(daos_get_module_key(key->dmk_index) == key); + D_ASSERT(dtls != NULL); + + return dtls->dtls_values[key->dmk_index]; +} + +#define dss_module_key_get daos_module_key_get +#define dss_register_key daos_register_key +#define dss_unregister_key daos_unregister_key +#define dss_module_info daos_module_info +#define dss_module_tag daos_module_tag +#define dss_module_key daos_module_key +#define dss_thread_local_storage daos_thread_local_storage + +void +daos_register_key(struct daos_module_key *key); +void +daos_unregister_key(struct daos_module_key *key); +struct daos_thread_local_storage * +dc_tls_init(int tag, uint32_t pid); +void +dc_tls_fini(void); +struct daos_thread_local_storage * +dss_tls_init(int tag, int xs_id, int tgt_id); +void +dss_tls_fini(struct daos_thread_local_storage *dtls); + +#endif /*__DAOS_TLS_H__*/ diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index 06a927b8d3f..116c486e943 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -54,84 +55,6 @@ extern unsigned int dss_instance_idx; /** Bypass for the nvme health check */ extern bool dss_nvme_bypass_health_check; -/** - * Stackable Module API - * Provides a modular interface to load and register server-side code on - * demand. A module is composed of: - * - a set of request handlers which are registered when the module is loaded. - * - a server-side API (see header files suffixed by "_srv") used for - * inter-module direct calls. - * - * For now, all loaded modules are assumed to be trustful, but sandboxes can be - * implemented in the future. - */ -/* - * Thead-local storage - */ -struct dss_thread_local_storage { - uint32_t dtls_tag; - void **dtls_values; -}; - -enum dss_module_tag { - DAOS_SYS_TAG = 1 << 0, /** only run on system xstream */ - DAOS_TGT_TAG = 1 << 1, /** only run on target xstream */ - DAOS_RDB_TAG = 1 << 2, /** only run on rdb xstream */ - DAOS_OFF_TAG = 1 << 3, /** only run on offload/helper xstream */ - DAOS_SERVER_TAG = 0xff, /** run on all xstream */ -}; - -/* The module key descriptor for each xstream */ -struct dss_module_key { - /* Indicate where the keys should be instantiated */ - enum dss_module_tag dmk_tags; - - /* The position inside the dss_module_keys */ - int dmk_index; - /* init keys for context */ - void *(*dmk_init)(int tags, int xs_id, int tgt_id); - - /* fini keys for context */ - void (*dmk_fini)(int tags, void *data); -}; - -extern pthread_key_t dss_tls_key; -extern struct dss_module_key *dss_module_keys[]; -#define DAOS_MODULE_KEYS_NR 10 - -static inline struct dss_thread_local_storage * -dss_tls_get() -{ - return (struct dss_thread_local_storage *) - pthread_getspecific(dss_tls_key); -} - -/** - * Get value from context by the key - * - * Get value inside dtls by key. So each module will use this API to - * retrieve their own value in the thread context. - * - * \param[in] dtls the thread context. - * \param[in] key key used to retrieve the dtls_value. - * - * \retval the dtls_value retrieved by key. - */ -static inline void * -dss_module_key_get(struct dss_thread_local_storage *dtls, - struct dss_module_key *key) -{ - D_ASSERT(key->dmk_index >= 0); - D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); - D_ASSERT(dss_module_keys[key->dmk_index] == key); - D_ASSERT(dtls != NULL); - - return dtls->dtls_values[key->dmk_index]; -} - -void dss_register_key(struct dss_module_key *key); -void dss_unregister_key(struct dss_module_key *key); - /** pthread names are limited to 16 chars */ #define DSS_XS_NAME_LEN (32) @@ -172,7 +95,7 @@ static inline struct dss_module_info * dss_get_module_info(void) { struct dss_module_info *dmi; - struct dss_thread_local_storage *dtc; + struct daos_thread_local_storage *dtc; dtc = dss_tls_get(); dmi = (struct dss_module_info *) @@ -419,23 +342,6 @@ struct dss_module_ops { int srv_profile_stop(); int srv_profile_start(char *path, int avg); -struct dss_module_metrics { - /* Indicate where the keys should be instantiated */ - enum dss_module_tag dmm_tags; - - /** - * allocate metrics with path to ephemeral shmem for to the - * newly-created pool - */ - void *(*dmm_init)(const char *path, int tgt_id); - void (*dmm_fini)(void *data); - - /** - * Get the number of metrics allocated by this module in total (including all targets). - */ - int (*dmm_nr_metrics)(void); -}; - /** * Each module should provide a dss_module structure which defines the module * interface. The name of the allocated structure must be the library name @@ -481,7 +387,7 @@ struct dss_module { struct dss_module_ops *sm_mod_ops; /* Per-pool metrics (optional) */ - struct dss_module_metrics *sm_metrics; + struct daos_module_metrics *sm_metrics; }; /** diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 12039c24a73..efb838befae 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -145,9 +145,12 @@ enum { }; enum { - D_TM_SERVER_PROCESS = 0x000, - D_TM_SERIALIZATION = 0x001, - D_TM_RETAIN_SHMEM = 0x002, + D_TM_SERVER_PROCESS = 0x000, + D_TM_SERIALIZATION = 0x001, + D_TM_RETAIN_SHMEM = 0x002, + D_TM_RETAIN_SHMEM_IF_NON_EMPTY = 0x004, + D_TM_OPEN_OR_CREATE = 0x008, + D_TM_MULTIPLE_WRITER_LOCK = 0x010, }; /** Output formats */ diff --git a/src/include/gurt/telemetry_consumer.h b/src/include/gurt/telemetry_consumer.h index f0b1d706be7..138633ced91 100644 --- a/src/include/gurt/telemetry_consumer.h +++ b/src/include/gurt/telemetry_consumer.h @@ -49,12 +49,21 @@ int d_tm_list(struct d_tm_context *ctx, struct d_tm_nodeList_t **head, int d_tm_list_subdirs(struct d_tm_context *ctx, struct d_tm_nodeList_t **head, struct d_tm_node_t *node, uint64_t *node_count, int max_depth); -void d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, - int level, int filter, char *path, int format, - int opt_fields, uint32_t ops, FILE *stream); + +typedef void (*d_tm_iter_cb_t)(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *cb_arg); + +void +d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path, + int format, int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg); void d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *name, int format, int opt_fields, FILE *stream); + +void + d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *path, + int format, int opt_fields, FILE *stream); + void d_tm_print_field_descriptors(int opt_fields, FILE *stream); void d_tm_print_counter(uint64_t val, char *name, int format, char *units, int opt_fields, FILE *stream); diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h index 21f506fba38..0046acf1240 100644 --- a/src/include/gurt/telemetry_producer.h +++ b/src/include/gurt/telemetry_producer.h @@ -1,11 +1,12 @@ /** - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ #ifndef __TELEMETRY_PRODUCER_H__ #define __TELEMETRY_PRODUCER_H__ +#include #include /* Developer facing server API to write data */ @@ -23,12 +24,19 @@ void d_tm_dec_gauge(struct d_tm_node_t *metric, uint64_t value); /* Other server functions */ int d_tm_init(int id, uint64_t mem_size, int flags); +int + d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name); int d_tm_init_histogram(struct d_tm_node_t *node, char *path, int num_buckets, int initial_width, int multiplier); int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc, char *units, const char *fmt, ...); int d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, const char *fmt, ...); +int + d_tm_attach_path_segment(key_t key, const char *fmt, ...); int d_tm_del_ephemeral_dir(const char *fmt, ...); +int + d_tm_try_del_ephemeral_dir(const char *fmt, ...); void d_tm_fini(void); + #endif /* __TELEMETRY_PRODUCER_H__ */ diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index aa640f4c99f..2eff852eb9f 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -24,6 +24,7 @@ #include "rpc.h" #include #include +#include int dc_cp(tse_task_t *task, void *data) @@ -1180,6 +1181,90 @@ dc_mgmt_pool_find(struct dc_mgmt_sys *sys, const char *label, uuid_t puuid, return rc; } +int +dc_mgmt_tm_register(const char *sys, const char *jobid, key_t shm_key, uid_t *owner_uid) +{ + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + struct drpc *ctx; + Mgmt__ClientTelemetryReq req = MGMT__CLIENT_TELEMETRY_REQ__INIT; + Mgmt__ClientTelemetryResp *resp; + uint8_t *reqb; + size_t reqb_size; + Drpc__Call *dreq; + Drpc__Response *dresp; + int rc; + + if (owner_uid == NULL) + return -DER_INVAL; + + /* Connect to daos_agent. */ + D_ASSERT(dc_agent_sockpath != NULL); + rc = drpc_connect(dc_agent_sockpath, &ctx); + if (rc != -DER_SUCCESS) { + DL_ERROR(rc, "failed to connect to %s ", dc_agent_sockpath); + D_GOTO(out, 0); + } + + req.sys = (char *)sys; + req.jobid = dc_jobid; + req.shm_key = shm_key; + + reqb_size = mgmt__client_telemetry_req__get_packed_size(&req); + D_ALLOC(reqb, reqb_size); + if (reqb == NULL) { + D_GOTO(out_ctx, rc = -DER_NOMEM); + } + mgmt__client_telemetry_req__pack(&req, reqb); + + rc = drpc_call_create(ctx, DRPC_MODULE_MGMT, DRPC_METHOD_MGMT_SETUP_CLIENT_TELEM, &dreq); + if (rc != 0) { + D_FREE(reqb); + goto out_ctx; + } + dreq->body.len = reqb_size; + dreq->body.data = reqb; + + /* Make the call and get the response. */ + rc = drpc_call(ctx, R_SYNC, dreq, &dresp); + if (rc != 0) { + DL_ERROR(rc, "Sending client telemetry setup request failed"); + goto out_dreq; + } + if (dresp->status != DRPC__STATUS__SUCCESS) { + D_ERROR("Client telemetry setup request unsuccessful: %d\n", dresp->status); + rc = -DER_UNINIT; + goto out_dresp; + } + + resp = mgmt__client_telemetry_resp__unpack(&alloc.alloc, dresp->body.len, dresp->body.data); + if (alloc.oom) + D_GOTO(out_dresp, rc = -DER_NOMEM); + if (resp == NULL) { + D_ERROR("failed to unpack SetupClientTelemetry response\n"); + rc = -DER_NOMEM; + goto out_dresp; + } + if (resp->status != 0) { + D_ERROR("SetupClientTelemetry(%s) failed: " DF_RC "\n", req.sys, + DP_RC(resp->status)); + rc = resp->status; + goto out_resp; + } + + *owner_uid = resp->agent_uid; + +out_resp: + mgmt__client_telemetry_resp__free_unpacked(resp, &alloc.alloc); +out_dresp: + drpc_response_free(dresp); +out_dreq: + drpc_call_free(dreq); +out_ctx: + drpc_close(ctx); +out: + return rc; +} + /** * Initialize management interface */ diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index c599d8f8aaf..f8e4e7e5299 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -649,6 +649,86 @@ void mgmt__pool_monitor_req__free_unpacked assert(message->base.descriptor == &mgmt__pool_monitor_req__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } +void +mgmt__client_telemetry_req__init(Mgmt__ClientTelemetryReq *message) +{ + static const Mgmt__ClientTelemetryReq init_value = MGMT__CLIENT_TELEMETRY_REQ__INIT; + *message = init_value; +} +size_t +mgmt__client_telemetry_req__get_packed_size(const Mgmt__ClientTelemetryReq *message) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor); + return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message)); +} +size_t +mgmt__client_telemetry_req__pack(const Mgmt__ClientTelemetryReq *message, uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor); + return protobuf_c_message_pack((const ProtobufCMessage *)message, out); +} +size_t +mgmt__client_telemetry_req__pack_to_buffer(const Mgmt__ClientTelemetryReq *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor); + return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer); +} +Mgmt__ClientTelemetryReq * +mgmt__client_telemetry_req__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data) +{ + return (Mgmt__ClientTelemetryReq *)protobuf_c_message_unpack( + &mgmt__client_telemetry_req__descriptor, allocator, len, data); +} +void +mgmt__client_telemetry_req__free_unpacked(Mgmt__ClientTelemetryReq *message, + ProtobufCAllocator *allocator) +{ + if (!message) + return; + assert(message->base.descriptor == &mgmt__client_telemetry_req__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void +mgmt__client_telemetry_resp__init(Mgmt__ClientTelemetryResp *message) +{ + static const Mgmt__ClientTelemetryResp init_value = MGMT__CLIENT_TELEMETRY_RESP__INIT; + *message = init_value; +} +size_t +mgmt__client_telemetry_resp__get_packed_size(const Mgmt__ClientTelemetryResp *message) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor); + return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message)); +} +size_t +mgmt__client_telemetry_resp__pack(const Mgmt__ClientTelemetryResp *message, uint8_t *out) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor); + return protobuf_c_message_pack((const ProtobufCMessage *)message, out); +} +size_t +mgmt__client_telemetry_resp__pack_to_buffer(const Mgmt__ClientTelemetryResp *message, + ProtobufCBuffer *buffer) +{ + assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor); + return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer); +} +Mgmt__ClientTelemetryResp * +mgmt__client_telemetry_resp__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data) +{ + return (Mgmt__ClientTelemetryResp *)protobuf_c_message_unpack( + &mgmt__client_telemetry_resp__descriptor, allocator, len, data); +} +void +mgmt__client_telemetry_resp__free_unpacked(Mgmt__ClientTelemetryResp *message, + ProtobufCAllocator *allocator) +{ + if (!message) + return; + assert(message->base.descriptor == &mgmt__client_telemetry_resp__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} static const ProtobufCFieldDescriptor mgmt__daos_resp__field_descriptors[1] = { { @@ -1740,3 +1820,77 @@ const ProtobufCMessageDescriptor mgmt__pool_monitor_req__descriptor = (ProtobufCMessageInit) mgmt__pool_monitor_req__init, NULL,NULL,NULL /* reserved[123] */ }; +static const ProtobufCFieldDescriptor mgmt__client_telemetry_req__field_descriptors[3] = { + { + "sys", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ + offsetof(Mgmt__ClientTelemetryReq, sys), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "jobid", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ + offsetof(Mgmt__ClientTelemetryReq, jobid), NULL, &protobuf_c_empty_string, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "shm_key", 3, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__ClientTelemetryReq, shm_key), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__client_telemetry_req__field_indices_by_name[] = { + 1, /* field[1] = jobid */ + 2, /* field[2] = shm_key */ + 0, /* field[0] = sys */ +}; +static const ProtobufCIntRange mgmt__client_telemetry_req__number_ranges[1 + 1] = {{1, 0}, {0, 3}}; +const ProtobufCMessageDescriptor mgmt__client_telemetry_req__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.ClientTelemetryReq", + "ClientTelemetryReq", + "Mgmt__ClientTelemetryReq", + "mgmt", + sizeof(Mgmt__ClientTelemetryReq), + 3, + mgmt__client_telemetry_req__field_descriptors, + mgmt__client_telemetry_req__field_indices_by_name, + 1, + mgmt__client_telemetry_req__number_ranges, + (ProtobufCMessageInit)mgmt__client_telemetry_req__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor mgmt__client_telemetry_resp__field_descriptors[2] = { + { + "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__ClientTelemetryResp, status), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "agent_uid", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ + offsetof(Mgmt__ClientTelemetryResp, agent_uid), NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__client_telemetry_resp__field_indices_by_name[] = { + 1, /* field[1] = agent_uid */ + 0, /* field[0] = status */ +}; +static const ProtobufCIntRange mgmt__client_telemetry_resp__number_ranges[1 + 1] = {{1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor mgmt__client_telemetry_resp__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.ClientTelemetryResp", + "ClientTelemetryResp", + "Mgmt__ClientTelemetryResp", + "mgmt", + sizeof(Mgmt__ClientTelemetryResp), + 2, + mgmt__client_telemetry_resp__field_descriptors, + mgmt__client_telemetry_resp__field_indices_by_name, + 1, + mgmt__client_telemetry_resp__number_ranges, + (ProtobufCMessageInit)mgmt__client_telemetry_resp__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 381b45534f3..789a636509b 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -31,7 +31,8 @@ typedef struct _Mgmt__PrepShutdownReq Mgmt__PrepShutdownReq; typedef struct _Mgmt__PingRankReq Mgmt__PingRankReq; typedef struct _Mgmt__SetRankReq Mgmt__SetRankReq; typedef struct _Mgmt__PoolMonitorReq Mgmt__PoolMonitorReq; - +typedef struct _Mgmt__ClientTelemetryReq Mgmt__ClientTelemetryReq; +typedef struct _Mgmt__ClientTelemetryResp Mgmt__ClientTelemetryResp; /* --- enums --- */ @@ -223,7 +224,7 @@ struct _Mgmt__ClientNetHint { ProtobufCMessage base; /* - * CaRT OFI provider + * CaRT provider */ char *provider; /* @@ -378,6 +379,43 @@ struct _Mgmt__PoolMonitorReq { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_monitor_req__descriptor) \ , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string } +struct _Mgmt__ClientTelemetryReq { + ProtobufCMessage base; + /* + * DAOS system identifier + */ + char *sys; + /* + * Job ID used for client telemetry + */ + char *jobid; + /* + * Client's shared memory segment key + */ + int32_t shm_key; +}; +#define MGMT__CLIENT_TELEMETRY_REQ__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT(&mgmt__client_telemetry_req__descriptor) \ + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0 \ + } + +struct _Mgmt__ClientTelemetryResp { + ProtobufCMessage base; + /* + * DAOS status code + */ + int32_t status; + /* + * UID of agent process + */ + int32_t agent_uid; +}; +#define MGMT__CLIENT_TELEMETRY_RESP__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT(&mgmt__client_telemetry_resp__descriptor) \ + , 0, 0 \ + } /* Mgmt__DaosResp methods */ void mgmt__daos_resp__init @@ -651,6 +689,36 @@ Mgmt__PoolMonitorReq * void mgmt__pool_monitor_req__free_unpacked (Mgmt__PoolMonitorReq *message, ProtobufCAllocator *allocator); +/* Mgmt__ClientTelemetryReq methods */ +void +mgmt__client_telemetry_req__init(Mgmt__ClientTelemetryReq *message); +size_t +mgmt__client_telemetry_req__get_packed_size(const Mgmt__ClientTelemetryReq *message); +size_t +mgmt__client_telemetry_req__pack(const Mgmt__ClientTelemetryReq *message, uint8_t *out); +size_t +mgmt__client_telemetry_req__pack_to_buffer(const Mgmt__ClientTelemetryReq *message, + ProtobufCBuffer *buffer); +Mgmt__ClientTelemetryReq * +mgmt__client_telemetry_req__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data); +void +mgmt__client_telemetry_req__free_unpacked(Mgmt__ClientTelemetryReq *message, + ProtobufCAllocator *allocator); +/* Mgmt__ClientTelemetryResp methods */ +void +mgmt__client_telemetry_resp__init(Mgmt__ClientTelemetryResp *message); +size_t +mgmt__client_telemetry_resp__get_packed_size(const Mgmt__ClientTelemetryResp *message); +size_t +mgmt__client_telemetry_resp__pack(const Mgmt__ClientTelemetryResp *message, uint8_t *out); +size_t +mgmt__client_telemetry_resp__pack_to_buffer(const Mgmt__ClientTelemetryResp *message, + ProtobufCBuffer *buffer); +Mgmt__ClientTelemetryResp * +mgmt__client_telemetry_resp__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data); +void +mgmt__client_telemetry_resp__free_unpacked(Mgmt__ClientTelemetryResp *message, + ProtobufCAllocator *allocator); /* --- per-message closures --- */ typedef void (*Mgmt__DaosResp_Closure) @@ -701,6 +769,10 @@ typedef void (*Mgmt__SetRankReq_Closure) typedef void (*Mgmt__PoolMonitorReq_Closure) (const Mgmt__PoolMonitorReq *message, void *closure_data); +typedef void (*Mgmt__ClientTelemetryReq_Closure)(const Mgmt__ClientTelemetryReq *message, + void *closure_data); +typedef void (*Mgmt__ClientTelemetryResp_Closure)(const Mgmt__ClientTelemetryResp *message, + void *closure_data); /* --- services --- */ @@ -724,6 +796,8 @@ extern const ProtobufCMessageDescriptor mgmt__prep_shutdown_req__descriptor; extern const ProtobufCMessageDescriptor mgmt__ping_rank_req__descriptor; extern const ProtobufCMessageDescriptor mgmt__set_rank_req__descriptor; extern const ProtobufCMessageDescriptor mgmt__pool_monitor_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__client_telemetry_req__descriptor; +extern const ProtobufCMessageDescriptor mgmt__client_telemetry_resp__descriptor; PROTOBUF_C__END_DECLS diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 9bc4f14362c..f39f95600f6 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -12,6 +12,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include "obj_rpc.h" #include "obj_internal.h" @@ -19,14 +24,121 @@ unsigned int srv_io_mode = DIM_DTX_FULL_ENABLED; int dc_obj_proto_version; +static void * +dc_obj_tls_init(int tags, int xs_id, int pid) +{ + struct dc_obj_tls *tls; + int opc; + int rc; + unsigned long tid = pthread_self(); + + D_ALLOC_PTR(tls); + if (tls == NULL) + return NULL; + + /** register different per-opcode sensors */ + for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { + /** Start with number of active requests, of type gauge */ + rc = d_tm_add_metric(&tls->cot_op_active[opc], D_TM_STATS_GAUGE, + "number of active object RPCs", "ops", "%lu/io/ops/%s/active", + tid, obj_opc_to_str(opc)); + if (rc) { + D_WARN("Failed to create active counter: " DF_RC "\n", DP_RC(rc)); + D_GOTO(out, rc); + } + + if (opc == DAOS_OBJ_RPC_UPDATE || opc == DAOS_OBJ_RPC_TGT_UPDATE || + opc == DAOS_OBJ_RPC_FETCH) + /** See below, latency reported per size for those */ + continue; + + /** And finally the per-opcode latency, of type gauge */ + rc = d_tm_add_metric(&tls->cot_op_lat[opc], D_TM_STATS_GAUGE, + "object RPC processing time", "us", "%lu/io/ops/%s/latency", + tid, obj_opc_to_str(opc)); + if (rc) { + D_WARN("Failed to create latency sensor: " DF_RC "\n", DP_RC(rc)); + D_GOTO(out, rc); + } + } + + /** + * Maintain per-I/O size latency for update & fetch RPCs + * of type gauge + */ + rc = obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, pid, tls->cot_update_lat, + obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time", + false); + if (rc) + D_GOTO(out, rc); + + rc = obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, pid, tls->cot_fetch_lat, + obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time", + false); + if (rc) + D_GOTO(out, rc); + +out: + if (rc) { + D_FREE(tls); + tls = NULL; + } + + return tls; +} + +static void +dc_obj_tls_fini(int tags, void *data) +{ + struct dc_obj_tls *tls = data; + + D_FREE(tls); +} + +struct daos_module_key dc_obj_module_key = { + .dmk_tags = DAOS_CLI_TAG, + .dmk_index = -1, + .dmk_init = dc_obj_tls_init, + .dmk_fini = dc_obj_tls_fini, +}; + +static void * +dc_obj_metrics_alloc(const char *path, int tgt_id) +{ + return obj_metrics_alloc_internal(path, tgt_id, false); +} + +static void +dc_obj_metrics_free(void *data) +{ + D_FREE(data); +} + +/* metrics per pool */ +struct daos_module_metrics dc_obj_metrics = { + .dmm_tags = DAOS_CLI_TAG, + .dmm_init = dc_obj_metrics_alloc, + .dmm_fini = dc_obj_metrics_free, + .dmm_nr_metrics = obj_metrics_count, +}; + /** * Initialize object interface */ int dc_obj_init(void) { - uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; - int rc; + uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; + int rc; + + if (daos_client_metric) { + daos_register_key(&dc_obj_module_key); + rc = daos_metrics_init(DAOS_CLI_TAG, DAOS_OBJ_MODULE, &dc_obj_metrics); + if (rc) { + DL_ERROR(rc, "register object failed"); + return rc; + } + } rc = obj_utils_init(); if (rc) @@ -78,6 +190,7 @@ dc_obj_init(void) out_utils: if (rc) obj_utils_fini(); + return rc; } @@ -94,4 +207,6 @@ dc_obj_fini(void) obj_ec_codec_fini(); obj_class_fini(); obj_utils_fini(); + if (daos_client_metric) + daos_unregister_key(&dc_obj_module_key); } diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 6179e1deb25..71b65cdf025 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -14,7 +14,9 @@ #include #include #include -#include "obj_rpc.h" +#include +#include +#include #include "obj_internal.h" static inline struct dc_obj_layout * @@ -104,6 +106,7 @@ struct rw_cb_args { daos_iom_t *maps; crt_endpoint_t tgt_ep; struct shard_rw_args *shard_args; + uint64_t send_time; }; static struct dcs_layout * @@ -886,6 +889,99 @@ dc_shard_update_size(struct rw_cb_args *rw_args, int fetch_rc) return rc; } +daos_size_t +obj_get_fetch_size(struct rw_cb_args *arg) +{ + struct obj_rw_out *orwo; + daos_size_t size = 0; + + orwo = crt_reply_get(arg->rpc); + + if (orwo->orw_sgls.ca_count > 0) { + /* inline transfer */ + size = + daos_sgls_packed_size(orwo->orw_sgls.ca_arrays, orwo->orw_sgls.ca_count, NULL); + } else if (arg->rwaa_sgls != NULL) { + /* bulk transfer */ + daos_size_t *replied_sizes = orwo->orw_data_sizes.ca_arrays; + int i; + + for (i = 0; i < orwo->orw_data_sizes.ca_count; i++) + size += replied_sizes[i]; + } + + return size; +} + +static void +obj_shard_update_metrics_begin(crt_rpc_t *rpc) +{ + struct dc_obj_tls *tls; + int opc; + + if (!daos_client_metric) + return; + + tls = dc_obj_tls_get(); + D_ASSERT(tls != NULL); + opc = opc_get(rpc->cr_opc); + d_tm_inc_gauge(tls->cot_op_active[opc], 1); +} + +static void +obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int ret) +{ + struct dc_obj_tls *tls; + struct rw_cb_args *rw_args; + struct dc_pool *pool; + struct obj_rw_in *orw; + struct d_tm_node_t *lat = NULL; + struct obj_pool_metrics *opm = NULL; + daos_size_t size; + uint64_t time; + int opc; + + if (!daos_client_metric || ret != 0) + return; + tls = dc_obj_tls_get(); + D_ASSERT(tls != NULL); + opc = opc_get(rpc->cr_opc); + orw = crt_req_get(rpc); + d_tm_dec_gauge(tls->cot_op_active[opc], 1); + /** + * Measure latency of successful I/O only. + * Use bit shift for performance and tolerate some inaccuracy. + */ + time = daos_get_ntime() - send_time; + time >>= 10; + + switch (opc) { + case DAOS_OBJ_RPC_UPDATE: + case DAOS_OBJ_RPC_FETCH: + rw_args = arg; + pool = rw_args->shard_args->auxi.obj_auxi->obj->cob_pool; + D_ASSERT(pool != NULL); + opm = pool->dp_metrics[DAOS_OBJ_MODULE]; + D_ASSERTF(opm != NULL, "pool %p\n", pool); + if (opc == DAOS_OBJ_RPC_UPDATE) { + size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL); + d_tm_inc_counter(opm->opm_update_bytes, size); + lat = tls->cot_update_lat[lat_bucket(size)]; + } else { + size = obj_get_fetch_size(rw_args); + lat = tls->cot_fetch_lat[lat_bucket(size)]; + d_tm_inc_counter(opm->opm_fetch_bytes, size); + } + break; + default: + lat = tls->cot_op_lat[opc]; + break; + } + + if (lat != NULL) + d_tm_set_gauge(lat, time); +} + static int dc_rw_cb(tse_task_t *task, void *arg) { @@ -1191,10 +1287,15 @@ dc_rw_cb(tse_task_t *task, void *arg) out: if (rc == -DER_CSUM && opc == DAOS_OBJ_RPC_FETCH) dc_shard_csum_report(task, &rw_args->tgt_ep, rw_args->rpc); + + obj_shard_update_metrics_end(rw_args->rpc, rw_args->send_time, rw_args, + ret == 0 ? rc : ret); + crt_req_decref(rw_args->rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; + return ret; } @@ -1362,7 +1463,9 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc, rw_args.co = shard->do_co; rw_args.shard_args = args; /* remember the sgl to copyout the data inline for fetch */ - rw_args.rwaa_sgls = (opc == DAOS_OBJ_RPC_FETCH) ? sgls : NULL; + rw_args.rwaa_sgls = sgls; + rw_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); if (args->reasb_req && args->reasb_req->orr_recov) { rw_args.maps = NULL; orw->orw_flags |= ORF_EC_RECOV; @@ -1421,6 +1524,7 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc, struct obj_punch_cb_args { crt_rpc_t *rpc; unsigned int *map_ver; + uint64_t send_time; }; static int @@ -1436,7 +1540,10 @@ obj_shard_punch_cb(tse_task_t *task, void *data) *cb_args->map_ver = obj_reply_map_version_get(rpc); } + obj_shard_update_metrics_end(cb_args->rpc, cb_args->send_time, cb_args, task->dt_result); + crt_req_decref(rpc); + return task->dt_result; } @@ -1480,6 +1587,8 @@ dc_obj_shard_punch(struct dc_obj_shard *shard, enum obj_rpc_opc opc, crt_req_addref(req); cb_args.rpc = req; cb_args.map_ver = &args->pa_auxi.map_ver; + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_punch_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -1540,6 +1649,7 @@ struct obj_enum_args { d_iov_t *csum; struct dtx_epoch *epoch; daos_handle_t *th; + uint64_t send_time; }; /** @@ -1858,10 +1968,15 @@ dc_enumerate_cb(tse_task_t *task, void *arg) crt_bulk_free(oei->oei_bulk); if (oei->oei_kds_bulk != NULL) crt_bulk_free(oei->oei_kds_bulk); + + obj_shard_update_metrics_end(enum_args->rpc, enum_args->send_time, enum_args, + ret == 0 ? rc : ret); + crt_req_decref(enum_args->rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; + return ret; } @@ -2007,6 +2122,8 @@ dc_obj_shard_list(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, enum_args.eaa_recxs = args->la_recxs; enum_args.epoch = &args->la_auxi.epoch; enum_args.th = &obj_args->th; + enum_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_enumerate_cb, &enum_args, sizeof(enum_args)); if (rc != 0) @@ -2038,6 +2155,7 @@ struct obj_query_key_cb_args { struct dc_obj_shard *shard; struct dtx_epoch epoch; daos_handle_t th; + uint64_t send_time; }; static void @@ -2235,6 +2353,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) D_SPIN_UNLOCK(&cb_args->obj->cob_spin); out: + obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc); crt_req_decref(rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; @@ -2285,6 +2404,8 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint cb_args.epoch = *epoch; cb_args.th = th; cb_args.max_epoch = max_epoch; + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_query_key_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -2328,6 +2449,7 @@ struct obj_shard_sync_cb_args { crt_rpc_t *rpc; daos_epoch_t *epoch; uint32_t *map_ver; + uint64_t send_time; }; static int @@ -2377,6 +2499,8 @@ obj_shard_sync_cb(tse_task_t *task, void *data) oso->oso_epoch, oso->oso_map_version); out: + obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc); + crt_req_decref(rpc); return rc; } @@ -2418,10 +2542,11 @@ dc_obj_shard_sync(struct dc_obj_shard *shard, enum obj_rpc_opc opc, D_GOTO(out, rc); crt_req_addref(req); - cb_args.rpc = req; - cb_args.epoch = args->sa_epoch; - cb_args.map_ver = &args->sa_auxi.map_ver; - + cb_args.rpc = req; + cb_args.epoch = args->sa_epoch; + cb_args.map_ver = &args->sa_auxi.map_ver; + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_sync_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -2455,8 +2580,9 @@ struct obj_k2a_args { unsigned int *eaa_map_ver; struct dtx_epoch *epoch; daos_handle_t *th; - daos_anchor_t *anchor; - uint32_t shard; + daos_anchor_t *anchor; + uint64_t send_time; + uint32_t shard; }; static int @@ -2511,6 +2637,8 @@ dc_k2a_cb(tse_task_t *task, void *arg) enum_anchor_copy(k2a_args->anchor, &oko->oko_anchor); dc_obj_shard2anchor(k2a_args->anchor, k2a_args->shard); out: + obj_shard_update_metrics_end(k2a_args->rpc, k2a_args->send_time, k2a_args, + ret == 0 ? rc : ret); if (k2a_args->eaa_obj != NULL) obj_shard_decref(k2a_args->eaa_obj); crt_req_decref(k2a_args->rpc); @@ -2584,6 +2712,8 @@ dc_obj_shard_key2anchor(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, cb_args.th = &obj_args->th; cb_args.anchor = args->ka_anchor; cb_args.shard = obj_shard->do_shard_idx; + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_k2a_cb, &cb_args, sizeof(cb_args)); if (rc != 0) D_GOTO(out_eaa, rc); diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index 149915c10fa..4d750c87332 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "obj_rpc.h" #include "obj_ec.h" @@ -535,6 +536,87 @@ struct dc_obj_verify_args { struct dc_obj_verify_cursor cursor; }; +/* + * Report latency on a per-I/O size. + * Buckets starts at [0; 256B[ and are increased by power of 2 + * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ + * Since 4MB = 2^22 and 256B = 2^8, this means + * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so + * 16 buckets in total. + */ +#define NR_LATENCY_BUCKETS 16 + +struct dc_obj_tls { + /** Measure update/fetch latency based on I/O size (type = gauge) */ + struct d_tm_node_t *cot_update_lat[NR_LATENCY_BUCKETS]; + struct d_tm_node_t *cot_fetch_lat[NR_LATENCY_BUCKETS]; + + /** Measure per-operation latency in us (type = gauge) */ + struct d_tm_node_t *cot_op_lat[OBJ_PROTO_CLI_COUNT]; + /** Count number of per-opcode active requests (type = gauge) */ + struct d_tm_node_t *cot_op_active[OBJ_PROTO_CLI_COUNT]; +}; + +int +obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc, + bool server); +extern struct daos_module_key dc_obj_module_key; + +static inline struct dc_obj_tls * +dc_obj_tls_get() +{ + struct daos_thread_local_storage *dtls; + + dtls = dc_tls_get(dc_obj_module_key.dmk_tags); + D_ASSERT(dtls != NULL); + return daos_module_key_get(dtls, &dc_obj_module_key); +} + +struct obj_pool_metrics { + /** Count number of total per-opcode requests (type = counter) */ + struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT]; + /** Total number of bytes fetched (type = counter) */ + struct d_tm_node_t *opm_fetch_bytes; + /** Total number of bytes updated (type = counter) */ + struct d_tm_node_t *opm_update_bytes; + + /** Total number of silently restarted updates (type = counter) */ + struct d_tm_node_t *opm_update_restart; + /** Total number of resent update operations (type = counter) */ + struct d_tm_node_t *opm_update_resent; + /** Total number of retry update operations (type = counter) */ + struct d_tm_node_t *opm_update_retry; + /** Total number of EC full-stripe update operations (type = counter) */ + struct d_tm_node_t *opm_update_ec_full; + /** Total number of EC partial update operations (type = counter) */ + struct d_tm_node_t *opm_update_ec_partial; +}; + +void +obj_metrics_free(void *data); +int +obj_metrics_count(void); +void * +obj_metrics_alloc_internal(const char *path, int tgt_id, bool server); + +static inline unsigned int +lat_bucket(uint64_t size) +{ + int nr; + + if (size <= 256) + return 0; + + /** return number of leading zero-bits */ + nr = __builtin_clzl(size - 1); + + /** >4MB, return last bucket */ + if (nr < 42) + return NR_LATENCY_BUCKETS - 1; + + return 56 - nr; +} + static inline int dc_cont2uuid(struct dc_cont *dc_cont, uuid_t *hdl_uuid, uuid_t *uuid) { diff --git a/src/object/obj_utils.c b/src/object/obj_utils.c index 8312c6719d8..f85409aee9b 100644 --- a/src/object/obj_utils.c +++ b/src/object/obj_utils.c @@ -10,6 +10,10 @@ #define DDSUBSYS DDFAC(object) #include +#include +#include +#include +#include #include "obj_internal.h" static daos_size_t @@ -86,6 +90,150 @@ daos_iods_free(daos_iod_t *iods, int nr, bool need_free) D_FREE(iods); } +int +obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc, + bool server) +{ + unsigned int bucket_max = 256; + int i; + int rc = 0; + + for (i = 0; i < NR_LATENCY_BUCKETS; i++) { + char *path; + + if (server) { + if (bucket_max < 1024) /** B */ + D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u", op, bucket_max, + tgt_id); + else if (bucket_max < 1024 * 1024) /** KB */ + D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u", op, bucket_max / 1024, + tgt_id); + else if (bucket_max <= 1024 * 1024 * 4) /** MB */ + D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u", op, + bucket_max / (1024 * 1024), tgt_id); + else /** >4MB */ + D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u", op, tgt_id); + } else { + unsigned long tid = pthread_self(); + + if (bucket_max < 1024) /** B */ + D_ASPRINTF(path, "%lu/io/latency/%s/%uB", tid, op, bucket_max); + else if (bucket_max < 1024 * 1024) /** KB */ + D_ASPRINTF(path, "%lu/io/latency/%s/%uKB", tid, op, + bucket_max / 1024); + else if (bucket_max <= 1024 * 1024 * 4) /** MB */ + D_ASPRINTF(path, "%lu/io/latency/%s/%uMB", tid, op, + bucket_max / (1024 * 1024)); + else /** >4MB */ + D_ASPRINTF(path, "%lu/io/latency/%s/GT4MB", tid, op); + } + rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path); + if (rc) + D_WARN("Failed to create per-I/O size latency " + "sensor: " DF_RC "\n", + DP_RC(rc)); + D_FREE(path); + + bucket_max <<= 1; + } + + return rc; +} + +void +obj_metrics_free(void *data) +{ + D_FREE(data); +} + +int +obj_metrics_count(void) +{ + return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *)); +} + +void * +obj_metrics_alloc_internal(const char *path, int tgt_id, bool server) +{ + struct obj_pool_metrics *metrics; + char tgt_path[32]; + uint32_t opc; + int rc; + + D_ASSERT(tgt_id >= 0); + if (server) + snprintf(tgt_path, sizeof(tgt_path), "/tgt_%u", tgt_id); + else + tgt_path[0] = '\0'; + + D_ALLOC_PTR(metrics); + if (metrics == NULL) { + D_ERROR("failed to alloc object metrics"); + return NULL; + } + + /** register different per-opcode counters */ + for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { + /** Then the total number of requests, of type counter */ + rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER, + "total number of processed object RPCs", "ops", "%s/ops/%s%s", + path, obj_opc_to_str(opc), tgt_path); + if (rc) + D_WARN("Failed to create total counter: " DF_RC "\n", DP_RC(rc)); + } + + /** Total number of silently restarted updates, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER, + "total number of restarted update ops", "updates", "%s/restarted%s", + path, tgt_path); + if (rc) + D_WARN("Failed to create restarted counter: " DF_RC "\n", DP_RC(rc)); + + /** Total number of resent updates, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER, + "total number of resent update RPCs", "updates", "%s/resent%s", path, + tgt_path); + if (rc) + D_WARN("Failed to create resent counter: " DF_RC "\n", DP_RC(rc)); + + /** Total number of retry updates locally, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER, + "total number of retried update RPCs", "updates", "%s/retry%s", path, + tgt_path); + if (rc) + D_WARN("Failed to create retry cnt sensor: " DF_RC "\n", DP_RC(rc)); + + /** Total bytes read */ + rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER, + "total number of bytes fetched/read", "bytes", "%s/xferred/fetch%s", + path, tgt_path); + if (rc) + D_WARN("Failed to create bytes fetch counter: " DF_RC "\n", DP_RC(rc)); + + /** Total bytes written */ + rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER, + "total number of bytes updated/written", "bytes", + "%s/xferred/update%s", path, tgt_path); + if (rc) + D_WARN("Failed to create bytes update counter: " DF_RC "\n", DP_RC(rc)); + + /** Total number of EC full-stripe update operations, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER, + "total number of EC full-stripe updates", "updates", + "%s/EC_update/full_stripe%s", path, tgt_path); + if (rc) + D_WARN("Failed to create EC full stripe update counter: " DF_RC "\n", DP_RC(rc)); + + /** Total number of EC partial update operations, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER, + "total number of EC partial updates", "updates", + "%s/EC_update/partial%s", path, tgt_path); + if (rc) + D_WARN("Failed to create EC partial update counter: " DF_RC "\n", DP_RC(rc)); + + return metrics; +} + struct recx_rec { daos_recx_t *rr_recx; }; diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 368595bbfb4..885a966c55c 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -114,36 +114,6 @@ struct migrate_cont_hdl { void migrate_pool_tls_destroy(struct migrate_pool_tls *tls); -/* - * Report latency on a per-I/O size. - * Buckets starts at [0; 256B[ and are increased by power of 2 - * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ - * Since 4MB = 2^22 and 256B = 2^8, this means - * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so - * 16 buckets in total. - */ -#define NR_LATENCY_BUCKETS 16 - -struct obj_pool_metrics { - /** Count number of total per-opcode requests (type = counter) */ - struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT]; - /** Total number of bytes fetched (type = counter) */ - struct d_tm_node_t *opm_fetch_bytes; - /** Total number of bytes updated (type = counter) */ - struct d_tm_node_t *opm_update_bytes; - - /** Total number of silently restarted updates (type = counter) */ - struct d_tm_node_t *opm_update_restart; - /** Total number of resent update operations (type = counter) */ - struct d_tm_node_t *opm_update_resent; - /** Total number of retry update operations (type = counter) */ - struct d_tm_node_t *opm_update_retry; - /** Total number of EC full-stripe update operations (type = counter) */ - struct d_tm_node_t *opm_update_ec_full; - /** Total number of EC partial update operations (type = counter) */ - struct d_tm_node_t *opm_update_ec_partial; -}; - struct obj_tls { d_sg_list_t ot_echo_sgl; d_list_t ot_pool_list; @@ -175,24 +145,6 @@ obj_tls_get() return dss_module_key_get(dss_tls_get(), &obj_module_key); } -static inline unsigned int -lat_bucket(uint64_t size) -{ - int nr; - - if (size <= 256) - return 0; - - /** return number of leading zero-bits */ - nr = __builtin_clzl(size - 1); - - /** >4MB, return last bucket */ - if (nr < 42) - return NR_LATENCY_BUCKETS - 1; - - return 56 - nr; -} - enum latency_type { BULK_LATENCY, BIO_LATENCY, diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index 4fd889bb7de..ddb39b8e9fb 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "obj_rpc.h" #include "srv_internal.h" @@ -73,41 +74,6 @@ static struct daos_rpc_handler obj_handlers[] = { #undef X -static int -obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc) -{ - unsigned int bucket_max = 256; - int i; - int rc = 0; - - for (i = 0; i < NR_LATENCY_BUCKETS; i++) { - char *path; - - if (bucket_max < 1024) /** B */ - D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u", - op, bucket_max, tgt_id); - else if (bucket_max < 1024 * 1024) /** KB */ - D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u", - op, bucket_max / 1024, tgt_id); - else if (bucket_max <= 1024 * 1024 * 4) /** MB */ - D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u", - op, bucket_max / (1024 * 1024), tgt_id); - else /** >4MB */ - D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u", - op, tgt_id); - - rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path); - if (rc) - D_WARN("Failed to create per-I/O size latency " - "sensor: "DF_RC"\n", DP_RC(rc)); - D_FREE(path); - - bucket_max <<= 1; - } - - return rc; -} - static void * obj_tls_init(int tags, int xs_id, int tgt_id) { @@ -158,27 +124,28 @@ obj_tls_init(int tags, int xs_id, int tgt_id) */ obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_lat, - obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time"); + obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_lat, - obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time"); + obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time", true); obj_latency_tm_init(DAOS_OBJ_RPC_TGT_UPDATE, tgt_id, tls->ot_tgt_update_lat, obj_opc_to_str(DAOS_OBJ_RPC_TGT_UPDATE), - "update tgt RPC processing time"); - obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bulk_lat, - "bulk_update", "Bulk update processing time"); - obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bulk_lat, - "bulk_fetch", "Bulk fetch processing time"); - - obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_vos_lat, - "vos_update", "VOS update processing time"); - obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_vos_lat, - "vos_fetch", "VOS fetch processing time"); - - obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bio_lat, - "bio_update", "BIO update processing time"); - obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bio_lat, - "bio_fetch", "BIO fetch processing time"); + "update tgt RPC processing time", true); + obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bulk_lat, "bulk_update", + "Bulk update processing time", true); + obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bulk_lat, "bulk_fetch", + "Bulk fetch processing time", true); + + obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_vos_lat, "vos_update", + "VOS update processing time", true); + obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_vos_lat, "vos_fetch", + "VOS fetch processing time", true); + + obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bio_lat, "bio_update", + "BIO update processing time", true); + obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bio_lat, "bio_fetch", + "BIO fetch processing time", true); return tls; } @@ -239,103 +206,14 @@ static struct dss_module_ops ds_obj_mod_ops = { static void * obj_metrics_alloc(const char *path, int tgt_id) { - struct obj_pool_metrics *metrics; - uint32_t opc; - int rc; - - D_ASSERT(tgt_id >= 0); - - D_ALLOC_PTR(metrics); - if (metrics == NULL) - return NULL; - - /** register different per-opcode counters */ - for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { - /** Then the total number of requests, of type counter */ - rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER, - "total number of processed object RPCs", - "ops", "%s/ops/%s/tgt_%u", path, - obj_opc_to_str(opc), tgt_id); - if (rc) - D_WARN("Failed to create total counter: "DF_RC"\n", - DP_RC(rc)); - } - - /** Total number of silently restarted updates, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER, - "total number of restarted update ops", "updates", - "%s/restarted/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create restarted counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of resent updates, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER, - "total number of resent update RPCs", "updates", - "%s/resent/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create resent counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of retry updates locally, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER, - "total number of retried update RPCs", "updates", - "%s/retry/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create retry cnt sensor: "DF_RC"\n", DP_RC(rc)); - - /** Total bytes read */ - rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER, - "total number of bytes fetched/read", "bytes", - "%s/xferred/fetch/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create bytes fetch counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total bytes written */ - rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER, - "total number of bytes updated/written", "bytes", - "%s/xferred/update/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create bytes update counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of EC full-stripe update operations, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER, - "total number of EC sull-stripe updates", "updates", - "%s/EC_update/full_stripe/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create EC full stripe update counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of EC partial update operations, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER, - "total number of EC sull-partial updates", "updates", - "%s/EC_update/partial/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create EC partial update counter: "DF_RC"\n", - DP_RC(rc)); - - return metrics; -} - -static void -obj_metrics_free(void *data) -{ - D_FREE(data); -} - -static int -obj_metrics_count(void) -{ - return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *)); + return obj_metrics_alloc_internal(path, tgt_id, true); } -struct dss_module_metrics obj_metrics = { - .dmm_tags = DAOS_TGT_TAG, - .dmm_init = obj_metrics_alloc, - .dmm_fini = obj_metrics_free, - .dmm_nr_metrics = obj_metrics_count, +struct daos_module_metrics obj_metrics = { + .dmm_tags = DAOS_TGT_TAG, + .dmm_init = obj_metrics_alloc, + .dmm_fini = obj_metrics_free, + .dmm_nr_metrics = obj_metrics_count, }; struct dss_module obj_module = { diff --git a/src/pool/cli.c b/src/pool/cli.c index e688cd9ecd3..89f7eb256a1 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,9 +15,13 @@ #define D_LOGFAC DD_FAC(pool) #include +#include +#include #include #include #include +#include +#include #include #include #include @@ -32,6 +36,152 @@ struct rsvc_client_state { int dc_pool_proto_version; +struct dc_pool_metrics { + d_list_t dp_pool_list; /* pool metrics list on this thread */ + uuid_t dp_uuid; + char dp_path[D_TM_MAX_NAME_LEN]; + void *dp_metrics[DAOS_NR_MODULE]; + int dp_ref; +}; + +/** + * Destroy metrics for a specific pool. + * + * \param[in] pool pointer to ds_pool structure + */ +static void +dc_pool_metrics_free(struct dc_pool_metrics *metrics) +{ + int rc; + + if (!daos_client_metric) + return; + + daos_module_fini_metrics(DAOS_CLI_TAG, metrics->dp_metrics); + if (!daos_client_metric_retain) { + rc = d_tm_del_ephemeral_dir(metrics->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to remove pool metrics dir for pool: " DF_RC "\n", + DP_UUID(metrics->dp_uuid), DP_RC(rc)); + return; + } + } + + D_INFO(DF_UUID ": destroyed ds_pool metrics: %s\n", DP_UUID(metrics->dp_uuid), + metrics->dp_path); +} + +static int +dc_pool_metrics_alloc(uuid_t pool_uuid, struct dc_pool_metrics **metrics_p) +{ + struct dc_pool_metrics *metrics = NULL; + int pid; + size_t size; + int rc; + + if (!daos_client_metric) + return 0; + + D_ALLOC_PTR(metrics); + if (metrics == NULL) + return -DER_NOMEM; + + uuid_copy(metrics->dp_uuid, pool_uuid); + pid = getpid(); + snprintf(metrics->dp_path, sizeof(metrics->dp_path), "pool/" DF_UUIDF, + DP_UUID(metrics->dp_uuid)); + + /** create new shmem space for per-pool metrics */ + size = daos_module_nr_pool_metrics() * PER_METRIC_BYTES; + rc = d_tm_add_ephemeral_dir(NULL, size, metrics->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to create metrics dir for pool: " DF_RC "\n", + DP_UUID(metrics->dp_uuid), DP_RC(rc)); + return rc; + } + + /* initialize metrics on the system xstream for each module */ + rc = daos_module_init_metrics(DAOS_CLI_TAG, metrics->dp_metrics, metrics->dp_path, pid); + if (rc != 0) { + D_WARN(DF_UUID ": failed to initialize module metrics: " DF_RC "\n", + DP_UUID(metrics->dp_uuid), DP_RC(rc)); + dc_pool_metrics_free(metrics); + return rc; + } + + D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(metrics->dp_uuid), + metrics->dp_path); + *metrics_p = metrics; + + return 0; +} + +struct dc_pool_metrics * +dc_pool_metrics_lookup(struct dc_pool_tls *tls, uuid_t pool_uuid) +{ + struct dc_pool_metrics *metrics; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_for_each_entry(metrics, &tls->dpc_metrics_list, dp_pool_list) { + if (uuid_compare(pool_uuid, metrics->dp_uuid) == 0) { + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + return metrics; + } + } + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + + return NULL; +} + +static void * +dc_pool_tls_init(int tags, int xs_id, int pid) +{ + struct dc_pool_tls *tls; + int rc; + + D_ALLOC_PTR(tls); + if (tls == NULL) + return NULL; + + rc = D_MUTEX_INIT(&tls->dpc_metrics_list_lock, NULL); + if (rc != 0) { + D_FREE(tls); + return NULL; + } + + D_INIT_LIST_HEAD(&tls->dpc_metrics_list); + return tls; +} + +static void +dc_pool_tls_fini(int tags, void *data) +{ + struct dc_pool_tls *tls = data; + struct dc_pool_metrics *dpm; + struct dc_pool_metrics *tmp; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_for_each_entry_safe(dpm, tmp, &tls->dpc_metrics_list, dp_pool_list) { + if (dpm->dp_ref != 0) + D_WARN("still reference for pool " DF_UUID " metrics\n", + DP_UUID(dpm->dp_uuid)); + d_list_del_init(&dpm->dp_pool_list); + dc_pool_metrics_free(dpm); + D_FREE(dpm); + } + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + + D_MUTEX_DESTROY(&tls->dpc_metrics_list_lock); + D_FREE(tls); +} + +struct daos_module_key dc_pool_module_key = { + .dmk_tags = DAOS_CLI_TAG, + .dmk_index = -1, + .dmk_init = dc_pool_tls_init, + .dmk_fini = dc_pool_tls_fini, +}; + /** * Initialize pool interface */ @@ -41,6 +191,9 @@ dc_pool_init(void) uint32_t ver_array[2] = {DAOS_POOL_VERSION - 1, DAOS_POOL_VERSION}; int rc; + if (daos_client_metric) + daos_register_key(&dc_pool_module_key); + dc_pool_proto_version = 0; rc = daos_rpc_proto_query(pool_proto_fmt_v4.cpf_base, ver_array, 2, &dc_pool_proto_version); if (rc) @@ -77,7 +230,68 @@ dc_pool_fini(void) else rc = daos_rpc_unregister(&pool_proto_fmt_v5); if (rc != 0) - D_ERROR("failed to unregister pool RPCs: "DF_RC"\n", DP_RC(rc)); + DL_ERROR(rc, "failed to unregister pool RPCs"); + + if (daos_client_metric) + daos_unregister_key(&dc_pool_module_key); +} + +static int +dc_pool_metrics_start(struct dc_pool *pool) +{ + struct dc_pool_tls *tls; + struct dc_pool_metrics *metrics; + int rc; + + if (!daos_client_metric) + return 0; + + if (pool->dp_metrics != NULL) + return 0; + + tls = dc_pool_tls_get(); + D_ASSERT(tls != NULL); + + metrics = dc_pool_metrics_lookup(tls, pool->dp_pool); + if (metrics != NULL) { + metrics->dp_ref++; + pool->dp_metrics = metrics->dp_metrics; + return 0; + } + + rc = dc_pool_metrics_alloc(pool->dp_pool, &metrics); + if (rc != 0) + return rc; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_add(&metrics->dp_pool_list, &tls->dpc_metrics_list); + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + metrics->dp_ref++; + pool->dp_metrics = metrics->dp_metrics; + + return 0; +} + +static void +dc_pool_metrics_stop(struct dc_pool *pool) +{ + struct dc_pool_metrics *metrics; + struct dc_pool_tls *tls; + + if (!daos_client_metric) + return; + + if (pool->dp_metrics == NULL) + return; + + tls = dc_pool_tls_get(); + D_ASSERT(tls != NULL); + + metrics = dc_pool_metrics_lookup(tls, pool->dp_pool); + if (metrics != NULL) + metrics->dp_ref--; + + pool->dp_metrics = NULL; } static void @@ -99,6 +313,8 @@ pool_free(struct d_hlink *hlink) if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); + dc_pool_metrics_stop(pool); + rsvc_client_fini(&pool->dp_client); if (pool->dp_sys != NULL) dc_mgmt_sys_detach(pool->dp_sys); @@ -609,6 +825,10 @@ dc_pool_connect_internal(tse_task_t *task, daos_pool_info_t *info, goto out; } + rc = dc_pool_metrics_start(pool); + if (rc != 0) + D_GOTO(out, rc); + /** Pool connect RPC by UUID (provided, or looked up by label above) */ rc = pool_req_create(daos_task2ctx(task), &ep, POOL_CONNECT, &rpc); if (rc != 0) { @@ -1090,6 +1310,10 @@ dc_pool_g2l(struct dc_pool_glob *pool_glob, size_t len, daos_handle_t *poh) if (rc < 0) goto out; + rc = dc_pool_metrics_start(pool); + if (rc != 0) + goto out; + rc = pool_map_create(map_buf, pool_glob->dpg_map_version, &map); if (rc != 0) { D_ERROR("failed to create local pool map: "DF_RC"\n", diff --git a/src/pool/cli_internal.h b/src/pool/cli_internal.h index f8f965b4469..fd3b26539f8 100644 --- a/src/pool/cli_internal.h +++ b/src/pool/cli_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -16,4 +16,20 @@ struct dc_pool *dc_pool_alloc(unsigned int nr); int dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect); +struct dc_pool_tls { + pthread_mutex_t dpc_metrics_list_lock; + d_list_t dpc_metrics_list; +}; + +extern struct daos_module_key dc_pool_module_key; + +static inline struct dc_pool_tls * +dc_pool_tls_get() +{ + struct daos_thread_local_storage *dtls; + + dtls = dc_tls_get(dc_pool_module_key.dmk_tags); + D_ASSERT(dtls != NULL); + return daos_module_key_get(dtls, &dc_pool_module_key); +} #endif /* __POOL_CLIENT_INTERNAL_H__ */ diff --git a/src/pool/srv.c b/src/pool/srv.c index 40f1d7d18eb..8a7ba7d14ef 100644 --- a/src/pool/srv.c +++ b/src/pool/srv.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include "rpc.h" @@ -174,11 +175,11 @@ struct dss_module_key pool_module_key = { .dmk_fini = pool_tls_fini, }; -struct dss_module_metrics pool_metrics = { - .dmm_tags = DAOS_SYS_TAG, - .dmm_init = ds_pool_metrics_alloc, - .dmm_fini = ds_pool_metrics_free, - .dmm_nr_metrics = ds_pool_metrics_count, +struct daos_module_metrics pool_metrics = { + .dmm_tags = DAOS_SYS_TAG, + .dmm_init = ds_pool_metrics_alloc, + .dmm_fini = ds_pool_metrics_free, + .dmm_nr_metrics = ds_pool_metrics_count, }; struct dss_module pool_module = { diff --git a/src/pool/srv_metrics.c b/src/pool/srv_metrics.c index 0ca5b494df1..615af9deba1 100644 --- a/src/pool/srv_metrics.c +++ b/src/pool/srv_metrics.c @@ -8,24 +8,9 @@ #include "srv_internal.h" #include +#include #include - -/* Estimate of bytes per typical metric node */ -#define NODE_BYTES (sizeof(struct d_tm_node_t) + \ - sizeof(struct d_tm_metric_t) + \ - 64 /* buffer for metadata */) -/* Estimate of bytes per histogram bucket */ -#define BUCKET_BYTES (sizeof(struct d_tm_bucket_t) + NODE_BYTES) -/* - Estimate of bytes per metric. - This is a generous high-water mark assuming most metrics are not using - histograms. May need adjustment if the balance of metrics changes. -*/ -#define PER_METRIC_BYTES (NODE_BYTES + sizeof(struct d_tm_stats_t) + \ - sizeof(struct d_tm_histogram_t) + \ - BUCKET_BYTES) - /** * Initializes the pool metrics */ diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index a284d645106..129fecd5370 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -1,5 +1,5 @@ // -// (C) Copyright 2018-2023 Intel Corporation. +// (C) Copyright 2018-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -122,3 +122,16 @@ message PoolMonitorReq { string poolHandleUUID = 3; // Pool Handle UUID for the connection string jobid = 4; // Job ID to associate instance with. } + +message ClientTelemetryReq +{ + string sys = 1; // DAOS system identifier + string jobid = 2; // Job ID used for client telemetry + int32 shm_key = 3; // Client's shared memory segment key +} + +message ClientTelemetryResp +{ + int32 status = 1; // DAOS status code + int32 agent_uid = 2; // UID of agent process +} diff --git a/src/tests/ftest/server/replay.py b/src/tests/ftest/server/replay.py index 1b9f08b114c..61e8f50a6f4 100644 --- a/src/tests/ftest/server/replay.py +++ b/src/tests/ftest/server/replay.py @@ -9,8 +9,7 @@ from apricot import TestWithServers from dfuse_utils import get_dfuse, start_dfuse, stop_dfuse from general_utils import join -from ior_utils import get_ior -from job_manager_utils import get_job_manager +from ior_utils import read_data, write_data from test_utils_pool import add_pool @@ -38,24 +37,6 @@ def create_container(self, details=None, **pool_params): self.log_step(join(' ', 'Creating a container (daos container create)', '-', details)) return self.get_container(pool) - def write_data(self, container, ppn, dfuse=None): - """Write data to the container/dfuse using ior. - - Args: - container (TestContainer): the container to populate - ppn (int): processes per node to use with the ior command - dfuse (Dfuse, optional): dfuse object defining the dfuse mount point. Defaults to None. - - Returns: - Ior: the Ior object used to populate the container - """ - job_manager = get_job_manager(self, subprocess=False, timeout=60) - ior = get_ior( - self, job_manager, self.hostlist_clients, self.workdir, None, - namespace='/run/ior_write/*') - ior.run(self.server_group, container.pool, container, None, ppn, dfuse=dfuse) - return ior - def stop_engines(self): """Stop each server engine and verify they are not running.""" self.log_step('Shutting down the engines (dmg system stop)') @@ -80,18 +61,6 @@ def restart_engines(self): self.log.info('Ranks %s failed to start', rank_check) self.fail('Failed to start ranks cleanly') - def read_data(self, ior, container, ppn, dfuse=None): - """Verify the data used to populate the container. - - Args: - ior (Ior): the ior command used to populate the container - container (TestContainer): the container to verify - ppn (int): processes per node to use with the ior command - dfuse (Dfuse, optional): dfuse object defining the dfuse mount point. Defaults to None. - """ - ior.update('flags', self.params.get('flags', '/run/ior_read/*')) - ior.run(self.server_group, container.pool, container, None, ppn, dfuse=dfuse) - def verify_snapshots(self, container, expected): """Verify the snapshots listed for the container match the expected list of snapshots. @@ -126,17 +95,16 @@ def test_restart(self): :avocado: tags=server,replay :avocado: tags=ReplayTests,test_restart """ - ppn = self.params.get('ppn', '/run/ior_write/*', 1) container = self.create_container() self.log_step('Write data to the container (ior)') - ior = self.write_data(container, ppn) + ior = write_data(self, container) self.stop_engines() self.restart_engines() self.log_step('Verifying data previously written to the container (ior)') - self.read_data(ior, container, ppn) + read_data(self, ior, container) self.log_step('Test passed') def test_replay_posix(self): @@ -159,7 +127,6 @@ def test_replay_posix(self): :avocado: tags=server,replay :avocado: tags=ReplayTests,test_replay_posix """ - ppn = self.params.get('ppn', '/run/ior_write/*', 1) container = self.create_container() self.log_step('Start dfuse') @@ -167,7 +134,7 @@ def test_replay_posix(self): start_dfuse(self, dfuse, container.pool, container) self.log_step('Write data to the dfuse mount point (ior)') - ior = self.write_data(container, ppn, dfuse) + ior = write_data(self, container, dfuse=dfuse) self.log_step('After the read has completed, unmount dfuse') stop_dfuse(self, dfuse) @@ -179,10 +146,10 @@ def test_replay_posix(self): start_dfuse(self, dfuse) self.log_step('Verifying data previously written to the dfuse mount point (ior)') - self.read_data(ior, container, ppn, dfuse) + read_data(self, ior, container, dfuse=dfuse) self.log_step('Write additional data to the dfuse mount point (ior)') - ior = self.write_data(container, ppn, dfuse) + ior = write_data(self, container, dfuse=dfuse) self.log.info('Test passed') @@ -210,14 +177,13 @@ def test_replay_snapshots(self): :avocado: tags=server,replay :avocado: tags=ReplayTests,test_replay_snapshots """ - ppn = self.params.get('ppn', '/run/ior_write/*', 1) container = self.create_container() snapshots = [] for index in range(1, 4): step = join(' ', index, 'of', 3) self.log_step(join(' ', 'Write data to the container (ior)', '-', step)) - self.write_data(container, ppn) + write_data(self, container) self.log_step(join(' ', 'Creating a snapshot (daos container create-snap)', '-', step)) snapshots.append(container.create_snap()['response']['epoch']) @@ -348,7 +314,6 @@ def test_replay_no_check_pointing(self): :avocado: tags=server,replay :avocado: tags=ReplayTests,test_replay_no_check_pointing """ - ppn = self.params.get('ppn', '/run/ior_write/*', 1) container = self.create_container() self.log_step('Disabling check pointing on {}'.format(container.pool)) @@ -358,7 +323,7 @@ def test_replay_no_check_pointing(self): self.fail('Pool check pointing not disabled before engine restart') self.log_step('Write data to the container (ior)') - ior = self.write_data(container, ppn) + ior = write_data(self, container) self.stop_engines() self.restart_engines() @@ -371,7 +336,7 @@ def test_replay_no_check_pointing(self): self.fail('Pool check pointing not disabled after engine restart') self.log_step('Verifying data previously written to the container (ior)') - self.read_data(ior, container, ppn) + read_data(self, ior, container) self.log_step('Test passed') def test_replay_check_pointing(self): @@ -392,14 +357,13 @@ def test_replay_check_pointing(self): :avocado: tags=server,replay :avocado: tags=ReplayTests,test_replay_check_pointing """ - ppn = self.params.get('ppn', '/run/ior_write/*', 1) frequency = 5 container = self.create_container( properties=f'checkpoint:timed,checkpoint_freq:{frequency}') self.log.info('%s check point frequency: %s seconds', container.pool, frequency) self.log_step('Write data to the container (ior)') - ior = self.write_data(container, ppn) + ior = write_data(self, container) self.log_step('Waiting for check pointing to complete (sleep {})'.format(frequency * 2)) time.sleep(frequency * 2) @@ -408,5 +372,5 @@ def test_replay_check_pointing(self): self.restart_engines() self.log_step('Verifying data previously written to the container (ior)') - self.read_data(ior, container, ppn) + read_data(self, ior, container) self.log_step('Test passed') diff --git a/src/tests/ftest/server/replay.yaml b/src/tests/ftest/server/replay.yaml index 16158253641..1e4fb0b81e0 100644 --- a/src/tests/ftest/server/replay.yaml +++ b/src/tests/ftest/server/replay.yaml @@ -21,8 +21,7 @@ container: dfs_oclass: SX ior: &ior_base - client_processes: - ppn: 4 + ppn: 4 api: DFS transfer_size: 512K block_size: 1G diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.py b/src/tests/ftest/telemetry/basic_client_telemetry.py new file mode 100644 index 00000000000..1d115b4c95e --- /dev/null +++ b/src/tests/ftest/telemetry/basic_client_telemetry.py @@ -0,0 +1,54 @@ +""" + (C) Copyright 2024 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +from ior_utils import read_data, write_data +from telemetry_test_base import TestWithClientTelemetry + + +class BasicClientTelemetry(TestWithClientTelemetry): + """Tests to verify basic client telemetry. + + :avocado: recursive + """ + + def test_client_metrics_exist(self): + """JIRA ID: DAOS-8331. + + Verify that the client-side telemetry captures some throughput metrics. + After performing some I/O, there should be some client telemetry data. + + Test steps: + 1) Create a pool and container + 2) Perform some I/O with IOR + 3) Verify that there is some client telemetry data + + :avocado: tags=all,daily_regression + :avocado: tags=vm + :avocado: tags=telemetry + :avocado: tags=BasicClientTelemetry,test_client_metrics_exist + """ + # create pool and container + pool = self.get_pool(connect=True) + container = self.get_container(pool=pool) + + self.log_step('Writing data to the pool (ior)') + ior = write_data(self, container) + self.log_step('Reading data from the pool (ior)') + read_data(self, ior, container) + + metric_names = [ + "client_pool_xferred_fetch", + "client_pool_xferred_update", + ] + + self.log_step('Reading client telemetry (reads & writes should be > 0)') + after_metrics = self.telemetry.collect_client_data(metric_names) + for metric in metric_names: + msum = 0 + for value in after_metrics[metric].values(): + msum += value + self.assertGreater(msum, 0) + + self.log_step('Test passed') diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.yaml b/src/tests/ftest/telemetry/basic_client_telemetry.yaml new file mode 100644 index 00000000000..d585dc81fda --- /dev/null +++ b/src/tests/ftest/telemetry/basic_client_telemetry.yaml @@ -0,0 +1,46 @@ +hosts: + test_servers: 1 + test_clients: 1 + +timeout: 180 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +agent_config: + telemetry_port: 9191 + telemetry_retain: 30s + telemetry_enabled: true + +pool: + scm_size: 2G + +container: + type: POSIX + control_method: daos + dfs_oclass: SX + +ior: &ior_base + ppn: 4 + api: DFS + transfer_size: 512K + block_size: 1M + dfs_oclass: SX + +ior_write: + <<: *ior_base + flags: "-k -v -w -W -G 1" + +ior_read: + <<: *ior_base + flags: "-v -r -R -G 1" diff --git a/src/tests/ftest/telemetry/dkey_akey_enum_punch.py b/src/tests/ftest/telemetry/dkey_akey_enum_punch.py index 99481406d8e..b97d1526b2b 100644 --- a/src/tests/ftest/telemetry/dkey_akey_enum_punch.py +++ b/src/tests/ftest/telemetry/dkey_akey_enum_punch.py @@ -1,5 +1,5 @@ ''' - (C) Copyright 2018-2023 Intel Corporation. + (C) Copyright 2018-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -232,13 +232,13 @@ def test_dkey_akey_enum_punch(self): # Obtain and verify the io metrics 1 to 4. ### # engine_pool_ops_dkey_enum - pool_dkey_enum = self.telemetry.ENGINE_POOL_METRICS[5] + pool_dkey_enum = self.telemetry.ENGINE_POOL_OPS_DKEY_ENUM_METRICS # engine_pool_ops_akey_enum - pool_akey_enum = self.telemetry.ENGINE_POOL_METRICS[2] + pool_akey_enum = self.telemetry.ENGINE_POOL_OPS_AKEY_ENUM_METRICS # engine_pool_ops_dkey_punch - pool_dkey_punch = self.telemetry.ENGINE_POOL_METRICS[6] + pool_dkey_punch = self.telemetry.ENGINE_POOL_OPS_DKEY_PUNCH_METRICS # engine_pool_ops_akey_punch - pool_akey_punch = self.telemetry.ENGINE_POOL_METRICS[3] + pool_akey_punch = self.telemetry.ENGINE_POOL_OPS_AKEY_PUNCH_METRICS specific_metrics = [ pool_dkey_enum, pool_akey_enum, pool_dkey_punch, pool_akey_punch, @@ -357,9 +357,9 @@ def test_pool_tgt_dkey_akey_punch(self): self.telemetry.dmg.verbose = False - # Obtain and verify the pool metrics 1 and 2 ### - pool_tgt_dkey_punch = self.telemetry.ENGINE_POOL_METRICS[21] - pool_tgt_akey_punch = self.telemetry.ENGINE_POOL_METRICS[20] + # Obtain and verify the pool target punch metrics + pool_tgt_dkey_punch = self.telemetry.ENGINE_POOL_OPS_TGT_DKEY_PUNCH_METRICS + pool_tgt_akey_punch = self.telemetry.ENGINE_POOL_OPS_TGT_AKEY_PUNCH_METRICS specific_metrics = [pool_tgt_dkey_punch, pool_tgt_akey_punch] pool_out = self.telemetry.get_pool_metrics( specific_metrics=specific_metrics) diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 46b793f31ef..7f92b9f479a 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -57,10 +57,19 @@ def __init__(self, filename, common_yaml): # Specifies the log level for agent logs. # - exclude_fabric_ifaces: , Ignore a subset of fabric interfaces when selecting # an interface for client applications. + # - telemetry_port: , e.g. 9192 + # Enable Prometheus endpoint for client telemetry. + # - telemetry_enabled: , e.g. True + # Enable client telemetry for all client processes. + # - telemetry_retain: , e.g. 5m + # Time to retain per-client telemetry data. self.runtime_dir = BasicParameter(None, "/var/run/daos_agent") self.log_file = LogParameter(log_dir, None, "daos_agent.log") self.control_log_mask = BasicParameter(None, "debug") self.exclude_fabric_ifaces = BasicParameter(None) + self.telemetry_port = BasicParameter(None) + self.telemetry_enabled = BasicParameter(None) + self.telemetry_retain = BasicParameter(None) def update_log_file(self, name): """Update the log file name for the daos agent. diff --git a/src/tests/ftest/util/ior_utils.py b/src/tests/ftest/util/ior_utils.py index cd54b0e19af..b729afc00ee 100644 --- a/src/tests/ftest/util/ior_utils.py +++ b/src/tests/ftest/util/ior_utils.py @@ -1,5 +1,5 @@ """ -(C) Copyright 2018-2023 Intel Corporation. +(C) Copyright 2018-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -13,6 +13,7 @@ from duns_utils import format_path from exception_utils import CommandFailure from general_utils import get_log_file +from job_manager_utils import get_job_manager def get_ior(test, manager, hosts, path, slots, namespace="/run/ior/*", ior_params=None): @@ -139,6 +140,83 @@ def thread_run_ior(thread_queue, job_id, test, manager, log, hosts, path, slots, thread_queue.put(thread_result) +def write_data(test, container, namespace='/run/ior_write/*', **ior_run_params): + """Write data to the container/dfuse using ior. + + Simple method for test classes to use to write data with ior. While not required, this is setup + by default to pull in ior parameters from the test yaml using a format similar to: + + ior: &ior_base + api: DFS + transfer_size: 512K + block_size: 1G + ppn: 2 + + ior_write: + <<: *ior_base + flags: "-k -v -w -W -G 1" + + ior_read: + <<: *ior_base + flags: "-v -r -R -G 1" + + Args: + test (Test): avocado Test object + container (TestContainer): the container to populate + namespace (str, optional): path to ior yaml parameters. Defaults to '/run/ior_write/*'. + ior_run_params (dict): optional params for the Ior.run() command, like ppn, dfuse, etc. + + Returns: + Ior: the Ior object used to populate the container + """ + job_manager = get_job_manager(test, subprocess=False, timeout=60) + ior = get_ior(test, job_manager, test.hostlist_clients, test.workdir, None, namespace) + + if 'processes' not in ior_run_params: + ior_run_params['processes'] = test.params.get('processes', namespace, None) + elif 'ppn' not in ior_run_params: + ior_run_params['ppn'] = test.params.get('ppn', namespace, None) + + ior.run(test.server_group, container.pool, container, **ior_run_params) + return ior + + +def read_data(test, ior, container, namespace='/run/ior_read/*', **ior_run_params): + """Verify the data used to populate the container. + + Simple method for test classes to use to read data with ior designed to be used with the Ior + object returned by the write_data() method. While not required, this is setup by default to pull + in ior parameters from the test yaml using a format similar to: + + ior: &ior_base + api: DFS + transfer_size: 512K + block_size: 1G + ppn: 2 + + ior_write: + <<: *ior_base + flags: "-k -v -w -W -G 1" + + ior_read: + <<: *ior_base + flags: "-v -r -R -G 1" + + Args: + test (Test): avocado Test object + ior (Ior): the ior command used to populate the container + container (TestContainer): the container to verify + namespace (str, optional): path to ior yaml parameters. Defaults to '/run/ior_read/*'. + ior_run_params (dict): optional params for the Ior.run() command, like ppn, dfuse, etc. + """ + if 'processes' not in ior_run_params: + ior_run_params['processes'] = test.params.get('processes', namespace, None) + elif 'ppn' not in ior_run_params: + ior_run_params['ppn'] = test.params.get('ppn', namespace, 1) + ior.update('flags', test.params.get('flags', namespace)) + ior.run(test.server_group, container.pool, container, **ior_run_params) + + class IorCommand(SubProcessCommand): # pylint: disable=too-many-instance-attributes # pylint: disable=wrong-spelling-in-docstring diff --git a/src/tests/ftest/util/telemetry_test_base.py b/src/tests/ftest/util/telemetry_test_base.py index 7641fe8d546..6a2389935f7 100644 --- a/src/tests/ftest/util/telemetry_test_base.py +++ b/src/tests/ftest/util/telemetry_test_base.py @@ -1,10 +1,10 @@ """ -(C) Copyright 2021-2023 Intel Corporation. +(C) Copyright 2021-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ from apricot import TestWithServers -from telemetry_utils import TelemetryUtils +from telemetry_utils import ClientTelemetryUtils, TelemetryUtils class TestWithTelemetry(TestWithServers): @@ -263,3 +263,36 @@ def sum_values(metric_out): total += value return total + + +class TestWithClientTelemetry(TestWithTelemetry): + """Test client telemetry metrics. + + :avocado: recursive + """ + def setUp(self): + """Set up each test case.""" + super().setUp() + self.telemetry = ClientTelemetryUtils( + self.get_dmg_command(), self.server_managers[0].hosts, self.hostlist_clients) + + def verify_client_telemetry_list(self, with_pools=False): + """Verify the dmg telemetry metrics list command output.""" + # Define a list of expected telemetry metrics names + expected = self.telemetry.get_all_client_metrics_names( + with_pools=with_pools) + + # List all of the telemetry metrics + result = self.telemetry.list_metrics() + + # Verify the lists are detected for each agent + errors = self.compare_lists( + list(result), self.hostlist_clients, 0, "", + "telemetry metrics list hosts") + for host, host_result in result.items(): + errors.extend( + self.compare_lists(expected, host_result, 2, host, "telemetry metric names")) + if errors: + self.fail("\n".join(errors)) + + self.log.info("Test PASSED") diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 1a51cf23848..091e8da68d0 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -1,8 +1,10 @@ """ -(C) Copyright 2021-2023 Intel Corporation. +(C) Copyright 2021-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ +# pylint: disable=too-many-lines +import copy import re from logging import getLogger @@ -28,7 +30,7 @@ def _gen_stats_metrics(basename): class TelemetryUtils(): # pylint: disable=too-many-nested-blocks - """Defines a object used to verify telemetry information.""" + """Defines an object used to verify server telemetry information.""" # Define a set of patterns that shouldn't be used for comparisons. METRIC_EXCLUDE_PATTERNS = [ @@ -41,14 +43,45 @@ class TelemetryUtils(): "engine_pool_ops_cont_create", "engine_pool_ops_cont_destroy", "engine_pool_ops_cont_query"] - ENGINE_POOL_METRICS = [ + ENGINE_POOL_ACTION_METRICS = [ + "engine_pool_resent", + "engine_pool_restarted", + "engine_pool_retry", + "engine_pool_started_at", + "engine_pool_xferred_fetch", + "engine_pool_xferred_update"] + ENGINE_POOL_BLOCK_ALLOCATOR_METRICS = [ + "engine_pool_block_allocator_alloc_hint", + "engine_pool_block_allocator_alloc_large", + "engine_pool_block_allocator_alloc_small", + "engine_pool_block_allocator_frags_aging", + "engine_pool_block_allocator_frags_large", + "engine_pool_block_allocator_frags_small", + "engine_pool_block_allocator_free_blks"] + ENGINE_POOL_CHECKPOINT_METRICS = [ + *_gen_stats_metrics("engine_pool_checkpoint_dirty_chunks"), + *_gen_stats_metrics("engine_pool_checkpoint_dirty_pages"), + *_gen_stats_metrics("engine_pool_checkpoint_duration"), + *_gen_stats_metrics("engine_pool_checkpoint_iovs_copied"), + *_gen_stats_metrics("engine_pool_checkpoint_wal_purged")] + ENGINE_POOL_EC_UPDATE_METRICS = [ + "engine_pool_EC_update_full_stripe", + "engine_pool_EC_update_partial"] + ENGINE_POOL_ENTRIES_METRICS = [ "engine_pool_entries_dtx_batched_degree", - "engine_pool_entries_dtx_batched_total", - "engine_pool_ops_akey_enum", - "engine_pool_ops_akey_punch", + "engine_pool_entries_dtx_batched_total"] + ENGINE_POOL_OPS_AKEY_ENUM_METRICS = "engine_pool_ops_akey_enum" + ENGINE_POOL_OPS_DKEY_ENUM_METRICS = "engine_pool_ops_dkey_enum" + ENGINE_POOL_OPS_AKEY_PUNCH_METRICS = "engine_pool_ops_akey_punch" + ENGINE_POOL_OPS_DKEY_PUNCH_METRICS = "engine_pool_ops_dkey_punch" + ENGINE_POOL_OPS_TGT_AKEY_PUNCH_METRICS = "engine_pool_ops_tgt_akey_punch" + ENGINE_POOL_OPS_TGT_DKEY_PUNCH_METRICS = "engine_pool_ops_tgt_dkey_punch" + ENGINE_POOL_OPS_METRICS = [ + ENGINE_POOL_OPS_AKEY_ENUM_METRICS, + ENGINE_POOL_OPS_DKEY_ENUM_METRICS, + ENGINE_POOL_OPS_AKEY_PUNCH_METRICS, + ENGINE_POOL_OPS_DKEY_PUNCH_METRICS, "engine_pool_ops_compound", - "engine_pool_ops_dkey_enum", - "engine_pool_ops_dkey_punch", "engine_pool_ops_dtx_abort", "engine_pool_ops_dtx_check", "engine_pool_ops_dtx_commit", @@ -57,13 +90,14 @@ class TelemetryUtils(): "engine_pool_ops_ec_rep", "engine_pool_ops_fetch", "engine_pool_ops_key_query", + "engine_pool_ops_key2anchor", "engine_pool_ops_migrate", "engine_pool_ops_obj_enum", "engine_pool_ops_obj_punch", "engine_pool_ops_obj_sync", "engine_pool_ops_recx_enum", - "engine_pool_ops_tgt_akey_punch", - "engine_pool_ops_tgt_dkey_punch", + ENGINE_POOL_OPS_TGT_AKEY_PUNCH_METRICS, + ENGINE_POOL_OPS_TGT_DKEY_PUNCH_METRICS, "engine_pool_ops_tgt_punch", "engine_pool_ops_tgt_update", "engine_pool_ops_update", @@ -71,10 +105,8 @@ class TelemetryUtils(): "engine_pool_ops_pool_disconnect", "engine_pool_ops_pool_evict", "engine_pool_ops_pool_query", - "engine_pool_ops_pool_query_space", - "engine_pool_resent", - "engine_pool_restarted", - "engine_pool_retry", + "engine_pool_ops_pool_query_space"] + ENGINE_POOL_SCRUBBER_METRICS = [ "engine_pool_scrubber_busy_time", "engine_pool_scrubber_bytes_scrubbed_current", "engine_pool_scrubber_bytes_scrubbed_prev", @@ -88,8 +120,8 @@ class TelemetryUtils(): "engine_pool_scrubber_next_tree_scrub", *_gen_stats_metrics("engine_pool_scrubber_prev_duration"), "engine_pool_scrubber_scrubber_started", - "engine_pool_scrubber_scrubs_completed", - "engine_pool_started_at", + "engine_pool_scrubber_scrubs_completed"] + ENGINE_POOL_VOS_AGGREGATION_METRICS = [ "engine_pool_vos_aggregation_akey_deleted", "engine_pool_vos_aggregation_akey_scanned", "engine_pool_vos_aggregation_akey_skipped", @@ -105,21 +137,19 @@ class TelemetryUtils(): "engine_pool_vos_aggregation_obj_deleted", "engine_pool_vos_aggregation_obj_scanned", "engine_pool_vos_aggregation_obj_skipped", - "engine_pool_vos_aggregation_uncommitted", + "engine_pool_vos_aggregation_uncommitted"] + ENGINE_POOL_VOS_SPACE_METRICS = [ "engine_pool_vos_space_nvme_used", - "engine_pool_vos_space_scm_used", - "engine_pool_xferred_fetch", - "engine_pool_xferred_update", - "engine_pool_EC_update_full_stripe", - "engine_pool_EC_update_partial", - "engine_pool_block_allocator_alloc_hint", - "engine_pool_block_allocator_alloc_large", - "engine_pool_block_allocator_alloc_small", - "engine_pool_block_allocator_frags_aging", - "engine_pool_block_allocator_frags_large", - "engine_pool_block_allocator_frags_small", - "engine_pool_block_allocator_free_blks", - "engine_pool_ops_key2anchor"] + "engine_pool_vos_space_scm_used"] + ENGINE_POOL_METRICS = ENGINE_POOL_ACTION_METRICS +\ + ENGINE_POOL_BLOCK_ALLOCATOR_METRICS +\ + ENGINE_POOL_CHECKPOINT_METRICS +\ + ENGINE_POOL_EC_UPDATE_METRICS +\ + ENGINE_POOL_ENTRIES_METRICS +\ + ENGINE_POOL_OPS_METRICS +\ + ENGINE_POOL_SCRUBBER_METRICS +\ + ENGINE_POOL_VOS_AGGREGATION_METRICS +\ + ENGINE_POOL_VOS_SPACE_METRICS ENGINE_EVENT_METRICS = [ "engine_events_dead_ranks", "engine_events_last_event_ts", @@ -383,6 +413,7 @@ def __init__(self, dmg, servers): self.log = getLogger(__name__) self.dmg = dmg self.hosts = NodeSet.fromlist(servers) + self._data = MetricData() def get_all_server_metrics_names(self, server, with_pools=False): """Get all the telemetry metrics names for this server. @@ -430,7 +461,7 @@ def is_excluded_metric(self, name): return True return False - def list_metrics(self): + def list_metrics(self, hosts=None): """List the available metrics for each host. Returns: @@ -438,8 +469,9 @@ def list_metrics(self): """ info = {} - self.log.info("Listing telemetry metrics from %s", self.hosts) - for host in self.hosts: + host_list = hosts or self.hosts + self.log.info("Listing telemetry metrics from %s", host_list) + for host in host_list: data = self.dmg.telemetry_metrics_list(host=host) info[host] = [] if "response" in data: @@ -449,7 +481,46 @@ def list_metrics(self): info[host].append(entry["name"]) return info - def get_metrics(self, name): + def collect_data(self, names, hosts=None): + """Collect telemetry data for the specified metrics. + + Args: + names (list): list of metric names + + Returns: + dict: dictionary of metric values keyed by the metric name and combination of metric + labels and values, e.g. + : { + : , + : , + ... + }, + ... + """ + host_list = hosts or self.hosts + self.log.info("Collecting telemetry data from %s", host_list) + return self._data.collect(self.log, names, host_list, self.dmg) + + def display_data(self): + """Display the telemetry metric values.""" + return self._data.display(self.log) + + def verify_data(self, ranges): + """Verify the telemetry metric values. + + Args: + ranges (dict): dictionary of min/max lists for each metric to be verified, e.g. + { + : [10], <--- will verify value of is at least 10 + : [0, 9] <--- will verify value of is between 0-9 + } + + Returns: + bool: True if all metric values are within the ranges specified; False otherwise + """ + return self._data.verify(self.log, ranges) + + def get_metrics(self, name, hosts=None): """Obtain the specified metric information for each host. Args: @@ -461,8 +532,9 @@ def get_metrics(self, name): """ info = {} - self.log.info("Querying telemetry metric %s from %s", name, self.hosts) - for host in self.hosts: + host_list = hosts or self.hosts + self.log.info("Querying telemetry metric %s from %s", name, host_list) + for host in host_list: data = self.dmg.telemetry_metrics_query(host=host, metrics=name) info[host] = {} if "response" in data: @@ -651,7 +723,7 @@ def get_nvme_metrics(self, specific_metrics=None): """Get the NVMe telemetry metrics. Args: - specific_metrics(list): list of specific NVMe metrics + specific_metrics (list): list of specific NVMe metrics Returns: dict: dictionary of dictionaries of NVMe metric names and @@ -728,3 +800,494 @@ def verify_metric_value(self, metrics_data, min_value=None, max_value=None): self.log.info(" %-12s %-4s %s %s", host, rank, value, invalid) return status + + +class ClientTelemetryUtils(TelemetryUtils): + """Defines an object used to verify server and client telemetry information.""" + + CLIENT_EVENT_METRICS = [ + "client_started_at"] + CLIENT_POOL_ACTION_METRICS = [ + "client_pool_resent", + "client_pool_restarted", + "client_pool_retry", + "client_pool_xferred_fetch", + "client_pool_xferred_update"] + CLIENT_POOL_OPS_METRICS = [ + "client_pool_ops_akey_enum", + "client_pool_ops_akey_punch", + "client_pool_ops_compound", + "client_pool_ops_dkey_enum", + "client_pool_ops_dkey_punch", + "client_pool_ops_ec_agg", + "client_pool_ops_ec_rep", + "client_pool_ops_fetch", + "client_pool_ops_key2anchor", + "client_pool_ops_key_query", + "client_pool_ops_migrate", + "client_pool_ops_obj_coll_punch", + "client_pool_ops_obj_coll_query", + "client_pool_ops_obj_enum", + "client_pool_ops_obj_punch", + "client_pool_ops_obj_sync", + "client_pool_ops_recx_enum", + "client_pool_ops_tgt_akey_punch", + "client_pool_ops_tgt_dkey_punch", + "client_pool_ops_tgt_punch", + "client_pool_ops_tgt_update", + "client_pool_ops_update"] + CLIENT_POOL_EC_UPDATE_METRICS = [ + "client_pool_EC_update_full_stripe", + "client_pool_EC_update_partial"] + CLIENT_POOL_METRICS = CLIENT_POOL_ACTION_METRICS +\ + CLIENT_POOL_OPS_METRICS +\ + CLIENT_POOL_EC_UPDATE_METRICS + CLIENT_IO_LATENCY_FETCH_METRICS = \ + _gen_stats_metrics("client_io_latency_fetch") + CLIENT_IO_LATENCY_UPDATE_METRICS = \ + _gen_stats_metrics("client_io_latency_update") + CLIENT_IO_OPS_AKEY_ENUM_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_akey_enum_active") + CLIENT_IO_OPS_AKEY_ENUM_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_akey_enum_latency") + CLIENT_IO_OPS_AKEY_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_akey_punch_active") + CLIENT_IO_OPS_AKEY_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_akey_punch_latency") + CLIENT_IO_OPS_COMPOUND_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_compound_active") + CLIENT_IO_OPS_COMPOUND_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_compound_latency") + CLIENT_IO_OPS_DKEY_ENUM_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_dkey_enum_active") + CLIENT_IO_OPS_DKEY_ENUM_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_dkey_enum_latency") + CLIENT_IO_OPS_DKEY_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_dkey_punch_active") + CLIENT_IO_OPS_DKEY_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_dkey_punch_latency") + CLIENT_IO_OPS_EC_AGG_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_ec_agg_active") + CLIENT_IO_OPS_EC_AGG_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_ec_agg_latency") + CLIENT_IO_OPS_EC_REP_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_ec_rep_active") + CLIENT_IO_OPS_EC_REP_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_ec_rep_latency") + CLIENT_IO_OPS_FETCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_fetch_active") + CLIENT_IO_OPS_KEY2ANCHOR_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_key2anchor_active") + CLIENT_IO_OPS_KEY2ANCHOR_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_key2anchor_latency") + CLIENT_IO_OPS_KEY_QUERY_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_key_query_active") + CLIENT_IO_OPS_KEY_QUERY_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_key_query_latency") + CLIENT_IO_OPS_MIGRATE_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_migrate_active") + CLIENT_IO_OPS_MIGRATE_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_migrate_latency") + CLIENT_IO_OPS_OBJ_COLL_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_coll_punch_active") + CLIENT_IO_OPS_OBJ_COLL_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_coll_punch_latency") + CLIENT_IO_OPS_OBJ_COLL_QUERY_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_coll_query_active") + CLIENT_IO_OPS_OBJ_COLL_QUERY_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_coll_query_latency") + CLIENT_IO_OPS_OBJ_ENUM_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_enum_active") + CLIENT_IO_OPS_OBJ_ENUM_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_enum_latency") + CLIENT_IO_OPS_OBJ_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_punch_active") + CLIENT_IO_OPS_OBJ_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_punch_latency") + CLIENT_IO_OPS_OBJ_punch_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_sync_active") + CLIENT_IO_OPS_OBJ_SYNC_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_obj_sync_latency") + CLIENT_IO_OPS_RECX_ENUM_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_recx_enum_active") + CLIENT_IO_OPS_RECX_ENUM_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_recx_enum_latency") + CLIENT_IO_OPS_TGT_AKEY_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_akey_punch_active") + CLIENT_IO_OPS_TGT_AKEY_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_akey_punch_latency") + CLIENT_IO_OPS_TGT_DKEY_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_dkey_punch_active") + CLIENT_IO_OPS_TGT_DKEY_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_dkey_punch_latency") + CLIENT_IO_OPS_TGT_PUNCH_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_punch_active") + CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_punch_latency") + CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_tgt_update_active") + CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS = \ + _gen_stats_metrics("client_io_ops_update_active") + CLIENT_IO_METRICS = CLIENT_IO_LATENCY_FETCH_METRICS +\ + CLIENT_IO_LATENCY_UPDATE_METRICS +\ + CLIENT_IO_OPS_AKEY_ENUM_ACTIVE_METRICS +\ + CLIENT_IO_OPS_AKEY_ENUM_LATENCY_METRICS +\ + CLIENT_IO_OPS_AKEY_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_AKEY_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_COMPOUND_ACTIVE_METRICS +\ + CLIENT_IO_OPS_COMPOUND_LATENCY_METRICS +\ + CLIENT_IO_OPS_DKEY_ENUM_ACTIVE_METRICS +\ + CLIENT_IO_OPS_DKEY_ENUM_LATENCY_METRICS +\ + CLIENT_IO_OPS_DKEY_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_DKEY_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_EC_AGG_ACTIVE_METRICS +\ + CLIENT_IO_OPS_EC_AGG_LATENCY_METRICS +\ + CLIENT_IO_OPS_EC_REP_ACTIVE_METRICS +\ + CLIENT_IO_OPS_EC_REP_LATENCY_METRICS +\ + CLIENT_IO_OPS_FETCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_KEY2ANCHOR_ACTIVE_METRICS +\ + CLIENT_IO_OPS_KEY2ANCHOR_LATENCY_METRICS +\ + CLIENT_IO_OPS_KEY_QUERY_ACTIVE_METRICS +\ + CLIENT_IO_OPS_KEY_QUERY_LATENCY_METRICS +\ + CLIENT_IO_OPS_MIGRATE_ACTIVE_METRICS +\ + CLIENT_IO_OPS_MIGRATE_LATENCY_METRICS +\ + CLIENT_IO_OPS_OBJ_COLL_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_OBJ_COLL_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_OBJ_COLL_QUERY_ACTIVE_METRICS +\ + CLIENT_IO_OPS_OBJ_COLL_QUERY_LATENCY_METRICS +\ + CLIENT_IO_OPS_OBJ_ENUM_ACTIVE_METRICS +\ + CLIENT_IO_OPS_OBJ_ENUM_LATENCY_METRICS +\ + CLIENT_IO_OPS_OBJ_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_OBJ_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_OBJ_punch_ACTIVE_METRICS +\ + CLIENT_IO_OPS_OBJ_SYNC_LATENCY_METRICS +\ + CLIENT_IO_OPS_RECX_ENUM_ACTIVE_METRICS +\ + CLIENT_IO_OPS_RECX_ENUM_LATENCY_METRICS +\ + CLIENT_IO_OPS_TGT_AKEY_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_TGT_AKEY_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_TGT_DKEY_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_TGT_DKEY_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_TGT_PUNCH_ACTIVE_METRICS +\ + CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS +\ + CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS +\ + CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS + + def __init__(self, dmg, servers, clients): + """Create a ClientTelemetryUtils object. + + Args: + dmg (DmgCommand): the DmgCommand object configured to communicate + with the servers + servers (list): a list of server host names + clients (list): a list of client host names + """ + super().__init__(dmg, servers) + self.clients = NodeSet.fromlist(clients) + + def get_all_client_metrics_names(self, with_pools=False): + """Get all the telemetry metrics names for this client. + + Args: + with_pools (bool): if True, include pool metrics in the results + + Returns: + list: all of the telemetry metrics names for this client + + """ + all_metrics_names = list(self.CLIENT_EVENT_METRICS) + all_metrics_names.extend(self.CLIENT_IO_METRICS) + if with_pools: + all_metrics_names.extend(self.CLIENT_POOL_METRICS) + + return all_metrics_names + + def list_client_metrics(self): + """List the available metrics for each host. + + Returns: + dict: a dictionary of host keys linked to a list of metric names + + """ + return super().list_metrics(hosts=self.clients) + + def collect_client_data(self, names): + """Collect telemetry data for the specified metrics. + + Args: + names (list): list of metric names + + Returns: + dict: dictionary of metric values keyed by the metric name and combination of metric + labels and values, e.g. + : { + : , + : , + ... + }, + ... + """ + return super().collect_data(names, hosts=self.clients) + + def get_client_metrics(self, name): + """Obtain the specified metric information for each host. + + Args: + name (str): Comma-separated list of metric names to query. + + Returns: + dict: a dictionary of host keys linked to metric data for each + metric name specified + + """ + return super().get_metrics(name, hosts=self.clients) + + +class MetricData(): + """Defines a object used to collect, display, and verify telemetry metric data.""" + + def __init__(self): + """Initialize a MetricData object.""" + self._data = {} + self._display = {'data': {}, 'labels': set(), 'widths': {}} + + def collect(self, log, names, hosts, dmg): + """Collect telemetry data for the specified metrics. + + Args: + log (logger): logger for the messages produced by this method + names (list): list of metric names + hosts (NodeSet): set of servers from which to collect the telemetry metrics + dmg (DmgCommand): the DmgCommand object configured to communicate with the servers + + Returns: + dict: dictionary of metric values keyed by the metric name and combination of metric + labels and values, e.g. + : { + : , + : , + ... + }, + ... + """ + info = self._get_metrics(log, ','.join(names), hosts, dmg) + self._data = self._get_data(names, info) + return copy.deepcopy(self._data) + + def display(self, log): + """Display the telemetry metric values. + + Args: + log (logger): logger for the messages produced by this method + """ + self._set_display() + columns = ['metric'] + self._display['labels'] + ['value'] + format_str = ' '.join([f"%-{self._display['widths'][name]}s" for name in columns]) + + log.info('-' * 80) + log.info('Telemetry Metric Information') + log.info(format_str, *[name.title() for name in columns]) + log.info(format_str, *['-' * self._display['widths'][name] for name in columns]) + for metric in sorted(self._display['data']): + for value, labels_list in self._display['data'][metric].items(): + for labels in labels_list: + log.info(format_str, metric, *self._label_values(labels), value) + + def verify(self, log, ranges): + """Verify the telemetry metric values. + + Args: + log (logger): logger for the messages produced by this method + ranges (dict): dictionary of expected metric value ranges with a minimum metric key and + optional label key to at least a minimum metric value and optional maximum metric + value, e.g. + {: } or + {: []} or + {: [, ]} or + {: {