From 7664f8aa7b05e3b0d8744babdfa6d16f46025a5e Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Tue, 25 Jul 2023 09:42:16 -0700 Subject: [PATCH] DAOS-13940 gurt: Fix log rotation error (#12660) (#12694) - When rotating a log that has stderr merged into it, the log was reopened but a new fd was not retreived, resulting in old fd sometimes being bad. Signed-off-by: Alexander A Oganezov --- src/cart/crt_init.c | 2 +- src/gurt/dlog.c | 159 +++++++++++++++++++++++++++----------------- 2 files changed, 100 insertions(+), 61 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 0c449173e0b..e6231f8509f 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -64,7 +64,7 @@ dump_envariables(void) int i; char *val; char *envars[] = {"D_PROVIDER", "D_INTERFACE", "D_DOMAIN", "D_PORT", - "CRT_PHY_ADDR_STR", "D_LOG_STDERR_IN_LOG", + "CRT_PHY_ADDR_STR", "D_LOG_STDERR_IN_LOG", "D_LOG_SIZE", "D_LOG_FILE", "D_LOG_FILE_APPEND_PID", "D_LOG_MASK", "DD_MASK", "DD_STDERR", "DD_SUBSYS", "CRT_TIMEOUT", "CRT_ATTACH_INFO_PATH", "OFI_PORT", "OFI_INTERFACE", "OFI_DOMAIN", "CRT_CREDIT_EP_CTX", diff --git a/src/gurt/dlog.c b/src/gurt/dlog.c index 430ddbfbec7..e9b7fa2f435 100644 --- a/src/gurt/dlog.c +++ b/src/gurt/dlog.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -68,6 +68,8 @@ struct d_log_state { int log_old_fd; /** current size of log file */ uint64_t log_size; + /** log size of last time check */ + uint64_t log_last_check_size; /** max size of log file */ uint64_t log_size_max; /** Callback to get thread id and ULT id */ @@ -358,6 +360,97 @@ static __thread uint64_t pre_err_time; #define LOG_BUF_SIZE (16 << 10) +static bool +log_exceed_threshold(void) +{ + struct stat st; + int rc; + + if (!merge_stderr) + goto out; + + /** + * if we merge stderr to log file which is not + * calculated by log_size, to avoid exceeding threshold + * too much, log_size will be updated with fstat if log + * size increased by 2% of max size every time. + */ + if ((mst.log_size - mst.log_last_check_size) < (mst.log_size_max / 50)) + goto out; + + rc = fstat(mst.log_fd, &st); + if (!rc) + mst.log_size = st.st_size; + + mst.log_last_check_size = mst.log_size; +out: + return mst.log_size + mst.log_buf_nob >= mst.log_size_max; +} + +/* exceeds the size threshold, rename the current log file + * as backup, create a new log file. + */ +static int +log_rotate(void) +{ + int rc = 0; + + if (!mst.log_old) { + rc = asprintf(&mst.log_old, "%s.old", mst.log_file); + if (rc < 0) { + dlog_print_err(errno, "failed to alloc name\n"); + return -1; + } + } + + if (mst.log_old_fd >= 0) { + close(mst.log_old_fd); + mst.log_old_fd = -1; + } + + /* rename the current log file as a backup */ + rc = rename(mst.log_file, mst.log_old); + if (rc) { + dlog_print_err(errno, "failed to rename log file\n"); + return -1; + } + mst.log_old_fd = mst.log_fd; + + /* create a new log file */ + if (merge_stderr) { + if (freopen(mst.log_file, "w", stderr) == NULL) { + fprintf(stderr, "d_log_write(): cannot open new %s: %s\n", + mst.log_file, strerror(errno)); + return -1; + } + + mst.log_fd = fileno(stderr); + } else { + mst.log_fd = open(mst.log_file, O_RDWR | O_CREAT, 0644); + if (mst.log_fd < 0) { + fprintf(stderr, "d_log_write(): failed to recreate log file %s: %s\n", + mst.log_file, strerror(errno)); + return -1; + } + rc = fcntl(mst.log_fd, F_DUPFD, 128); + if (rc < 0) { + fprintf(stderr, + "d_log_write(): failed to recreate log file %s: %s\n", + mst.log_file, strerror(errno)); + close(mst.log_fd); + return -1; + } + close(mst.log_fd); + mst.log_fd = rc; + } + + mst.log_size = 0; + mst.log_last_check_size = 0; + + return rc; +} + + /** * This function can do a few things: * - copy log message @msg to log buffer @@ -405,65 +498,11 @@ d_log_write(char *msg, int len, bool flush) if (mst.log_buf_nob == 0) return 0; /* nothing to write */ - if (mst.log_size + mst.log_buf_nob >= mst.log_size_max) { - /* exceeds the size threshold, rename the current log file - * as backup, create a new log file. - */ - if (!mst.log_old) { - rc = asprintf(&mst.log_old, "%s.old", mst.log_file); - if (rc < 0) { - dlog_print_err(errno, "failed to alloc name\n"); - return -1; - } - } - - if (mst.log_old_fd >= 0) { - close(mst.log_old_fd); - mst.log_old_fd = -1; - } - - /* remove the backup log file */ - rc = unlink(mst.log_old); - if (rc && errno != ENOENT) { - dlog_print_err(errno, "failed to unlink old file\n"); - return -1; - } - - /* rename the current log file as a backup */ - rc = rename(mst.log_file, mst.log_old); - if (rc) { - dlog_print_err(errno, "failed to rename log file\n"); - return -1; - } - mst.log_old_fd = mst.log_fd; - - /* create a new log file */ - if (merge_stderr) { - if (freopen(mst.log_file, "w", stderr) == NULL) { - fprintf(stderr, "d_log_write(): cannot open new %s: %s\n", - mst.log_file, strerror(errno)); - return -1; - } - } else { - mst.log_fd = open(mst.log_file, O_RDWR | O_CREAT, 0644); - if (mst.log_fd < 0) { - fprintf(stderr, "d_log_write(): failed to recreate log file %s: %s\n", - mst.log_file, strerror(errno)); - return -1; - } - rc = fcntl(mst.log_fd, F_DUPFD, 128); - if (rc < 0) { - fprintf(stderr, - "d_log_write(): failed to recreate log file %s: %s\n", - mst.log_file, strerror(errno)); - close(mst.log_fd); - return -1; - } - close(mst.log_fd); - mst.log_fd = rc; - } - - mst.log_size = 0; + /* rotate the log if it exceeds the threshold */ + if (log_exceed_threshold()) { + rc = log_rotate(); + if (rc != 0) + return rc; } /* flush the cached log messages */