diff --git a/src/test/Makefile b/src/test/Makefile index e7a9c78a889..5d39101d811 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -105,6 +105,7 @@ OBJ_TESTS = \ obj_tx_realloc\ obj_tx_strdup\ obj_tx_user_data\ + obj_ulog_advanced\ obj_ulog_size\ obj_zones diff --git a/src/test/obj_ulog_advanced/.gitignore b/src/test/obj_ulog_advanced/.gitignore new file mode 100644 index 00000000000..e74b2667e56 --- /dev/null +++ b/src/test/obj_ulog_advanced/.gitignore @@ -0,0 +1 @@ +obj_ulog_advanced \ No newline at end of file diff --git a/src/test/obj_ulog_advanced/Makefile b/src/test/obj_ulog_advanced/Makefile new file mode 100644 index 00000000000..ccf459e2cea --- /dev/null +++ b/src/test/obj_ulog_advanced/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation + +# +# src/test/obj_ulog_advanced/Makefile -- build obj_ulog_advanced test +# +TARGET = obj_ulog_advanced +OBJS = obj_ulog_advanced.o + +BUILD_STATIC_DEBUG=n +BUILD_STATIC_NONDEBUG=n + +# required for proper mock integration +LIBPMEMOBJ=internal-debug + +include ../Makefile.inc +LDFLAGS += $(call extract_funcs, obj_ulog_advanced.c) diff --git a/src/test/obj_ulog_advanced/TEST0 b/src/test/obj_ulog_advanced/TEST0 new file mode 100755 index 00000000000..89cc115f99e --- /dev/null +++ b/src/test/obj_ulog_advanced/TEST0 @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation + +# +# src/test/obj_ulog_advanced/TEST0 -- a kick-start test +# +# Since this directory contains both Bash-based and Python-based tests and match +# files are used by some of them both groups cannot have overlapping numbering. +# Hence the real Bash-based tests' numbering starts where the Python-based tests' +# numbering ends. However, the Bash-based test framework relies on the existence +# of this TEST0 file to keep looking for other Bash-based tests. +# + +. ../unittest/unittest.sh + +. ./common.sh + +require_fs_type any +require_build_type $COMMON_BUILD_TYPE +require_test_type short + +setup + +pass diff --git a/src/test/obj_ulog_advanced/TEST5 b/src/test/obj_ulog_advanced/TEST5 new file mode 100755 index 00000000000..5dfa5e1d401 --- /dev/null +++ b/src/test/obj_ulog_advanced/TEST5 @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation + +# +# src/test/obj_ulog_advanced/TEST5 -- a test employing pmreorder WITHOUT error +# injection +# +# Please see the source code for the details of the tested scenario. +# + +. ../unittest/unittest.sh + +. ./common.sh + +common_require + +setup + +ERROR_INJECT=0 # an error is NOT being injected +common_setup $ERROR_INJECT + +common_init +common_record +common_replay_and_check $ERROR_INJECT + +check + +pass diff --git a/src/test/obj_ulog_advanced/TEST6 b/src/test/obj_ulog_advanced/TEST6 new file mode 100755 index 00000000000..5262e430f13 --- /dev/null +++ b/src/test/obj_ulog_advanced/TEST6 @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation + +# +# src/test/obj_ulog_advanced/TEST6 -- a test employing pmreorder WITH error +# injection +# +# Please see the source code for the details of the tested scenario. +# + +. ../unittest/unittest.sh + +. ./common.sh + +common_require + +setup + +ERROR_INJECT=1 # an error is being injected +common_setup $ERROR_INJECT + +common_init +common_record +common_replay_and_check $ERROR_INJECT + +check + +pass diff --git a/src/test/obj_ulog_advanced/TESTS.py b/src/test/obj_ulog_advanced/TESTS.py new file mode 100755 index 00000000000..cb67ea0253b --- /dev/null +++ b/src/test/obj_ulog_advanced/TESTS.py @@ -0,0 +1,60 @@ +#!../env.py +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation +# + + +import testframework as t +from testframework import granularity as g +from os import path + + +SIGABRT_EXIT_CODE = 134 + + +@g.require_granularity(g.ANY) +# The 'debug' build is chosen arbitrarily to ensure these tests are run only +# once. No dynamic libraries are used nor .static_* builds are available. +@t.require_build('debug') +class OBJ_ULOG_ADVANCED(t.Test): + test_type = t.Short + test_case = 'test_init_publish_abort_and_verify' + error_inject = False + + def run(self, ctx): + testfile = path.join(ctx.testdir, f'testfile{self.testnum}') + stderr_file = f'err{self.testnum}.log' + error_inject = 1 if self.error_inject else 0 + # The verify will abort the process when the injected error will be + # discovered. + expected_exitcode = SIGABRT_EXIT_CODE if self.error_inject else 0 + ctx.exec('obj_ulog_advanced', self.test_case, testfile, self.slot_num, + error_inject, expected_exitcode=expected_exitcode, + stderr_file=stderr_file) + + +class TEST0(OBJ_ULOG_ADVANCED): + # The number of slots exactly populating a single persistent redo log. + # Please see the source code for details. + slot_num = 40 + + +class TEST1(OBJ_ULOG_ADVANCED): + # The number of slots between the one used by TEST0 and TEST2. + slot_num = 50 + + +class TEST2(OBJ_ULOG_ADVANCED): + # The number of slots exactly populating a persistent shadow log without + # triggering its growth. Please see the source code for details. + slot_num = 60 + + +class TEST3(TEST1): + # For details on the injected error please see the source code. + error_inject = True + + +class TEST4(TEST2): + # For details on the injected error please see the source code. + error_inject = True diff --git a/src/test/obj_ulog_advanced/common.sh b/src/test/obj_ulog_advanced/common.sh new file mode 100755 index 00000000000..4eb6926b5e9 --- /dev/null +++ b/src/test/obj_ulog_advanced/common.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2024, Intel Corporation + +# +# src/test/obj_ulog_advanced/common.sh -- common bits and pieces +# + +# The 'debug' build is chosen arbitrarily to ensure these tests are run only +# once. No dynamic libraries are used nor .static_* builds are available. +COMMON_BUILD_TYPE=debug + +function common_require() { + require_fs_type any + require_build_type $COMMON_BUILD_TYPE + require_test_type medium + require_pmemcheck_version_ge 1 0 + require_pmemcheck_version_lt 2 0 + require_pmreorder +} + +function common_setup() { + ERROR_INJECT=$1 + + export PMEMOBJ_LOG_LEVEL=10 + + BIN="./obj_ulog_advanced$EXESUFFIX" + TESTFILE=$DIR/testfile + ERR_LOG_FILE=err$UNITTEST_NUM.log + # This value was labourly calculated. Please see the source file for + # details. + SLOTS_NUM=60 + PMEMCHECK_CMD="$BIN test_publish $TESTFILE $SLOTS_NUM $ERROR_INJECT" + PMREORDER_CMD="$BIN test_verify $SLOTS_NUM" +} + +function common_init() { + expect_normal_exit $BIN test_init $TESTFILE +} + +function common_record() { + pmreorder_create_store_log $TESTFILE "$PMEMCHECK_CMD" +} + +function common_replay_and_check() { + ERROR_INJECT=$1 + + # skip reordering and checking stores outside of the markers + DEFAULT_ENGINE=NoReorderNoCheck + # The accumulative reordering is sufficient considering the nature of + # the scenario at hand where the key risk is that not all stores + # will be executed. The order of these stores is irrelevant. + # Please see the source code for the details of the tested scenario. + # Note: ReorderFull is too time-consuming for this scenario. + EXTENDED_MACROS="PMREORDER_PUBLISH=ReorderAccumulative" + + if [ $ERROR_INJECT -eq 0 ]; then + pmreorder_expect_success $DEFAULT_ENGINE "$EXTENDED_MACROS" "$PMREORDER_CMD" + else + pmreorder_expect_failure $DEFAULT_ENGINE "$EXTENDED_MACROS" "$PMREORDER_CMD" + fi +} diff --git a/src/test/obj_ulog_advanced/err3.log.match b/src/test/obj_ulog_advanced/err3.log.match new file mode 100644 index 00000000000..896b1d8232d --- /dev/null +++ b/src/test/obj_ulog_advanced/err3.log.match @@ -0,0 +1 @@ +{$(nW).c:$(N) verify} obj_ulog_advanced/TEST3: Error: assertion failure: rootp->slots[i] (0x0) == exp (0x1) diff --git a/src/test/obj_ulog_advanced/err4.log.match b/src/test/obj_ulog_advanced/err4.log.match new file mode 100644 index 00000000000..d155e7eed38 --- /dev/null +++ b/src/test/obj_ulog_advanced/err4.log.match @@ -0,0 +1 @@ +{$(nW).c:$(N) verify} obj_ulog_advanced/TEST4: Error: assertion failure: rootp->slots[i] (0x0) == exp (0x1) diff --git a/src/test/obj_ulog_advanced/err6.log.match b/src/test/obj_ulog_advanced/err6.log.match new file mode 100644 index 00000000000..fd7d8be1721 --- /dev/null +++ b/src/test/obj_ulog_advanced/err6.log.match @@ -0,0 +1 @@ +{$(nW).c:$(N) verify} obj_ulog_advanced/TEST6: Error: assertion failure: rootp->slots[i] (0x0) == exp (0x1) diff --git a/src/test/obj_ulog_advanced/obj_ulog_advanced.c b/src/test/obj_ulog_advanced/obj_ulog_advanced.c new file mode 100644 index 00000000000..c3703e3b28e --- /dev/null +++ b/src/test/obj_ulog_advanced/obj_ulog_advanced.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2024, Intel Corporation */ + +/* + * obj_ulog_advanced.c -- a test targetting redo logs of size between a single + * persistent redo log size and the initial size of + * the persistent shadow log + */ + +#include +#include +#include "pmemops.h" +#include "memops.h" +#include "ulog.h" +#include "unittest.h" + +#define LAYOUT_NAME "obj_ulog_advanced" + +/* + * The persistent shadow log is a DRAM buffer where intially all redo log + * entries are placed. This log's initial capacity is 1KiB (ULOG_BASE_SIZE) + * and can be reallocated to grow bigger as necessary. It grows when adding + * the next entry will reduce the available capacity below CACHELINE_SIZE (64B). + * + * When the persistent shadow log is ready, it will be copied to the persistent + * redo log before processing to ensure all the scheduled operations will + * eventually take place no matter the interruptions. + * + * The persistent redo log's maximum capacity is 640B (LANE_REDO_EXTERNAL_SIZE). + * So, when the persistent shadow log is bigger, additional redo logs + * have to be allocated and linked to the first one before the persistent + * shadow log will be copied. + * + * The header of the persistent shadow log is of exactly the same structure as + * the header of the persistent redo log and one of its fields stores + * the capacity. It turns out to be confusing since it is not obvious whether + * the persistent shadow log's capacity is the actual capacity of the underlying + * DRAM buffer (>=1024B) or the capacity of a single persistent redo log + * (<=640B). + * + * Moreover, the DAOS developers observed a real issue occurring in their BMEM + * allocator which is based on PMEMOBJ (daos-stack/daos#11593). The issue + * occurred when the entries fell above the LANE_REDO_EXTERNAL_SIZE offset but + * there were not enough entries to trigger the persistent shadow log's growth + * (<= ULOG_BASE_SIZE - CACHELINE_SIZE = 960B). + * + * This test aims at reproducing this scenario to ensure this issue is not + * present in PMEMOBJ. + */ + +/* exit code of a process when terminated in consequence of SIGABRT */ +#define SIGABRT_EXITSTATUS 134 + +/* a single redo log entry's size - true only for a set-operation */ +#define ENTRY_SIZE (sizeof(struct ulog_entry_val)) +/* + * A 'slot' for the sake of this test means a single 64b value in persistent + * memory which set-operations will target. + * + * The number of slots is limited by the number of set-operations that can fit + * in a persistent shadow log before triggering its growth. + */ +#define SLOTS_NUM_MAX ((ULOG_BASE_SIZE - CACHELINE_SIZE) / ENTRY_SIZE) /* 60 */ +/* The number of set-operations that can fit in a single persistent redo log. */ +#define SLOTS_PER_REDO_LOG (LANE_REDO_EXTERNAL_SIZE / ENTRY_SIZE) /* 40 */ + +struct root { + uint64_t slots[SLOTS_NUM_MAX]; +}; + +/* + * It has to be big enough so the call counter won't reach this value naturally. + */ +#define BIG_ENOUGH_MAGIC_CALL_NUMBER 127 + +/* + * The error injection is done for ulog_store(). + * The abort injection is done for ulog_process(). + * + * Both of them are run one after another not only in case of processing + * the user-built persistent shadow log but also whenever a reservation is + * necessary e.g. when additional persistent redo log is needed to accomodate + * the persistent shadow log. The persistent redo log reservation is done before + * processing the persistent shadow log hence -1. + * + * ERROR_INJECT_CALL() and ABORTED_CALL() prime the respective call counter to + * hit the dedicated magic value and trigger either an error injection or + * an abort injection. + */ +#define _ERROR_INJECT_CALL BIG_ENOUGH_MAGIC_CALL_NUMBER +#define ERROR_INJECT_CALL(slots_num) \ + ((slots_num > SLOTS_PER_REDO_LOG) ? \ + (_ERROR_INJECT_CALL - 1) : _ERROR_INJECT_CALL) + +#define _ABORTED_CALL (BIG_ENOUGH_MAGIC_CALL_NUMBER * 2) +#define ABORTED_CALL(slots_num) \ + ((slots_num > SLOTS_PER_REDO_LOG) ? \ + (_ABORTED_CALL - 1) : _ABORTED_CALL) + +FUNC_MOCK(ulog_store, void, struct ulog *dest, struct ulog *src, size_t nbytes, + size_t ulog_base_nbytes, size_t ulog_total_capacity, + struct ulog_next *next, const struct pmem_ops *p_ops) + /* + * In this case, the error injection means replacing the number of bytes + * truly populated in the persistent shadow log with the value reduced + * to the capacity of a single persistent redo log. The error scenario + * as envisioned by the issue which inspired the creation of this test. + */ + FUNC_MOCK_RUN(_ERROR_INJECT_CALL) { + _FUNC_REAL(ulog_store)(dest, src, LANE_REDO_EXTERNAL_SIZE, + ulog_base_nbytes, ulog_total_capacity, next, p_ops); + return; + } +FUNC_MOCK_RUN_DEFAULT { + _FUNC_REAL(ulog_store)(dest, src, nbytes, ulog_base_nbytes, + ulog_total_capacity, next, p_ops); +} +FUNC_MOCK_END + +FUNC_MOCK(ulog_process, void, struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops) + /* + * The abort ought to be strategically injected just after copying + * the persistent shadow log to the persistent redo log but before + * processing it. So, when the pool is opened again the result of + * the sequence of the set-operations will rely solely on the contents + * of the persistent redo log not on the persistent shadow log. + */ + FUNC_MOCK_RUN(_ABORTED_CALL) { + abort(); + } +FUNC_MOCK_RUN_DEFAULT { + _FUNC_REAL(ulog_process)(ulog, check, p_ops); +} +FUNC_MOCK_END + +#define ERROR_INJECTION_ON 1 + +static struct root * +get_root(PMEMobjpool *pop) +{ + PMEMoid root = pmemobj_root(pop, sizeof(struct root)); + if (OID_IS_NULL(root)) { + UT_FATAL("!pmemobj_root: root == NULL"); + } + struct root *rootp = (struct root *)pmemobj_direct(root); + if (rootp == NULL) { + UT_FATAL("pmemobj_direct: rootp == NULL"); + } + return rootp; +} + +/* + * init -- create a PMEMOBJ pool and initialize the root object. + */ +static void +init(const char *path) +{ + PMEMobjpool *pop = pmemobj_create(path, LAYOUT_NAME, PMEMOBJ_MIN_POOL, + S_IWUSR | S_IRUSR); + if (pop == NULL) { + UT_FATAL("!pmemobj_create: %s", path); + } + + (void) get_root(pop); + + + /* The root object is initially zeroed so no need to touch it. */ + + pmemobj_close(pop); +} + +/* + * publish -- attempt to modify the values of the requested number of slots. + * The redo log of the operation might be intentionally corrupted (an error + * injection) and/or the process might be aborted just after writing + * the redo log and before starting processing the published set-operations + * (an abort injection). + */ +static void +publish(const char *path, int slots_num, bool error_inject, bool abort_inject) +{ + PMEMobjpool *pop = pmemobj_open(path, LAYOUT_NAME); + if (pop == NULL) { + UT_FATAL("!pmemobj_open: %s", path); + } + struct root *rootp = get_root(pop); + + struct pobj_action actions[SLOTS_NUM_MAX]; + unsigned actnum = 0; + for (unsigned i = 0; i < slots_num; ++i) { + pmemobj_set_value(pop, &actions[actnum++], &rootp->slots[i], 1); + } + + /* + * prime the call counters if requested so an error injection or + * an abort injection will take place + */ + if (error_inject) { + FUNC_MOCK_RCOUNTER_SET(ulog_store, + ERROR_INJECT_CALL(slots_num)); + } + if (abort_inject) { + FUNC_MOCK_RCOUNTER_SET(ulog_process, ABORTED_CALL(slots_num)); + } + /* + * The pmreorder markers help track down the operations belonging to + * the publish in question. Required for tests employing pmreorder. + */ + VALGRIND_PMC_EMIT_LOG("PMREORDER_PUBLISH.BEGIN"); + pmemobj_publish(pop, actions, actnum); + VALGRIND_PMC_EMIT_LOG("PMREORDER_PUBLISH.END"); + + pmemobj_close(pop); +} + +/* + * publish_abort_and_wait -- fork() the process and wait for the child to abort. + * The child process will attempt to modify a requested number of slots' values + * with or without error injection but it will abort just after writing + * the redo log. + */ +static void +publish_abort_and_wait(const char *path, int slots_num, bool error_inject) +{ + int status; + pid_t pid, ret; + + pid = fork(); + if (pid < 0) { + UT_FATAL("!fork"); + } + + if (pid == 0) { + const bool abort_inject = true; + publish(path, slots_num, error_inject, abort_inject); + UT_FATAL( + "the child process should be aborted before this point"); + } else { + ret = waitpid(pid, &status, 0); + if (ret == -1) { + UT_FATAL("!waitpid"); + } + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != SIGABRT_EXITSTATUS) { + UT_FATAL( + "the child terminated with an unexpected status: %d", + WEXITSTATUS(status)); + } + } else { + UT_FATAL( + "something unexpected happened to the child process"); + } + } +} + +/* + * verify -- verify the requested number of slots are consistent. Either all + * modified or all not modified. + */ +static void +verify(const char *path, int slots_num) +{ + PMEMobjpool *pop = pmemobj_open(path, LAYOUT_NAME); + if (pop == NULL) { + UT_FATAL("!pmemobj_open: %s", path); + } + + struct root *rootp = get_root(pop); + /* + * The correct state is when all the requested slots have exactly + * the same value. + */ + uint64_t exp = rootp->slots[0]; + + for (unsigned i = 1; i < slots_num; ++i) { + UT_ASSERTeq(rootp->slots[i], exp); + } + + pmemobj_close(pop); +} + +/* test entry points */ + +/* + * test_init_publish_abort_and_verify -- execute the whole sequence with or + * without error injection + */ +static int +test_init_publish_abort_and_verify(const struct test_case *tc, int argc, + char *argv[]) +{ + if (argc < 3) { + UT_FATAL("usage: %s filename slots_num error_inject", + __FUNCTION__); + } + + const char *path = argv[0]; + int slots_num = atoi(argv[1]); + bool error_inject = atoi(argv[2]) == ERROR_INJECTION_ON; + + init(path); + publish_abort_and_wait(path, slots_num, error_inject); + verify(path, slots_num); + + return 3; +} + +/* + * test_init -- just initialize the pool + */ +static int +test_init(const struct test_case *tc, int argc, char *argv[]) +{ + if (argc < 1) { + UT_FATAL("usage: %s filename", __FUNCTION__); + } + + const char *path = argv[0]; + init(path); + + return 1; +} + +/* + * test_publish -- having an initialized pool, change the values of + * the requested number of slots, with or without error injection. + */ +static int +test_publish(const struct test_case *tc, int argc, char *argv[]) +{ + if (argc < 3) { + UT_FATAL("usage: %s filename slots_num error_inject", + __FUNCTION__); + } + + const char *path = argv[0]; + int slots_num = atoi(argv[1]); + bool error_inject = atoi(argv[2]) == ERROR_INJECTION_ON; + + const bool abort_inject = false; + publish(path, slots_num, error_inject, abort_inject); + + return 3; +} + +/* + * test_verify -- verify the state of the requested number of slots. + */ +static int +test_verify(const struct test_case *tc, int argc, char *argv[]) +{ + /* + * Note: the file name has to be the last argument. It is forced by + * pmreorder. + */ + if (argc < 2) { + UT_FATAL("usage: %s slots_num filename", __FUNCTION__); + } + + int slots_num = atoi(argv[0]); + const char *path = argv[1]; + + /* + * The setting preferred by the pmreorder's verify implementations. + */ + int y = 1; + pmemobj_ctl_set(NULL, "copy_on_write.at_open", &y); + + verify(path, slots_num); + + /* + * If the verify did not fail till now it has passed successfully. + * Return the result ASAP. + */ + END(0); +} + +static struct test_case test_cases[] = { + TEST_CASE(test_init_publish_abort_and_verify), + TEST_CASE(test_init), + TEST_CASE(test_publish), + TEST_CASE(test_verify), +}; + +int +main(int argc, char *argv[]) +{ + START(argc, argv, "obj_ulog_advanced"); + + /* + * Assert the assumptions. + * Please see the description at the beginning of this file. + */ + COMPILE_ERROR_ON(ULOG_BASE_SIZE != 1024); + COMPILE_ERROR_ON(CACHELINE_SIZE != 64); + COMPILE_ERROR_ON(LANE_REDO_EXTERNAL_SIZE != 640); + COMPILE_ERROR_ON(sizeof(struct ulog_entry_val) != 16); + + TEST_CASE_PROCESS(argc, argv, test_cases, ARRAY_SIZE(test_cases)); + + DONE(NULL); +}