From c5aec75483dd2cea55bf05beff5de824fc854d44 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 7 Sep 2023 12:38:13 -0400 Subject: [PATCH 01/29] DAOS-14301 test: Disable fault injection tests from release testing. (#13016) Adding 'faults' tag to scrubber/csum_fault.py. Signed-off-by: Phil Henderson --- src/tests/ftest/scrubber/csum_fault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/scrubber/csum_fault.py b/src/tests/ftest/scrubber/csum_fault.py index a038e1d2d37..6e1b9a968db 100644 --- a/src/tests/ftest/scrubber/csum_fault.py +++ b/src/tests/ftest/scrubber/csum_fault.py @@ -20,7 +20,7 @@ def test_scrubber_csum_fault(self): whether scrubber finds them. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium - :avocado: tags=scrubber + :avocado: tags=scrubber,faults :avocado: tags=TestWithScrubberFault,test_scrubber_csum_fault """ From d6285840be53edc1dee711b547031c64cadafb15 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 8 Sep 2023 13:45:23 -0400 Subject: [PATCH 02/29] DAOS-14254 test: Improve test harness server file cleanup in startup (#12978) Add raising exceptions for failed server file cleanup steps during server startup and limiting mount cleanup to existing mount points. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 123 +++++++++++++++++---------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index cf8350f09ba..7b312457995 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -272,60 +272,89 @@ def clean_files(self, verbose=True): Args: verbose (bool, optional): display clean commands. Defaults to True. - """ - clean_commands = [] - for index, engine_params in enumerate(self.manager.job.yaml.engine_params): - scm_mount = engine_params.get_value("scm_mount") - self.log.info("Cleaning up the %s directory.", str(scm_mount)) - - # Remove the superblocks - cmd = "sudo rm -fr {}/*".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) - - # Remove the shared memory segment associated with this io server - cmd = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) - clean_commands.append(cmd) - - # Dismount the scm mount point - cmd = "while sudo umount {}; do continue; done".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) + Raises: + ServerFailed: if there was an error cleaning up the daos server files + """ + scm_mounts = [] + scm_lists = [] + for engine_params in self.manager.job.yaml.engine_params: + scm_mounts.append(engine_params.get_value("scm_mount")) if self.manager.job.using_dcpm: scm_list = engine_params.get_value("scm_list") if isinstance(scm_list, list): - self.log.info("Cleaning up the following device(s): %s.", ", ".join(scm_list)) - # Umount and wipefs the dcpm device - cmd_list = [ - "for dev in {}".format(" ".join(scm_list)), - "do mount=$(lsblk $dev -n -o MOUNTPOINT)", - "if [ ! -z $mount ]", - "then while sudo umount $mount", - "do continue", - "done", - "fi", - "sudo wipefs -a $dev", - "done" - ] - cmd = "; ".join(cmd_list) - if cmd not in clean_commands: - clean_commands.append(cmd) + scm_lists.append(scm_list) + + for index, scm_mount in enumerate(scm_mounts): + # Remove the superblocks and dismount the scm mount point + self.log.info("Cleaning up the %s scm mount.", str(scm_mount)) + self.clean_mount(self._hosts, scm_mount, verbose, index) + + for scm_list in scm_lists: + # Umount and wipefs the dcpm device + self.log.info("Cleaning up the %s dcpm devices", str(scm_list)) + command_list = [ + "for dev in {}".format(" ".join(scm_list)), + "do mount=$(lsblk $dev -n -o MOUNTPOINT)", + "if [ ! -z $mount ]", + "then while sudo umount $mount", + "do continue", + "done", + "fi", + "sudo wipefs -a $dev", + "done" + ] + command = "; ".join(command_list) + result = run_remote(self.log, self._hosts, command, verbose) + if not result.passed: + raise ServerFailed("Failed cleaning {} on {}".format(scm_list, result.failed_hosts)) if self.manager.job.using_control_metadata: # Remove the contents (superblocks) of the control plane metadata path - cmd = "sudo rm -fr {}/*".format(self.manager.job.control_metadata.path.value) - if cmd not in clean_commands: - clean_commands.append(cmd) - - if self.manager.job.control_metadata.device.value is not None: - # Dismount the control plane metadata mount point - cmd = "while sudo umount {}; do continue; done".format( - self.manager.job.control_metadata.device.value) - if cmd not in clean_commands: - clean_commands.append(cmd) - - pcmd(self._hosts, "; ".join(clean_commands), verbose) + self.log.info( + "Cleaning up the control metadata path %s", + self.manager.job.control_metadata.path.value) + self.clean_mount(self._hosts, self.manager.job.control_metadata.path.value, verbose) + + def clean_mount(self, hosts, mount, verbose=True, index=None): + """Clean the mount point by removing the superblocks and dismounting. + + Args: + hosts (NodeSet): the hosts on which to clean the mount point + mount (str): the mount point to clean + verbose (bool, optional): display clean commands. Defaults to True. + index (int, optional): Defaults to None. + + Raises: + ServerFailed: if there is an error cleaning the mount point + """ + self.log.debug("Checking for the existence of the %s mount point", mount) + command = "test -d {}".format(mount) + result = run_remote(self.log, hosts, command, verbose) + if result.passed: + mounted_hosts = result.passed_hosts + + # Remove the superblocks + self.log.debug("Removing the %s superblocks", mount) + command = "sudo rm -fr {}/*".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to remove superblocks for {} on {}".format(mount, result.failed_hosts)) + + if index is not None: + # Remove the shared memory segment associated with this io server + self.log.debug("Removing the shared memory segment") + command = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) + run_remote(self.log, self._hosts, command, verbose) + + # Dismount the scm mount point + self.log.debug("Dismount the %s mount point", mount) + command = "while sudo umount {}; do continue; done".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to dismount {} on {}".format(mount, result.failed_hosts)) def prepare_storage(self, user, using_dcpm=None, using_nvme=None): """Prepare the server storage. From db6ac13c819d8053e5a94541be2d6df0fcd11a2b Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 8 Sep 2023 20:57:48 +0100 Subject: [PATCH 03/29] DAOS-623 build: Make scons quiet quiet. (#12892) Update build so there's less output when quiet is used. Signed-off-by: Ashley Pittman --- site_scons/components/__init__.py | 8 +-- site_scons/env_modules.py | 35 +++++++------ site_scons/prereq_tools/base.py | 8 ++- site_scons/site_tools/compiler_setup.py | 5 +- site_scons/site_tools/daos_builder.py | 12 +++-- src/SConscript | 3 +- src/client/dfs/SConscript | 17 +++--- src/client/serialize/SConscript | 7 ++- src/tests/suite/SConscript | 70 +------------------------ src/tests/suite/daos_test.h | 17 ------ 10 files changed, 60 insertions(+), 122 deletions(-) diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 1fe43214676..94154c6d2f9 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -58,7 +58,7 @@ def check(self, name): self.installed.append(name) return True - if not GetOption('help'): + if not GetOption('help') and not GetOption('silent'): print(f'Using build version of {name}') self.not_installed.append(name) return False @@ -90,12 +90,14 @@ def check(reqs, name, built_str, installed_str=""): def ofi_config(config): """Check ofi version""" - print('Checking for libfabric > 1.11...', end=' ') + if not GetOption('silent'): + print('Checking for libfabric > 1.11...', end=' ') code = """#include _Static_assert(FI_MAJOR_VERSION == 1 && FI_MINOR_VERSION >= 11, "libfabric must be >= 1.11");""" rc = config.TryCompile(code, ".c") - print('yes' if rc else 'no') + if not GetOption('silent'): + print('yes' if rc else 'no') return rc diff --git a/site_scons/env_modules.py b/site_scons/env_modules.py index 9d38df7e2db..df4af0a6498 100644 --- a/site_scons/env_modules.py +++ b/site_scons/env_modules.py @@ -36,7 +36,7 @@ class _env_module(): # pylint: disable=invalid-name "openmpi": ['mpi/mlnx_openmpi-x86_64', 'mpi/openmpi3-x86_64', 'gnu-openmpi', 'mpi/openmpi-x86_64']} - def __init__(self): + def __init__(self, silent=False): """Load Modules for initializing environment variables""" # Leap 15's lmod-lua doesn't include the usual module path # in it's MODULEPATH, for some unknown reason @@ -44,6 +44,7 @@ def __init__(self): os.path.join(os.sep, "usr", "share", "modulefiles"), os.path.join(os.sep, "etc", "modulefiles")] + os.environ.get("MODULEPATH", "").split(":")) + self._silent = silent self._module_load = self._init_mpi_module() def _module_func(self, command, *arguments): # pylint: disable=no-self-use @@ -56,7 +57,8 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # pylint: disable=consider-using-with try: - print(f"Going to run {cmd}") + if not self._silent: + print(' '.join(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) except OSError as error: if error.errno == errno.ENOENT: @@ -81,11 +83,11 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # return _mlstatus, stderr.decode() # pylint: disable=undefined-variable def _init_mpi_module(self): - """init mpi module function""" + """Init mpi module function""" return self._mpi_module def _mpi_module(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] unload = [] @@ -109,16 +111,17 @@ def _mpi_module(self, mpi): self._module_func('unload', to_unload) for to_load in load: - print(f"Trying to load {to_load}") - if self._module_func('is-avail', to_load)[0] and \ - self._module_func('load', to_load)[0]: - print(f'Loaded {to_load}') + if not self._silent: + print(f"Trying to load {to_load}") + if self._module_func('is-avail', to_load)[0] and self._module_func('load', to_load)[0]: + if not self._silent: + print(f'Loaded {to_load}') return True return False def _mpi_module_old(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] for key, value in self._mpi_map.items(): if key == mpi: @@ -162,7 +165,7 @@ def load_mpi(self, mpi): return True def show_avail(self): - """list available modules""" + """List available modules""" try: status, output = self._module_func('avail') if not status: @@ -172,12 +175,12 @@ def show_avail(self): return output def get_map(self, key): - """return the mpi map""" + """Return the mpi map""" return self._mpi_map[key] -def load_mpi(mpi): - """global function to load MPI into os.environ""" +def load_mpi(mpi, silent=False): + """Global function to load MPI into os.environ""" # On Ubuntu, MPI stacks use alternatives and need root to change their # pointer, so just verify that the desired MPI is loaded if distro.id() == "ubuntu": @@ -201,19 +204,19 @@ def load_mpi(mpi): return False if _env_module.env_module_init is None: - _env_module.env_module_init = _env_module() + _env_module.env_module_init = _env_module(silent) return _env_module.env_module_init.load_mpi(mpi) def show_avail(): - """global function to show the available modules""" + """Global function to show the available modules""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.show_avail() def get_module_list(key): - """global function to show the modules that map to a key""" + """Global function to show the modules that map to a key""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.get_map(key) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 9163bfb0ed7..f016e4295b0 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1125,6 +1125,11 @@ def _parse_config(self, env, opts): return + def _print(self, msg): + if GetOption('silent'): + return + print(msg) + def has_missing_targets(self, env): """Check for expected build targets (e.g. libraries or headers)""" # pylint: disable=too-many-return-statements @@ -1151,7 +1156,7 @@ def has_missing_targets(self, env): print('help set') return True - print(f"Checking targets for component '{self.name}'") + self._print(f"Checking targets for component '{self.name}'") config = env.Configure() config_cb = self.key_words.get("config_cb", None) @@ -1244,7 +1249,6 @@ def configure(self): def set_environment(self, env, needed_libs): """Modify the specified construction environment to build with the external component""" - if self.skip_arch: return diff --git a/site_scons/site_tools/compiler_setup.py b/site_scons/site_tools/compiler_setup.py index 1f20d00b050..8c91ff2f535 100644 --- a/site_scons/site_tools/compiler_setup.py +++ b/site_scons/site_tools/compiler_setup.py @@ -39,8 +39,9 @@ def _base_setup(env): compiler = env['CC'] build_type = env['BUILD_TYPE'] - print(f'Setting up compile environment for {compiler}') - print(f"Build type is '{build_type}'") + if not GetOption('silent'): + print(f'Setting up compile environment for {compiler}') + print(f"Build type is '{build_type}'") prev_compiler = env.get('BSETUP', False) if prev_compiler: diff --git a/site_scons/site_tools/daos_builder.py b/site_scons/site_tools/daos_builder.py index 36676952ffd..8afd254a182 100644 --- a/site_scons/site_tools/daos_builder.py +++ b/site_scons/site_tools/daos_builder.py @@ -224,6 +224,10 @@ def _configure_mpi(self): if GetOption('help'): return None + def _print(msg): + if not GetOption('silent'): + print(msg) + env = self.Clone() env['CXX'] = None @@ -233,13 +237,13 @@ def _configure_mpi(self): return env for mpi in ['openmpi', 'mpich']: - if not load_mpi(mpi): + if not load_mpi(mpi, GetOption('silent')): continue if _find_mpicc(env): - print(f'{mpi} is installed') + _print(f'{mpi} is installed') return env - print(f'No {mpi} installed and/or loaded') - print("No MPI installed") + _print(f'No {mpi} installed and/or loaded') + _print("No MPI installed") return None diff --git a/src/SConscript b/src/SConscript index e440dff1eea..c4cb419e047 100644 --- a/src/SConscript +++ b/src/SConscript @@ -49,7 +49,8 @@ def read_and_save_version(env): '@Template for @': ''} out = env.Substfile(tmpl_hdr_in, SUBST_DICT=subst_dict) - print(f'generated daos version header file: {out[0].abspath}') + if not GetOption('silent'): + print(f'generated daos version header file: {out[0].abspath}') return version diff --git a/src/client/dfs/SConscript b/src/client/dfs/SConscript index a0c12efc139..38512536397 100644 --- a/src/client/dfs/SConscript +++ b/src/client/dfs/SConscript @@ -5,24 +5,29 @@ def configure_lustre(denv): """Do Lustre configure checks""" if GetOption('help') or GetOption('clean'): return denv + + def _print(msg): + if not GetOption('silent'): + print(msg) + # If Lustre installed build a Lustre-aware libduns conf = Configure(denv) gotversion = False if not conf.CheckLibWithHeader('lustreapi', 'linux/lustre/lustre_user.h', 'c'): - print("No installed Lustre version detected") + _print("No installed Lustre version detected") else: - print("Installed Lustre version detected") + _print("Installed Lustre version detected") if not conf.CheckFunc('llapi_unlink_foreign'): - print("Lustre version is not compatible") + _print("Lustre version is not compatible") else: - print("Lustre version is compatible") + _print("Lustre version is compatible") gotversion = True if gotversion is True: - print("Building with Lustre bindings.") + _print("Building with Lustre bindings.") denv.AppendUnique(CCFLAGS=['-DLUSTRE_INCLUDE']) else: - print("Not building with Lustre bindings.") + _print("Not building with Lustre bindings.") return conf.Finish() diff --git a/src/client/serialize/SConscript b/src/client/serialize/SConscript index a50ffca0ebd..83b077d7867 100644 --- a/src/client/serialize/SConscript +++ b/src/client/serialize/SConscript @@ -3,6 +3,7 @@ def scons(): """Execute build""" + Import('env') denv = env.Clone() @@ -19,11 +20,13 @@ def scons(): src = ['daos_serialize.c'] if have_hdf5 is True: - print("Building with hdf5 bindings.") + if not GetOption('silent'): + print("Building with hdf5 bindings.") daos_serialize = denv.d_library('daos_serialize', src, LIBS=libraries) denv.Install('$PREFIX/lib64/', daos_serialize) else: - print("No installed hdf5 detected, DAOS serialization is not enabled") + if not GetOption('silent'): + print("No installed hdf5 detected, DAOS serialization is not enabled") if __name__ == "SCons.Script": diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index f4872c86e09..efbcba289cb 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -1,72 +1,4 @@ """Build test suite""" -import sys -import subprocess # nosec - -TEST_CMOCKA_SKIP = """ -#include -#include -#include -#include - -static void -test(void **state) { skip(); } - -int main(int argc, char **argv) -{ - const struct CMUnitTest tests[] = { - cmocka_unit_test(test), - cmocka_unit_test(test), - }; - return cmocka_run_group_tests(tests, NULL, NULL); -} -""" - - -# pylint: disable-next=invalid-name -def CheckCmockaSkip(context): - """Configure check for cmocka bug""" - context.Message('Checking if cmocka skip() bug is present ... ') - rc = context.TryCompile(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Compile failed) assuming ") - context.Result(not rc) - return rc - rc = context.TryLink(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Link failed) assuming ") - context.Result(not rc) - return rc - prog = context.lastTarget - pname = prog.get_abspath() - rc = subprocess.call(pname, env={"CMOCKA_TEST_ABORT": "1"}, shell=False, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - # in case of abort rc is -6 instead of 134 (128+6) with shell ... - if rc == -6: - sys.stdout.write(" (Bug reproduced) ") - else: - if rc != 0: - sys.stdout.write(" (Other error than bug) assuming ") - else: - sys.stdout.write(" (Bug not reproduced) ") - context.Result(rc) - # return 0 means error - return not rc - - -# pylint: disable=no-member -def configure_cmocka(nenv): - """configure cmocka environment""" - if GetOption('help') or GetOption('clean'): - return nenv - conf = Configure(nenv, custom_tests={'CheckCmockaSkip': CheckCmockaSkip}) - conf.env.AppendUnique(LIBS=['cmocka']) - if not conf.CheckCmockaSkip(): - # it would be cool to be able to check exit code is effectively 134 - # (for abort() upon skip() bug) but in all error cases we should - # decide to use workaround - conf.env.AppendUnique(CCFLAGS=['-DOVERRIDE_CMOCKA_SKIP']) - print("libcmocka with broken skip(), using workaround (DAOS-1093).") - return conf.Finish() def scons(): @@ -97,7 +29,7 @@ def scons(): c_files + daos_test_tgt, LIBS=['daos_common'] + libraries) - newenv = configure_cmocka(denv.Clone()) + newenv = denv.Clone() c_files = Split("""daos_array.c daos_base_tx.c daos_capa.c daos_checksum.c daos_container.c daos_dedup.c daos_degraded.c diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index e423a61433e..49a40e2f62c 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -22,23 +22,6 @@ #include #include -#ifdef OVERRIDE_CMOCKA_SKIP -/* redefine cmocka's skip() so it will no longer abort() - * if CMOCKA_TEST_ABORT=1 - * - * it can't be redefined as a function as it must return from current context - */ -#undef skip -#define skip() \ - do { \ - const char *abort_test = getenv("CMOCKA_TEST_ABORT"); \ - if (abort_test != NULL && abort_test[0] == '1') \ - print_message("Skipped !!!\n"); \ - else \ - _skip(__FILE__, __LINE__); \ - return; \ - } while (0) -#endif #if FAULT_INJECTION #define FAULT_INJECTION_REQUIRED() do { } while (0) From d42b2ab2f088be6296910b2527de3f80b9de39ed Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Fri, 8 Sep 2023 15:06:04 -0700 Subject: [PATCH 04/29] DAOS-14328 cart: Do not print error when exceeding context limit (#13011) - Do not print an internal layer error when exceeding the context limit, as daos client layer will try to allocate as many contexts as it can in some usage models. Printing an error is not user friendly in such situations. Signed-off-by: Alexander A Oganezov --- src/cart/crt_hg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 8e871e7c3a6..e6c5fe70fb1 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -617,8 +617,9 @@ crt_provider_get_ctx_idx(bool primary, int provider) } } - D_ERROR("ctx_num %d, will exceed CRT_SRV_CONTEXT_NUM (%d) if create more context.\n", - prov_data->cpg_ctx_num, CRT_SRV_CONTEXT_NUM); + D_DEBUG(DB_ALL, "provider:%d allowed context limit = %d exceeded\n", + provider, CRT_SRV_CONTEXT_NUM); + return -1; } From ef93dbef058175e9aa4540e48964b8a53b1931dc Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sun, 10 Sep 2023 10:50:26 -0500 Subject: [PATCH 05/29] DAOS-14260 dfs: add API to readdir with a filter (#12985) This allows user to utilize the pipeline API to execute a server side find. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs.c | 406 +++++++++++++++++++++++++++++++- src/client/dfs/dfs_internal.h | 91 +++++++ src/tests/suite/dfs_unit_test.c | 125 ++++++++++ 3 files changed, 618 insertions(+), 4 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 9107db9214b..3313c188c64 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -11,19 +11,15 @@ #include #include #include -#include #include #include #include #include -#include #include #include -#include #include "daos.h" #include "daos_fs.h" - #include "dfs_internal.h" /** D-key name of SB metadata */ @@ -7446,3 +7442,405 @@ dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) D_FREE(entry.value); return rc; } + +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size) +{ + daos_handle_t oh; + int rc; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (daos_obj_id2type(oid) != DAOS_OT_ARRAY_BYTE) + return EINVAL; + + rc = daos_array_open_with_attr(dfs->coh, oid, DAOS_TX_NONE, DAOS_OO_RO, 1, + chunk_size ? chunk_size : dfs->attr.da_chunk_size, + &oh, NULL); + if (rc != 0) { + D_ERROR("daos_array_open() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_get_size(oh, DAOS_TX_NONE, size, NULL); + if (rc) { + daos_array_close(oh, NULL); + D_ERROR("daos_array_get_size() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_close(oh, NULL); + return daos_der2errno(rc); +} + +struct dfs_pipeline { + daos_pipeline_t pipeline; + dfs_predicate_t pred; + + mode_t constant1; + mode_t constant2; + + d_iov_t dkey_iov; + d_iov_t const1_iov; + d_iov_t const2_iov; + d_iov_t const3_iov; + + daos_filter_part_t dkey_ft; + daos_filter_part_t akey1_ft; + daos_filter_part_t akey2_ft; + daos_filter_part_t const0_ft; + daos_filter_part_t const1_ft; + daos_filter_part_t const2_ft; + daos_filter_part_t const3_ft; + daos_filter_part_t like_ft; + daos_filter_part_t ba_ft; + daos_filter_part_t eq_ft; + daos_filter_part_t gt_ft; + daos_filter_part_t and_ft; + daos_filter_part_t or_ft; + + daos_filter_t pipef; +}; + +#define DKEY_F "DAOS_FILTER_DKEY" +#define AKEY_F "DAOS_FILTER_AKEY" +#define CONST_F "DAOS_FILTER_CONST" +#define BINARY_F "DAOS_FILTER_TYPE_BINARY" +#define INT8_F "DAOS_FILTER_TYPE_UINTEGER8" +#define INT4_F "DAOS_FILTER_TYPE_UINTEGER4" +#define LIKE_F "DAOS_FILTER_FUNC_LIKE" +#define GT_F "DAOS_FILTER_FUNC_GT" +#define EQ_F "DAOS_FILTER_FUNC_EQ" +#define BA_F "DAOS_FILTER_FUNC_BITAND" +#define AND_F "DAOS_FILTER_FUNC_AND" +#define OR_F "DAOS_FILTER_FUNC_OR" +#define COND_F "DAOS_FILTER_CONDITION" + +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **_dpipe) +{ + daos_size_t bin_flen = sizeof(BINARY_F) - 1; + daos_size_t dkey_flen = sizeof(DKEY_F) - 1; + daos_size_t akey_flen = sizeof(AKEY_F) - 1; + daos_size_t const_flen = sizeof(CONST_F) - 1; + daos_size_t int8_flen = sizeof(INT8_F) - 1; + daos_size_t int4_flen = sizeof(INT4_F) - 1; + daos_size_t like_flen = sizeof(LIKE_F) - 1; + daos_size_t gt_flen = sizeof(GT_F) - 1; + daos_size_t eq_flen = sizeof(EQ_F) - 1; + daos_size_t ba_flen = sizeof(BA_F) - 1; + daos_size_t and_flen = sizeof(AND_F) - 1; + daos_size_t or_flen = sizeof(OR_F) - 1; + daos_size_t cond_flen = sizeof(COND_F) - 1; + dfs_pipeline_t *dpipe; + int rc; + + D_ALLOC_PTR(dpipe); + if (dpipe == NULL) + return ENOMEM; + + /** copy the user predicate conditions */ + memcpy(&dpipe->pred, &pred, sizeof(dfs_predicate_t)); + + daos_pipeline_init(&dpipe->pipeline); + + /** build condition for entry name */ + if (flags & DFS_FILTER_NAME) { + daos_size_t name_len; + + name_len = strnlen(dpipe->pred.dp_name, DFS_MAX_NAME); + + d_iov_set(&dpipe->dkey_ft.part_type, DKEY_F, dkey_flen); + d_iov_set(&dpipe->dkey_ft.data_type, BINARY_F, bin_flen); + dpipe->dkey_ft.data_len = DFS_MAX_NAME; + + d_iov_set(&dpipe->const0_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const0_ft.data_type, BINARY_F, bin_flen); + dpipe->const0_ft.num_constants = 1; + dpipe->const0_ft.constant = &dpipe->dkey_iov; + d_iov_set(dpipe->const0_ft.constant, dpipe->pred.dp_name, name_len); + + d_iov_set(&dpipe->like_ft.part_type, LIKE_F, like_flen); + dpipe->like_ft.num_operands = 2; + } + + /** build condition for newer than ctime */ + if (flags & DFS_FILTER_NEWER) { + d_iov_set(&dpipe->akey2_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey2_ft.data_type, INT8_F, int8_flen); + d_iov_set(&dpipe->akey2_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey2_ft.data_offset = CTIME_IDX; + dpipe->akey2_ft.data_len = sizeof(time_t); + + d_iov_set(&dpipe->const3_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const3_ft.data_type, INT8_F, int8_flen); + dpipe->const3_ft.num_constants = 1; + dpipe->const3_ft.constant = &dpipe->const3_iov; + d_iov_set(dpipe->const3_ft.constant, &dpipe->pred.dp_newer, sizeof(time_t)); + + d_iov_set(&dpipe->gt_ft.part_type, GT_F, gt_flen); + dpipe->gt_ft.num_operands = 2; + } + + /** If filter on dirs is not requested, return all dirs so they can be traversed */ + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + d_iov_set(&dpipe->akey1_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey1_ft.data_type, INT4_F, int4_flen); + d_iov_set(&dpipe->akey1_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey1_ft.data_offset = MODE_IDX; + dpipe->akey1_ft.data_len = sizeof(mode_t); + + dpipe->constant1 = S_IFMT; + d_iov_set(&dpipe->const1_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const1_ft.data_type, INT4_F, int4_flen); + dpipe->const1_ft.num_constants = 1; + dpipe->const1_ft.constant = &dpipe->const1_iov; + d_iov_set(dpipe->const1_ft.constant, &dpipe->constant1, sizeof(mode_t)); + + dpipe->constant2 = S_IFDIR; + d_iov_set(&dpipe->const2_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const2_ft.data_type, INT4_F, int4_flen); + dpipe->const2_ft.num_constants = 1; + dpipe->const2_ft.constant = &dpipe->const2_iov; + d_iov_set(dpipe->const2_ft.constant, &dpipe->constant2, sizeof(mode_t)); + + d_iov_set(&dpipe->ba_ft.part_type, BA_F, ba_flen); + dpipe->ba_ft.num_operands = 2; + + d_iov_set(&dpipe->eq_ft.part_type, EQ_F, eq_flen); + dpipe->eq_ft.num_operands = 2; + } + + /** build final condition: IS_DIR || (entry name match && newer match) */ + + d_iov_set(&dpipe->and_ft.part_type, AND_F, and_flen); + dpipe->and_ft.num_operands = 2; + + d_iov_set(&dpipe->or_ft.part_type, OR_F, or_flen); + dpipe->or_ft.num_operands = 2; + + /** initialize and add all the parts to the pipeline */ + daos_filter_init(&dpipe->pipef); + d_iov_set(&dpipe->pipef.filter_type, COND_F, cond_flen); + + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->or_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + rc = daos_filter_add(&dpipe->pipef, &dpipe->eq_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->ba_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER && flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->and_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->like_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->dkey_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const0_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->gt_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const3_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + rc = daos_pipeline_add(&dpipe->pipeline, &dpipe->pipef); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + *_dpipe = dpipe; + return 0; +err: + printf("failed to create pipeline. rc = %d\n", rc); + D_FREE(dpipe); + return rc; +} + +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe) +{ + if (dpipe->pipeline.num_filters) + D_FREE(dpipe->pipeline.filters); + D_FREE(dpipe); + return 0; +} + +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csize, + uint64_t *nr_scanned) +{ + daos_iod_t iod; + daos_key_desc_t *kds; + d_sg_list_t sgl_keys, sgl_recs; + d_iov_t iov_keys, iov_recs; + char *buf_keys = NULL, *buf_recs = NULL; + daos_recx_t recxs[4]; + uint32_t nr_iods, nr_kds, key_nr, i; + daos_size_t record_len; + int rc = 0; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (obj == NULL || !S_ISDIR(obj->mode)) + return ENOTDIR; + if (*nr == 0) + return 0; + if (dpipe == NULL || dirs == NULL || anchor == NULL) + return EINVAL; + + /* IOD to retrieve the mode_t and the ctime */ + iod.iod_nr = 2; + iod.iod_size = 1; + recxs[0].rx_idx = MODE_IDX; + recxs[0].rx_nr = sizeof(mode_t); + recxs[1].rx_idx = CTIME_IDX; + recxs[1].rx_nr = sizeof(time_t); + iod.iod_recxs = recxs; + iod.iod_type = DAOS_IOD_ARRAY; + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + record_len = recxs[0].rx_nr + recxs[1].rx_nr; + + if (oids) { + recxs[iod.iod_nr].rx_idx = OID_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_obj_id_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + if (csize) { + recxs[iod.iod_nr].rx_idx = CSIZE_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_size_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + + nr_kds = *nr; + nr_iods = 1; + + D_ALLOC_ARRAY(kds, nr_kds); + if (kds == NULL) + return ENOMEM; + + /** alloc buffer to store dkeys enumerated */ + sgl_keys.sg_nr = 1; + sgl_keys.sg_nr_out = 0; + sgl_keys.sg_iovs = &iov_keys; + D_ALLOC_ARRAY(buf_keys, nr_kds * DFS_MAX_NAME); + if (buf_keys == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_keys, buf_keys, nr_kds * DFS_MAX_NAME); + + + /** alloc buffer to store records enumerated */ + sgl_recs.sg_nr = 1; + sgl_recs.sg_nr_out = 0; + sgl_recs.sg_iovs = &iov_recs; + D_ALLOC_ARRAY(buf_recs, nr_kds * record_len); + if (buf_recs == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_recs, buf_recs, nr_kds * record_len); + + key_nr = 0; + *nr_scanned = 0; + while (!daos_anchor_is_eof(anchor)) { + daos_pipeline_stats_t stats = {0}; + char *ptr1; + + memset(buf_keys, 0, *nr * DFS_MAX_NAME); + + rc = daos_pipeline_run(dfs->coh, obj->oh, &dpipe->pipeline, DAOS_TX_NONE, 0, NULL, + &nr_iods, &iod, anchor, &nr_kds, kds, &sgl_keys, &sgl_recs, + NULL, NULL, &stats, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + + D_ASSERT(nr_iods == 1); + ptr1 = buf_keys; + + for (i = 0; i < nr_kds; i++) { + char *ptr2; + mode_t mode; + char *dkey = (char *)ptr1; + + /** set the dentry name */ + memcpy(dirs[key_nr].d_name, dkey, kds[i].kd_key_len); + dirs[key_nr].d_name[kds[i].kd_key_len] = '\0'; + + /** set the dentry type */ + ptr2 = &buf_recs[i * record_len]; + mode = *((mode_t *)ptr2); + + if (S_ISDIR(mode)) { + dirs[key_nr].d_type = DT_DIR; + } else if (S_ISREG(mode)) { + dirs[key_nr].d_type = DT_REG; + } else if (S_ISLNK(mode)) { + dirs[key_nr].d_type = DT_LNK; + } else { + D_ERROR("Invalid DFS entry type found, possible data corruption\n"); + D_GOTO(out, rc = EINVAL); + } + + /** set the OID for dentry if requested */ + if (oids) { + ptr2 += sizeof(mode_t) + sizeof(time_t); + oid_cp(&oids[key_nr], *((daos_obj_id_t *)ptr2)); + } + + /** set the chunk size for dentry if requested */ + if (csize) { + if (oids) + ptr2 += sizeof(daos_obj_id_t); + else + ptr2 += sizeof(mode_t) + sizeof(time_t); + csize[key_nr] = *((daos_size_t *)ptr2); + } + + key_nr++; + ptr1 += kds[i].kd_key_len; + } + + *nr_scanned += stats.nr_dkeys; + nr_kds = *nr - key_nr; + if (nr_kds == 0) + break; + } + *nr = key_nr; + +out: + D_FREE(kds); + D_FREE(buf_recs); + D_FREE(buf_keys); + return rc; +} diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index c337ec1bf42..83ac13aeaab 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -131,6 +131,97 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); +/** Internal pipeline readdir functionality */ + +/** DFS pipeline object */ +typedef struct dfs_pipeline dfs_pipeline_t; + +enum { + DFS_FILTER_NAME = (1 << 1), + DFS_FILTER_NEWER = (1 << 2), + DFS_FILTER_INCLUDE_DIRS = (1 << 3), +}; + +/** Predicate conditions for filter */ +typedef struct { + char dp_name[DFS_MAX_NAME]; /** name condition for entry - regex */ + time_t dp_newer; /** timestamp for newer condition */ + size_t dp_size; /** size of files - not supported for now */ +} dfs_predicate_t; + +/** + * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the + * chunk_size of the file is also required to be passed if the file was created with a different + * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to + * chunk size. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] oid Object ID of the file. + * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). + * \param[out] size Returned size of the file. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); + +/** + * Create a pipeline object to be used during readdir with filter. Should be destroyed with + * dfs_pipeline_destroy(). + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). + * \param[in] flags Pipeline flags (conditions to apply). + * \param[out] dpipe Pipeline object created. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); + +/** + * Destroy pipeline object. + * + * \param[in] dpipe Pipeline object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe); + +/** + * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() + * on the entries that are enumerated. This function also optionally returns the object ID of each + * dirent if requested through a pre-allocated OID input array. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Opened directory object. + * \param[in] dpipe DFS pipeline filter. + * \param[in,out] + * anchor Hash anchor for the next call, it should be set to + * zeroes for the first call, it should not be changed + * by caller between calls. + * \param[in,out] + * nr [in]: number of dirents allocated in \a dirs. + * [out]: number of returned dirents. + * \param[in,out] + * dirs [in] preallocated array of dirents. + * [out]: dirents returned with d_name filled only. + * \param[in,out] + * oids [in] Optional preallocated array of object IDs. + * [out]: Object ID associated with each dirent that was read. + * \param[in,out] + * csizes [in] Optional preallocated array of sizes. + * [out]: chunk size associated with each dirent that was read. + * \param[out] Total number of entries scanned by readdir before returning. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, + uint64_t *nr_scanned); + #if defined(__cplusplus) } #endif diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index c1def757c4c..47845f57b20 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -3053,6 +3053,129 @@ dfs_test_fix_chunk_size(void **state) D_FREE(buf); } +#define NUM_ENTRIES 1024 +#define NR_ENUM 64 + +static void +dfs_test_pipeline_find(void **state) +{ + dfs_obj_t *dir1, *f1; + int i; + time_t ts = 0; + mode_t create_mode = S_IWUSR | S_IRUSR; + int create_flags = O_RDWR | O_CREAT | O_EXCL; + char *dirname = "pipeline_dir"; + int rc; + + rc = dfs_open(dfs_mt, NULL, dirname, create_mode | S_IFDIR, create_flags, + OC_SX, 0, NULL, &dir1); + assert_int_equal(rc, 0); + + for (i = 0; i < NUM_ENTRIES; i++) { + char name[24]; + + /* create 1 dir for every 100 files */ + if (i % 100 == 0) { + sprintf(name, "dir.%d", i); + rc = dfs_mkdir(dfs_mt, dir1, name, create_mode | S_IFDIR, 0); + assert_int_equal(rc, 0); + } else { + daos_obj_id_t oid; + + sprintf(name, "file.%d", i); + rc = dfs_open(dfs_mt, dir1, name, create_mode | S_IFREG, create_flags, 0, 0, + NULL, &f1); + assert_int_equal(rc, 0); + + dfs_obj2id(f1, &oid); + /* printf("File %s \t OID: %"PRIu64".%"PRIu64"\n", name, oid.hi, oid.lo); */ + + rc = dfs_release(f1); + assert_int_equal(rc, 0); + } + + if (i == NUM_ENTRIES / 2) { + sleep(1); + ts = time(NULL); + sleep(1); + } + } + + dfs_predicate_t pred = {0}; + dfs_pipeline_t *dpipe = NULL; + + strcpy(pred.dp_name, "%.6%"); + pred.dp_newer = ts; + rc = dfs_pipeline_create(dfs_mt, pred, DFS_FILTER_NAME | DFS_FILTER_NEWER, &dpipe); + assert_int_equal(rc, 0); + + + uint32_t num_split = 0, j; + + rc = dfs_obj_anchor_split(dir1, &num_split, NULL); + assert_int_equal(rc, 0); + print_message("Anchor split in %u parts\n", num_split); + + daos_anchor_t *anchors; + struct dirent *dents = NULL; + daos_obj_id_t *oids = NULL; + daos_size_t *csizes = NULL; + + anchors = malloc(sizeof(daos_anchor_t) * num_split); + dents = malloc (sizeof(struct dirent) * NR_ENUM); + oids = calloc(NR_ENUM, sizeof(daos_obj_id_t)); + csizes = calloc(NR_ENUM, sizeof(daos_size_t)); + + uint64_t nr_total = 0, nr_matched = 0, nr_scanned; + + for (j = 0; j < num_split; j++) { + daos_anchor_t *anchor = &anchors[j]; + uint32_t nr; + + memset(anchor, 0, sizeof(daos_anchor_t)); + + rc = dfs_obj_anchor_set(dir1, j, anchor); + assert_int_equal(rc, 0); + + while (!daos_anchor_is_eof(anchor)) { + nr = NR_ENUM; + rc = dfs_readdir_with_filter(dfs_mt, dir1, dpipe, anchor, &nr, dents, oids, + csizes, &nr_scanned); + assert_int_equal(rc, 0); + + nr_total += nr_scanned; + nr_matched += nr; + + for (i = 0; i < nr; i++) { + print_message("Name: %s\t", dents[i].d_name); + print_message("OID: %"PRIu64".%"PRIu64"\t", oids[i].hi, oids[i].lo); + print_message("CSIZE = %zu\n", csizes[i]); + if (dents[i].d_type == DT_DIR) + print_message("Type: DIR\n"); + else if (dents[i].d_type == DT_REG) + print_message("Type: FILE\n"); + else + assert(0); + } + } + } + + print_message("total entries scanned = %"PRIu64"\n", nr_total); + print_message("total entries matched = %"PRIu64"\n", nr_matched); + + free(dents); + free(anchors); + free(oids); + free(csizes); + rc = dfs_pipeline_destroy(dpipe); + assert_int_equal(rc, 0); + /** close / finalize */ + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, dirname, true, NULL); + assert_int_equal(rc, 0); +} + static const struct CMUnitTest dfs_unit_tests[] = { { "DFS_UNIT_TEST1: DFS mount / umount", dfs_test_mount, async_disable, test_case_teardown}, @@ -3106,6 +3229,8 @@ static const struct CMUnitTest dfs_unit_tests[] = { dfs_test_relink_root, async_disable, test_case_teardown}, { "DFS_UNIT_TEST26: dfs MWC chunk size fix", dfs_test_fix_chunk_size, async_disable, test_case_teardown}, + { "DFS_UNIT_TEST27: dfs pipeline find", + dfs_test_pipeline_find, async_disable, test_case_teardown}, }; static int From 60004a11f664ea26fdfafebc46a2a2fe3b1ffd81 Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Sun, 10 Sep 2023 10:55:05 -0500 Subject: [PATCH 06/29] client: bump hadoop-common version from 3.3.3 to 3.3.6 (#13019) minimize possible vulnerabilities in dependent packages per SDL requirement. Signed-off-by: Lei Huang --- src/client/java/hadoop-daos/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/java/hadoop-daos/pom.xml b/src/client/java/hadoop-daos/pom.xml index 34ecdf445ac..7f8dac9f9f6 100644 --- a/src/client/java/hadoop-daos/pom.xml +++ b/src/client/java/hadoop-daos/pom.xml @@ -15,7 +15,7 @@ jar - 3.3.3 + 3.3.6 ${project.basedir}/build ${project.basedir}/install From c75710cc4cfe549c125f7ec06e513b6aad2f6dab Mon Sep 17 00:00:00 2001 From: wangdi Date: Mon, 11 Sep 2023 06:35:08 -0700 Subject: [PATCH 07/29] DAOS-14208 rebuild: several fixes for multiple shards in the same target (#13022) Checking rebuilding shard by comparing the old and new layout map, similar as drain/reintegration, since non-failure shard might be moved to other targets due to co-locate and failure domain factors. Add reclaim phase for rebuild as well. Add test to verify it. A few other fixes due to this. Increase rebuild EC timeout value. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 59 ++++-------- src/placement/jump_map.c | 96 +++++++------------ src/placement/pl_map_common.c | 3 +- src/placement/tests/jump_map_place_obj.c | 2 +- src/rebuild/scan.c | 2 +- src/rebuild/srv.c | 3 +- src/tests/ftest/daos_test/suite.yaml | 2 +- src/tests/ftest/rebuild/basic.py | 2 +- .../ftest/rebuild/container_create_race.py | 4 +- src/tests/ftest/rebuild/with_io.py | 2 +- src/tests/ftest/util/rebuild_test_base.py | 4 +- src/tests/suite/daos_rebuild_ec.c | 44 ++++++++- 12 files changed, 111 insertions(+), 112 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 51280364c2b..c3bc5472b83 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -620,7 +620,6 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, d_iov_t *csum_iov_fetch) { struct migrate_pool_tls *tls; - struct dc_object *obj; int rc = 0; tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid, @@ -634,21 +633,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, if (daos_oclass_grp_size(&mrone->mo_oca) > 1) flags |= DIOF_TO_LEADER; - /** - * For EC data migration, let's force it to do degraded fetch, - * make sure reintegration will not fetch from the original - * shard, which might cause parity corruption. - */ - obj = obj_hdl2ptr(oh); - if (iods[0].iod_type != DAOS_IOD_SINGLE && - daos_oclass_is_ec(&mrone->mo_oca) && - is_ec_data_shard(obj, mrone->mo_dkey_hash, mrone->mo_oid.id_shard) && - obj_ec_parity_alive(oh, mrone->mo_dkey_hash, NULL)) - flags |= DIOF_FOR_FORCE_DEGRADE; - - obj_decref(obj); - - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); if (rc != 0) @@ -669,7 +654,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, csum_iov_fetch->iov_len = 0; csum_iov_fetch->iov_buf = p; - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, iod_num, iods, sgls, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); } @@ -1223,7 +1208,8 @@ migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, static int __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, - daos_iod_t *iods, int iod_num, daos_epoch_t update_eph, + daos_iod_t *iods, int iod_num, daos_epoch_t fetch_eph, + daos_epoch_t update_eph, uint32_t flags, struct ds_cont_child *ds_cont) { d_sg_list_t sgls[OBJ_ENUM_UNPACK_MAX_IODS]; @@ -1282,8 +1268,7 @@ __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, p_csum_iov = &csum_iov; } - rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, mrone->mo_epoch, - flags, p_csum_iov); + rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, fetch_eph, flags, p_csum_iov); if (rc) { D_ERROR("migrate dkey "DF_KEY" failed: "DF_RC"\n", DP_KEY(&mrone->mo_dkey), DP_RC(rc)); @@ -1358,6 +1343,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, if (!daos_oclass_is_ec(&mrone->mo_oca)) return __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods, mrone->mo_iod_num, + mrone->mo_epoch, mrone->mo_min_epoch, DIOF_FOR_MIGRATION, ds_cont); @@ -1370,22 +1356,19 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, * this data shard. */ - if (mrone->mo_iods_num_from_parity > 0) { - daos_epoch_t min_eph = DAOS_EPOCH_MAX; + for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { + for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) { + daos_iod_t iod = mrone->mo_iods_from_parity[i]; - for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { - for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) - min_eph = min(min_eph, - mrone->mo_iods_update_ephs_from_parity[i][j]); + iod.iod_nr = 1; + iod.iod_recxs = &mrone->mo_iods_from_parity[i].iod_recxs[j]; + rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_iods_update_ephs_from_parity[i][j], + mrone->mo_iods_update_ephs_from_parity[i][j], + DIOF_EC_RECOV_FROM_PARITY, ds_cont); + if (rc != 0) + D_GOTO(out, rc); } - - rc = __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods_from_parity, - mrone->mo_iods_num_from_parity, - min_eph, - DIOF_FOR_MIGRATION | DIOF_EC_RECOV_FROM_PARITY, - ds_cont); - if (rc != 0) - D_GOTO(out, rc); } /* The data, rebuilt from replication, needs to keep the same epoch during rebuild, @@ -1401,6 +1384,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, iod.iod_nr = 1; iod.iod_recxs = &mrone->mo_iods[i].iod_recxs[j]; rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_epoch, mrone->mo_iods_update_ephs[i][j], DIOF_FOR_MIGRATION, ds_cont); if (rc < 0) { @@ -2343,10 +2327,9 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) migrate_tgt_off = obj_ec_shard_off_by_layout_ver(layout_ver, io->ui_dkey_hash, &arg->oc_attr, shard); unpack_tgt_off = obj_ec_shard_off(obj, io->ui_dkey_hash, io->ui_oid.id_shard); - if ((rc == 1 && + if (rc == 1 && (is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) || - (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) || - (tls->mpt_opc == RB_OP_EXCLUDE && io->ui_oid.id_shard == shard)) { + (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) { D_DEBUG(DB_REBUILD, DF_UOID" ignore shard "DF_KEY"/%u/%d/%u/%d.\n", DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); @@ -2579,7 +2562,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, /* Only open with RW flag, reintegrating flag will be set, which is needed * during unpack_cb to check if parity shard alive. */ - rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RW, &oh); + rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &oh); if (rc) { D_ERROR("dsc_obj_open failed: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_cont, rc); diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c index bbcc07f2dc1..1b57aff1719 100644 --- a/src/placement/jump_map.c +++ b/src/placement/jump_map.c @@ -734,6 +734,8 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l } else { if (domain != NULL) setbit(dom_cur_grp_real, domain - root); + if (pool_target_down(target)) + layout->ol_shards[k].po_rebuilding = 1; } if (is_extending != NULL && pool_target_is_up_or_drain(target)) @@ -743,7 +745,7 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l if (fail_tgt_cnt > 0) rc = obj_remap_shards(jmap, layout_ver, md, layout, jmop, &remap_list, out_list, - allow_status, md->omd_ver, tgts_used, dom_used, dom_full, + allow_status, allow_version, tgts_used, dom_used, dom_full, fail_tgt_cnt, is_extending, fdom_lvl); out: if (rc) @@ -1025,7 +1027,12 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + if (mode & DAOS_OO_RO) + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | + PO_COMP_ST_DOWN; + else + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + rc = obj_layout_alloc_and_get(jmap, layout_version, &jmop, md, allow_status, md->omd_ver, &layout, NULL, &is_extending); if (rc != 0) { @@ -1090,66 +1097,16 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ * another target, Or 0 if none need to be rebuilt. */ static int -jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, - uint32_t *tgt_id, uint32_t *shard_idx, unsigned int array_size) -{ - struct pl_jump_map *jmap; - struct pl_obj_layout *layout; - d_list_t remap_list; - struct jm_obj_placement jmop; - daos_obj_id_t oid; - int rc; - - int idx = 0; - - D_DEBUG(DB_PL, "Finding Rebuild at version: %u\n", rebuild_ver); - - /* Caller should guarantee the pl_map is up-to-date */ - if (pl_map_version(map) < rebuild_ver) { - D_ERROR("pl_map version(%u) < rebuild version(%u)\n", - pl_map_version(map), rebuild_ver); - return -DER_INVAL; - } - - jmap = pl_map2jmap(map); - oid = md->omd_id; - - rc = jm_obj_placement_init(jmap, md, shard_md, &jmop); - if (rc) { - D_ERROR("jm_obj_placement_init failed, rc "DF_RC"\n", DP_RC(rc)); - return rc; - } - - D_INIT_LIST_HEAD(&remap_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jmop, md, PO_COMP_ST_UPIN, - rebuild_ver, &layout, &remap_list, NULL); - if (rc < 0) - D_GOTO(out, rc); - - obj_layout_dump(oid, layout); - rc = remap_list_fill(map, md, shard_md, rebuild_ver, tgt_id, shard_idx, - array_size, &idx, layout, &remap_list, false); - -out: - jm_obj_placement_fini(&jmop); - remap_list_free_all(&remap_list); - if (layout != NULL) - pl_obj_layout_free(layout); - return rc < 0 ? rc : idx; -} - -static int -jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t reint_ver, - uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) +jump_map_obj_find_diff(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t old_status, uint32_t new_status, + uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) { struct pl_jump_map *jmap; struct pl_obj_layout *layout = NULL; struct pl_obj_layout *reint_layout = NULL; d_list_t reint_list; struct jm_obj_placement jop; - uint32_t allow_status; int rc; int idx = 0; @@ -1170,16 +1127,14 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; D_INIT_LIST_HEAD(&reint_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, old_status, reint_ver, &layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); obj_layout_dump(md->omd_id, layout); - allow_status |= PO_COMP_ST_UP; - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, new_status, reint_ver, &reint_layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); @@ -1200,6 +1155,27 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc < 0 ? rc : idx; } +static int +jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, reint_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_UP, + tgt_id, shard_id, array_size); +} + +static int +jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, rebuild_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_DOWN, + PO_COMP_ST_UPIN, tgt_id, shard_id, array_size); +} + /** API for generic placement map functionality */ struct pl_map_ops jump_map_ops = { .o_create = jump_map_create, diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c index 691d0e1e600..47f620d6635 100644 --- a/src/placement/pl_map_common.c +++ b/src/placement/pl_map_common.c @@ -327,7 +327,8 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, * skip this shard. */ if (f_shard->fs_status == PO_COMP_ST_DOWN || - f_shard->fs_status == PO_COMP_ST_DRAIN) + f_shard->fs_status == PO_COMP_ST_DRAIN || + pool_target_down(spare_tgt)) l_shard->po_rebuilding = 1; } else { l_shard->po_shard = -1; diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index fdbc08ef07e..5de8ba810c4 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -1607,7 +1607,7 @@ placement_handles_multiple_states(void **state) */ ctx.ver = ver_after_fail; jtc_scan(&ctx); - assert_int_equal(ctx.rebuild.out_nr, 1); + assert_int_equal(ctx.rebuild.out_nr, 2); /* Complete the rebuild */ ctx.ver = ver_after_reint_complete; /* Restore the version first */ diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 8587d7b5d8c..0f8707f5aab 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -646,7 +646,7 @@ rebuild_object(struct rebuild_tgt_pool_tracker *rpt, uuid_t co_uuid, daos_unit_o rc = 0; if (myrank == target->ta_comp.co_rank && mytarget == target->ta_comp.co_index && - rpt->rt_rebuild_op != RB_OP_UPGRADE) { + (shard == oid.id_shard) && rpt->rt_rebuild_op != RB_OP_UPGRADE) { D_DEBUG(DB_REBUILD, DF_UOID" %u/%u already on the target shard\n", DP_UOID(oid), myrank, mytarget); return 0; diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 94d0b2a79bc..e16583436ce 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1412,7 +1412,8 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, task->dst_new_layout_version, &task->dst_tgts, retry_opc, 5); } else if (task->dst_rebuild_op == RB_OP_REINT || task->dst_rebuild_op == RB_OP_EXTEND || - task->dst_rebuild_op == RB_OP_UPGRADE) { + task->dst_rebuild_op == RB_OP_UPGRADE || task->dst_rebuild_op == RB_OP_EXCLUDE || + task->dst_rebuild_op == RB_OP_DRAIN) { /* Otherwise schedule reclaim for reintegrate/extend/upgrade. */ rgt->rgt_status.rs_state = DRS_IN_PROGRESS; rc = ds_rebuild_schedule(pool, task->dst_map_ver, rgt->rgt_reclaim_epoch, diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 20bbfcf6296..a016e1937c9 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -27,7 +27,7 @@ timeouts: test_daos_extend_simple: 3600 test_daos_oid_allocator: 640 test_daos_checksum: 500 - test_daos_rebuild_ec: 4800 + test_daos_rebuild_ec: 6400 test_daos_aggregate_ec: 200 test_daos_degraded_ec: 1900 test_daos_dedup: 220 diff --git a/src/tests/ftest/rebuild/basic.py b/src/tests/ftest/rebuild/basic.py index c6263211190..2d7b0e723c1 100644 --- a/src/tests/ftest/rebuild/basic.py +++ b/src/tests/ftest/rebuild/basic.py @@ -97,7 +97,7 @@ def run_rebuild_test(self, pool_quantity): pi_ndisabled=target_count ) status &= pool.check_rebuild_status( - rs_state=2, rs_obj_nr=rs_obj_nr[index], rs_rec_nr=rs_rec_nr[index], rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/rebuild/container_create_race.py b/src/tests/ftest/rebuild/container_create_race.py index 2607c9ef6f0..6684d89ad53 100644 --- a/src/tests/ftest/rebuild/container_create_race.py +++ b/src/tests/ftest/rebuild/container_create_race.py @@ -152,8 +152,8 @@ def test_rebuild_container_create(self): # Check for pool and rebuild info after rebuild self.log.info("=> (6) Check for pool and rebuild info after rebuild") info_checks["pi_ndisabled"] += targets - rebuild_checks["rs_obj_nr"] = ">0" - rebuild_checks["rs_rec_nr"] = ">0" + rebuild_checks["rs_obj_nr"] = ">=0" + rebuild_checks["rs_rec_nr"] = ">=0" rebuild_checks["rs_state"] = 2 self.assertTrue( self.pool.check_pool_info(**info_checks), diff --git a/src/tests/ftest/rebuild/with_io.py b/src/tests/ftest/rebuild/with_io.py index 229b3fa3ca3..7e7a1e623d4 100644 --- a/src/tests/ftest/rebuild/with_io.py +++ b/src/tests/ftest/rebuild/with_io.py @@ -92,7 +92,7 @@ def test_rebuild_with_io(self): pi_ndisabled=targets, # DAOS-2799 ) status &= self.pool.check_rebuild_status( - rs_state=2, rs_obj_nr=">0", rs_rec_nr=">0", rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/util/rebuild_test_base.py b/src/tests/ftest/util/rebuild_test_base.py index a4f7d845e2e..1435aa1815e 100644 --- a/src/tests/ftest/util/rebuild_test_base.py +++ b/src/tests/ftest/util/rebuild_test_base.py @@ -75,8 +75,8 @@ def update_pool_verify(self): """Update the pool verification expected values.""" self.info_checks["pi_ndisabled"] = ">0" self.rebuild_checks["rs_state"] = 2 - self.rebuild_checks["rs_obj_nr"] = ">0" - self.rebuild_checks["rs_rec_nr"] = ">0" + self.rebuild_checks["rs_obj_nr"] = ">=0" + self.rebuild_checks["rs_rec_nr"] = ">=0" def execute_pool_verify(self, msg=None): """Verify the pool info. diff --git a/src/tests/suite/daos_rebuild_ec.c b/src/tests/suite/daos_rebuild_ec.c index 0863647d845..6669d32490e 100644 --- a/src/tests/suite/daos_rebuild_ec.c +++ b/src/tests/suite/daos_rebuild_ec.c @@ -1111,6 +1111,7 @@ rebuild_ec_multiple_shards(void **state) d_rank_t rank = 2; int i, j, k; char *data; + char *verify_data; uint64_t stripe_size = 4 * CELL_SIZE; daos_recx_t recx; @@ -1118,32 +1119,69 @@ rebuild_ec_multiple_shards(void **state) return; data = (char *)malloc(stripe_size); + verify_data = (char *)malloc(stripe_size); assert_true(data != NULL); + assert_true(verify_data != NULL); + for (i = 0; i < 20; i++) + oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); + for (k = 0; k < 3; k++) { for (i = 0; i < 20; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(data, 'a' + i, stripe_size); for (j = 5 * k; j < 5 * (k + 1); j++) { req.iod_type = DAOS_IOD_ARRAY; recx.rx_nr = stripe_size; recx.rx_idx = j * stripe_size; - memset(data, 'a', stripe_size); insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, stripe_size, &req); } ioreq_fini(&req); } + rebuild_pools_ranks(&arg, 1, &rank, 1, false); daos_cont_status_clear(arg->coh, NULL); + print_message("exclude rank %u\n", rank); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } rank = 2; - for (i = 0; i < 3; i++) { + for (k = 0; k < 3; k++) { reintegrate_pools_ranks(&arg, 1, &rank, 1, false); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } + free(verify_data); free(data); } From 92ef794e0abd2360b55ed2e713ba8bd97a561b64 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Mon, 11 Sep 2023 12:37:24 -0400 Subject: [PATCH 08/29] DAOS-14251 control: Allow premounted empty tmpfs (#12968) In the special case where the tmpfs has already been mounted but is empty, don't skip NVMe format and configuration. Enables the use case of running daos_server in a container with an external tmpfs. Signed-off-by: Michael MacDonald --- src/control/cmd/daos_server/start.go | 4 +- src/control/fault/code/codes.go | 1 + src/control/server/config/server.go | 46 +++++++-------- src/control/server/config/server_legacy.go | 9 +++ src/control/server/ctl_storage_rpc.go | 22 +++++++- src/control/server/ctl_storage_rpc_test.go | 56 ++++++++++++++++++- src/control/server/harness.go | 4 +- src/control/server/instance_exec.go | 14 ++--- src/control/server/instance_storage.go | 15 +---- src/control/server/instance_storage_test.go | 13 +---- src/control/server/instance_superblock.go | 4 +- .../server/instance_superblock_test.go | 2 +- src/control/server/instance_test.go | 2 +- src/control/server/server_utils.go | 19 +++++++ src/control/server/server_utils_test.go | 37 ++++++++++-- src/control/server/storage/faults.go | 11 ++++ 16 files changed, 185 insertions(+), 74 deletions(-) diff --git a/src/control/cmd/daos_server/start.go b/src/control/cmd/daos_server/start.go index bb773d02b3a..f2d7b77feda 100644 --- a/src/control/cmd/daos_server/start.go +++ b/src/control/cmd/daos_server/start.go @@ -64,7 +64,9 @@ func (cmd *startCmd) setCLIOverrides() error { if cmd.Modules != nil { cmd.config.WithModules(*cmd.Modules) } - cmd.config.RecreateSuperblocks = cmd.RecreateSuperblocks + if cmd.RecreateSuperblocks { + cmd.Notice("--recreate-superblocks is deprecated and no longer needed to use externally-managed tmpfs") + } for _, srv := range cmd.config.Engines { if cmd.Targets > 0 { diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index eff960b6f30..89bfb32bed0 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -79,6 +79,7 @@ const ( ScmBadRegion ScmInvalidPMem ScmRamdiskLowMem + ScmRamdiskBadSize ScmConfigTierMissing ) diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index a860edae8f9..974d11161f8 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -40,26 +40,25 @@ const ( // See utils/config/daos_server.yml for parameter descriptions. type Server struct { // control-specific - ControlPort int `yaml:"port"` - TransportConfig *security.TransportConfig `yaml:"transport_config"` - Engines []*engine.Config `yaml:"engines"` - BdevExclude []string `yaml:"bdev_exclude,omitempty"` - DisableVFIO bool `yaml:"disable_vfio"` - DisableVMD *bool `yaml:"disable_vmd"` - EnableHotplug bool `yaml:"enable_hotplug"` - NrHugepages int `yaml:"nr_hugepages"` // total for all engines - SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines - DisableHugepages bool `yaml:"disable_hugepages"` - ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` - ControlLogFile string `yaml:"control_log_file,omitempty"` - ControlLogJSON bool `yaml:"control_log_json,omitempty"` - HelperLogFile string `yaml:"helper_log_file,omitempty"` - FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` - RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` - FaultPath string `yaml:"fault_path,omitempty"` - TelemetryPort int `yaml:"telemetry_port,omitempty"` - CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` - ClientEnvVars []string `yaml:"client_env_vars,omitempty"` + ControlPort int `yaml:"port"` + TransportConfig *security.TransportConfig `yaml:"transport_config"` + Engines []*engine.Config `yaml:"engines"` + BdevExclude []string `yaml:"bdev_exclude,omitempty"` + DisableVFIO bool `yaml:"disable_vfio"` + DisableVMD *bool `yaml:"disable_vmd"` + EnableHotplug bool `yaml:"enable_hotplug"` + NrHugepages int `yaml:"nr_hugepages"` // total for all engines + SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines + DisableHugepages bool `yaml:"disable_hugepages"` + ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` + ControlLogFile string `yaml:"control_log_file,omitempty"` + ControlLogJSON bool `yaml:"control_log_json,omitempty"` + HelperLogFile string `yaml:"helper_log_file,omitempty"` + FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` + FaultPath string `yaml:"fault_path,omitempty"` + TelemetryPort int `yaml:"telemetry_port,omitempty"` + CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` + ClientEnvVars []string `yaml:"client_env_vars,omitempty"` // duplicated in engine.Config SystemName string `yaml:"name"` @@ -87,13 +86,6 @@ func (cfg *Server) WithCoreDumpFilter(filter uint8) *Server { return cfg } -// WithRecreateSuperblocks indicates that a missing superblock should not be treated as -// an error. The server will create new superblocks as necessary. -func (cfg *Server) WithRecreateSuperblocks() *Server { - cfg.RecreateSuperblocks = true - return cfg -} - // WithSystemName sets the system name. func (cfg *Server) WithSystemName(name string) *Server { cfg.SystemName = name diff --git a/src/control/server/config/server_legacy.go b/src/control/server/config/server_legacy.go index b09092a9ca9..fba515a0c54 100644 --- a/src/control/server/config/server_legacy.go +++ b/src/control/server/config/server_legacy.go @@ -18,6 +18,8 @@ type ServerLegacy struct { EnableVMD *bool `yaml:"enable_vmd,omitempty"` // Detect outdated "servers" config, to direct users to change their config file. Servers []*engine.Config `yaml:"servers,omitempty"` + // Detect outdated "recreate_superblocks" config, to direct users to change their config file. + RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` } // WithEnableVMD can be used to set the state of VMD functionality, @@ -27,6 +29,13 @@ func (sl *ServerLegacy) WithEnableVMD(enabled bool) *ServerLegacy { return sl } +// WithRecreateSuperblocks indicates that a missing superblock should not be treated as +// an error. The server will create new superblocks as necessary. +func (sl *ServerLegacy) WithRecreateSuperblocks() *ServerLegacy { + sl.RecreateSuperblocks = true + return sl +} + func updateVMDSetting(legacyCfg ServerLegacy, srvCfg *Server) error { switch { case legacyCfg.EnableVMD == nil: diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index cf0ca43be13..1880eb4c7f9 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -623,6 +623,7 @@ type formatScmReq struct { func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatResp) (map[int]string, map[int]bool, error) { needFormat := make(map[int]bool) + emptyTmpfs := make(map[int]bool) scmCfgs := make(map[int]*storage.TierConfig) allNeedFormat := true @@ -641,6 +642,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR return nil, nil, errors.Wrap(err, "retrieving SCM config") } scmCfgs[idx] = scmCfg + + // If the tmpfs was already mounted but empty, record that fact for later usage. + if scmCfg.Class == storage.ClassRam && !needs { + info, err := ei.GetStorage().GetScmUsage() + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to check SCM usage for instance %d", idx) + } + emptyTmpfs[idx] = info.TotalBytes-info.AvailBytes == 0 + } } if allNeedFormat { @@ -673,7 +683,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR }, }) - skipped[idx] = true + // In the normal case, where SCM wasn't already mounted, we want + // to trigger NVMe format. In the case where SCM was mounted and + // wasn't empty, we want to skip NVMe format, as we're using + // mountedness as a proxy for already-formatted. In the special + // case where tmpfs was already mounted but empty, we will treat it + // as an indication that the NVMe format needs to occur. + if !emptyTmpfs[idx] { + skipped[idx] = true + } } for formatting > 0 { @@ -708,7 +726,7 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma _, hasError := req.errored[idx] _, skipped := req.skipped[idx] if hasError || (skipped && !req.mdFormatted) { - // if scm errored or was already formatted, indicate skipping bdev format + // if scm failed to format or was already formatted, indicate skipping bdev format ret := ei.newCret(storage.NilBdevAddress, nil) ret.State.Info = fmt.Sprintf(msgNvmeFormatSkip, ei.Index()) resp.Crets = append(resp.Crets, ret) diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 76ff043af92..ba9f3de1e0a 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -1777,6 +1777,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { for name, tc := range map[string]struct { scmMounted bool // if scmMounted we emulate ext4 fs is mounted + tmpfsEmpty bool // if false, an already-mounted ramdisk is not empty superblockExists bool instancesStarted bool // engine already started sMounts []string @@ -1995,6 +1996,44 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { }, }, }, + "ram already mounted but empty": { + scmMounted: true, + tmpfsEmpty: true, + sMounts: []string{"/mnt/daos"}, + sClass: storage.ClassRam, + sSize: 6, + bClass: storage.ClassNvme, + bDevs: [][]string{{mockNvmeController0.PciAddr}}, + bmbc: &bdev.MockBackendConfig{ + ScanRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{mockNvmeController0}, + }, + FormatRes: &storage.BdevFormatResponse{ + DeviceResponses: storage.BdevDeviceFormatResponses{ + mockNvmeController0.PciAddr: &storage.BdevDeviceFormatResponse{ + Formatted: true, + }, + }, + }, + }, + expResp: &ctlpb.StorageFormatResp{ + Crets: []*ctlpb.NvmeControllerResult{ + { + PciAddr: mockNvmeController0.PciAddr, + State: new(ctlpb.ResponseState), + }, + }, + Mrets: []*ctlpb.ScmMountResult{ + { + Mntpoint: "/mnt/daos", + State: &ctlpb.ResponseState{ + Status: ctlpb.ResponseStatus_CTL_SUCCESS, + Info: "SCM is already formatted", + }, + }, + }, + }, + }, "ram already mounted and reformat set": { scmMounted: true, reformat: true, @@ -2247,6 +2286,19 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { GetfsStr: getFsRetStr, SourceToTarget: devToMount, } + if tc.sClass == storage.ClassRam { + total := uint64(1234) + avail := total + if !tc.tmpfsEmpty { + avail-- + } + smsc.GetfsUsageResps = []system.GetfsUsageRetval{ + { + Total: total, + Avail: avail, + }, + } + } sysProv := system.NewMockSysProvider(log, smsc) mounter := mount.NewProvider(log, sysProv) scmProv := scm.NewProvider(log, nil, sysProv, mounter) @@ -2301,7 +2353,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { // if the instance is expected to have a valid superblock, create one if tc.superblockExists { - if err := ei.createSuperblock(false); err != nil { + if err := ei.createSuperblock(); err != nil { t.Fatal(err) } } else { @@ -2332,7 +2384,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { go func(ctx context.Context, e *EngineInstance) { select { case <-ctx.Done(): - case awaitCh <- e.awaitStorageReady(ctx, false): + case awaitCh <- e.awaitStorageReady(ctx): } }(ctx, ei.(*EngineInstance)) } diff --git a/src/control/server/harness.go b/src/control/server/harness.go index 88028bc658a..f27febc1dce 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -62,7 +62,7 @@ type Engine interface { IsReady() bool LocalState() system.MemberState RemoveSuperblock() error - Run(context.Context, bool) + Run(context.Context) SetupRank(context.Context, ranklist.Rank, uint32) error Stop(os.Signal) error OnInstanceExit(...onInstanceExitFn) @@ -260,7 +260,7 @@ func (h *EngineHarness) Start(ctx context.Context, db dbLeader, cfg *config.Serv defer h.started.SetFalse() for _, ei := range h.Instances() { - ei.Run(ctx, cfg.RecreateSuperblocks) + ei.Run(ctx) } h.OnDrpcFailure(newOnDrpcFailureFn(h.log, db)) diff --git a/src/control/server/instance_exec.go b/src/control/server/instance_exec.go index ab22cb4504f..19143782ec3 100644 --- a/src/control/server/instance_exec.go +++ b/src/control/server/instance_exec.go @@ -30,14 +30,14 @@ type EngineRunner interface { GetConfig() *engine.Config } -func (ei *EngineInstance) format(ctx context.Context, recreateSBs bool) error { +func (ei *EngineInstance) format(ctx context.Context) error { idx := ei.Index() ei.log.Debugf("instance %d: checking if storage is formatted", idx) - if err := ei.awaitStorageReady(ctx, recreateSBs); err != nil { + if err := ei.awaitStorageReady(ctx); err != nil { return err } - if err := ei.createSuperblock(recreateSBs); err != nil { + if err := ei.createSuperblock(); err != nil { return err } @@ -158,7 +158,7 @@ func (ei *EngineInstance) handleExit(ctx context.Context, exitPid int, exitErr e // will only return (if no errors are returned during setup) on I/O Engine // process exit (triggered by harness shutdown through context cancellation // or abnormal I/O Engine process termination). -func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) (_ chan *engine.RunnerExitInfo, err error) { +func (ei *EngineInstance) startRunner(parent context.Context) (_ chan *engine.RunnerExitInfo, err error) { ctx, cancel := context.WithCancel(parent) defer func() { if err != nil { @@ -168,7 +168,7 @@ func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) } }() - if err = ei.format(ctx, recreateSBs); err != nil { + if err = ei.format(ctx); err != nil { return } @@ -192,7 +192,7 @@ func (ei *EngineInstance) requestStart(ctx context.Context) { // Run starts the control loop for an EngineInstance. Engine starts are triggered by // calling requestStart() on the instance. -func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { +func (ei *EngineInstance) Run(ctx context.Context) { // Start the instance control loop. go func() { var runnerExitCh engine.RunnerExitChan @@ -212,7 +212,7 @@ func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { continue } - runnerExitCh, err = ei.startRunner(ctx, recreateSBs) + runnerExitCh, err = ei.startRunner(ctx) if err != nil { ei.log.Errorf("runner exited without starting process: %s", err) ei.handleExit(ctx, 0, err) diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 7be5c570b86..2cc4f1f5443 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -76,7 +76,7 @@ func createPublishFormatRequiredFunc(publish func(*events.RASEvent), hostname st } // awaitStorageReady blocks until instance has storage available and ready to be used. -func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSuperblock bool) error { +func (ei *EngineInstance) awaitStorageReady(ctx context.Context) error { idx := ei.Index() if ei.IsStarted() { @@ -117,9 +117,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe } if !needsMetaFormat && !needsScmFormat { - if skipMissingSuperblock { - return nil - } ei.log.Debugf("instance %d: no SCM format required; checking for superblock", idx) needsSuperblock, err := ei.NeedsSuperblock() if err != nil { @@ -132,16 +129,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe ei.log.Debugf("instance %d: superblock needed", idx) } - if needsScmFormat { - cfg, err := ei.storage.GetScmConfig() - if err != nil { - return err - } - if skipMissingSuperblock { - return FaultScmUnmanaged(cfg.Scm.MountPoint) - } - } - // by this point we need superblock and possibly scm format formatType := "SCM" if !needsScmFormat { diff --git a/src/control/server/instance_storage_test.go b/src/control/server/instance_storage_test.go index 0a73da3458c..2bbc049bd65 100644 --- a/src/control/server/instance_storage_test.go +++ b/src/control/server/instance_storage_test.go @@ -361,7 +361,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted bool needsScmFormat bool hasSB bool - skipMissingSB bool engineIndex uint32 expFmtType string expErr error @@ -370,14 +369,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted: true, expErr: errStarted, }, - "needs format but skip missing superblock": { - needsScmFormat: true, - skipMissingSB: true, - expErr: FaultScmUnmanaged("/mnt/test"), - }, - "no need to format and skip missing superblock": { - skipMissingSB: true, - }, "no need to format and existing superblock": { hasSB: true, }, @@ -432,9 +423,9 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { ctx, cancel := context.WithTimeout(test.Context(t), time.Millisecond*100) defer cancel() - gotErr := engine.awaitStorageReady(ctx, tc.skipMissingSB) + gotErr := engine.awaitStorageReady(ctx) test.CmpErr(t, tc.expErr, gotErr) - if tc.expErr == errStarted || tc.skipMissingSB == true || tc.hasSB == true { + if tc.expErr == errStarted || tc.hasSB == true { return } diff --git a/src/control/server/instance_superblock.go b/src/control/server/instance_superblock.go index 11b7f3849ea..0d6ec613a8b 100644 --- a/src/control/server/instance_superblock.go +++ b/src/control/server/instance_superblock.go @@ -111,7 +111,7 @@ func (ei *EngineInstance) NeedsSuperblock() (bool, error) { } // createSuperblock creates instance superblock if needed. -func (ei *EngineInstance) createSuperblock(recreate bool) error { +func (ei *EngineInstance) createSuperblock() error { if ei.IsStarted() { return errors.Errorf("can't create superblock: instance %d already started", ei.Index()) } @@ -120,7 +120,7 @@ func (ei *EngineInstance) createSuperblock(recreate bool) error { if !needsSuperblock { return nil } - if err != nil && !recreate { + if err != nil { return err } diff --git a/src/control/server/instance_superblock_test.go b/src/control/server/instance_superblock_test.go index 8354f17f320..5c1220cd7dc 100644 --- a/src/control/server/instance_superblock_test.go +++ b/src/control/server/instance_superblock_test.go @@ -56,7 +56,7 @@ func TestServer_Instance_createSuperblock(t *testing.T) { } for _, e := range h.Instances() { - if err := e.(*EngineInstance).createSuperblock(false); err != nil { + if err := e.(*EngineInstance).createSuperblock(); err != nil { t.Fatal(err) } } diff --git a/src/control/server/instance_test.go b/src/control/server/instance_test.go index cb6552fc84a..d6767df30c0 100644 --- a/src/control/server/instance_test.go +++ b/src/control/server/instance_test.go @@ -248,7 +248,7 @@ func (mi *MockInstance) RemoveSuperblock() error { return mi.cfg.RemoveSuperblockErr } -func (mi *MockInstance) Run(_ context.Context, _ bool) {} +func (mi *MockInstance) Run(_ context.Context) {} func (mi *MockInstance) SetupRank(_ context.Context, _ ranklist.Rank, _ uint32) error { return mi.cfg.SetupRankErr diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index f6f02e55731..eca82a964ff 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -540,6 +540,25 @@ func checkEngineTmpfsMem(srv *server, ei *EngineInstance, mi *common.MemInfo) er memRamdisk := uint64(sc.Scm.RamdiskSize) * humanize.GiByte memAvail := uint64(mi.MemAvailableKiB) * humanize.KiByte + // In the event that tmpfs was already mounted, we need to verify that it + // is the correct size and that the memory usage still makes sense. + if isMounted, err := ei.storage.ScmIsMounted(); err == nil && isMounted { + usage, err := ei.storage.GetScmUsage() + if err != nil { + return errors.Wrap(err, "unable to check tmpfs usage") + } + // Ensure that the existing ramdisk is not larger than the calculated + // optimal size, in order to avoid potential OOM situations. + if usage.TotalBytes > memRamdisk { + return storage.FaultRamdiskBadSize(usage.TotalBytes, memRamdisk) + } + // Looks OK, so we can return early and bypass additional checks. + srv.log.Debugf("using existing tmpfs of size %s", humanize.IBytes(usage.TotalBytes)) + return nil + } else if err != nil { + return errors.Wrap(err, "unable to check for mounted tmpfs") + } + if err := checkMemForRamdisk(srv.log, memRamdisk, memAvail); err != nil { return err } diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 069fac5e028..1fb0567fadd 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -753,9 +753,11 @@ func TestServer_prepBdevStorage(t *testing.T) { func TestServer_checkEngineTmpfsMem(t *testing.T) { for name, tc := range map[string]struct { - srvCfgExtra func(*config.Server) *config.Server - memAvailGiB int - expErr error + srvCfgExtra func(*config.Server) *config.Server + memAvailGiB int + tmpfsMounted bool + tmpfsSize uint64 + expErr error }{ "pmem tier; skip check": { srvCfgExtra: func(sc *config.Server) *config.Server { @@ -780,6 +782,21 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { expErr: storage.FaultRamdiskLowMem("Available", 10*humanize.GiByte, 9*humanize.GiByte, 8*humanize.GiByte), }, + "tmpfs already mounted; more than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 11, + expErr: errors.New("ramdisk size"), + }, + "tmpfs already mounted; less than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 9, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(name) @@ -799,7 +816,19 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { ec := cfg.Engines[0] runner := engine.NewRunner(log, ec) - provider := storage.MockProvider(log, 0, &ec.Storage, nil, nil, nil, nil) + sysMockCfg := &sysprov.MockSysConfig{ + IsMountedBool: tc.tmpfsMounted, + } + if tc.tmpfsMounted { + sysMockCfg.GetfsUsageResps = []sysprov.GetfsUsageRetval{ + { + Total: tc.tmpfsSize * humanize.GiByte, + }, + } + } + sysMock := sysprov.NewMockSysProvider(log, sysMockCfg) + scmMock := &storage.MockScmProvider{} + provider := storage.MockProvider(log, 0, &ec.Storage, sysMock, scmMock, nil, nil) instance := NewEngineInstance(log, provider, nil, runner) srv, err := newServer(log, cfg, &system.FaultDomain{}) diff --git a/src/control/server/storage/faults.go b/src/control/server/storage/faults.go index fb64eabc51f..cbf029c93f2 100644 --- a/src/control/server/storage/faults.go +++ b/src/control/server/storage/faults.go @@ -78,6 +78,17 @@ func FaultRamdiskLowMem(memType string, confRamdiskSize, memNeed, memHave uint64 "file if reducing the requested amount of RAM is not possible") } +// FaultRamdiskBadSize indicates that the already-mounted ramdisk is out +// of spec with the calculated ramdisk size for the engine. +func FaultRamdiskBadSize(existingSize, calcSize uint64) *fault.Fault { + return storageFault( + code.ScmRamdiskBadSize, + fmt.Sprintf("already-mounted ramdisk size %s is too far from optimal size of %s", + humanize.IBytes(existingSize), humanize.IBytes(calcSize)), + fmt.Sprintf("unmount the ramdisk and allow DAOS to manage it, or remount with size %s", + humanize.IBytes(calcSize))) +} + // FaultConfigRamdiskUnderMinMem indicates that the tmpfs size requested in config is less than // minimum allowed. func FaultConfigRamdiskUnderMinMem(confSize, memRamdiskMin uint64) *fault.Fault { From e3cb161381c502137a64d4cb7b2c83c4f069c5cc Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 11 Sep 2023 21:06:02 +0100 Subject: [PATCH 09/29] DAOS-623 build: Fail the build stage in Jenkins for rpm build failure. (#13014) Some debugging for failures meant that the actual result was being masked. Move the debugging to the "unsuccessful" script and re-instate the fail-on-failure behaviour. Signed-off-by: Ashley Pittman --- ci/rpm/build_unsuccessful.sh | 5 ++++- utils/rpms/packaging/rpm_chrootbuild | 12 ++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ci/rpm/build_unsuccessful.sh b/ci/rpm/build_unsuccessful.sh index 3c88a98dc56..d1d1f3606e4 100755 --- a/ci/rpm/build_unsuccessful.sh +++ b/ci/rpm/build_unsuccessful.sh @@ -8,7 +8,7 @@ mydir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" ci_envs="$mydir/../parse_ci_envs.sh" if [ -e "${ci_envs}" ]; then # at some point we want to use: shellcheck source=ci/parse_ci_envs.sh - # shellcheck disable=SC1091 + # shellcheck disable=SC1091,SC1090 source "${ci_envs}" fi @@ -25,6 +25,9 @@ if [ -d /var/cache/pbuilder/ ]; then exit 0 fi +rpm -q mock +mock --debug-config + mockroot="/var/lib/mock/$CHROOT_NAME" cat "$mockroot"/result/{root,build}.log 2>/dev/null || true diff --git a/utils/rpms/packaging/rpm_chrootbuild b/utils/rpms/packaging/rpm_chrootbuild index 73ce60c0746..9cf177e4a41 100755 --- a/utils/rpms/packaging/rpm_chrootbuild +++ b/utils/rpms/packaging/rpm_chrootbuild @@ -117,11 +117,7 @@ echo "\"\"\"" >> "$cfg_file" if [ -n "$DISTRO_VERSION" ]; then releasever_opt=("--config-opts=releasever=$DISTRO_VERSION") fi -# shellcheck disable=SC2086 -if ! eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" \ - ${repo_dels[*]} ${repo_adds[*]} --disablerepo=\*-debug* \ - "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET"; then - # Debug information for filing bugs on mock - rpm -q mock - mock --debug-config -fi + +# shellcheck disable=SC2086,SC2048,SC2294 +eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" ${repo_dels[*]} ${repo_adds[*]} \ + --disablerepo=\*-debug* "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET" From dedd8e14feceb6b835396739fa3bc6555f74e49c Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 12 Sep 2023 11:25:35 -0400 Subject: [PATCH 10/29] DAOS-14254 test: Fix functional test server cleanup (#13040) Resolve an issue where cleanup commands need to be run on a subset of the test hosts running servers. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 7b312457995..514375ccce9 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -331,7 +331,7 @@ def clean_mount(self, hosts, mount, verbose=True, index=None): self.log.debug("Checking for the existence of the %s mount point", mount) command = "test -d {}".format(mount) result = run_remote(self.log, hosts, command, verbose) - if result.passed: + if result.passed_hosts: mounted_hosts = result.passed_hosts # Remove the superblocks From a61940319d8cad8c6475113185327239d180837f Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 12 Sep 2023 20:08:59 +0100 Subject: [PATCH 11/29] DAOS-13392 test: Do not fail build on missing NLT stash. (#12941) Catch the error and continue if unstash fails, the code is written to handle this. Signed-off-by: Ashley Pittman --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0f257aa9dfe..ce8e946bc7b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -74,6 +74,11 @@ void job_step_update(def value) { Map nlt_test() { // groovylint-disable-next-line NoJavaUtilDate Date startDate = new Date() + try { + unstash('nltr') + } catch (e) { + print 'Unstash failed, results from NLT stage will not be included' + } sh label: 'Fault injection testing using NLT', script: './ci/docker_nlt.sh --class-name el8.fault-injection fi' List filesList = [] @@ -1104,7 +1109,6 @@ pipeline { sconsBuild(parallel_build: true, scons_args: 'PREFIX=/opt/daos TARGET_TYPE=release BUILD_TYPE=debug', build_deps: 'no')) - unstash('nltr') job_step_update(nlt_test()) recordCoverage(tools: [[parser: 'COBERTURA', pattern:'nltr.xml']], skipPublishingChecks: true, From 693821c079d82ce5f896f4f3a6a0e4776683ff46 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 13 Sep 2023 22:08:28 +0900 Subject: [PATCH 12/29] DAOS-14232 tests: Increase pool/svc.yaml timeout (#13009) Due to the recent increase of SWIM suspicion timeout, increase the test timeout of pool/svc.yaml. Signed-off-by: Li Wei --- src/tests/ftest/pool/svc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/pool/svc.yaml b/src/tests/ftest/pool/svc.yaml index 9c9f47448c0..a2811dae6ca 100644 --- a/src/tests/ftest/pool/svc.yaml +++ b/src/tests/ftest/pool/svc.yaml @@ -12,7 +12,7 @@ server_config: class: ram scm_mount: /mnt/daos system_ram_reserved: 1 -timeout: 200 +timeout: 300 pool: control_method: dmg scm_size: 134217728 From 7062dc4b754f4459f370dc270b9adb6c58fe35c2 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 13 Sep 2023 12:25:31 -0400 Subject: [PATCH 13/29] DAOS-14302 test: Skip DAOS_Drain_Simple w/o fault injection (#13039) Do not run the DAOS_Drain_Simple test when DAOS is built without fault injection. Also skipping the DAOS_Extend_Simple.EXTEND[7,9,11,13,15,16] and DAOS_EC.EC28 test when fault injection is disabled. Signed-off-by: Phil Henderson --- src/tests/suite/daos_drain_simple.c | 2 ++ src/tests/suite/daos_extend_simple.c | 12 ++++++++++++ src/tests/suite/daos_obj_ec.c | 2 ++ 3 files changed, 16 insertions(+) diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index a250d044f15..57a4772d666 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -1003,6 +1003,8 @@ run_daos_drain_simple_test(int rank, int size, int *sub_tests, { int rc = 0; + FAULT_INJECTION_REQUIRED(); + par_barrier(PAR_COMM_WORLD); if (sub_tests_size == 0) { sub_tests_size = ARRAY_SIZE(drain_tests); diff --git a/src/tests/suite/daos_extend_simple.c b/src/tests/suite/daos_extend_simple.c index d7e32e99a1f..c0635359527 100644 --- a/src/tests/suite/daos_extend_simple.c +++ b/src/tests/suite/daos_extend_simple.c @@ -499,6 +499,8 @@ dfs_extend_punch_kill(void **state) void dfs_extend_punch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_PUNCH, extend_cb_internal, false); } @@ -511,6 +513,8 @@ dfs_extend_stat_kill(void **state) void dfs_extend_stat_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_STAT, extend_cb_internal, false); } @@ -523,6 +527,8 @@ dfs_extend_enumerate_kill(void **state) void dfs_extend_enumerate_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_ENUMERATE, extend_cb_internal, false); } @@ -535,6 +541,8 @@ dfs_extend_fetch_kill(void **state) void dfs_extend_fetch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_FETCH, extend_cb_internal, false); } @@ -547,6 +555,8 @@ dfs_extend_write_kill(void **state) void dfs_extend_write_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_UPDATE, extend_cb_internal, false); } @@ -562,6 +572,8 @@ dfs_extend_fail_retry(void **state) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + attr.da_props = daos_prop_alloc(1); assert_non_null(attr.da_props); attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; diff --git a/src/tests/suite/daos_obj_ec.c b/src/tests/suite/daos_obj_ec.c index 609b0ab319f..2eef576d096 100644 --- a/src/tests/suite/daos_obj_ec.c +++ b/src/tests/suite/daos_obj_ec.c @@ -2464,6 +2464,8 @@ ec_three_stripes_nvme_io(void **state) daos_recx_t recx; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 6)) return; From b3edc5eee84e2579d17cb3483472959f820b4b7e Mon Sep 17 00:00:00 2001 From: Michael Hennecke Date: Wed, 13 Sep 2023 18:52:32 +0200 Subject: [PATCH 14/29] DAOS-14309 packaging: remove daos tests RPMs from distro (#13034) Do not ship DAOS test RPMs (add all daos*tests RPMs to .rpmignore) Updated .rpmignore to retire centos7 (no longer supported), and add el9 Signed-off-by: Michael Hennecke --- .rpmignore | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/.rpmignore b/.rpmignore index d0657d759c0..86666be323b 100644 --- a/.rpmignore +++ b/.rpmignore @@ -3,28 +3,22 @@ # but should not be included in the current release # -centos7/daos-client-tests-openmpi*.rpm -centos7/daos-firmware*.rpm -centos7/daos-mofed*.rpm -centos7/daos-serialize*.rpm -centos7/daos-server-tests-openmpi*.rpm -centos7/daos-tests-internal*.rpm -centos7/ucx*.rpm - -el8/daos-client-tests-openmpi*.rpm +el8/daos-*tests*.rpm el8/daos-firmware*.rpm el8/daos-mofed*.rpm el8/daos-serialize*.rpm -el8/daos-server-tests-openmpi*.rpm -el8/daos-tests-internal*.rpm el8/ucx*.rpm -leap15/daos-client-tests-openmpi*.rpm +el9/daos-*tests*.rpm +el9/daos-firmware*.rpm +el9/daos-mofed*.rpm +el9/daos-serialize*.rpm +el9/ucx*.rpm + +leap15/daos-*tests*.rpm leap15/daos-firmware*.rpm leap15/daos-mofed*.rpm leap15/daos-serialize*.rpm -leap15/daos-server-tests-openmpi*.rpm -leap15/daos-tests-internal*.rpm leap15/openucx*.rpm leap15/ucx*.rpm leap15/*protobuf-c*.rpm From 9f5c089852593e9901d1915b14d28ff5c0d0d1ee Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Thu, 14 Sep 2023 14:08:54 -0400 Subject: [PATCH 15/29] DAOS-14332 control: Use gRPC metadata for interop (#13018) The interoperability checking code relies on getting the peer component from the peer certificate. When running in insecure mode, the peer component information is unavailable, and the interoperability check defaults to the most stringent requirements. This patch adds new component/version headers to the grpc client request to allow the server to perform interoperability checks without the peer certificate. Signed-off-by: Michael MacDonald --- src/control/cmd/daos_agent/main.go | 1 + src/control/cmd/dmg/main.go | 1 + src/control/common/proto/consts.go | 14 +++ src/control/lib/control/interceptors.go | 30 +++++- src/control/lib/control/mocks.go | 6 ++ src/control/lib/control/rpc.go | 26 ++++- src/control/security/grpc_authorization.go | 24 +++++ .../security/grpc_authorization_test.go | 56 +++++++++++ src/control/server/interceptors.go | 98 ++++++++++++++----- src/control/server/interceptors_test.go | 98 ++++++++++++++++++- src/control/server/server.go | 1 + src/control/server/server_utils.go | 2 +- 12 files changed, 322 insertions(+), 35 deletions(-) create mode 100644 src/control/common/proto/consts.go diff --git a/src/control/cmd/daos_agent/main.go b/src/control/cmd/daos_agent/main.go index 8f5c135b4d9..f6906a1fc83 100644 --- a/src/control/cmd/daos_agent/main.go +++ b/src/control/cmd/daos_agent/main.go @@ -257,6 +257,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAgent), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index d9d1eacfa2d..a15c65867a4 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -302,6 +302,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAdmin), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/common/proto/consts.go b/src/control/common/proto/consts.go new file mode 100644 index 00000000000..039e7fe23f2 --- /dev/null +++ b/src/control/common/proto/consts.go @@ -0,0 +1,14 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package proto + +const ( + // DaosComponentHeader defines the header name used to convey the component name. + DaosComponentHeader = "x-daos-component" + // DaosVersionHeader defines the header name used to convey the component version. + DaosVersionHeader = "x-daos-version" +) diff --git a/src/control/lib/control/interceptors.go b/src/control/lib/control/interceptors.go index d3c4c3da375..d507845fc46 100644 --- a/src/control/lib/control/interceptors.go +++ b/src/control/lib/control/interceptors.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,8 +12,10 @@ import ( "github.com/pkg/errors" "google.golang.org/grpc" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/security" ) @@ -59,8 +61,8 @@ func streamErrorInterceptor() grpc.DialOption { } // unaryErrorInterceptor calls the specified unary RPC and returns any unwrapped errors. -func unaryErrorInterceptor() grpc.DialOption { - return grpc.WithUnaryInterceptor(func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { +func unaryErrorInterceptor() grpc.UnaryClientInterceptor { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { err := invoker(ctx, method, req, reply, cc, opts...) if err != nil { st := status.Convert(err) @@ -71,5 +73,25 @@ func unaryErrorInterceptor() grpc.DialOption { return connErrToFault(st, cc.Target()) } return nil - }) + } +} + +// unaryVersionedComponentInterceptor appends the component name and version to the +// outgoing request headers. +func unaryVersionedComponentInterceptor(comp build.Component) grpc.UnaryClientInterceptor { + return func(parent context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + // NB: The caller should specify its component, but as a fallback, we + // can make a decent guess about the calling component based on the method. + if comp == build.ComponentAny { + var err error + if comp, err = security.MethodToComponent(method); err != nil { + return errors.Wrap(err, "unable to determine component from method") + } + } + ctx := metadata.AppendToOutgoingContext(parent, + proto.DaosComponentHeader, comp.String(), + proto.DaosVersionHeader, build.DaosVersion, + ) + return invoker(ctx, method, req, reply, cc, opts...) + } } diff --git a/src/control/lib/control/mocks.go b/src/control/lib/control/mocks.go index 752d597abdb..077937d26a0 100644 --- a/src/control/lib/control/mocks.go +++ b/src/control/lib/control/mocks.go @@ -21,6 +21,7 @@ import ( "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/runtime/protoimpl" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" commonpb "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/proto/convert" @@ -50,6 +51,7 @@ type ( // for a MockInvoker. MockInvokerConfig struct { Sys string + Component build.Component UnaryError error UnaryResponse *UnaryResponse UnaryResponseSet []*UnaryResponse @@ -102,6 +104,10 @@ func (mi *MockInvoker) GetSystem() string { return mi.cfg.Sys } +func (mi *MockInvoker) GetComponent() build.Component { + return mi.cfg.Component +} + func (mi *MockInvoker) InvokeUnaryRPC(ctx context.Context, uReq UnaryRequest) (*UnaryResponse, error) { // Allow the test to override the timeouts set by the caller. if mi.cfg.ReqTimeout > 0 { diff --git a/src/control/lib/control/rpc.go b/src/control/lib/control/rpc.go index f5424ebaf91..e00374603c4 100644 --- a/src/control/lib/control/rpc.go +++ b/src/control/lib/control/rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +19,7 @@ import ( "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/fault/code" @@ -88,6 +89,7 @@ type ( UnaryInvoker interface { sysGetter debugLogger + GetComponent() build.Component InvokeUnaryRPC(ctx context.Context, req UnaryRequest) (*UnaryResponse, error) InvokeUnaryRPCAsync(ctx context.Context, req UnaryRequest) (HostResponseChan, error) } @@ -122,14 +124,22 @@ type ( // Client implements the Invoker interface and should be provided to // API methods to invoke RPCs. Client struct { - config *Config - log debugLogger + config *Config + log debugLogger + component build.Component } // ClientOption defines the signature for functional Client options. ClientOption func(c *Client) ) +// WithClientComponent sets the client's component. +func WithClientComponent(comp build.Component) ClientOption { + return func(c *Client) { + c.component = comp + } +} + // WithClientLogger sets the client's debugLogger. func WithClientLogger(log debugLogger) ClientOption { return func(c *Client) { @@ -171,6 +181,11 @@ func DefaultClient() *Client { ) } +// GetComponent returns the client's component. +func (c *Client) GetComponent() build.Component { + return c.component +} + // SetConfig sets the client configuration for an // existing Client. func (c *Client) SetConfig(cfg *Config) { @@ -196,7 +211,10 @@ func (c *Client) Debugf(fmtStr string, args ...interface{}) { func (c *Client) dialOptions() ([]grpc.DialOption, error) { opts := []grpc.DialOption{ streamErrorInterceptor(), - unaryErrorInterceptor(), + grpc.WithChainUnaryInterceptor( + unaryErrorInterceptor(), + unaryVersionedComponentInterceptor(c.GetComponent()), + ), grpc.FailOnNonTempDialError(true), } diff --git a/src/control/security/grpc_authorization.go b/src/control/security/grpc_authorization.go index 39a3d67dc2b..de9ef2bddf5 100644 --- a/src/control/security/grpc_authorization.go +++ b/src/control/security/grpc_authorization.go @@ -6,6 +6,12 @@ package security +import ( + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" +) + // Component represents the DAOS component being granted authorization. type Component int @@ -77,6 +83,24 @@ var methodAuthorizations = map[string][]Component{ "/RaftTransport/InstallSnapshot": {ComponentServer}, } +func methodToComponent(method string, methodAuthorizations map[string][]Component) (build.Component, error) { + comps, found := methodAuthorizations[method] + if !found || len(comps) == 0 { + return build.ComponentAny, errors.Errorf("method %q does not map to a known authorized component", method) + } else if len(comps) > 1 { + // In this case, the caller must explicitly set the component and cannot + // rely on this helper to resolve it. + return build.ComponentAny, errors.Errorf("method %q maps to multiple authorized components", method) + } + + return build.Component(comps[0].String()), nil +} + +// MethodToComponent resolves a gRPC method string to a build.Component. +func MethodToComponent(method string) (build.Component, error) { + return methodToComponent(method, methodAuthorizations) +} + // HasAccess check if the given component has access to method given in FullMethod func (c Component) HasAccess(FullMethod string) bool { compList, ok := methodAuthorizations[FullMethod] diff --git a/src/control/security/grpc_authorization_test.go b/src/control/security/grpc_authorization_test.go index 25b43873744..ee31dcbd0e2 100644 --- a/src/control/security/grpc_authorization_test.go +++ b/src/control/security/grpc_authorization_test.go @@ -12,6 +12,9 @@ import ( "strings" "testing" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" @@ -218,3 +221,56 @@ func TestSecurity_AuthorizedRpcsAreValid(t *testing.T) { }) } } + +func TestSecurity_MethodToCompnent(t *testing.T) { + for name, tc := range map[string]struct { + method string + authMap map[string][]Component + expComp build.Component + expErr error + }{ + "method maps to an unknown component": { + method: "/unknown", + expErr: errors.New("does not map"), + }, + "method maps to 0 components": { + method: "/zero", + authMap: map[string][]Component{ + "/zero": nil, + }, + expErr: errors.New("does not map"), + }, + "method maps to 2 components": { + method: "/two", + authMap: map[string][]Component{ + "/two": {ComponentAdmin, ComponentAgent}, + }, + expErr: errors.New("multiple authorized"), + }, + "method maps to 1 component": { + method: "/one", + authMap: map[string][]Component{ + "/one": {ComponentServer}, + }, + expComp: build.ComponentServer, + }, + } { + t.Run(name, func(t *testing.T) { + var gotComp build.Component + var gotErr error + + if tc.authMap != nil { + gotComp, gotErr = methodToComponent(tc.method, tc.authMap) + } else { + gotComp, gotErr = MethodToComponent(tc.method) + } + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expComp, gotComp, "unexpected component") + }) + } +} diff --git a/src/control/server/interceptors.go b/src/control/server/interceptors.go index 1762b58d790..1f0aa24efb6 100644 --- a/src/control/server/interceptors.go +++ b/src/control/server/interceptors.go @@ -16,6 +16,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "google.golang.org/grpc/status" "google.golang.org/protobuf/reflect/protoreflect" @@ -28,6 +29,10 @@ import ( "github.com/daos-stack/daos/src/control/system" ) +var ( + errNoReqMetadata = errors.New("no component/version metadata found in request") +) + func componentFromContext(ctx context.Context) (comp *security.Component, err error) { clientPeer, ok := peer.FromContext(ctx) if !ok { @@ -116,7 +121,25 @@ var selfServerComponent = func() *build.VersionedComponent { return self }() -func checkVersion(ctx context.Context, self *build.VersionedComponent, req interface{}) error { +func compVersionFromHeaders(ctx context.Context) (*build.VersionedComponent, error) { + md, hasMD := metadata.FromIncomingContext(ctx) + if !hasMD { + return nil, errNoReqMetadata + } + compName, hasName := md[proto.DaosComponentHeader] + if !hasName { + return nil, errNoReqMetadata + } + comp := build.Component(compName[0]) + compVersion, hasVersion := md[proto.DaosVersionHeader] + if !hasVersion { + return nil, errNoReqMetadata + } + + return build.NewVersionedComponent(comp, compVersion[0]) +} + +func checkVersion(ctx context.Context, log logging.Logger, self *build.VersionedComponent, req interface{}) error { // If we can't determine our own version, then there's no // checking to be done. if self.Version.IsZero() { @@ -127,33 +150,58 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter // are most stringent for server/server communication. We have // to set a default because this security component lookup // will fail if certificates are disabled. - buildComponent := build.ComponentServer + otherComponent := build.ComponentServer + otherVersion := build.MustNewVersion("0.0.0") secComponent, err := componentFromContext(ctx) if err == nil { - buildComponent = build.Component(secComponent.String()) + otherComponent = build.Component(secComponent.String()) } isInsecure := status.Code(err) == codes.Unauthenticated - otherVersion := build.MustNewVersion("0.0.0") - if sReq, ok := req.(interface{ GetSys() string }); ok { - comps := strings.Split(sReq.GetSys(), "-") - if len(comps) > 1 { - if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { - otherVersion = ver - } + fromHeaders, err := compVersionFromHeaders(ctx) + if err != nil && err != errNoReqMetadata { + return errors.Wrap(err, "failed to extract peer component/version from headers") + } + + // Prefer the new header-based component/version mechanism. + // If we are in secure mode, verify that the component presented + // in the header matches the certificate's component. + if fromHeaders != nil { + otherVersion = fromHeaders.Version + if isInsecure { + otherComponent = fromHeaders.Component + } else if otherComponent != fromHeaders.Component { + return status.Errorf(codes.PermissionDenied, + "component mismatch (req: %q != cert: %q)", fromHeaders.Component, otherComponent) } } else { - // If the request message type does not implement GetSys(), then - // there is no version to check. We leave message compatibility - // to lower layers. - return nil - } + // If we did not receive a version via request header, then we need to fall back + // to trying to pick it out of the overloaded system name field. + // + // TODO (DAOS-14336): Remove this once the compatibility window has closed (e.g. for 2.8+). + if sReq, ok := req.(interface{ GetSys() string }); ok { + comps := strings.Split(sReq.GetSys(), "-") + if len(comps) > 1 { + if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { + otherVersion = ver + } + } + } else { + // If the request message type does not implement GetSys(), then + // there is no version to check. We leave message compatibility + // to lower layers. + return nil + } - if isInsecure && !self.Version.Equals(otherVersion) { - return FaultNoCompatibilityInsecure(self.Version, otherVersion) + // If we're running without certificates and we didn't receive a component + // via headers, then we have to enforce the strictest compatibility requirements, + // i.e. exact same version. + if isInsecure && !self.Version.Equals(otherVersion) { + return FaultNoCompatibilityInsecure(self.Version, otherVersion) + } } - other, err := build.NewVersionedComponent(buildComponent, otherVersion.String()) + other, err := build.NewVersionedComponent(otherComponent, otherVersion.String()) if err != nil { other = &build.VersionedComponent{ Component: "unknown", @@ -163,18 +211,22 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter } if err := build.CheckCompatibility(self, other); err != nil { + log.Errorf("%s is incompatible with %s", other, self) return FaultIncompatibleComponents(self, other) } + log.Debugf("%s is compatible with %s", other, self) return nil } -func unaryVersionInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { - if err := checkVersion(ctx, selfServerComponent, req); err != nil { - return nil, errors.Wrapf(err, "version check failed for %T", req) - } +func unaryVersionInterceptor(log logging.Logger) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + if err := checkVersion(ctx, log, selfServerComponent, req); err != nil { + return nil, errors.Wrapf(err, "version check failed for %T", req) + } - return handler(ctx, req) + return handler(ctx, req) + } } func unaryErrorInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { diff --git a/src/control/server/interceptors_test.go b/src/control/server/interceptors_test.go index 2dbfbca30c8..bdde5bde6e8 100644 --- a/src/control/server/interceptors_test.go +++ b/src/control/server/interceptors_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -16,12 +16,15 @@ import ( "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" ) type testStatus struct { @@ -141,10 +144,96 @@ func TestServer_checkVersion(t *testing.T) { otherVersion: "2.4.0", ctx: newTestAuthCtx(test.Context(t), "agent"), }, - "non-sys msg bypasses version checks": { + "non-sys msg bypasses version checks in secure mode": { selfVersion: "2.4.0", + ctx: newTestAuthCtx(test.Context(t), "agent"), nonSysMsg: true, }, + "insecure prelease agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.3.108", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.4.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "insecure 2.4.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "invalid component": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, "banana", + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("invalid component"), + }, + "header/certificate component mismatch": { + selfVersion: "2.4.0", + ctx: newTestAuthCtx( + metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0"), + ), "agent"), + nonSysMsg: true, + expErr: errors.New("component mismatch"), + }, } { t.Run(name, func(t *testing.T) { ctx := test.Context(t) @@ -169,7 +258,10 @@ func TestServer_checkVersion(t *testing.T) { req = verReq } - gotErr := checkVersion(ctx, selfComp, req) + log, buf := logging.NewTestLogger(name) + test.ShowBufferOnFailure(t, buf) + + gotErr := checkVersion(ctx, log, selfComp, req) test.CmpErr(t, tc.expErr, gotErr) }) } diff --git a/src/control/server/server.go b/src/control/server/server.go index 8e5d921bb57..e4f7b5bdfa1 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -188,6 +188,7 @@ func (srv *server) createServices(ctx context.Context) (err error) { cliCfg := control.DefaultConfig() cliCfg.TransportConfig = srv.cfg.TransportConfig rpcClient := control.NewClient( + control.WithClientComponent(build.ComponentServer), control.WithConfig(cliCfg), control.WithClientLogger(srv.log)) diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index eca82a964ff..6e059aef32e 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -733,7 +733,7 @@ func getGrpcOpts(log logging.Logger, cfgTransport *security.TransportConfig, ldr unaryLoggingInterceptor(log, ldrChk), // must be first in order to properly log errors unaryErrorInterceptor, unaryStatusInterceptor, - unaryVersionInterceptor, + unaryVersionInterceptor(log), } streamInterceptors := []grpc.StreamServerInterceptor{ streamErrorInterceptor, From a03b55f78d227a838691dec4873a3d8ee3f6a99f Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 14 Sep 2023 19:55:14 +0100 Subject: [PATCH 16/29] DAOS-14223 control: Add --meta-size to dmg pool create (#13000) Add plumbing for new pool create option to specify size of blob on meta-role SSDs. In MD-on-SSD phase II this can be significantly larger than the VOS file in tmpfs in order to to support larger md capacity with limited memory capacity, Limit PR scope by only propagating new parameter to dRPC pool create handler and only supporting the new size when manually specifying SCM and NVMe storage sizes in dmg pool create call. Further work required to: * Propagate meta size throughout create call stack * Expand meta size param support to auto-sizing create options * Consolidate and improve control-plane pool create workflow and logic Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/pool.go | 37 +- src/control/cmd/dmg/pool_test.go | 34 +- src/control/common/proto/mgmt/pool.pb.go | 596 ++++++++++++----------- src/control/common/test/utils.go | 4 + src/control/lib/control/pool.go | 1 + src/control/lib/control/pool_test.go | 56 ++- src/control/server/ctl_storage.go | 3 + src/control/server/ctl_storage_rpc.go | 2 + src/control/server/engine/utils.go | 4 +- src/control/server/engine/utils_test.go | 2 +- src/control/server/mgmt_pool.go | 1 + src/control/server/mgmt_pool_test.go | 47 +- src/mgmt/pool.pb-c.c | 38 +- src/mgmt/pool.pb-c.h | 12 +- src/mgmt/srv_drpc.c | 3 +- src/mgmt/srv_internal.h | 2 +- src/mgmt/srv_pool.c | 9 +- src/mgmt/tests/mocks.c | 8 +- src/proto/mgmt/pool.proto | 2 + 19 files changed, 530 insertions(+), 331 deletions(-) diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index 39e1c4179c8..df6936c8a9f 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -199,6 +199,7 @@ type PoolCreateCmd struct { NumSvcReps uint32 `short:"v" long:"nsvc" description:"Number of pool service replicas"` ScmSize sizeFlag `short:"s" long:"scm-size" description:"Per-engine SCM allocation for DAOS pool (manual)"` NVMeSize sizeFlag `short:"n" long:"nvme-size" description:"Per-engine NVMe allocation for DAOS pool (manual)"` + MetaSize sizeFlag `long:"meta-size" description:"In MD-on-SSD mode specify meta blob size to be used in DAOS pool (manual)"` RankList ui.RankSetFlag `short:"r" long:"ranks" description:"Storage engine unique identifiers (ranks) for DAOS pool"` Args struct { @@ -208,11 +209,18 @@ type PoolCreateCmd struct { // Execute is run when PoolCreateCmd subcommand is activated func (cmd *PoolCreateCmd) Execute(args []string) error { - if cmd.Size.IsSet() && (cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet()) { - return errIncompatFlags("size", "scm-size", "nvme-size") - } - if !cmd.Size.IsSet() && !cmd.ScmSize.IsSet() { - return errors.New("either --size or --scm-size must be supplied") + if cmd.Size.IsSet() { + if cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet() { + return errIncompatFlags("size", "scm-size", "nvme-size") + } + if cmd.MetaSize.IsSet() { + // NOTE DAOS-14223: --meta-size value is currently not taken into account + // when storage tier sizes are auto-calculated so only + // support in manual mode. + return errors.New("--meta-size can only be set if --scm-size is set") + } + } else if !cmd.ScmSize.IsSet() { + return errors.New("either --size or --scm-size must be set") } if cmd.Args.PoolLabel != "" { @@ -299,13 +307,22 @@ func (cmd *PoolCreateCmd) Execute(args []string) error { scmBytes := cmd.ScmSize.bytes nvmeBytes := cmd.NVMeSize.bytes + metaBytes := cmd.MetaSize.bytes scmRatio := cmd.updateRequest(req, scmBytes, nvmeBytes) - cmd.Infof("Creating DAOS pool with manual per-engine storage allocation: "+ - "%s SCM, %s NVMe (%0.2f%% ratio)", - humanize.Bytes(scmBytes), - humanize.Bytes(nvmeBytes), - scmRatio*100) + if metaBytes > 0 && metaBytes < scmBytes { + return errors.Errorf("--meta-size (%s) can not be smaller than --scm-size (%s)", + humanize.Bytes(metaBytes), humanize.Bytes(scmBytes)) + } + req.MetaBytes = metaBytes + + msg := fmt.Sprintf("Creating DAOS pool with manual per-engine storage allocation:"+ + " %s SCM, %s NVMe (%0.2f%% ratio)", humanize.Bytes(scmBytes), + humanize.Bytes(nvmeBytes), scmRatio*100) + if metaBytes > 0 { + msg += fmt.Sprintf(" with %s meta-blob-size", humanize.Bytes(metaBytes)) + } + cmd.Info(msg) } resp, err := control.PoolCreate(context.Background(), cmd.ctlInvoker, req) diff --git a/src/control/cmd/dmg/pool_test.go b/src/control/cmd/dmg/pool_test.go index 8f17d7e6e4f..0c53669aa10 100644 --- a/src/control/cmd/dmg/pool_test.go +++ b/src/control/cmd/dmg/pool_test.go @@ -226,7 +226,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with missing size", "pool create label", "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with missing label", @@ -276,6 +276,12 @@ func TestPoolCommands(t *testing.T) { "", errors.New("may not be mixed"), }, + { + "Create pool with incompatible arguments (auto with meta-blob)", + fmt.Sprintf("pool create label --size %s --meta-size 32G", testSizeStr), + "", + errors.New("can only be set"), + }, { "Create pool with too-large tier-ratio (auto)", fmt.Sprintf("pool create label --size %s --tier-ratio 200", testSizeStr), @@ -355,7 +361,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with incompatible arguments (-n without -s)", fmt.Sprintf("pool create label --nvme-size %s", testSizeStr), "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with minimal arguments", @@ -374,6 +380,30 @@ func TestPoolCommands(t *testing.T) { }, " "), nil, }, + { + "Create pool with manual meta blob size", + fmt.Sprintf("pool create label --scm-size %s --meta-size 1024G", + testSizeStr), + strings.Join([]string{ + printRequest(t, &control.PoolCreateReq{ + User: eUsr.Username + "@", + UserGroup: eGrp.Name + "@", + Ranks: []ranklist.Rank{}, + TierBytes: []uint64{uint64(testSize), 0}, + MetaBytes: humanize.GByte * 1024, + Properties: []*daos.PoolProperty{ + propWithVal("label", "label"), + }, + }), + }, " "), + nil, + }, + { + "Create pool with manual meta blob size smaller than scm", + "pool create label --scm-size 1026G --meta-size 1024G", + "", + errors.New("can not be smaller than"), + }, { "Create pool with manual ranks", fmt.Sprintf("pool create label --size %s --ranks 1,2", testSizeStr), diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 00841f9fa28..60ae78baa40 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -308,13 +308,14 @@ type PoolCreateReq struct { // representing members of the tree in a breadth-first traversal order. // Each domain above rank consists of: (level, id, num children) // Each rank consists of: (rank number) - FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format - Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas - Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) - Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) - Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) - Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) - Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format + Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas + Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) + Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) + Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) + Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) + Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + MetaBlobSize uint64 `protobuf:"varint,14,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateReq) Reset() { @@ -440,17 +441,25 @@ func (x *PoolCreateReq) GetTierbytes() []uint64 { return nil } +func (x *PoolCreateReq) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolCreateResp returns created pool uuid and ranks. type PoolCreateResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code - Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader - SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks - TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks - TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code + Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader + SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks + TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks + TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + MetaBlobSize uint64 `protobuf:"varint,6,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateResp) Reset() { @@ -520,6 +529,13 @@ func (x *PoolCreateResp) GetTierBytes() []uint64 { return nil } +func (x *PoolCreateResp) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolDestroyReq supplies pool identifier and force flag. type PoolDestroyReq struct { state protoimpl.MessageState @@ -2823,7 +2839,7 @@ var File_mgmt_pool_proto protoreflect.FileDescriptor var file_mgmt_pool_proto_rawDesc = []byte{ 0x0a, 0x0f, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x70, 0x6f, 0x6f, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xff, 0x02, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, + 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xa5, 0x03, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, @@ -2847,294 +2863,298 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, - 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x97, 0x01, 0x0a, 0x0e, 0x50, 0x6f, - 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, - 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, - 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, - 0x74, 0x65, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, - 0x72, 0x6f, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, - 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, - 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, - 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, - 0x63, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, - 0x0a, 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x5f, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0c, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, - 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0e, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0xbd, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, + 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, + 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, + 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x72, 0x65, 0x63, 0x75, + 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, - 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, - 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, - 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, - 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, - 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x64, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x5f, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x66, 0x6f, 0x72, + 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x63, + 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, 0x68, + 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, + 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x63, 0x6f, 0x75, + 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, - 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, - 0xa6, 0x01, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, + 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, + 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, - 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, - 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, - 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, - 0x61, 0x69, 0x6e, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, - 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, - 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, - 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, - 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, - 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, - 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, - 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, - 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2e, 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, - 0x6c, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, - 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, - 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, - 0x65, 0x70, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, - 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, - 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0xa6, 0x01, 0x0a, 0x0d, + 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, - 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, - 0x04, 0x43, 0x6f, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, - 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, - 0x65, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, - 0x16, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, - 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, - 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, - 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, - 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, - 0x72, 0x65, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x03, 0x6d, 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x03, 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xbb, 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, + 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, + 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, + 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, 0xa3, 0x01, + 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, + 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, + 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, + 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, + 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2e, + 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x21, + 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, + 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, + 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, + 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, 0x0a, 0x0b, 0x4c, 0x69, + 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x1d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, - 0x18, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, - 0x22, 0xed, 0x04, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, - 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, - 0x61, 0x62, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, - 0x69, 0x76, 0x65, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, - 0x12, 0x29, 0x0a, 0x10, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, - 0x62, 0x6c, 0x65, 0x64, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, - 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, - 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, - 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, - 0x0e, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, - 0x67, 0x69, 0x6e, 0x65, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x45, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, - 0x6c, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0d, 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, - 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, - 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, - 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, - 0x2c, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, - 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, - 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, - 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x0a, 0x63, + 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, 0x04, 0x43, 0x6f, 0x6e, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, + 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x45, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x69, 0x6e, 0x63, + 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, 0x6e, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, + 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, + 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, + 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6d, 0x69, + 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, + 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xbb, + 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, + 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, + 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, + 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, + 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, 0x22, 0xed, 0x04, 0x0a, + 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, + 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, + 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, + 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, + 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, + 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, + 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, + 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, + 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, + 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, + 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, + 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, + 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, + 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, + 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, + 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, - 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, - 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, - 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, - 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, - 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, - 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, - 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, - 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, - 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, - 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, - 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, - 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, - 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, - 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, - 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, - 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, - 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, - 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, - 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, + 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, + 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, + 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, + 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, + 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, + 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, + 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, + 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, + 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, + 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, + 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, + 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, + 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, + 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index cd88b5acf25..81c17facecd 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -129,6 +129,10 @@ func CmpErrBool(want, got error) bool { func CmpErr(t *testing.T, want, got error) { t.Helper() + if want != nil && want.Error() == "" { + t.Fatal("comparison with empty error will always return true, don't do it") + } + if !CmpErrBool(want, got) { t.Fatalf("unexpected error\n(wanted: %v, got: %v)", want, got) } diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index b849099c511..4982a9edc61 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -248,6 +248,7 @@ type ( // manual params Ranks []ranklist.Rank TierBytes []uint64 + MetaBytes uint64 `json:"meta_blob_size"` } // PoolCreateResp contains the response from a pool create request. diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index 7ee31167317..4d2e85ef913 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -17,6 +17,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/fault" @@ -349,13 +350,58 @@ func TestControl_PoolEvict(t *testing.T) { } } +func strVal(s string) daos.PoolPropertyValue { + v := daos.PoolPropertyValue{} + v.SetString(s) + return v +} + +func TestControl_PoolCreateReq_Convert(t *testing.T) { + req := &PoolCreateReq{ + User: "bob", + UserGroup: "work", + NumSvcReps: 2, + TotalBytes: 1, + TierRatio: []float64{0.06, 0.94}, + NumRanks: 3, + Ranks: []ranklist.Rank{1, 2, 3}, + TierBytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBytes: 2 * humanize.GiByte, + Properties: []*daos.PoolProperty{ + { + Name: "label", + Number: daos.PoolPropertyLabel, + Value: strVal("foo"), + }, + }, + } + reqPB := new(mgmtpb.PoolCreateReq) + if err := convert.Types(req, reqPB); err != nil { + t.Fatal(err) + } + expReqPB := &mgmtpb.PoolCreateReq{ + User: "bob", + Usergroup: "work", + Numsvcreps: 2, + Totalbytes: 1, + Tierratio: []float64{0.06, 0.94}, + Numranks: 3, + Ranks: []uint32{1, 2, 3}, + Tierbytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: []*mgmtpb.PoolProperty{ + {Number: 1, Value: &mgmtpb.PoolProperty_Strval{"foo"}}, + }, + } + + cmpOpt := cmpopts.IgnoreUnexported(mgmtpb.PoolCreateReq{}, mgmtpb.PoolProperty{}) + if diff := cmp.Diff(expReqPB, reqPB, cmpOpt); diff != "" { + t.Fatalf("Unexpected response (-want, +got):\n%s\n", diff) + } +} + func TestControl_PoolCreate(t *testing.T) { mockExt := auth.NewMockExtWithUser("poolTest", 0, 0) - strVal := func(s string) daos.PoolPropertyValue { - v := daos.PoolPropertyValue{} - v.SetString(s) - return v - } for name, tc := range map[string]struct { mic *MockInvokerConfig diff --git a/src/control/server/ctl_storage.go b/src/control/server/ctl_storage.go index fd75f01263c..f4747f87513 100644 --- a/src/control/server/ctl_storage.go +++ b/src/control/server/ctl_storage.go @@ -212,6 +212,9 @@ func (cs *ControlService) scanAssignedBdevs(ctx context.Context, nsps []*ctl.Scm continue } + // NOTE DAOS-14223: This metadata size calculation won't necessarily match + // the meta blob size on SSD if --meta-size is specified in + // pool create command. md_size = mp.GetUsableBytes() / uint64(ei.GetTargetCount()) engineCfg, err := cs.getEngineCfgFromScmNsp(nsp) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 1880eb4c7f9..f0d0e0e58f6 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -308,6 +308,8 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju engineTargetNb := uint64(engineCfg.TargetCount) if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { + // TODO DAOS-14223: GetMetaSize() should reflect custom values set through pool + // create --meta-size option. clusterCount := getClusterCount(dev.GetMetaSize(), engineTargetNb, clusterSize) c.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) diff --git a/src/control/server/engine/utils.go b/src/control/server/engine/utils.go index 56734985c22..9f3098389a9 100644 --- a/src/control/server/engine/utils.go +++ b/src/control/server/engine/utils.go @@ -141,8 +141,8 @@ var ( "CRT", "RPC", "BULK", "CORPC", "GRP", "LM", "HG", // CaRT subsystems "EXTERNAL", "ST", "IV", "CTL", } - errLogNameAllWithOther = errors.New("'all' identifier cannot be used with any other") - errLogNameAllInMasks = errors.New("'all' identifier cannot be used in log mask level assignments") + errLogNameAllWithOther = errors.New("'all' identifier can not be used with any other log identifier") + errLogNameAllInMasks = errors.New("'all' identifier can not be used in log mask level assignments") ) func isLogLevelValid(name string) bool { diff --git a/src/control/server/engine/utils_test.go b/src/control/server/engine/utils_test.go index 41b765b4c7c..f4c9aa7a49a 100644 --- a/src/control/server/engine/utils_test.go +++ b/src/control/server/engine/utils_test.go @@ -47,7 +47,7 @@ func Test_ValidateLogMasks(t *testing.T) { }, "single level; single assignment; illegal use of all": { masks: "ERR,all=DBUG", - expErr: errors.New(""), + expErr: errors.New("identifier can not be used"), }, "single level; single assignment; bad level": { masks: "ERR,mgmt=DEG", diff --git a/src/control/server/mgmt_pool.go b/src/control/server/mgmt_pool.go index 9189a7ef116..6ae1e1287da 100644 --- a/src/control/server/mgmt_pool.go +++ b/src/control/server/mgmt_pool.go @@ -299,6 +299,7 @@ func (svc *mgmtSvc) poolCreate(parent context.Context, req *mgmtpb.PoolCreateReq resp.SvcReps = ranklist.RanksToUint32(ps.Replicas) resp.TgtRanks = ranklist.RanksToUint32(ps.Storage.CreationRanks()) resp.TierBytes = ps.Storage.PerRankTierStorage + // TODO DAOS-14223: Store Meta-Blob-Size in sysdb. return resp, nil } diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index e484b7be19b..004dc19a08a 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -314,6 +314,7 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { targetCount int memberCount int req *mgmtpb.PoolCreateReq + drpcRet *mgmtpb.PoolCreateResp expResp *mgmtpb.PoolCreateResp expErr error }{ @@ -388,11 +389,34 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, TgtRanks: []uint32{0, 1}, }, }, + "successful creation with meta size": { + targetCount: 8, + req: &mgmtpb.PoolCreateReq{ + Uuid: test.MockUUID(1), + Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: testPoolLabelProp(), + }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + expResp: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + }, "successful creation minimum size": { targetCount: 8, req: &mgmtpb.PoolCreateReq{ @@ -400,6 +424,10 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, TgtRanks: []uint32{0, 1}, @@ -412,9 +440,19 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Totalbytes: 100 * humanize.GiByte, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ - TierBytes: []uint64{((100 * humanize.GiByte) * DefaultPoolScmRatio) / 2, (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2}, - TgtRanks: []uint32{0, 1}, + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, }, }, "failed creation invalid ranks": { @@ -512,14 +550,15 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { numMembers = 2 } for i := 0; i < numMembers; i++ { - if _, err := tc.mgmtSvc.membership.Add(system.MockMember(t, uint32(i), system.MemberStateJoined)); err != nil { + mm := system.MockMember(t, uint32(i), system.MemberStateJoined) + if _, err := tc.mgmtSvc.membership.Add(mm); err != nil { t.Fatal(err) } } if tc.setupMockDrpc == nil { tc.setupMockDrpc = func(svc *mgmtSvc, err error) { - setupMockDrpcClient(tc.mgmtSvc, tc.expResp, tc.expErr) + setupMockDrpcClient(tc.mgmtSvc, tc.drpcRet, tc.expErr) } } tc.setupMockDrpc(tc.mgmtSvc, tc.expErr) diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 912218b58bd..6b740050c98 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -1504,7 +1504,7 @@ void mgmt__pool_query_target_resp__free_unpacked assert(message->base.descriptor == &mgmt__pool_query_target_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[13] = +static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[14] = { { "uuid", @@ -1662,10 +1662,23 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 14, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateReq, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { 4, /* field[4] = acl */ 6, /* field[6] = faultDomains */ + 13, /* field[13] = meta_blob_size */ 10, /* field[10] = numranks */ 7, /* field[7] = numsvcreps */ 5, /* field[5] = properties */ @@ -1681,7 +1694,7 @@ static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 13 } + { 0, 14 } }; const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = { @@ -1691,14 +1704,14 @@ const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = "Mgmt__PoolCreateReq", "mgmt", sizeof(Mgmt__PoolCreateReq), - 13, + 14, mgmt__pool_create_req__field_descriptors, mgmt__pool_create_req__field_indices_by_name, 1, mgmt__pool_create_req__number_ranges, (ProtobufCMessageInit) mgmt__pool_create_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[6] = { { "status", @@ -1760,9 +1773,22 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[ 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateResp, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { 1, /* field[1] = leader */ + 5, /* field[5] = meta_blob_size */ 0, /* field[0] = status */ 2, /* field[2] = svc_reps */ 3, /* field[3] = tgt_ranks */ @@ -1771,7 +1797,7 @@ static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = { @@ -1781,7 +1807,7 @@ const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = "Mgmt__PoolCreateResp", "mgmt", sizeof(Mgmt__PoolCreateResp), - 5, + 6, mgmt__pool_create_resp__field_descriptors, mgmt__pool_create_resp__field_indices_by_name, 1, mgmt__pool_create_resp__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index 1fc8a18ae76..9357267326f 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -209,10 +209,14 @@ struct _Mgmt__PoolCreateReq */ size_t n_tierbytes; uint64_t *tierbytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL, 0 } /* @@ -244,10 +248,14 @@ struct _Mgmt__PoolCreateResp */ size_t n_tier_bytes; uint64_t *tier_bytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_resp__descriptor) \ - , 0, 0, 0,NULL, 0,NULL, 0,NULL } + , 0, 0, 0,NULL, 0,NULL, 0,NULL, 0 } /* diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index a5d089f2c11..9ef6054beda 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -497,7 +497,8 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) /* Ranks to allocate targets (in) & svc for pool replicas (out). */ rc = ds_mgmt_create_pool(pool_uuid, req->sys, "pmem", targets, req->tierbytes[DAOS_MEDIA_SCM], req->tierbytes[DAOS_MEDIA_NVME], - prop, &svc, req->n_faultdomains, req->faultdomains); + prop, &svc, req->n_faultdomains, req->faultdomains, + req->meta_blob_size); if (rc != 0) { D_ERROR("failed to create pool: "DF_RC"\n", DP_RC(rc)); goto out; diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 9cfc0b5a0ab..6bd142022ab 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -66,7 +66,7 @@ int ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in); /** srv_pool.c */ int ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains); + int domains_nr, uint32_t *domains, size_t meta_blob_size); int ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks); int ds_mgmt_evict_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uuid_t *handles, size_t n_handles, uint32_t destroy, uint32_t force_destroy, diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index e5f94333f1c..5f99c9ef406 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -168,16 +168,17 @@ ds_mgmt_pool_svc_create(uuid_t pool_uuid, int ntargets, const char *group, d_ran } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, size_t nvme_size, - daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { d_rank_list_t *pg_ranks = NULL; d_rank_list_t *pg_targets = NULL; int rc; int rc_cleanup; + D_DEBUG(DB_MGMT, DF_UUID ": meta blob size %ld", DP_UUID(pool_uuid), meta_blob_size); + /* Sanity check targets versus cart's current primary group members. * If any targets not in PG, flag error before MGMT_TGT_ corpcs fail. */ diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 4b104f19195..80f95891c8d 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -509,11 +509,9 @@ ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in) } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, - size_t nvme_size, daos_prop_t *prop, - d_rank_list_t **svcp, int nr_domains, - uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { return 0; } diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 1dd9842db51..51b55b1254f 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -32,6 +32,7 @@ message PoolCreateReq { uint32 numranks = 11; // Number of target ranks to use (auto config) repeated uint32 ranks = 12; // target ranks (manual config) repeated uint64 tierbytes = 13; // Size in bytes of storage tiers (manual config) + uint64 meta_blob_size = 14; // Size in bytes of metadata blob on SSD (manual config) } // PoolCreateResp returns created pool uuid and ranks. @@ -41,6 +42,7 @@ message PoolCreateResp { repeated uint32 svc_reps = 3; // pool service replica ranks repeated uint32 tgt_ranks = 4; // pool target ranks repeated uint64 tier_bytes = 5; // storage tiers allocated to pool + uint64 meta_blob_size = 6; // Size in bytes of metadata blob on SSD (manual config) } // PoolDestroyReq supplies pool identifier and force flag. From 35f269b9c38ffb833f82968f8f85a1d4d3e9e118 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 14 Sep 2023 23:52:10 -0700 Subject: [PATCH 17/29] DAOS-14338 object: create migrate container in all cases (#13041) Since exclude/drain might change the shard location as well, so let's create the container for all casing during migration. Checking pool stopping before creating container during migration. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index c3bc5472b83..fa7579ca516 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1484,21 +1484,31 @@ migrate_punch(struct migrate_pool_tls *tls, struct migrate_one *mrone, static int migrate_get_cont_child(struct migrate_pool_tls *tls, uuid_t cont_uuid, - struct ds_cont_child **cont_p) + struct ds_cont_child **cont_p, bool create) { struct ds_cont_child *cont_child = NULL; int rc; *cont_p = NULL; - if (tls->mpt_opc == RB_OP_EXTEND || tls->mpt_opc == RB_OP_REINT) { - /* For extend and reintegration, it may need create the container */ + if (tls->mpt_pool->spc_pool->sp_stopping) { + D_DEBUG(DB_REBUILD, DF_UUID "pool is being destroyed.\n", + DP_UUID(tls->mpt_pool_uuid)); + return 0; + } + + if (create) { + /* Since the shard might be moved different location for any pool operation, + * so it may need create the container in all cases. + */ rc = ds_cont_child_open_create(tls->mpt_pool_uuid, cont_uuid, &cont_child); if (rc != 0) { - if (rc == -DER_SHUTDOWN) { + if (rc == -DER_SHUTDOWN || (cont_child && cont_child->sc_stopping)) { D_DEBUG(DB_REBUILD, DF_UUID "container is being destroyed\n", DP_UUID(cont_uuid)); rc = 0; } + if (cont_child) + ds_cont_child_put(cont_child); return rc; } } else { @@ -1532,7 +1542,7 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, int rc; D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont); + rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont, true); if (rc || cont == NULL) D_GOTO(cont_put, rc); @@ -2441,7 +2451,7 @@ migrate_obj_punch_one(void *data) tls, DP_UUID(tls->mpt_pool_uuid), arg->version, arg->punched_epoch, DP_UOID(arg->oid)); - rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont); + rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont, true); if (rc != 0 || cont == NULL) D_GOTO(put, rc); @@ -2943,7 +2953,7 @@ migrate_obj_ult(void *data) struct ds_cont_child *cont_child = NULL; /* check again to see if the container is being destroyed. */ - migrate_get_cont_child(tls, arg->cont_uuid, &cont_child); + migrate_get_cont_child(tls, arg->cont_uuid, &cont_child, false); if (cont_child == NULL || cont_child->sc_stopping) rc = 0; From 8647532007bd3a932692f85738e00962e30a45fd Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 15 Sep 2023 08:26:09 +0100 Subject: [PATCH 18/29] DAOS-6854 dfuse: Use dfs_ostatx for aync getattr calls. (#12832) This allows stat calls to happen with both rpcs in parallel reducing wall-clock time for the query, and moves to a async interface so that it can be completed in a callback and not require a dedicated thread for the duration of the operaton. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 25 +++++++++------- src/client/dfuse/ops/fgetattr.c | 51 +++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index c0a6453354d..4c4d88bbbdb 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -323,17 +323,22 @@ struct dfuse_inode_ops { }; struct dfuse_event { - fuse_req_t de_req; /**< The fuse request handle */ - daos_event_t de_ev; - size_t de_len; /**< The size returned by daos */ - d_iov_t de_iov; - d_sg_list_t de_sgl; - d_list_t de_list; - struct dfuse_eq *de_eqt; - struct dfuse_obj_hdl *de_oh; - off_t de_req_position; /**< The file position requested by fuse */ - size_t de_req_len; + fuse_req_t de_req; /**< The fuse request handle */ + daos_event_t de_ev; + size_t de_len; /**< The size returned by daos */ + d_iov_t de_iov; + d_sg_list_t de_sgl; + d_list_t de_list; + struct dfuse_eq *de_eqt; + union { + struct dfuse_obj_hdl *de_oh; + struct dfuse_inode_entry *de_ie; + }; + off_t de_req_position; /**< The file position requested by fuse */ + size_t de_req_len; void (*de_complete_cb)(struct dfuse_event *ev); + + struct stat de_attr; }; extern struct dfuse_inode_ops dfuse_dfs_ops; diff --git a/src/client/dfuse/ops/fgetattr.c b/src/client/dfuse/ops/fgetattr.c index 481c9fc56e6..6fdee73515c 100644 --- a/src/client/dfuse/ops/fgetattr.c +++ b/src/client/dfuse/ops/fgetattr.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,11 +7,32 @@ #include "dfuse_common.h" #include "dfuse.h" +static void +dfuse_cb_getattr_cb(struct dfuse_event *ev) +{ + if (ev->de_ev.ev_error != 0) { + DFUSE_REPLY_ERR_RAW(ev->de_ie, ev->de_req, ev->de_ev.ev_error); + D_GOTO(release, 0); + } + + ev->de_attr.st_ino = ev->de_ie->ie_stat.st_ino; + + ev->de_ie->ie_stat = ev->de_attr; + + DFUSE_REPLY_ATTR(ev->de_ie, ev->de_req, &ev->de_attr); +release: + daos_event_fini(&ev->de_ev); + D_FREE(ev); +} + void dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) { - struct stat attr = {}; - int rc; + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_event *ev; + uint64_t eqt_idx; + struct dfuse_eq *eqt; + int rc; if (ie->ie_unlinked) { DFUSE_TRA_DEBUG(ie, "File is unlinked, returning most recent data"); @@ -19,17 +40,29 @@ dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) return; } - rc = dfs_ostat(ie->ie_dfs->dfs_ns, ie->ie_obj, &attr); - if (rc != 0) - D_GOTO(err, rc); + eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); + eqt = &dfuse_info->di_eqt[eqt_idx % dfuse_info->di_eq_count]; + D_ALLOC_PTR(ev); + if (ev == NULL) + D_GOTO(err, rc = ENOMEM); - attr.st_ino = ie->ie_stat.st_ino; + ev->de_req = req; + ev->de_complete_cb = dfuse_cb_getattr_cb; + ev->de_ie = ie; - ie->ie_stat = attr; + rc = daos_event_init(&ev->de_ev, eqt->de_eq, NULL); + if (rc != -DER_SUCCESS) + D_GOTO(ev, rc = daos_der2errno(rc)); + + rc = dfs_ostatx(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_attr, &ev->de_ev); + if (rc != 0) + D_GOTO(ev, rc); - DFUSE_REPLY_ATTR(ie, req, &attr); + sem_post(&eqt->de_sem); return; +ev: + D_FREE(ev); err: DFUSE_REPLY_ERR_RAW(ie, req, rc); } From f479b9e45f8a26ac7a1ed0f1bf57443749804592 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 15 Sep 2023 11:22:50 -0400 Subject: [PATCH 19/29] DAOS-14302 test: Fix skipping DAOS_Drain_Simple tests (#13051) Signed-off-by: Phil Henderson --- src/tests/suite/daos_drain_simple.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index 57a4772d666..65bcd069de9 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -38,6 +38,8 @@ drain_dkeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -103,6 +105,8 @@ cont_open_in_drain(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -157,6 +161,8 @@ drain_akeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -208,6 +214,8 @@ drain_indexes(void **state) int i; int j; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -267,6 +275,7 @@ drain_snap_update_keys(void **state) char buf[256]; int buf_len = 256; + FAULT_INJECTION_REQUIRED(); if (!test_runable(arg, 4)) return; @@ -343,6 +352,8 @@ drain_snap_punch_keys(void **state) int buf_len = 256; uint32_t number; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -429,6 +440,8 @@ drain_multiple(void **state) int j; int k; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -495,6 +508,8 @@ drain_large_rec(void **state) char buffer[5000]; char v_buffer[5000]; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -543,6 +558,8 @@ drain_objects(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -569,6 +586,8 @@ drain_fail_and_retry_objects(void **state) daos_obj_id_t oids[OBJ_NR]; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -598,6 +617,8 @@ drain_then_exclude(void **state) test_arg_t *arg = *state; daos_obj_id_t oid; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -849,6 +870,8 @@ dfs_extend_drain_common(void **state, int opc, uint32_t objclass) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -1003,8 +1026,6 @@ run_daos_drain_simple_test(int rank, int size, int *sub_tests, { int rc = 0; - FAULT_INJECTION_REQUIRED(); - par_barrier(PAR_COMM_WORLD); if (sub_tests_size == 0) { sub_tests_size = ARRAY_SIZE(drain_tests); From 7172771be81b0999b995c51eb69a9e44a59cb24a Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 16 Sep 2023 10:06:04 -0500 Subject: [PATCH 20/29] DAOS-14110 il: create and do io against an EQ in the IL (#13007) - Use an EQ in the IL for dfs read and write for better performance and support of fork mode in fio() - add a new env variable D_IL_MAX_EQ to make the number of EQs / network contexts configurable. 64 is the default and the maximum allowed. Asking for more than that will not be an error though and the library will print a warning an set that to 64. - in case of multiple threads, each thread will create a new EQ till the max are created, then other threads will just round robin use of the EQs that are created. - When fork is called, the main thread eq is replaced with a new one from the child process (with at_fork child handler) to prevent sharing of any internal network context resources. - Update the IL VM build test to use less EQs than the default to prevent OOM since the VMs are constrained on memory. Signed-off-by: Mohamad Chaarawi Co-authored-by: Fan Yong --- SConstruct | 2 +- src/client/api/event.c | 12 ++-- src/client/dfs/dfs.c | 8 ++- src/client/dfuse/il/int_posix.c | 99 +++++++++++++++++++++++++++-- src/client/dfuse/il/int_read.c | 47 ++++++++++++-- src/client/dfuse/il/int_write.c | 42 ++++++++++-- src/client/dfuse/il/ioil.h | 4 +- src/common/misc.c | 16 +++-- src/include/daos/dtx.h | 1 + src/tests/ftest/dfuse/daos_build.py | 3 +- 10 files changed, 204 insertions(+), 30 deletions(-) diff --git a/SConstruct b/SConstruct index abc04dc8b64..8e59cc56c4f 100644 --- a/SConstruct +++ b/SConstruct @@ -363,7 +363,7 @@ MINIMAL_ENV = ('HOME', 'TERM', 'SSH_AUTH_SOCK', 'http_proxy', 'https_proxy', 'PK # Environment variables that are also kept when LD_PRELOAD is set. PRELOAD_ENV = ('LD_PRELOAD', 'D_LOG_FILE', 'DAOS_AGENT_DRPC_DIR', 'D_LOG_MASK', 'DD_MASK', - 'DD_SUBSYS') + 'DD_SUBSYS', 'D_IL_MAX_EQ') def scons(): diff --git a/src/client/api/event.c b/src/client/api/event.c index 85dd514da17..e6996fb6155 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -484,8 +484,13 @@ daos_event_complete(struct daos_event *ev, int rc) } if (evx->evx_status == DAOS_EVS_READY || evx->evx_status == DAOS_EVS_COMPLETED || - evx->evx_status == DAOS_EVS_ABORTED) + evx->evx_status == DAOS_EVS_ABORTED) { + if (evx->is_errno) + ev->ev_error = daos_der2errno(rc); + else + ev->ev_error = rc; goto out; + } D_ASSERT(evx->evx_status == DAOS_EVS_RUNNING); @@ -830,7 +835,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) eqx = daos_eq_lookup(eqh); if (eqx == NULL) { - D_ERROR("eqh nonexist.\n"); + D_ERROR("daos_eq_lookup() failed: "DF_RC"\n", DP_RC(-DER_NONEXIST)); return -DER_NONEXIST; } @@ -862,8 +867,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) if (eqx->eqx_ctx != NULL) { rc = crt_context_flush(eqx->eqx_ctx, 0); if (rc != 0) { - D_ERROR("failed to flush client context: "DF_RC"\n", - DP_RC(rc)); + D_ERROR("failed to flush client context: "DF_RC"\n", DP_RC(rc)); return rc; } } diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 3313c188c64..e502a95ff83 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -4489,7 +4489,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, D_ALLOC_PTR(params); if (params == NULL) - D_GOTO(err_task, rc = ENOMEM); + D_GOTO(err_task, rc = -DER_NOMEM); params->read_size = read_size; @@ -4513,10 +4513,12 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, daos_task_set_priv(task, params); rc = tse_task_register_cbs(task, NULL, NULL, 0, read_cb, NULL, 0); if (rc) - D_GOTO(err_params, rc = daos_der2errno(rc)); + D_GOTO(err_params, rc); rc = dc_task_schedule(task, true); - return daos_der2errno(rc); + if (rc) + D_GOTO(err_task, rc); + return 0; err_params: D_FREE(params); diff --git a/src/client/dfuse/il/int_posix.c b/src/client/dfuse/il/int_posix.c index b845c85c05c..93a91cd6215 100644 --- a/src/client/dfuse/il/int_posix.c +++ b/src/client/dfuse/il/int_posix.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -33,6 +33,10 @@ FOREACH_INTERCEPT(IOIL_FORWARD_DECL) +static __thread daos_handle_t ioil_eqh; + +#define IOIL_MAX_EQ 64 + struct ioil_pool { daos_handle_t iop_poh; uuid_t iop_uuid; @@ -43,13 +47,17 @@ struct ioil_pool { struct ioil_global { pthread_mutex_t iog_lock; d_list_t iog_pools_head; + daos_handle_t iog_main_eqh; + daos_handle_t iog_eqs[IOIL_MAX_EQ]; + uint16_t iog_eq_count_max; + uint16_t iog_eq_count; + uint16_t iog_eq_idx; pid_t iog_init_tid; bool iog_initialized; bool iog_no_daos; bool iog_daos_init; bool iog_show_summary; /**< Should a summary be shown at teardown */ - unsigned iog_report_count; /**< Number of operations that should be logged */ ATOMIC uint64_t iog_file_count; /**< Number of file opens intercepted */ @@ -277,6 +285,7 @@ ioil_init(void) struct rlimit rlimit; int rc; uint64_t report_count = 0; + uint64_t eq_count = 0; pthread_once(&init_links_flag, init_links); @@ -319,6 +328,18 @@ ioil_init(void) if (rc) return; + rc = d_getenv_uint64_t("D_IL_MAX_EQ", &eq_count); + if (rc != -DER_NONEXIST) { + if (eq_count > IOIL_MAX_EQ) { + DFUSE_LOG_WARNING("Max EQ count (%"PRIu64") should not exceed: %d", + eq_count, IOIL_MAX_EQ); + eq_count = IOIL_MAX_EQ; + } + ioil_iog.iog_eq_count_max = (uint16_t)eq_count; + } else { + ioil_iog.iog_eq_count_max = IOIL_MAX_EQ; + } + ioil_iog.iog_initialized = true; } @@ -377,12 +398,55 @@ ioil_fini(void) ioil_shrink_pool(pool); } - if (ioil_iog.iog_daos_init) + if (ioil_iog.iog_daos_init) { + int i; + + /** destroy EQs created by threads */ + for (i = 0; i < ioil_iog.iog_eq_count; i++) + daos_eq_destroy(ioil_iog.iog_eqs[i], 0); + /** destroy main thread eq */ + if (daos_handle_is_valid(ioil_iog.iog_main_eqh)) + daos_eq_destroy(ioil_iog.iog_main_eqh, 0); daos_fini(); + } ioil_iog.iog_daos_init = false; daos_debug_fini(); } +int +ioil_get_eqh(daos_handle_t *eqh) +{ + int rc; + + if (daos_handle_is_valid(ioil_eqh)) { + *eqh = ioil_eqh; + return 0; + } + + /** No EQ support requested */ + if (ioil_iog.iog_eq_count_max == 0) + return -1; + + rc = pthread_mutex_lock(&ioil_iog.iog_lock); + /** create a new EQ if the EQ pool is not full; otherwise round robin EQ use from pool */ + if (ioil_iog.iog_eq_count >= ioil_iog.iog_eq_count_max) { + ioil_eqh = ioil_iog.iog_eqs[ioil_iog.iog_eq_idx ++]; + if (ioil_iog.iog_eq_idx == ioil_iog.iog_eq_count_max) + ioil_iog.iog_eq_idx = 0; + } else { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + pthread_mutex_unlock(&ioil_iog.iog_lock); + return -1; + } + ioil_iog.iog_eqs[ioil_iog.iog_eq_count] = ioil_eqh; + ioil_iog.iog_eq_count ++; + } + pthread_mutex_unlock(&ioil_iog.iog_lock); + *eqh = ioil_eqh; + return 0; +} + /* Get the object handle for the file itself */ static int fetch_dfs_obj_handle(int fd, struct fd_entry *entry) @@ -729,6 +793,20 @@ call_daos_init(int fd) return rcb; } +static void +child_hdlr(void) +{ + int rc; + + daos_dti_reset(); + ioil_eqh = DAOS_HDL_INVAL; + rc = daos_eq_create(&ioil_eqh); + if (rc) + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + else + ioil_iog.iog_main_eqh = ioil_eqh; +} + /* Returns true on success */ static bool check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) @@ -764,10 +842,23 @@ check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) rc = pthread_mutex_lock(&ioil_iog.iog_lock); D_ASSERT(rc == 0); - if (!ioil_iog.iog_daos_init) + if (!ioil_iog.iog_daos_init) { if (!call_daos_init(fd)) goto err; + if (ioil_iog.iog_eq_count_max) { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + D_GOTO(err, rc = daos_der2errno(rc)); + } + ioil_iog.iog_main_eqh = ioil_eqh; + + rc = pthread_atfork(NULL, NULL, &child_hdlr); + D_ASSERT(rc == 0); + } + } + d_list_for_each_entry(pool, &ioil_iog.iog_pools_head, iop_pools) { if (uuid_compare(pool->iop_uuid, il_reply.fir_pool) != 0) continue; diff --git a/src/client/dfuse/il/int_read.c b/src/client/dfuse/il/int_read.c index 6b5ee1fd7b5..497e39273ab 100644 --- a/src/client/dfuse/il/int_read.c +++ b/src/client/dfuse/il/int_read.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,17 +15,52 @@ static ssize_t read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - daos_size_t read_size = 0; - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + daos_size_t read_size = 0; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); sgl.sg_nr = 1; d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, NULL); + + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, + &read_size, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, + NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_read() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/int_write.c b/src/client/dfuse/il/int_write.c index fc602f0a1c3..abbb573638d 100644 --- a/src/client/dfuse/il/int_write.c +++ b/src/client/dfuse/il/int_write.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,9 +15,11 @@ ssize_t ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); @@ -25,7 +27,37 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_write() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/ioil.h b/src/client/dfuse/il/ioil.h index 8c4a7205e4e..b9581b3bd77 100644 --- a/src/client/dfuse/il/ioil.h +++ b/src/client/dfuse/il/ioil.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -53,5 +53,7 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en ssize_t ioil_do_pwritev(const struct iovec *iov, int count, off_t position, struct fd_entry *entry, int *errcode); +int +ioil_get_eqh(daos_handle_t *eqh); #endif /* __IOIL_H__ */ diff --git a/src/common/misc.c b/src/common/misc.c index bc902538e1a..eeb5c4522ef 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -705,6 +705,8 @@ daos_crt_init_opt_get(bool server, int ctx_nr) return &daos_crt_init_opt; } +static __thread uuid_t dti_uuid; + void daos_dti_gen_unique(struct dtx_id *dti) { @@ -719,19 +721,23 @@ daos_dti_gen_unique(struct dtx_id *dti) void daos_dti_gen(struct dtx_id *dti, bool zero) { - static __thread uuid_t uuid; - if (zero) { memset(dti, 0, sizeof(*dti)); } else { - if (uuid_is_null(uuid)) - uuid_generate(uuid); + if (uuid_is_null(dti_uuid)) + uuid_generate(dti_uuid); - uuid_copy(dti->dti_uuid, uuid); + uuid_copy(dti->dti_uuid, dti_uuid); dti->dti_hlc = d_hlc_get(); } } +void +daos_dti_reset(void) +{ + memset(dti_uuid, 0, sizeof(dti_uuid)); +} + /** * daos_recx_alloc/_free to provide same log facility for recx's alloc and free * for iom->iom_recxs' usage for example. diff --git a/src/include/daos/dtx.h b/src/include/daos/dtx.h index 272c041dabf..14b2337ea0f 100644 --- a/src/include/daos/dtx.h +++ b/src/include/daos/dtx.h @@ -174,6 +174,7 @@ struct dtx_id { void daos_dti_gen_unique(struct dtx_id *dti); void daos_dti_gen(struct dtx_id *dti, bool zero); +void daos_dti_reset(void); static inline void daos_dti_copy(struct dtx_id *des, const struct dtx_id *src) diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index acb2b8cb6a5..5edd0b328df 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -135,9 +135,11 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): # Note that run_on_vms does not tell ftest where to run, this should be set according to # the test tags so the test can run with appropriate settings. + remote_env = {} if run_on_vms: dfuse_namespace = dfuse_namespace = "/run/dfuse_vm/*" build_jobs = 6 * 2 + remote_env['D_IL_MAX_EQ'] = '6' intercept_jobs = build_jobs if intercept: @@ -189,7 +191,6 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): mount_dir = self.dfuse.mount_dir.value build_dir = os.path.join(mount_dir, 'daos') - remote_env = {} remote_env['PATH'] = '{}:$PATH'.format(os.path.join(mount_dir, 'venv', 'bin')) remote_env['VIRTUAL_ENV'] = os.path.join(mount_dir, 'venv') remote_env['COVFILE'] = os.environ['COVFILE'] From f6b1ec8bea82c291ca61a2d1924ab46c403e91a9 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 18 Sep 2023 16:39:20 -0400 Subject: [PATCH 21/29] DAOS-14385 test: Fix TestPool.pool_query_delay use (#13062) When using the TestPool.pool_query_delay BasicParameter access its value property not the object. Signed-off-by: Phil Henderson --- src/tests/ftest/util/test_utils_pool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 333cc2c93b2..0826ea7d864 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -729,10 +729,10 @@ def query(self, show_enabled=False, show_disabled=False): "test yaml parameter.".format( self.pool_query_timeout.value, self.identifier)) from error - if self.pool_query_delay: + if self.pool_query_delay.value: self.log.info( "Waiting %s seconds before issuing next dmg pool query", - self.pool_query_delay) + self.pool_query_delay.value) sleep(self.pool_query_delay.value) @fail_on(CommandFailure) From 0cf35608bfbde2d2f3704e061031f89432bd7618 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 19 Sep 2023 14:33:32 +0800 Subject: [PATCH 22/29] DAOS-13562 vea: add bitmap to optimize small allocation (#12678) Currently VEA use extents to manage free space, for small extent(<64MiB) it is managed by a size tree, while large extents sorted by heap tree. To reduce metadata overhead, VEA can use bitmap to manage free space for small allocation. In this case, each free fragment can be represented by a few bits or bytes, instead of a B+Tree leaf node. vea_stree showed it reduced heap_bytes by ~28% if all allocation could be allocated from bitmap. For old pools, bitmap feature will not be enabled unless "upgrade" command is triggered. Required-githooks: true Signed-off-by: Wang Shilong --- src/common/ad_mem.c | 99 +---- src/common/misc.c | 105 ++++++ src/include/daos/common.h | 21 ++ src/include/daos_srv/vea.h | 45 ++- src/vea/tests/vea_stress.c | 25 +- src/vea/tests/vea_ut.c | 428 +++++++++++++++++++--- src/vea/vea_alloc.c | 562 +++++++++++++++++++++++----- src/vea/vea_api.c | 399 ++++++++++++++------ src/vea/vea_free.c | 669 ++++++++++++++++++++++++++++------ src/vea/vea_hint.c | 4 +- src/vea/vea_init.c | 82 +++-- src/vea/vea_internal.h | 207 +++++++++-- src/vea/vea_util.c | 267 +++++++++++--- src/vos/tests/vts_aggregate.c | 9 +- src/vos/vos_pool.c | 5 + 15 files changed, 2301 insertions(+), 626 deletions(-) diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index c3454a4cd2d..675906d466e 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -34,7 +34,6 @@ static int arena_tx_publish(struct ad_arena *arena, struct ad_tx *tx); static void arena_dump(struct ad_arena *arena); static inline int group_unit_avail(const struct ad_group_df *gd); static inline int group_weight(const struct ad_group_df *gd); -static int find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); #define ASSERT_DUMP_ARENA(cond, arena) \ do { \ @@ -129,21 +128,6 @@ static struct ad_group_spec grp_specs_large[] = { static struct ad_blob *dummy_blob; -static inline void -setbits64(uint64_t *bmap, int at, int bits) -{ - setbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -static inline void -clrbits64(uint64_t *bmap, int at, int bits) -{ - clrbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -#define setbit64(bm, at) setbit(((uint8_t *)bm), at) -#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) -#define isset64(bm, at) isset(((uint8_t *)bm), at) static int group_u2b(int unit, int unit_nr) @@ -1007,7 +991,7 @@ arena_find(struct ad_blob *blob, uint32_t *arena_id, struct ad_arena_df **ad_p) if (id == AD_ARENA_ANY) { int bits = 1; - id = find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); + id = daos_find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); if (id < 0) { rc = -DER_NOSPACE; D_ERROR("Blob %s is full, cannot create more arena, "DF_RC"\n", @@ -1867,83 +1851,6 @@ arena_remove_grp(struct ad_arena *arena, struct ad_group *group) arena->ar_grp_nr--; } -/** Find requested number of unused bits (neither set it @used or @reserved */ -static int -find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) -{ - int nr_saved; - int at_saved; - int nr; - int at; - int i; - int j; - - nr = nr_saved = 0; - at = at_saved = -1; - - for (i = 0; i < bmap_sz; i++) { - uint64_t free_bits = ~used[i]; - - if (reserved) - free_bits &= ~reserved[i]; - - if (free_bits == 0) { /* no space in the current int64 */ - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - continue; - } - - j = ffsll(free_bits); - D_ASSERT(j > 0); - if (at >= 0 && j == 1) { - D_ASSERT(nr > 0); - nr++; - } else { - at = i * 64 + j - 1; - nr = 1; - } - - for (; j < 64; j++) { - if (nr == *bits) /* done */ - goto out; - - if (isset64(&free_bits, j)) { - if (at < 0) - at = i * 64 + j; - nr++; - continue; - } - - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - if ((free_bits >> j) == 0) - break; - } - if (nr == *bits) - goto out; - } - out: - if (nr == *bits || nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - - if (nr_saved >= bits_min) - *bits = nr_saved; - else - at_saved = -1; - - return at_saved; -} - /** reserve a new group within @arena */ static int arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, @@ -1981,7 +1888,7 @@ arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, if (bits_min > bits) bits_min = bits; - bit_at = find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); + bit_at = daos_find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); if (bit_at < 0) return -DER_NOSPACE; @@ -2076,7 +1983,7 @@ group_reserve_addr(struct ad_group *grp, struct ad_reserv_act *act) int b = 1; int at; - at = find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); + at = daos_find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); /* NB: bitmap may includes more bits than the actual number of units */ if (at < 0 || at >= gd->gd_unit_nr) return 0; diff --git a/src/common/misc.c b/src/common/misc.c index eeb5c4522ef..a3a8c7bfd6c 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -779,3 +779,108 @@ daos_hlc2timestamp(uint64_t hlc, time_t *ts) *ts = tspec.tv_sec; return 0; } + +/** Find requested number of unused bits (neither set it @used or @reserved */ +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) +{ + int nr_saved; + int at_saved; + int nr; + int at; + int i; + int j; + + nr = nr_saved = 0; + at = at_saved = -1; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + if (reserved) + free_bits &= ~reserved[i]; + + if (free_bits == 0) { /* no space in the current int64 */ + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + continue; + } + + j = ffsll(free_bits); + D_ASSERT(j > 0); + if (at >= 0 && j == 1) { + D_ASSERT(nr > 0); + nr++; + } else { + at = i * 64 + j - 1; + nr = 1; + } + + for (; j < 64; j++) { + if (nr == *bits) /* done */ + goto out; + + if (isset64(&free_bits, j)) { + if (at < 0) + at = i * 64 + j; + nr++; + continue; + } + + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + if ((free_bits >> j) == 0) + break; + } + if (nr == *bits) + goto out; + } + out: + if (nr == *bits || nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + + if (nr_saved >= bits_min) + *bits = nr_saved; + else + at_saved = -1; + + return at_saved; +} + +int +daos_count_free_bits(uint64_t *used, int bmap_sz) +{ + int i; + int j; + int nr = 0; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + /* no free bits in the current int64 */ + if (free_bits == 0) + continue; + + j = ffsll(free_bits); + D_ASSERT(j > 0); + nr++; + for (; j < 64; j++) { + if (isset64(&free_bits, j)) + nr++; + if ((free_bits >> j) == 0) + break; + } + } + + return nr; +} diff --git a/src/include/daos/common.h b/src/include/daos/common.h index c7af0fc6563..78acb71858c 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -237,6 +237,27 @@ setbit_range(uint8_t *bitmap, uint32_t start, uint32_t end) setbit(bitmap, index); } +static inline void +setbits64(uint64_t *bmap, int at, int bits) +{ + setbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +static inline void +clrbits64(uint64_t *bmap, int at, int bits) +{ + clrbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +#define setbit64(bm, at) setbit(((uint8_t *)bm), at) +#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) +#define isset64(bm, at) isset(((uint8_t *)bm), at) + +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); +int +daos_count_free_bits(uint64_t *used, int bmap_sz); + static inline unsigned int daos_power2_nbits(unsigned int val) { diff --git a/src/include/daos_srv/vea.h b/src/include/daos_srv/vea.h index bdcd6c2ad21..1b37d1c042f 100644 --- a/src/include/daos_srv/vea.h +++ b/src/include/daos_srv/vea.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -20,23 +20,6 @@ #include #include -/* Common free extent structure for both SCM & in-memory index */ -struct vea_free_extent { - uint64_t vfe_blk_off; /* Block offset of the extent */ - uint32_t vfe_blk_cnt; /* Total blocks of the extent */ - uint32_t vfe_age; /* Monotonic timestamp */ -}; - -/* Maximum extents a non-contiguous allocation can have */ -#define VEA_EXT_VECTOR_MAX 9 - -/* Allocated extent vector */ -struct vea_ext_vector { - uint64_t vev_blk_off[VEA_EXT_VECTOR_MAX]; - uint32_t vev_blk_cnt[VEA_EXT_VECTOR_MAX]; - uint32_t vev_size; /* Size of the extent vector */ -}; - /* Reserved extent(s) */ struct vea_resrvd_ext { /* Link to a list for a series of vea_reserve() calls */ @@ -49,8 +32,12 @@ struct vea_resrvd_ext { uint64_t vre_hint_seq; /* Total reserved blocks */ uint32_t vre_blk_cnt; + /* New extent allocated for bitmap */ + uint32_t vre_new_bitmap_chunk:1; /* Extent vector for non-contiguous reserve */ struct vea_ext_vector *vre_vector; + /* private pointer */ + void *vre_private; }; /* @@ -83,6 +70,8 @@ struct vea_unmap_context { bool vnc_ext_flush; }; +#define VEA_COMPAT_FEATURE_BITMAP (1 << 0) + /* Free space tracking information on SCM */ struct vea_space_df { uint32_t vsd_magic; @@ -95,8 +84,8 @@ struct vea_space_df { uint64_t vsd_tot_blks; /* Free extent tree, sorted by offset */ struct btr_root vsd_free_tree; - /* Allocated extent vector tree, for non-contiguous allocation */ - struct btr_root vsd_vec_tree; + /* Free bitmap tree, sorted by offset */ + struct btr_root vsd_bitmap_tree; }; /* VEA attributes */ @@ -116,8 +105,10 @@ struct vea_stat { uint64_t vs_resrv_hint; /* Number of hint reserve */ uint64_t vs_resrv_large; /* Number of large reserve */ uint64_t vs_resrv_small; /* Number of small reserve */ + uint64_t vs_resrv_bitmap; /* Number of bitmap reserve */ uint64_t vs_frags_large; /* Large free frags */ uint64_t vs_frags_small; /* Small free frags */ + uint64_t vs_frags_bitmap; /* Bitmap frags */ uint64_t vs_frags_aging; /* Aging frags */ }; @@ -148,6 +139,20 @@ int vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, struct vea_space_df *md, uint32_t blk_sz, uint32_t hdr_blks, uint64_t capacity, vea_format_callback_t cb, void *cb_data, bool force); +/** + * Upgrade VEA to support latest disk format + * + * \param vsi [IN] In-memory compound free extent index + * \param umem [IN] An instance of SCM + * \param md [IN] The allocation metadata on SCM + * \param version [IN] Version which we try to upgrade + * + * \return Zero on success, in-memory compound free extent + * index returned by @vsi; Appropriated negative + * value on error + */ +int vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version); /** * Load space tracking information from SCM to initialize the in-memory compound diff --git a/src/vea/tests/vea_stress.c b/src/vea/tests/vea_stress.c index 49d56e684cd..b50f37f8e7a 100644 --- a/src/vea/tests/vea_stress.c +++ b/src/vea/tests/vea_stress.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,6 +24,7 @@ uint64_t pool_capacity = (1024ULL << 30); /* 1TB */ unsigned int cont_per_pool = 1; unsigned int obj_per_cont = 100; unsigned int test_duration = (2 * 60); /* 2 mins */ +unsigned int upd_blks_max = 256; /* 1MB by default */ unsigned int rand_seed; bool loading_test; /* test loading pool */ @@ -40,7 +41,6 @@ enum { #define VS_RSRV_CNT_MAX 10 /* extents */ #define VS_FREE_CNT_MAX 30 /* extents */ #define VS_MERGE_CNT_MAX 10 /* extents */ -#define VS_UPD_BLKS_MAX 256 /* 1MB */ #define VS_AGG_BLKS_MAX 1024 /* 4MB */ struct vs_perf_cntr { @@ -311,7 +311,7 @@ vs_update(struct vea_stress_pool *vs_pool) rsrv_cnt = get_random_count(VS_RSRV_CNT_MAX); for (i = 0; i < rsrv_cnt; i++) { - blk_cnt = get_random_count(VS_UPD_BLKS_MAX); + blk_cnt = get_random_count(upd_blks_max); cur_ts = daos_getutime(); rc = vea_reserve(vs_pool->vsp_vsi, blk_cnt, hint, &r_list); @@ -601,10 +601,11 @@ vs_stop_run(struct vea_stress_pool *vs_pool, int rc) } fprintf(stdout, "free_blks:["DF_12U64","DF_12U64"] frags_l:"DF_12U64" frags_s:"DF_12U64" " - "frags_a:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" r_small:"DF_12U64"\n", + "frags_a:"DF_12U64" frags_bitmap:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" " + "r_small:"DF_12U64" r_bitmap:"DF_12U64"\n", stat.vs_free_persistent, stat.vs_free_transient, stat.vs_frags_large, - stat.vs_frags_small, stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, - stat.vs_resrv_small); + stat.vs_frags_small, stat.vs_frags_aging, stat.vs_frags_bitmap, + stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small, stat.vs_resrv_bitmap); return stop; } @@ -873,6 +874,7 @@ vs_init(void) const char vs_stress_options[] = "Available options are:\n" +"-b max blocks per update\n" "-C pool capacity\n" "-c container nr\n" "-d test duration in seconds\n" @@ -932,6 +934,7 @@ vs_op2str(unsigned int op) int main(int argc, char **argv) { static struct option long_ops[] = { + { "block_max", required_argument, NULL, 'b' }, { "capacity", required_argument, NULL, 'C' }, { "cont_nr", required_argument, NULL, 'c' }, { "duration", required_argument, NULL, 'd' }, @@ -949,8 +952,16 @@ int main(int argc, char **argv) rand_seed = (unsigned int)(time(NULL) & 0xFFFFFFFFUL); memset(pool_file, 0, sizeof(pool_file)); - while ((rc = getopt_long(argc, argv, "C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { + while ((rc = getopt_long(argc, argv, "b:C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { switch (rc) { + case 'b': + upd_blks_max = strtoull(optarg, &endp, 0); + if (*endp != '\0') { + printf("invalid update max blocks\n"); + print_usage(); + return -1; + } + break; case 'C': pool_capacity = strtoul(optarg, &endp, 0); pool_capacity = val_unit(pool_capacity, *endp); diff --git a/src/vea/tests/vea_ut.c b/src/vea/tests/vea_ut.c index a16590329c3..3f6c8369550 100644 --- a/src/vea/tests/vea_ut.c +++ b/src/vea/tests/vea_ut.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -83,6 +83,8 @@ ut_load(void **state) rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, NULL, &args->vua_vsi); assert_rc_equal(rc, 0); + /* turn off bitmap feature to test legacy allocation */ + args->vua_md->vsd_compat = 0; } static void @@ -114,10 +116,12 @@ ut_query(void **state) assert_int_equal(stat.vs_free_transient, tot_blks); assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 0); + assert_int_equal(stat.vs_frags_bitmap, 0); assert_int_equal(stat.vs_frags_aging, 0); assert_int_equal(stat.vs_resrv_hint, 0); assert_int_equal(stat.vs_resrv_large, 0); assert_int_equal(stat.vs_resrv_small, 0); + assert_int_equal(stat.vs_resrv_bitmap, 0); } static void @@ -172,9 +176,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_a); - rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -199,9 +205,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_b); - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -226,10 +234,12 @@ ut_reserve(void **state) assert_int_equal(ext->vre_blk_off, off_b); /* Verify transient is allocated */ - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); /* Verify persistent is not allocated */ - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* Verify statistics */ @@ -238,12 +248,135 @@ ut_reserve(void **state) assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 1); + assert_int_equal(stat.vs_frags_bitmap, 0); /* 2 hint from the second reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_hint, 2); /* 2 large from the first reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_large, 2); /* 1 small from the reserve for io stream 2 */ assert_int_equal(stat.vs_resrv_small, 1); + /* 0 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 0); +} + +static void +ut_reserve_bitmap(void **state) +{ + struct vea_ut_args *args = *state; + uint32_t blk_cnt; + struct vea_resrvd_ext *ext; + struct vea_hint_context *h_ctxt; + d_list_t *r_list; + struct vea_stat stat; + int rc, ext_cnt; + uint32_t hdr_blks = 1; + uint64_t capacity = UT_TOTAL_BLKS; + struct vea_unmap_context unmap_ctxt = { 0 }; + uint32_t blk_cnt_stream0[3] = { 4, 32, 4}; + uint32_t blk_cnt_stream1[3] = { 1, 2, 3}; + int i; + + rc = vea_format(&args->vua_umm, &args->vua_txd, args->vua_md, 0, + hdr_blks, capacity, NULL, NULL, true); + assert_rc_equal(rc, 0); + + rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, + NULL, &args->vua_vsi); + assert_rc_equal(rc, 0); + + for (i = 0; i < IO_STREAM_CNT; i++) { + /* reset off and seq */ + args->vua_hint[i]->vhd_off = 0; + args->vua_hint[i]->vhd_seq = 0; + + rc = vea_hint_load(args->vua_hint[i], &args->vua_hint_ctxt[i]); + assert_rc_equal(rc, 0); + } + /* + * Reserve three blocks from I/O stream 0 and I/O stream 1 in + * interleaved order, the reservation from I/O stream 0 will be + * canceled later, and the reservation from I/O stream 1 will + * be published. + */ + for (ext_cnt = 0; ext_cnt < 3; ext_cnt++) { + print_message("reserve extent %d from I/O stream 0\n", ext_cnt); + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + blk_cnt = blk_cnt_stream0[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + print_message("reserve extent %d from I/O stream 1\n", ext_cnt); + + r_list = &args->vua_resrvd_list[1]; + h_ctxt = args->vua_hint_ctxt[1]; + + blk_cnt = blk_cnt_stream1[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + } + + /* Reserve from I/O stream 2, it will reserve from small free extent */ + print_message("reserve extent from I/O stream 2\n"); + + r_list = &args->vua_resrvd_list[2]; + h_ctxt = args->vua_hint_ctxt[2]; + + blk_cnt = 1024; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_hint_off, VEA_HINT_OFF_INVAL); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + + /* Verify transient is allocated */ + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + /* Verify persistent is not allocated */ + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + assert_rc_equal(rc, 0); + + assert_int_equal(stat.vs_frags_large, 1); + assert_int_equal(stat.vs_frags_small, 1); + /* 5 bitmaps for io stream 0 & 1 */ + assert_int_equal(stat.vs_frags_bitmap, 5); + /* 4 hint from */ + assert_int_equal(stat.vs_resrv_hint, 4); + /* 1 large from the first reserve for io stream 2 */ + assert_int_equal(stat.vs_resrv_large, 1); + assert_int_equal(stat.vs_resrv_small, 1); + /* 6 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 6); } static void @@ -269,11 +402,48 @@ ut_cancel(void **state) print_message("cancel reservation from I/O stream 0\n"); rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); assert_int_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, true); + assert_rc_equal(rc, 1); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, false); assert_rc_equal(rc, 1); assert_int_equal(h_ctxt->vhc_off, VEA_HINT_OFF_INVAL); } +static void +ut_cancel_bitmap(void **state) +{ + + struct vea_ut_args *args = *state; + struct vea_hint_context *h_ctxt; + struct vea_resrvd_ext *ext; + d_list_t *r_list; + struct vea_stat stat; + int rc; + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + print_message("cancel reservation from I/O stream 0\n"); + rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); + assert_int_equal(rc, 0); + + d_list_for_each_entry(ext, r_list, vre_link) { + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, true); + assert_rc_equal(rc, 1); + + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, false); + assert_rc_equal(rc, 1); + } + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + + /* 3 bitmaps left */ + assert_int_equal(stat.vs_frags_bitmap, 3); +} + static void ut_tx_publish(void **state) { @@ -301,9 +471,11 @@ ut_tx_publish(void **state) assert_ptr_not_equal(copy, NULL); D_INIT_LIST_HEAD(©->vre_link); + copy->vre_new_bitmap_chunk = ext->vre_new_bitmap_chunk; + copy->vre_private = ext->vre_private; copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; - d_list_add(©->vre_link, &args->vua_alloc_list); + d_list_add_tail(©->vre_link, &args->vua_alloc_list); } print_message("publish reservation from I/O stream %d\n", i); @@ -319,10 +491,12 @@ ut_tx_publish(void **state) blk_off = copy->vre_blk_off; blk_cnt = copy->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); } } @@ -331,7 +505,7 @@ static void ut_free(void **state) { struct vea_ut_args *args = *state; - struct vea_resrvd_ext *ext; + struct vea_resrvd_ext *ext, *tmp; d_list_t *r_list; uint64_t blk_off; uint32_t blk_cnt, nr_flushed; @@ -346,10 +520,12 @@ ut_free(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -363,13 +539,21 @@ ut_free(void **state) assert_rc_equal(rc, 0); assert_true(nr_flushed > 0); + print_message("transient free extents after flush:\n"); + vea_dump(args->vua_vsi, true); + print_message("persistent free extents after flush:\n"); + vea_dump(args->vua_vsi, false); + r_list = &args->vua_alloc_list; - d_list_for_each_entry(ext, r_list, vre_link) { + d_list_for_each_entry_safe(ext, tmp, r_list, vre_link) { blk_off = ext->vre_blk_off; blk_cnt = ext->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); } print_message("transient free extents after migration:\n"); @@ -400,6 +584,14 @@ ut_unload(void **state) args->vua_vsi = NULL; } +static void +ut_free_bitmap(void **state) +{ + ut_free(state); + ut_hint_unload(state); + ut_unload(state); +} + static int ut_setup(struct vea_ut_args *test_args) { @@ -885,23 +1077,6 @@ ut_inval_params_set_ext_age(void **state) ut_teardown(&args); } -static void -ut_inval_params_get_ext_vector(void **state) -{ - struct vea_ut_args args; - uint64_t block_offset = 0; - uint64_t block_count = 1; - struct vea_ext_vector ext_vector; - - print_message("Testing invalid parameters to vea_get_ext_vector\n"); - ut_setup(&args); - expect_assert_failure(vea_get_ext_vector(NULL, block_offset, - block_count, &ext_vector)); - expect_assert_failure(vea_get_ext_vector(args.vua_vsi, block_offset, - block_count, NULL)); - ut_teardown(&args); -} - static void ut_free_invalid_space(void **state) { @@ -932,6 +1107,13 @@ ut_free_invalid_space(void **state) rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list); assert_int_equal(rc, 0); + print_message("transient free extents:\n"); + rc = vea_dump(args.vua_vsi, true); + assert_rc_equal(rc, 0); + print_message("persistent free extents:\n"); + rc = vea_dump(args.vua_vsi, false); + assert_rc_equal(rc, 0); + /* Try to free from I/O Stream 1, which hasn't been reserved */ r_list = &args.vua_resrvd_list[1]; h_ctxt = args.vua_hint_ctxt[1]; @@ -957,12 +1139,13 @@ print_stats(struct vea_ut_args *args, bool verbose) rc = vea_query(args->vua_vsi, NULL, &stat); assert_int_equal(rc, 0); print_message("free_blks:"DF_U64"/"DF_U64", frags_large:"DF_U64", " - "frags_small:"DF_U64", frags_aging:"DF_U64"\n" + "frags_small:"DF_U64", frags_bitmap:"DF_U64" frags_aging:"DF_U64"\n" "resrv_hint:"DF_U64"\nresrv_large:"DF_U64"\n" - "resrv_small:"DF_U64"\n", + "resrv_small:"DF_U64"\nresrv_bitmap:"DF_U64"\n", stat.vs_free_persistent, stat.vs_free_transient, - stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_aging, - stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small); + stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_bitmap, + stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, + stat.vs_resrv_small, stat.vs_resrv_bitmap); if (verbose) vea_dump(args->vua_vsi, true); @@ -980,6 +1163,8 @@ ut_interleaved_ops(void **state) uint32_t header_blocks = 1; uint64_t capacity = ((VEA_LARGE_EXT_MB * 2) << 20); /* 128 MB */ uint32_t block_count; + d_list_t tmp_list; + struct vea_resrvd_ext *ext, *tmp; int rc; print_message("Test interleaved operations\n"); @@ -992,8 +1177,6 @@ ut_interleaved_ops(void **state) NULL, &args.vua_vsi); assert_int_equal(rc, 0); - rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); - assert_int_equal(rc, 0); /* * Do the following interleaved operations: @@ -1006,6 +1189,7 @@ ut_interleaved_ops(void **state) * 7. reserve A, reserve B, cancel A, cancel B * 8. reserve A, reserve B, cancel B, cancel A * 9. reserve A, reserve B, reserve C, publish B, publish A & C + * 10. reserve A, reserve B, reserve C, cancel A & C. publish B. **/ block_count = 2; r_list_a = &args.vua_resrvd_list[0]; @@ -1020,10 +1204,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 2 */ block_count += 2; @@ -1032,10 +1220,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 3 */ block_count += 2; @@ -1046,8 +1238,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 4 */ block_count += 2; @@ -1056,8 +1252,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); @@ -1070,8 +1270,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 6 */ block_count += 2; @@ -1080,8 +1284,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); @@ -1120,12 +1328,46 @@ ut_interleaved_ops(void **state) /* Reserve C */ rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); /* Publish B */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_rc_equal(rc, 0); /* Publish A & C */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + /* Case 10 */ + block_count = 256; + /* Reserve A */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + block_count = 260; + /* Reserve B */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + block_count = 261; + /* Reserve C */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + D_INIT_LIST_HEAD(&tmp_list); + d_list_for_each_entry_safe(ext, tmp, r_list_a, vre_link) { + /* move second reserve out */ + if (ext->vre_blk_cnt == 260) + d_list_move_tail(&ext->vre_link, &tmp_list); + } + /* cancel A & C */ + rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + /* Publish B */ + rc = vea_tx_publish(args.vua_vsi, h_ctxt, &tmp_list); + assert_rc_equal(rc, 0); rc = umem_tx_commit(&args.vua_umm); assert_int_equal(rc, 0); @@ -1190,6 +1432,7 @@ ut_fragmentation(void **state) D_INIT_LIST_HEAD(©->vre_link); copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; d_list_add(©->vre_link, &args.vua_alloc_list); } } @@ -1239,10 +1482,12 @@ ut_fragmentation(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args.vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args.vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -1250,6 +1495,98 @@ ut_fragmentation(void **state) ut_teardown(&args); } +static void +ut_reclaim_unused_bitmap(void **state) +{ + struct vea_ut_args args; + struct vea_unmap_context unmap_ctxt = { 0 }; + d_list_t *r_list; + uint64_t capacity = 1llu << 27; /* 128 MiB */ + uint32_t block_size = 4096; /* use the default size */ + uint32_t header_blocks = 1; + d_list_t persist_list; + struct vea_resrvd_ext *ext, *copy; + struct vea_resrvd_ext *tmp_ext; + int rc; + + print_message("Test bitmap allocation\n"); + ut_setup(&args); + rc = vea_format(&args.vua_umm, &args.vua_txd, args.vua_md, block_size, + header_blocks, capacity, NULL, NULL, false); + assert_rc_equal(rc, 0); + + rc = vea_load(&args.vua_umm, &args.vua_txd, args.vua_md, &unmap_ctxt, + NULL, &args.vua_vsi); + assert_rc_equal(rc, 0); + + r_list = &args.vua_resrvd_list[0]; + /* keep reserving until we run out of space */ + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 8, NULL, r_list); + } + + D_INIT_LIST_HEAD(&persist_list); + d_list_for_each_entry_safe(ext, tmp_ext, r_list, vre_link) { + /* Copy the extents to keep to persist_list */ + D_ALLOC_PTR(copy); + assert_ptr_not_equal(copy, NULL); + + D_INIT_LIST_HEAD(©->vre_link); + copy->vre_blk_off = ext->vre_blk_off; + copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; + d_list_add(©->vre_link, &persist_list); + } + + /* Publish the ones to persist */ + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, r_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 8 blocks:\n"); + print_stats(&args, true); + + d_list_for_each_entry_safe(ext, tmp_ext, &persist_list, vre_link) { + uint64_t blk_off = ext->vre_blk_off; + uint32_t blk_cnt = ext->vre_blk_cnt; + + rc = vea_free(args.vua_vsi, blk_off, blk_cnt); + assert_rc_equal(rc, 0); + + /* not immediately visual for allocation */ + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 0); + + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); + } + + rc = 0; + D_INIT_LIST_HEAD(&persist_list); + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 16, NULL, &persist_list); + } + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, &persist_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 16 blocks:\n"); + print_stats(&args, true); + + vea_unload(args.vua_vsi); + ut_teardown(&args); +} + static const struct CMUnitTest vea_uts[] = { { "vea_format", ut_format, NULL, NULL}, { "vea_load", ut_load, NULL, NULL}, @@ -1261,6 +1598,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_free", ut_free, NULL, NULL}, { "vea_hint_unload", ut_hint_unload, NULL, NULL}, { "vea_unload", ut_unload, NULL, NULL}, + { "vea_reserve_bitmap", ut_reserve_bitmap, NULL, NULL}, + { "vea_cancel_bitmap", ut_cancel_bitmap, NULL, NULL}, + { "vea_tx_publish_bitmap", ut_tx_publish, NULL, NULL}, + { "vea_free_bitmap", ut_free_bitmap, NULL, NULL}, { "vea_reserve_special", ut_reserve_special, NULL, NULL}, { "vea_inval_params_format", ut_inval_params_format, NULL, NULL}, { "vea_inval_params_load", ut_inval_params_load, NULL, NULL}, @@ -1271,11 +1612,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_inval_param_hint_load", ut_inval_params_hint_load, NULL, NULL}, { "vea_inval_param_set_ext_age", ut_inval_params_set_ext_age, NULL, NULL}, - { "vea_inval_param_get_ext_vector", ut_inval_params_get_ext_vector, - NULL, NULL}, { "vea_free_invalid_space", ut_free_invalid_space, NULL, NULL}, { "vea_interleaved_ops", ut_interleaved_ops, NULL, NULL}, - { "vea_fragmentation", ut_fragmentation, NULL, NULL} + { "vea_fragmentation", ut_fragmentation, NULL, NULL}, + { "vea_reclaim_unused_bitmap", ut_reclaim_unused_bitmap, NULL, NULL} }; int main(int argc, char **argv) diff --git a/src/vea/vea_alloc.c b/src/vea/vea_alloc.c index 6e0986c0dba..a9fd9424184 100644 --- a/src/vea/vea_alloc.c +++ b/src/vea/vea_alloc.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -10,27 +10,20 @@ #include #include "vea_internal.h" -int -compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec) -{ - /* TODO Add in in-memory extent vector tree */ - return 0; -} - static int -compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, - struct vea_entry *entry) +compound_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + struct vea_extent_entry *entry) { struct vea_free_extent *remain; d_iov_t key; int rc; - remain = &entry->ve_ext; + remain = &entry->vee_ext; D_ASSERT(remain->vfe_blk_cnt >= vfe->vfe_blk_cnt); D_ASSERT(remain->vfe_blk_off == vfe->vfe_blk_off); /* Remove the found free extent from compound index */ - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); if (remain->vfe_blk_cnt == vfe->vfe_blk_cnt) { d_iov_set(&key, &vfe->vfe_blk_off, sizeof(vfe->vfe_blk_off)); @@ -40,7 +33,7 @@ compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, remain->vfe_blk_off += vfe->vfe_blk_cnt; remain->vfe_blk_cnt -= vfe->vfe_blk_cnt; - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); } return rc; @@ -51,7 +44,7 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; d_iov_t key, val; int rc; @@ -72,12 +65,12 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, if (rc) return (rc == -DER_NONEXIST) ? 0 : rc; - entry = (struct vea_entry *)val.iov_buf; + entry = (struct vea_extent_entry *)val.iov_buf; /* The matching free extent isn't big enough */ - if (entry->ve_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) return 0; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -94,82 +87,33 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, static int reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd) -{ - daos_handle_t btr_hdl; - struct vea_sized_class *sc; - struct vea_free_extent vfe; - struct vea_entry *entry; - d_iov_t key, val_out; - uint64_t int_key = blk_cnt; - int rc; - - /* Skip huge allocate request */ - if (blk_cnt > vsi->vsi_class.vfc_large_thresh) - return 0; - - btr_hdl = vsi->vsi_class.vfc_size_btr; - D_ASSERT(daos_handle_is_valid(btr_hdl)); - - d_iov_set(&key, &int_key, sizeof(int_key)); - d_iov_set(&val_out, NULL, 0); - - rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); - if (rc == -DER_NONEXIST) { - return 0; - } else if (rc) { - D_ERROR("Search size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); - return rc; - } - - sc = (struct vea_sized_class *)val_out.iov_buf; - D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); - - /* Get the least used item from head */ - entry = d_list_entry(sc->vsc_lru.next, struct vea_entry, ve_link); - D_ASSERT(entry->ve_sized_class == sc); - D_ASSERT(entry->ve_ext.vfe_blk_cnt >= blk_cnt); - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; - vfe.vfe_blk_cnt = blk_cnt; - - rc = compound_alloc(vsi, &vfe, entry); - if (rc) - return rc; - - resrvd->vre_blk_off = vfe.vfe_blk_off; - resrvd->vre_blk_cnt = blk_cnt; - inc_stats(vsi, STAT_RESRV_SMALL, 1); - - D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); - - return rc; -} + struct vea_resrvd_ext *resrvd); +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd); -int -reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, +static int +reserve_extent(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_class *vfc = &vsi->vsi_class; struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; struct d_binheap_node *root; int rc; - /* No large free extent available */ if (d_binheap_is_empty(&vfc->vfc_heap)) - return reserve_small(vsi, blk_cnt, resrvd); + return 0; root = d_binheap_root(&vfc->vfc_heap); - entry = container_of(root, struct vea_entry, ve_node); + entry = container_of(root, struct vea_extent_entry, vee_node); - D_ASSERT(entry->ve_ext.vfe_blk_cnt > vfc->vfc_large_thresh); + D_ASSERT(entry->vee_ext.vfe_blk_cnt > vfc->vfc_large_thresh); D_DEBUG(DB_IO, "largest free extent ["DF_U64", %u]\n", - entry->ve_ext.vfe_blk_off, entry->ve_ext.vfe_blk_cnt); + entry->vee_ext.vfe_blk_off, entry->vee_ext.vfe_blk_cnt); /* The largest free extent can't satisfy huge allocate request */ - if (entry->ve_ext.vfe_blk_cnt < blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < blk_cnt) return 0; /* @@ -178,16 +122,11 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, * reserve from the small extents first, if it fails, reserve from the * largest free extent. */ - if (entry->ve_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { - /* Try small extents first */ - rc = reserve_small(vsi, blk_cnt, resrvd); - if (rc != 0 || resrvd->vre_blk_cnt != 0) - return rc; - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; + if (entry->vee_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { + vfe.vfe_blk_off = entry->vee_ext.vfe_blk_off; vfe.vfe_blk_cnt = blk_cnt; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -195,15 +134,15 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t half_blks, tot_blks; uint64_t blk_off; - blk_off = entry->ve_ext.vfe_blk_off; - tot_blks = entry->ve_ext.vfe_blk_cnt; + blk_off = entry->vee_ext.vfe_blk_off; + tot_blks = entry->vee_ext.vfe_blk_cnt; half_blks = tot_blks >> 1; D_ASSERT(tot_blks >= (half_blks + blk_cnt)); /* Shrink the original extent to half size */ - free_class_remove(vsi, entry); - entry->ve_ext.vfe_blk_cnt = half_blks; - rc = free_class_add(vsi, entry); + extent_free_class_remove(vsi, entry); + entry->vee_ext.vfe_blk_cnt = half_blks; + rc = extent_free_class_add(vsi, entry); if (rc) return rc; @@ -213,8 +152,8 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, vfe.vfe_blk_cnt = tot_blks - half_blks - blk_cnt; vfe.vfe_age = 0; /* Not used */ - rc = compound_free(vsi, &vfe, VEA_FL_NO_MERGE | - VEA_FL_NO_ACCOUNTING); + rc = compound_free_extent(vsi, &vfe, VEA_FL_NO_MERGE | + VEA_FL_NO_ACCOUNTING); if (rc) return rc; } @@ -232,16 +171,263 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, return 0; } +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + daos_handle_t btr_hdl; + struct vea_sized_class *sc; + struct vea_free_extent vfe; + struct vea_extent_entry *extent_entry; + d_iov_t key, val_out; + uint64_t int_key = blk_cnt; + int rc; + + btr_hdl = vsi->vsi_class.vfc_size_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + + d_iov_set(&key, &int_key, sizeof(int_key)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); + if (rc == -DER_NONEXIST) + return 0; + else if (rc) + return rc; + + sc = (struct vea_sized_class *)val_out.iov_buf; + D_ASSERT(sc != NULL); + + /* Get the least used item from head */ + extent_entry = d_list_entry(sc->vsc_extent_lru.next, struct vea_extent_entry, vee_link); + D_ASSERT(extent_entry->vee_sized_class == sc); + D_ASSERT(extent_entry->vee_ext.vfe_blk_cnt >= blk_cnt); + + vfe.vfe_blk_off = extent_entry->vee_ext.vfe_blk_off; + vfe.vfe_blk_cnt = blk_cnt; + + rc = compound_alloc_extent(vsi, &vfe, extent_entry); + if (rc) + return rc; + resrvd->vre_blk_off = vfe.vfe_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = NULL; + inc_stats(vsi, STAT_RESRV_SMALL, 1); + + return 0; +} + +static int +reserve_bitmap_chunk(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Get hint offset */ + hint_get(vsi->vsi_bitmap_hint_context, &resrvd->vre_hint_off); + + /* Reserve from hint offset */ + if (resrvd->vre_hint_off != VEA_HINT_OFF_INVAL) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + return rc; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } + + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + goto extent; + + rc = reserve_size_tree(vsi, blk_cnt, resrvd); + if (rc) + return rc; + + if (resrvd->vre_blk_cnt > 0) + goto done; + +extent: + rc = reserve_extent(vsi, blk_cnt, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return -DER_NOSPACE; +done: + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + + /* Update hint offset */ + hint_update(vsi->vsi_bitmap_hint_context, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + return rc; +} + +#define LARGE_EXT_FREE_BLKS ((32UL << 30) / VEA_BLK_SZ) + +static inline uint32_t +get_bitmap_chunk_blks(struct vea_space_info *vsi, uint32_t blk_cnt) +{ + uint32_t chunk_blks = VEA_BITMAP_MIN_CHUNK_BLKS; + + D_ASSERT(blk_cnt <= VEA_MAX_BITMAP_CLASS); + chunk_blks *= blk_cnt; + + D_ASSERT(chunk_blks <= VEA_BITMAP_MAX_CHUNK_BLKS); + /* + * Always try to allocate large bitmap chunk if there + * is enough free extent blocks. + */ + if (vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] >= LARGE_EXT_FREE_BLKS) { + int times = VEA_BITMAP_MAX_CHUNK_BLKS / chunk_blks; + + if (times > 1) + chunk_blks *= times; + } + + /* should be aligned with 64 bits */ + D_ASSERT(chunk_blks % (blk_cnt * 64) == 0); + + return chunk_blks; +} + +static inline int +get_bitmap_sz(uint32_t chunk_blks, uint16_t class) +{ + int bits = chunk_blks / class; + + D_ASSERT(chunk_blks % class == 0); + D_ASSERT(bits % 64 == 0); + + return bits / 64; +} + +static int +reserve_bitmap(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_bitmap_entry *entry; + int rc; + struct vea_free_bitmap *vfb; + struct vea_free_bitmap new_vfb = { 0 }; + int bits = 1; + uint32_t chunk_blks; + int bitmap_sz; + d_list_t *list_head; + + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + if (blk_cnt > VEA_MAX_BITMAP_CLASS) + return 0; + + D_ASSERT(blk_cnt > 0); + /* reserve from bitmap */ + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + /* Only assert in server mode */ + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + rc = daos_find_bits(vfb->vfb_bitmaps, NULL, vfb->vfb_bitmap_sz, 1, &bits); + if (rc < 0) { + d_list_del_init(&bitmap_entry->vbe_link); + continue; + } + + D_ASSERT(rc * blk_cnt + blk_cnt <= vfb->vfb_blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off + (rc * blk_cnt); + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, rc, 1); + rc = 0; + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + return 0; + } + + list_head = &vsi->vsi_class.vfc_bitmap_empty[blk_cnt - 1]; + if (!d_list_empty(list_head)) { + bitmap_entry = d_list_entry(list_head->next, struct vea_bitmap_entry, + vbe_link); + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, 0, 1); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + d_list_move_tail(&bitmap_entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1]); + return 0; + } + + chunk_blks = get_bitmap_chunk_blks(vsi, blk_cnt); + bitmap_sz = get_bitmap_sz(chunk_blks, blk_cnt); + rc = reserve_bitmap_chunk(vsi, chunk_blks, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return 0; + + resrvd->vre_new_bitmap_chunk = 1; + + new_vfb.vfb_blk_off = resrvd->vre_blk_off; + new_vfb.vfb_class = blk_cnt; + new_vfb.vfb_blk_cnt = chunk_blks; + new_vfb.vfb_bitmap_sz = bitmap_sz; + rc = bitmap_entry_insert(vsi, &new_vfb, VEA_BITMAP_STATE_NEW, + &entry, VEA_FL_NO_ACCOUNTING); + if (rc) + return rc; + + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)entry; + + D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, chunk_blks); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + + return rc; +} + +static int +reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Skip huge allocate request */ + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + return 0; + + rc = reserve_bitmap(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + + return reserve_size_tree(vsi, blk_cnt, resrvd); +} + int -reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, +reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { - /* TODO reserve extent vector for non-contiguous allocation */ - return -DER_NOSPACE; + struct vea_free_class *vfc = &vsi->vsi_class; + int rc; + + /* No large free extent available */ + if (d_binheap_is_empty(&vfc->vfc_heap)) + return reserve_small(vsi, blk_cnt, resrvd); + + if (blk_cnt < vsi->vsi_class.vfc_large_thresh) { + rc = reserve_small(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + } + + return reserve_extent(vsi, blk_cnt, resrvd); } -int -persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) +static int +persistent_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent *found, frag = {0}; daos_handle_t btr_hdl; @@ -328,3 +514,183 @@ persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) return 0; } + +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr) +{ + uint32_t bitmap_off = bit_at / 8; + uint32_t bitmap_sz = 0; + + if (bit_at % 8) + bitmap_sz = 1; + + if (bits_nr > (bit_at % 8)) + bitmap_sz += (bits_nr - (bit_at % 8) + 7) / 8; + + return umem_tx_add_ptr(vsi_umem, (char *)bitmap + bitmap_off, bitmap_sz); +} + +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear) +{ + uint32_t bit_at, bits_nr; + int rc; + + if (blk_off < bitmap->vfb_blk_off || + blk_off + blk_cnt > bitmap->vfb_blk_off + bitmap->vfb_blk_cnt) { + D_ERROR("range ["DF_U64", %u] is not within bitmap ["DF_U64", %u]\n", + blk_off, blk_cnt, bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + return -DER_INVAL; + } + + bit_at = blk_off - bitmap->vfb_blk_off; + if (bit_at % bitmap->vfb_class != 0) { + D_ERROR("invalid block offset: "DF_U64" which is not times of %u\n", + blk_off, bitmap->vfb_class); + return -DER_INVAL; + } + if (blk_cnt % bitmap->vfb_class != 0) { + D_ERROR("invalid block count: %u which is not times of %u\n", + blk_cnt, bitmap->vfb_class); + return -DER_INVAL; + } + bit_at /= bitmap->vfb_class; + bits_nr = blk_cnt / bitmap->vfb_class; + if (clear) { + if (!isset_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already cleared in the range.\n"); + return -DER_INVAL; + } + } else { + if (!isclr_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already set in the range.["DF_U64", %u]\n", + blk_off, blk_cnt); + return -DER_INVAL; + } + } + + if (vsi_umem) { + rc = bitmap_tx_add_ptr(vsi_umem, bitmap->vfb_bitmaps, bit_at, bits_nr); + if (rc) + return rc; + } + + D_ASSERT(bit_at + bits_nr <= bitmap->vfb_bitmap_sz * 64); + if (clear) + clrbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + else + setbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + + return 0; +} + +static void +new_chunk_commit_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHED; +} + +static void +new_chunk_abort_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_NEW; +} + +int +persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + struct vea_bitmap_entry *bitmap_entry = vfe->vfe_bitmap; + + if (bitmap_entry == NULL) + return persistent_alloc_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(bitmap_entry != NULL); + + /* if this bitmap is new */ + if (bitmap_entry->vbe_published_state == VEA_BITMAP_STATE_NEW) { + d_iov_t key, val, val_out; + struct vea_free_bitmap *bitmap; + int rc; + struct vea_free_extent extent; + daos_handle_t btr_hdl = vsi->vsi_md_bitmap_btr; + rc = umem_tx_begin(vsi->vsi_umem, vsi->vsi_txd); + if (rc != 0) + return rc; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONABORT, + new_chunk_abort_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk abort callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHING; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + new_chunk_commit_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk commit callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + extent = vfe->vfe_ext; + extent.vfe_blk_off = bitmap_entry->vbe_bitmap.vfb_blk_off; + extent.vfe_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + rc = persistent_alloc_extent(vsi, &extent); + if (rc) + goto out; + + D_ALLOC(bitmap, alloc_free_bitmap_size(bitmap_entry->vbe_bitmap.vfb_bitmap_sz)); + if (!bitmap) { + rc = -DER_NOMEM; + goto out; + } + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt != 0); + bitmap->vfb_blk_off = extent.vfe_blk_off; + bitmap->vfb_class = bitmap_entry->vbe_bitmap.vfb_class; + bitmap->vfb_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + bitmap->vfb_bitmap_sz = bitmap_entry->vbe_bitmap.vfb_bitmap_sz; + rc = bitmap_set_range(NULL, bitmap, vfe->vfe_ext.vfe_blk_off, + vfe->vfe_ext.vfe_blk_cnt, false); + if (rc) { + D_FREE(bitmap); + goto out; + } + /* Add to persistent bitmap tree */ + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &bitmap->vfb_blk_off, sizeof(bitmap->vfb_blk_off)); + d_iov_set(&val, bitmap, alloc_free_bitmap_size(bitmap->vfb_bitmap_sz)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(btr_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(bitmap); + if (rc) + D_ERROR("Insert persistent bitmap failed. "DF_RC"\n", DP_RC(rc)); + else + bitmap_entry->vbe_md_bitmap = (struct vea_free_bitmap *)val_out.iov_buf; +out: + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(vsi->vsi_umem, rc) : umem_tx_commit(vsi->vsi_umem); + + return rc; + } + + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, false); +} diff --git a/src/vea/vea_api.c b/src/vea/vea_api.c index ffad7b1b870..a5530a8e5f2 100644 --- a/src/vea/vea_api.c +++ b/src/vea/vea_api.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,7 +14,7 @@ static void erase_md(struct umem_instance *umem, struct vea_space_df *md) { struct umem_attr uma = {0}; - daos_handle_t free_btr, vec_btr; + daos_handle_t free_btr, bitmap_btr; int rc; uma.uma_id = umem->umm_id; @@ -27,15 +27,54 @@ erase_md(struct umem_instance *umem, struct vea_space_df *md) DP_RC(rc)); } - rc = dbtree_open_inplace(&md->vsd_vec_tree, &uma, &vec_btr); + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, &bitmap_btr); if (rc == 0) { - rc = dbtree_destroy(vec_btr, NULL); + rc = dbtree_destroy(bitmap_btr, NULL); if (rc) - D_ERROR("destroy vector tree error: "DF_RC"\n", + D_ERROR("destroy bitmap tree error: "DF_RC"\n", DP_RC(rc)); } } +int +vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version) +{ + int rc; + uint64_t offset; + d_iov_t key, val; + struct vea_hint_df dummy; + + if (version < 3) + return 0; + + /* Start transaction to initialize allocation metadata */ + rc = umem_tx_begin(umem, NULL); + if (rc != 0) + return rc; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(vsi->vsi_md_bitmap_btr, &key, &val); + if (rc) { + D_ERROR("upgrade to insert bitmap hint failed: "DF_RC"\n", + DP_RC(rc)); + goto out; + } + + rc = umem_tx_add_ptr(umem, md, sizeof(*md)); + if (rc != 0) + goto out; + + md->vsd_compat |= VEA_COMPAT_FEATURE_BITMAP; + +out: + /* Commit/Abort transaction on success/error */ + return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); +} + /* * Initialize the space tracking information on SCM and the header of the * block device. @@ -48,9 +87,11 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, { struct vea_free_extent free_ext; struct umem_attr uma; - uint64_t tot_blks; - daos_handle_t free_btr, vec_btr; + uint64_t tot_blks, offset; + daos_handle_t free_btr, bitmap_btr; + struct vea_hint_df dummy; d_iov_t key, val; + daos_handle_t md_bitmap_btr = DAOS_HDL_INVAL; int rc; D_ASSERT(umem != NULL); @@ -108,14 +149,15 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) return rc; - free_btr = vec_btr = DAOS_HDL_INVAL; + free_btr = bitmap_btr = DAOS_HDL_INVAL; rc = umem_tx_add_ptr(umem, md, sizeof(*md)); if (rc != 0) goto out; md->vsd_magic = VEA_MAGIC; - md->vsd_compat = 0; + /* Todo only enable bitmap for large pool size */ + md->vsd_compat = VEA_COMPAT_FEATURE_BITMAP; md->vsd_blk_sz = blk_sz; md->vsd_tot_blks = tot_blks; md->vsd_hdr_blks = hdr_blks; @@ -141,26 +183,59 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto out; - /* Create extent vector tree */ - rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, - &md->vsd_vec_tree, &vec_btr); + /* Create bitmap tree */ + rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, + &md->vsd_bitmap_tree, &bitmap_btr); + if (rc != 0) + goto out; + + /* Open bitmap tree */ + uma.uma_id = umem->umm_id; + uma.uma_pool = umem->umm_pool; + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, + &md_bitmap_btr); if (rc != 0) goto out; + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(md_bitmap_btr, &key, &val); + if (rc) + goto out; out: if (daos_handle_is_valid(free_btr)) dbtree_close(free_btr); - if (daos_handle_is_valid(vec_btr)) - dbtree_close(vec_btr); + if (daos_handle_is_valid(bitmap_btr)) + dbtree_close(bitmap_btr); + if (daos_handle_is_valid(md_bitmap_btr)) + dbtree_close(md_bitmap_btr); /* Commit/Abort transaction on success/error */ return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); } +static int +destroy_free_bitmap_agg(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + if (daos_handle_is_valid(vbe->vbe_agg_btr)) { + dbtree_destroy(vbe->vbe_agg_btr, NULL); + vbe->vbe_agg_btr = DAOS_HDL_INVAL; + } + + return 0; +} + /* Free the memory footprint created by vea_load(). */ void vea_unload(struct vea_space_info *vsi) { + int rc; + D_ASSERT(vsi != NULL); unload_space_info(vsi); @@ -170,10 +245,14 @@ vea_unload(struct vea_space_info *vsi) vsi->vsi_free_btr = DAOS_HDL_INVAL; } - /* Destroy the in-memory extent vector tree */ - if (daos_handle_is_valid(vsi->vsi_vec_btr)) { - dbtree_destroy(vsi->vsi_vec_btr, NULL); - vsi->vsi_vec_btr = DAOS_HDL_INVAL; + /* Destroy the in-memory bitmap tree */ + if (daos_handle_is_valid(vsi->vsi_bitmap_btr)) { + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, destroy_free_bitmap_agg, NULL); + if (rc) + D_ERROR("Failed to destroy free bitmap aggregation btr: "DF_RC"\n", DP_RC(rc)); + dbtree_destroy(vsi->vsi_bitmap_btr, NULL); + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; } /* Destroy the in-memory aggregation tree */ @@ -218,11 +297,11 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, vsi->vsi_txd = txd; vsi->vsi_md = md; vsi->vsi_md_free_btr = DAOS_HDL_INVAL; - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; vsi->vsi_free_btr = DAOS_HDL_INVAL; + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; D_INIT_LIST_HEAD(&vsi->vsi_agg_lru); vsi->vsi_agg_btr = DAOS_HDL_INVAL; - vsi->vsi_vec_btr = DAOS_HDL_INVAL; vsi->vsi_flush_time = 0; vsi->vsi_flush_scheduled = false; vsi->vsi_unmap_ctxt = *unmap_ctxt; @@ -240,15 +319,15 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto error; - /* Create in-memory extent vector tree */ + /* Create in-memory aggregation tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_vec_btr); + &vsi->vsi_agg_btr); if (rc != 0) goto error; - /* Create in-memory aggregation tree */ - rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_agg_btr); + /* Create in-memory bitmap tree */ + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, + &vsi->vsi_bitmap_btr); if (rc != 0) goto error; @@ -283,8 +362,7 @@ aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t * half-and-half then reserve from the latter half. (lookup vfc_heap). Otherwise; * 3. Try to reserve from some small free extent (<= VEA_LARGE_EXT_MB) in best-fit, * if it fails, reserve from the largest free extent. (lookup vfc_size_btr) - * 4. Repeat the search in 3rd step to reserve an extent vector. (vsi_vec_btr) - * 5. Fail reserve with ENOMEM if all above attempts fail. + * 4. Fail reserve with ENOMEM if all above attempts fail. */ int vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, @@ -294,10 +372,14 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t nr_flushed; bool force = false; int rc = 0; + bool try_hint = true; D_ASSERT(vsi != NULL); D_ASSERT(resrvd_list != NULL); + if (is_bitmap_feature_enabled(vsi) && blk_cnt <= VEA_MAX_BITMAP_CLASS) + try_hint = false; + D_ALLOC_PTR(resrvd); if (resrvd == NULL) return -DER_NOMEM; @@ -306,17 +388,20 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, resrvd->vre_hint_off = VEA_HINT_OFF_INVAL; /* Get hint offset */ - hint_get(hint, &resrvd->vre_hint_off); + if (try_hint) + hint_get(hint, &resrvd->vre_hint_off); /* Trigger aging extents flush */ aging_flush(vsi, force, MAX_FLUSH_FRAGS, &nr_flushed); retry: /* Reserve from hint offset */ - rc = reserve_hint(vsi, blk_cnt, resrvd); - if (rc != 0) - goto error; - else if (resrvd->vre_blk_cnt != 0) - goto done; + if (try_hint) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + goto error; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } /* Reserve from the largest extent or a small extent */ rc = reserve_single(vsi, blk_cnt, resrvd); @@ -325,27 +410,28 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, else if (resrvd->vre_blk_cnt != 0) goto done; - /* Reserve extent vector as the last resort */ - rc = reserve_vector(vsi, blk_cnt, resrvd); - - if (rc == -DER_NOSPACE && !force) { + rc = -DER_NOSPACE; + if (!force) { force = true; trigger_aging_flush(vsi, force, MAX_FLUSH_FRAGS * 10, &nr_flushed); if (nr_flushed == 0) goto error; goto retry; - } else if (rc != 0) { + } else { goto error; } done: - D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); - dec_stats(vsi, STAT_FREE_BLKS, blk_cnt); - - /* Update hint offset */ - hint_update(hint, resrvd->vre_blk_off + blk_cnt, - &resrvd->vre_hint_seq); + /* Update hint offset if allocation is from extent */ + if (resrvd->vre_private) { + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + } else { + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + hint_update(hint, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + } d_list_add_tail(&resrvd->vre_link, resrvd_list); @@ -355,67 +441,130 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, return rc; } +static int +process_free_entry(struct vea_space_info *vsi, struct vea_free_entry *vfe, bool publish) +{ + uint32_t expected_type = vfe->vfe_bitmap ? VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + if (!publish) { + int type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, NULL); + + if (type < 0) + return type; + + if (type != expected_type) { + D_ERROR("mismatch free entry type expected: %d, but got: %d\n", + expected_type, type); + return -DER_INVAL; + } + return compound_free(vsi, vfe, 0); + } + + return persistent_alloc(vsi, vfe); +} + static int process_resrvd_list(struct vea_space_info *vsi, struct vea_hint_context *hint, d_list_t *resrvd_list, bool publish) { struct vea_resrvd_ext *resrvd, *tmp; - struct vea_free_extent vfe; + struct vea_free_entry vfe; uint64_t seq_max = 0, seq_min = 0; uint64_t off_c = 0, off_p = 0; unsigned int seq_cnt = 0; int rc = 0; + uint32_t entry_type; + void *private = NULL; + uint64_t bitmap_seq_max = 0, bitmap_seq_min = 0; + uint64_t bitmap_off_c = 0, bitmap_off_p = 0; + unsigned int bitmap_seq_cnt = 0; + struct vea_hint_context *bitmap_hint = vsi->vsi_bitmap_hint_context; if (d_list_empty(resrvd_list)) return 0; - vfe.vfe_blk_off = 0; - vfe.vfe_blk_cnt = 0; - vfe.vfe_age = 0; /* Not used */ + vfe.vfe_ext.vfe_blk_off = 0; + vfe.vfe_ext.vfe_blk_cnt = 0; + vfe.vfe_ext.vfe_age = 0; /* Not used */ + vfe.vfe_bitmap = NULL; d_list_for_each_entry(resrvd, resrvd_list, vre_link) { + struct vea_bitmap_entry *bitmap_entry; + rc = verify_resrvd_ext(resrvd); if (rc) goto error; + entry_type = resrvd->vre_private ? + VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + bitmap_entry = (struct vea_bitmap_entry *)resrvd->vre_private; /* Reserved list is sorted by hint sequence */ - if (seq_min == 0) { - seq_min = resrvd->vre_hint_seq; - off_c = resrvd->vre_hint_off; - } else if (hint != NULL) { - D_ASSERT(seq_min < resrvd->vre_hint_seq); + /* use bitmap entry chunk offset */ + if (resrvd->vre_new_bitmap_chunk) { + D_ASSERT(bitmap_entry != NULL); + D_ASSERT(entry_type == VEA_FREE_ENTRY_BITMAP); + if (bitmap_seq_min == 0) { + bitmap_seq_min = resrvd->vre_hint_seq; + bitmap_off_c = resrvd->vre_hint_off; + } else { + D_ASSERT(bitmap_seq_min < resrvd->vre_hint_seq); + } + bitmap_seq_cnt++; + bitmap_seq_max = resrvd->vre_hint_seq; + bitmap_off_p = resrvd->vre_blk_off + bitmap_entry->vbe_bitmap.vfb_blk_cnt; + } else if (entry_type == VEA_FREE_ENTRY_EXTENT) { + if (seq_min == 0) { + seq_min = resrvd->vre_hint_seq; + off_c = resrvd->vre_hint_off; + } else if (hint != NULL) { + D_ASSERT(seq_min < resrvd->vre_hint_seq); + } + + seq_cnt++; + seq_max = resrvd->vre_hint_seq; + off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; } - seq_cnt++; - seq_max = resrvd->vre_hint_seq; - off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; - - if (vfe.vfe_blk_off + vfe.vfe_blk_cnt == resrvd->vre_blk_off) { - vfe.vfe_blk_cnt += resrvd->vre_blk_cnt; + if (private == resrvd->vre_private && + vfe.vfe_ext.vfe_blk_off + vfe.vfe_ext.vfe_blk_cnt == resrvd->vre_blk_off) { + vfe.vfe_ext.vfe_blk_cnt += resrvd->vre_blk_cnt; continue; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } - vfe.vfe_blk_off = resrvd->vre_blk_off; - vfe.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_ext.vfe_blk_off = resrvd->vre_blk_off; + vfe.vfe_ext.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_bitmap = bitmap_entry; + private = resrvd->vre_private; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } + if (seq_cnt == 0) + goto bitmap_publish; + rc = publish ? hint_tx_publish(vsi->vsi_umem, hint, off_p, seq_min, seq_max, seq_cnt) : hint_cancel(hint, off_c, seq_min, seq_max, seq_cnt); +bitmap_publish: + if (rc || bitmap_seq_cnt == 0) + goto error; + + rc = publish ? hint_tx_publish(vsi->vsi_umem, bitmap_hint, bitmap_off_p, + bitmap_seq_min, bitmap_seq_max, bitmap_seq_cnt) : + hint_cancel(bitmap_hint, bitmap_off_c, bitmap_seq_min, + bitmap_seq_max, bitmap_seq_cnt); + error: d_list_for_each_entry_safe(resrvd, tmp, resrvd_list, vre_link) { d_list_del_init(&resrvd->vre_link); @@ -457,40 +606,6 @@ vea_tx_publish(struct vea_space_info *vsi, struct vea_hint_context *hint, return process_resrvd_list(vsi, hint, resrvd_list, true); } -struct free_commit_cb_arg { - struct vea_space_info *fca_vsi; - struct vea_free_extent fca_vfe; -}; - -static void -free_commit_cb(void *data, bool noop) -{ - struct free_commit_cb_arg *fca = data; - int rc; - - /* Transaction aborted, only need to free callback arg */ - if (noop) - goto free; - - /* - * Aggregated free will be executed on outermost transaction - * commit. - * - * If it fails, the freed space on persistent free tree won't - * be added in in-memory free tree, hence the space won't be - * visible for allocation until the tree sync up on next server - * restart. Such temporary space leak is tolerable, what we must - * avoid is the contrary case: in-memory tree update succeeds - * but persistent tree update fails, which risks data corruption. - */ - rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); - - D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", - fca->fca_vsi, rc); -free: - D_FREE(fca); -} - /* * Free allocated extent. * @@ -515,10 +630,10 @@ vea_free(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt) return -DER_NOMEM; fca->fca_vsi = vsi; - fca->fca_vfe.vfe_blk_off = blk_off; - fca->fca_vfe.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; - rc = verify_free_entry(NULL, &fca->fca_vfe); + rc = verify_free_entry(NULL, &fca->fca_vfe.vfe_ext); if (rc) goto error; @@ -571,16 +686,6 @@ vea_set_ext_age(struct vea_space_info *vsi, uint64_t blk_off, uint64_t age) return 0; } -/* Convert an extent into an allocated extent vector. */ -int -vea_get_ext_vector(struct vea_space_info *vsi, uint64_t blk_off, - uint32_t blk_cnt, struct vea_ext_vector *ext_vector) -{ - D_ASSERT(vsi != NULL); - D_ASSERT(ext_vector != NULL); - return 0; -} - /* Load persistent hint data and initialize in-memory hint context */ int vea_hint_load(struct vea_hint_df *phd, struct vea_hint_context **thc) @@ -609,8 +714,8 @@ vea_hint_unload(struct vea_hint_context *thc) } static int -count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, - void *arg) +count_free_extent_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) { struct vea_free_extent *vfe; uint64_t *off, *free_blks = arg; @@ -629,16 +734,53 @@ count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, return 0; } +static int +count_free_bitmap_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_free_bitmap *vfb; + uint64_t *off, *free_blks = arg; + int rc; + + off = (uint64_t *)key->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; + + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); + if (rc != 0) + return rc; + + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(vfb); + + return 0; +} + static int count_free_transient(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_entry *ve; + struct vea_extent_entry *ve; uint64_t *free_blks = arg; - ve = (struct vea_entry *)val->iov_buf; + ve = (struct vea_extent_entry *)val->iov_buf; D_ASSERT(free_blks != NULL); - *free_blks += ve->ve_ext.vfe_blk_cnt; + *free_blks += ve->vee_ext.vfe_blk_cnt; + + return 0; +} + +static int +count_free_bitmap_transient(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + uint64_t *free_blks = arg; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(&vbe->vbe_bitmap); return 0; } @@ -660,7 +802,8 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, attr->va_hdr_blks = vsd->vsd_hdr_blks; attr->va_large_thresh = vsi->vsi_class.vfc_large_thresh; attr->va_tot_blks = vsd->vsd_tot_blks; - attr->va_free_blks = vsi->vsi_stat[STAT_FREE_BLKS]; + attr->va_free_blks = vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] + + vsi->vsi_stat[STAT_FREE_BITMAP_BLKS]; } if (stat != NULL) { @@ -668,7 +811,13 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, stat->vs_free_persistent = 0; rc = dbtree_iterate(vsi->vsi_md_free_btr, DAOS_INTENT_DEFAULT, - false, count_free_persistent, + false, count_free_extent_persistent, + (void *)&stat->vs_free_persistent); + if (rc != 0) + return rc; + + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_persistent, (void *)&stat->vs_free_persistent); if (rc != 0) return rc; @@ -680,11 +829,19 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, if (rc != 0) return rc; + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_transient, + (void *)&stat->vs_free_transient); + if (rc != 0) + return rc; + stat->vs_resrv_hint = vsi->vsi_stat[STAT_RESRV_HINT]; stat->vs_resrv_large = vsi->vsi_stat[STAT_RESRV_LARGE]; stat->vs_resrv_small = vsi->vsi_stat[STAT_RESRV_SMALL]; + stat->vs_resrv_bitmap = vsi->vsi_stat[STAT_RESRV_BITMAP]; stat->vs_frags_large = vsi->vsi_stat[STAT_FRAGS_LARGE]; stat->vs_frags_small = vsi->vsi_stat[STAT_FRAGS_SMALL]; + stat->vs_frags_bitmap = vsi->vsi_stat[STAT_FRAGS_BITMAP]; stat->vs_frags_aging = vsi->vsi_stat[STAT_FRAGS_AGING]; } diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index 53fa8492a91..f82fd299bd4 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,6 +7,7 @@ #include #include +#include #include "vea_internal.h" enum vea_free_type { @@ -15,33 +16,97 @@ enum vea_free_type { VEA_TYPE_PERSIST, }; +int +free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry) +{ + int type = VEA_FREE_ENTRY_BITMAP; + struct vea_free_bitmap *found; + daos_handle_t btr_hdl = vsi->vsi_bitmap_btr; + d_iov_t key_in, key_out, val; + uint64_t found_end, vfe_end; + int rc, opc = BTR_PROBE_LE; + struct vea_bitmap_entry *entry = NULL; + + if (blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + type = VEA_FREE_ENTRY_EXTENT; + goto out; + } + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + /* Fetch the in-tree record */ + d_iov_set(&key_in, &blk_off, sizeof(blk_off)); + d_iov_set(&key_out, NULL, sizeof(blk_off)); + d_iov_set(&val, NULL, 0); + + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key_in, &key_out, &val); + if (rc == -DER_NONEXIST) + return VEA_FREE_ENTRY_EXTENT; + + if (rc) { + D_ERROR("failed to search range ["DF_U64", %u] int bitmap tree\n", + blk_off, blk_cnt); + return rc; + } + + entry = (struct vea_bitmap_entry *)val.iov_buf; + found = &entry->vbe_bitmap; + rc = verify_bitmap_entry(found); + if (rc) { + D_ERROR("verify bitmap failed in free_type\n"); + return rc; + } + + found_end = found->vfb_blk_off + found->vfb_blk_cnt - 1; + vfe_end = blk_off + blk_cnt - 1; + D_ASSERT(blk_off >= found->vfb_blk_off); + if (blk_off <= found_end) { + if (vfe_end <= found_end) { + if (bitmap_entry) + *bitmap_entry = entry; + return VEA_FREE_ENTRY_BITMAP; + } + + D_CRIT("["DF_U64", %u] should not cross bitmap tree\n", + found->vfb_blk_off, found->vfb_blk_cnt); + return -DER_INVAL; + } else { + type = VEA_FREE_ENTRY_EXTENT; + } +out: + return type; +} + void -free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) +extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry) { struct vea_free_class *vfc = &vsi->vsi_class; - struct vea_sized_class *sc = entry->ve_sized_class; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; + struct vea_sized_class *sc = entry->vee_sized_class; + uint32_t blk_cnt; if (sc == NULL) { + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > vfc->vfc_large_thresh, "%u <= %u", blk_cnt, vfc->vfc_large_thresh); - D_ASSERT(d_list_empty(&entry->ve_link)); + D_ASSERT(d_list_empty(&entry->vee_link)); - d_binheap_remove(&vfc->vfc_heap, &entry->ve_node); + d_binheap_remove(&vfc->vfc_heap, &entry->vee_node); dec_stats(vsi, STAT_FRAGS_LARGE, 1); } else { d_iov_t key; - uint64_t int_key = blk_cnt; int rc; + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > 0 && blk_cnt <= vfc->vfc_large_thresh, "%u > %u", blk_cnt, vfc->vfc_large_thresh); D_ASSERT(daos_handle_is_valid(vfc->vfc_size_btr)); - d_list_del_init(&entry->ve_link); - entry->ve_sized_class = NULL; + d_list_del_init(&entry->vee_link); + entry->vee_sized_class = NULL; /* Remove the sized class when it's empty */ - if (d_list_empty(&sc->vsc_lru)) { + if (d_list_empty(&sc->vsc_extent_lru)) { + uint64_t int_key = blk_cnt; + d_iov_set(&key, &int_key, sizeof(int_key)); rc = dbtree_delete(vfc->vfc_size_btr, BTR_PROBE_EQ, &key, NULL); if (rc) @@ -52,32 +117,16 @@ free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) } } -int -free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) +static int +find_or_create_sized_class(struct vea_space_info *vsi, uint64_t int_key, + struct vea_sized_class **ret_sc) { struct vea_free_class *vfc = &vsi->vsi_class; daos_handle_t btr_hdl = vfc->vfc_size_btr; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; d_iov_t key, val, val_out; - uint64_t int_key = blk_cnt; - struct vea_sized_class dummy, *sc; + struct vea_sized_class dummy, *sc = NULL; int rc; - D_ASSERT(entry->ve_sized_class == NULL); - D_ASSERT(d_list_empty(&entry->ve_link)); - - /* Add to heap if it's a large free extent */ - if (blk_cnt > vfc->vfc_large_thresh) { - rc = d_binheap_insert(&vfc->vfc_heap, &entry->ve_node); - if (rc != 0) { - D_ERROR("Failed to insert heap: %d\n", rc); - return rc; - } - - inc_stats(vsi, STAT_FRAGS_LARGE, 1); - return 0; - } - /* Add to a sized class */ D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &int_key, sizeof(int_key)); @@ -88,50 +137,119 @@ free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) /* Found an existing sized class */ sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); } else if (rc == -DER_NONEXIST) { /* Create a new sized class */ + memset(&dummy, 0, sizeof(dummy)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc != 0) { - D_ERROR("Insert size class:%u failed. "DF_RC"\n", - blk_cnt, DP_RC(rc)); + D_ERROR("Insert size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_INIT_LIST_HEAD(&sc->vsc_lru); + D_INIT_LIST_HEAD(&sc->vsc_extent_lru); } else { - D_ERROR("Lookup size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); + D_ERROR("Lookup size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } + *ret_sc = sc; + + return rc; +} + +int +extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry) +{ + struct vea_free_class *vfc = &vsi->vsi_class; + uint64_t int_key; + struct vea_sized_class *sc; + int rc; + + D_ASSERT(entry->vee_sized_class == NULL); + D_ASSERT(d_list_empty(&entry->vee_link)); + + int_key = entry->vee_ext.vfe_blk_cnt; + /* Add to heap if it's a free extent */ + if (int_key > vfc->vfc_large_thresh) { + rc = d_binheap_insert(&vfc->vfc_heap, &entry->vee_node); + if (rc != 0) { + D_ERROR("Failed to insert heap: %d\n", rc); + return rc; + } + inc_stats(vsi, STAT_FRAGS_LARGE, 1); + return 0; + } + + rc = find_or_create_sized_class(vsi, int_key, &sc); + if (rc) + return rc; - entry->ve_sized_class = sc; - d_list_add_tail(&entry->ve_link, &sc->vsc_lru); + entry->vee_sized_class = sc; + d_list_add_tail(&entry->vee_link, &sc->vsc_extent_lru); inc_stats(vsi, STAT_FRAGS_SMALL, 1); return 0; } static void -undock_entry(struct vea_space_info *vsi, struct vea_entry *entry, - unsigned int type) +bitmap_free_class_add(struct vea_space_info *vsi, struct vea_bitmap_entry *entry, + int flags) +{ + uint64_t int_key; + int free_blks; + + D_ASSERT(d_list_empty(&entry->vbe_link)); + + int_key = entry->vbe_bitmap.vfb_class; + D_ASSERT(int_key <= VEA_MAX_BITMAP_CLASS && int_key > 0); + + free_blks = bitmap_free_blocks(&entry->vbe_bitmap); + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, free_blks); + if (free_blks >= int_key) { + if (free_blks == entry->vbe_bitmap.vfb_blk_cnt) + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[int_key - 1]); + else + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[int_key - 1]); + } + inc_stats(vsi, STAT_FRAGS_BITMAP, 1); +} + +static void +undock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, + unsigned int type) { if (type == VEA_TYPE_PERSIST) return; D_ASSERT(entry != NULL); if (type == VEA_TYPE_COMPOUND) { - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); } else { - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vee_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); } } +static void +undock_free_entry(struct vea_space_info *vsi, struct vea_free_entry *entry, + unsigned int type) +{ + if (type == VEA_TYPE_PERSIST || type == VEA_TYPE_COMPOUND) + return; + + d_list_del_init(&entry->vfe_link); + dec_stats(vsi, STAT_FRAGS_AGING, 1); +} + #define LARGE_AGING_FRAG_BLKS 8192 static inline bool @@ -141,27 +259,20 @@ is_aging_frag_large(struct vea_free_extent *vfe) } static inline void -dock_aging_entry(struct vea_space_info *vsi, struct vea_entry *entry) +dock_aging_entry(struct vea_space_info *vsi, struct vea_free_entry *entry) { - d_list_add_tail(&entry->ve_link, &vsi->vsi_agg_lru); + d_list_add_tail(&entry->vfe_link, &vsi->vsi_agg_lru); inc_stats(vsi, STAT_FRAGS_AGING, 1); } static int -dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int type) +dock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, unsigned int type) { - int rc = 0; D_ASSERT(entry != NULL); - if (type == VEA_TYPE_COMPOUND) { - rc = free_class_add(vsi, entry); - } else { - D_ASSERT(type == VEA_TYPE_AGGREGATE); - D_ASSERT(d_list_empty(&entry->ve_link)); - dock_aging_entry(vsi, entry); - } + D_ASSERT(type == VEA_TYPE_COMPOUND); - return rc; + return extent_free_class_add(vsi, entry); } /* @@ -175,26 +286,17 @@ dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int typ */ static int merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, - unsigned int type, unsigned int flags) + unsigned int type, unsigned int flags, daos_handle_t btr_hdl) { struct vea_free_extent *ext, *neighbor = NULL; struct vea_free_extent merged = *ext_in; - struct vea_entry *entry, *neighbor_entry = NULL; - daos_handle_t btr_hdl; + struct vea_extent_entry *extent_entry, *neighbor_extent_entry = NULL; + struct vea_free_entry *free_entry, *neighbor_free_entry = NULL; d_iov_t key, key_out, val; uint64_t *off; bool fetch_prev = true, large_prev = false; int rc, del_opc = BTR_PROBE_BYPASS; - if (type == VEA_TYPE_COMPOUND) - btr_hdl = vsi->vsi_free_btr; - else if (type == VEA_TYPE_PERSIST) - btr_hdl = vsi->vsi_md_free_btr; - else if (type == VEA_TYPE_AGGREGATE) - btr_hdl = vsi->vsi_agg_btr; - else - return -DER_INVAL; - D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &ext_in->vfe_blk_off, sizeof(ext_in->vfe_blk_off)); d_iov_set(&key_out, NULL, 0); @@ -215,7 +317,7 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } repeat: - d_iov_set(&key_out, NULL, 0); + d_iov_set(&key_out, NULL, sizeof(ext_in->vfe_blk_off)); d_iov_set(&val, NULL, 0); if (fetch_prev) { @@ -249,11 +351,17 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } if (type == VEA_TYPE_PERSIST) { - entry = NULL; + extent_entry = NULL; + free_entry = NULL; ext = (struct vea_free_extent *)val.iov_buf; + } else if (type == VEA_TYPE_COMPOUND) { + free_entry = NULL; + extent_entry = (struct vea_extent_entry *)val.iov_buf; + ext = &extent_entry->vee_ext; } else { - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + extent_entry = NULL; + free_entry = (struct vea_free_entry *)val.iov_buf; + ext = &free_entry->vfe_ext; } off = (uint64_t *)key_out.iov_buf; @@ -297,7 +405,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, merged.vfe_blk_cnt += ext->vfe_blk_cnt; neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } else { merged.vfe_blk_cnt += ext->vfe_blk_cnt; @@ -306,7 +415,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, * adjacent extent. */ if (neighbor != NULL) { - undock_entry(vsi, entry, type); + if (extent_entry) + undock_extent_entry(vsi, extent_entry, type); + else if (free_entry) + undock_free_entry(vsi, free_entry, type); rc = dbtree_delete(btr_hdl, del_opc, &key_out, NULL); if (rc) { D_ERROR("Failed to delete: %d\n", rc); @@ -314,7 +426,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } } else { neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } } } @@ -335,7 +448,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } } else { - undock_entry(vsi, neighbor_entry, type); + if (neighbor_extent_entry) + undock_extent_entry(vsi, neighbor_extent_entry, type); + else if (neighbor_free_entry) + undock_free_entry(vsi, neighbor_free_entry, type); } /* Adjust in-tree offset & length */ @@ -344,24 +460,123 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, if (type == VEA_TYPE_AGGREGATE || type == VEA_TYPE_COMPOUND) { neighbor->vfe_age = merged.vfe_age; - rc = dock_entry(vsi, neighbor_entry, type); - if (rc < 0) - return rc; + if (neighbor_extent_entry) { + rc = dock_extent_entry(vsi, neighbor_extent_entry, type); + if (rc < 0) + return rc; + } else if (neighbor_free_entry) { + D_ASSERT(type == VEA_TYPE_AGGREGATE); + D_ASSERT(d_list_empty(&neighbor_free_entry->vfe_link)); + dock_aging_entry(vsi, neighbor_free_entry); + } } return 1; } -/* Free extent to in-memory compound index */ +/* insert bitmap entry to in-memory index */ int -compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags) +bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags) { - struct vea_entry *entry, dummy; + struct vea_bitmap_entry *entry, *dummy; + d_iov_t key, val, val_out; + int rc, ret; + struct umem_attr uma; + int dummy_size = sizeof(*dummy) + (vfb->vfb_bitmap_sz << 3); + + D_ALLOC(dummy, dummy_size); + if (!dummy) + return -DER_NOMEM; + + memset(dummy, 0, sizeof(*dummy)); + dummy->vbe_bitmap = *vfb; + dummy->vbe_agg_btr = DAOS_HDL_INVAL; + if (state == VEA_BITMAP_STATE_NEW) + setbits64(dummy->vbe_bitmap.vfb_bitmaps, 0, 1); + else + memcpy(dummy->vbe_bitmap.vfb_bitmaps, vfb->vfb_bitmaps, vfb->vfb_bitmap_sz << 3); + dummy->vbe_published_state = state; + + /* Add to in-memory bitmap tree */ + D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); + d_iov_set(&key, &dummy->vbe_bitmap.vfb_blk_off, sizeof(dummy->vbe_bitmap.vfb_blk_off)); + d_iov_set(&val, dummy, dummy_size); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(dummy); + if (rc != 0) { + D_ERROR("Insert bitmap failed. "DF_RC" %llu\n", DP_RC(rc), + (unsigned long long)vfb->vfb_blk_off); + return rc; + } + + memset(&uma, 0, sizeof(uma)); + uma.uma_id = UMEM_CLASS_VMEM; + + D_ASSERT(val_out.iov_buf != NULL); + entry = (struct vea_bitmap_entry *)val_out.iov_buf; + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, + &entry->vbe_agg_btr); + if (rc != 0) + goto error; + + D_INIT_LIST_HEAD(&entry->vbe_link); + D_ASSERT(entry->vbe_bitmap.vfb_class == vfb->vfb_class); + + bitmap_free_class_add(vsi, entry, flags); + if (ret_entry) + *ret_entry = entry; + return rc; + +error: + ret = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (ret) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), vfb->vfb_blk_off); + return rc; +} + +static int +bitmap_entry_remove(struct vea_space_info *vsi, struct vea_bitmap_entry *bitmap, + unsigned int flags) +{ + d_iov_t key; + int rc; + + rc = dbtree_destroy(bitmap->vbe_agg_btr, NULL); + if (rc) { + D_ERROR("Failed to destroy bitmap agg tree. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + return rc; + } + bitmap->vbe_agg_btr = DAOS_HDL_INVAL; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, bitmap->vbe_bitmap.vfb_blk_cnt); + d_list_del_init(&bitmap->vbe_link); + dec_stats(vsi, STAT_FRAGS_BITMAP, 1); + + d_iov_set(&key, &bitmap->vbe_bitmap.vfb_blk_off, sizeof(bitmap->vbe_bitmap.vfb_blk_off)); + rc = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + + return rc; +} + +int +compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags) +{ + struct vea_extent_entry *entry, dummy; d_iov_t key, val, val_out; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags, vsi->vsi_free_btr); if (rc < 0) { return rc; } else if (rc > 0) { @@ -370,12 +585,12 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + D_INIT_LIST_HEAD(&dummy.vee_link); + dummy.vee_ext = *vfe; /* Add to in-memory free extent tree */ D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vee_ext.vfe_blk_off, sizeof(dummy.vee_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); @@ -387,27 +602,81 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_extent_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vee_link); - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); accounting: if (!rc && !(flags & VEA_FL_NO_ACCOUNTING)) - inc_stats(vsi, STAT_FREE_BLKS, vfe->vfe_blk_cnt); + inc_stats(vsi, STAT_FREE_EXTENT_BLKS, vfe->vfe_blk_cnt); return rc; } -/* Free extent to persistent free tree */ +/* Free entry to in-memory compound index */ int -persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, + unsigned int flags) +{ + int rc; + struct vea_bitmap_entry *found = vfe->vfe_bitmap; + + if (found == NULL) + return compound_free_extent(vsi, &vfe->vfe_ext, flags); + + rc = bitmap_set_range(NULL, &found->vbe_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); + if (rc) + return rc; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, vfe->vfe_ext.vfe_blk_cnt); + + /* if bitmap is not published and clear, then remove it */ + if (found->vbe_published_state == VEA_BITMAP_STATE_NEW) { + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + struct vea_free_extent ext; + + ext.vfe_blk_cnt = found->vbe_bitmap.vfb_blk_cnt; + ext.vfe_blk_off = found->vbe_bitmap.vfb_blk_off; + rc = bitmap_entry_remove(vsi, found, flags); + if (rc) + return rc; + return compound_free_extent(vsi, &ext, flags); + } + } + + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + if (d_list_empty(&found->vbe_link)) + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + else + d_list_move_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + return 0; + } + + if (d_list_empty(&found->vbe_link)) { + D_ASSERT(found->vbe_bitmap.vfb_class <= VEA_MAX_BITMAP_CLASS); + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[found->vbe_bitmap.vfb_class - 1]); + } + + return 0; +} + +/* Free extent to persistent free tree */ +static int +persistent_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent dummy; d_iov_t key, val; daos_handle_t btr_hdl = vsi->vsi_md_free_btr; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0, vsi->vsi_md_free_btr); if (rc < 0) return rc; else if (rc > 0) @@ -428,41 +697,70 @@ persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) return rc; } +int +persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + int type; + + D_ASSERT(umem_tx_inprogress(vsi->vsi_umem) || + vsi->vsi_umem->umm_id == UMEM_CLASS_VMEM); + D_ASSERT(vfe->vfe_ext.vfe_blk_off != VEA_HINT_OFF_INVAL); + type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, + &vfe->vfe_bitmap); + if (type < 0) + return type; + + if (vfe->vfe_bitmap == NULL) + return persistent_free_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(type == VEA_FREE_ENTRY_BITMAP); + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt > 0 && + vfe->vfe_ext.vfe_blk_cnt < vsi->vsi_class.vfc_large_thresh); + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); +} + /* Free extent to the aggregate free tree */ int -aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) { - struct vea_entry *entry, dummy; + struct vea_free_entry *entry, dummy; d_iov_t key, val, val_out; daos_handle_t btr_hdl = vsi->vsi_agg_btr; int rc; - vfe->vfe_age = get_current_age(); - rc = merge_free_ext(vsi, vfe, VEA_TYPE_AGGREGATE, 0); + /* free entry bitmap */ + if (vfe->vfe_bitmap == NULL) + btr_hdl = vsi->vsi_agg_btr; + else + btr_hdl = vfe->vfe_bitmap->vbe_agg_btr; + + vfe->vfe_ext.vfe_age = get_current_age(); + rc = merge_free_ext(vsi, &vfe->vfe_ext, VEA_TYPE_AGGREGATE, 0, btr_hdl); if (rc < 0) return rc; else if (rc > 0) - return 0; /* extent merged in tree */ + return 0; /* entry merged in tree */ - memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + dummy = *vfe; + D_INIT_LIST_HEAD(&dummy.vfe_link); /* Add to in-memory aggregate free extent tree */ D_ASSERT(daos_handle_is_valid(btr_hdl)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vfe_ext.vfe_blk_off, sizeof(dummy.vfe_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc) { - D_ERROR("Insert aging extent failed. "DF_RC"\n", DP_RC(rc)); + D_ERROR("Insert aging entry failed. "DF_RC"\n", DP_RC(rc)); return rc; } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_free_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vfe_link); dock_aging_entry(vsi, entry); return 0; @@ -474,35 +772,48 @@ aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) static int flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_list_t *unmap_sgl) { - struct vea_entry *entry, *tmp; + struct vea_free_entry *entry, *tmp; struct vea_free_extent vfe; + struct vea_free_entry free_entry; d_iov_t *unmap_iov; int i, rc = 0; + d_iov_t key; + struct vea_bitmap_entry *bitmap; + struct vea_bitmap_entry **flush_bitmaps; + daos_handle_t btr_hdl; D_ASSERT(umem_tx_none(vsi->vsi_umem)); D_ASSERT(unmap_sgl->sg_nr_out == 0); - d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, ve_link) { - d_iov_t key; + D_ALLOC_ARRAY(flush_bitmaps, MAX_FLUSH_FRAGS); + if (!flush_bitmaps) + return -DER_NOMEM; - vfe = entry->ve_ext; + d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, vfe_link) { + vfe = entry->vfe_ext; if (!force && cur_time < (vfe.vfe_age + EXPIRE_INTVL)) break; /* Remove entry from aggregate LRU list */ - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vfe_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); + bitmap = entry->vfe_bitmap; + if (bitmap) + btr_hdl = bitmap->vbe_agg_btr; + else + btr_hdl = vsi->vsi_agg_btr; /* Remove entry from aggregate tree, entry will be freed on deletion */ d_iov_set(&key, &vfe.vfe_blk_off, sizeof(vfe.vfe_blk_off)); - D_ASSERT(daos_handle_is_valid(vsi->vsi_agg_btr)); - rc = dbtree_delete(vsi->vsi_agg_btr, BTR_PROBE_EQ, &key, NULL); + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_delete(btr_hdl, BTR_PROBE_EQ, &key, NULL); if (rc) { D_ERROR("Remove ["DF_U64", %u] from aggregated tree error: "DF_RC"\n", vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); break; } + flush_bitmaps[unmap_sgl->sg_nr_out] = bitmap; /* Unmap callback may yield, so we can't call it directly in this tight loop */ unmap_sgl->sg_nr_out++; unmap_iov = &unmap_sgl->sg_iovs[unmap_sgl->sg_nr_out - 1]; @@ -533,15 +844,18 @@ flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_l for (i = 0; i < unmap_sgl->sg_nr_out; i++) { unmap_iov = &unmap_sgl->sg_iovs[i]; - vfe.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; - vfe.vfe_blk_cnt = unmap_iov->iov_len; - vfe.vfe_age = cur_time; + free_entry.vfe_ext.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; + free_entry.vfe_ext.vfe_blk_cnt = unmap_iov->iov_len; + free_entry.vfe_ext.vfe_age = cur_time; + free_entry.vfe_bitmap = flush_bitmaps[i]; - rc = compound_free(vsi, &vfe, 0); + rc = compound_free(vsi, &free_entry, 0); if (rc) D_ERROR("Compound free ["DF_U64", %u] error: "DF_RC"\n", - vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); + free_entry.vfe_ext.vfe_blk_off, free_entry.vfe_ext.vfe_blk_cnt, + DP_RC(rc)); } + D_FREE(flush_bitmaps); return rc; } @@ -562,6 +876,127 @@ need_aging_flush(struct vea_space_info *vsi, uint32_t cur_time, bool force) return true; } +void +free_commit_cb(void *data, bool noop) +{ + struct free_commit_cb_arg *fca = data; + int rc; + + /* Transaction aborted, only need to free callback arg */ + if (noop) + goto free; + + /* + * Aggregated free will be executed on outermost transaction + * commit. + * + * If it fails, the freed space on persistent free tree won't + * be added in in-memory free tree, hence the space won't be + * visible for allocation until the tree sync up on next server + * restart. Such temporary space leak is tolerable, what we must + * avoid is the contrary case: in-memory tree update succeeds + * but persistent tree update fails, which risks data corruption. + */ + rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); + + D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", + fca->fca_vsi, rc); +free: + D_FREE(fca); +} + +static int +reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t *nr_reclaimed) +{ + int i; + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_free_bitmap *vfb; + d_iov_t key; + int rc = 0; + struct free_commit_cb_arg *fca; + struct umem_instance *umem = vsi->vsi_umem; + int nr = 0; + uint64_t blk_off; + uint32_t blk_cnt; + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_empty[i], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == i + 1); + D_ASSERT(is_bitmap_empty(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz)); + d_list_del_init(&bitmap_entry->vbe_link); + D_ALLOC_PTR(fca); + if (!fca) + return -DER_NOMEM; + + blk_off = vfb->vfb_blk_off; + blk_cnt = vfb->vfb_blk_cnt; + fca->fca_vsi = vsi; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_age = 0; /* not used */ + + rc = umem_tx_begin(umem, vsi->vsi_txd); + if (rc != 0) { + D_FREE(fca); + return rc; + } + + /* + * Even in-memory bitmap failed to remove from tree, it is ok + * because this bitmap chunk has been removed from allocation LRU list. + */ + d_iov_set(&key, &fca->fca_vfe.vfe_ext.vfe_blk_off, + sizeof(fca->fca_vfe.vfe_ext.vfe_blk_off)); + dbtree_destroy(bitmap_entry->vbe_agg_btr, NULL); + rc = dbtree_delete(fca->fca_vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from bitmap tree " + "error: "DF_RC"\n", fca->fca_vfe.vfe_ext.vfe_blk_off, + fca->fca_vfe.vfe_ext.vfe_blk_cnt, DP_RC(rc)); + goto abort; + } + dec_stats(fca->fca_vsi, STAT_FRAGS_BITMAP, 1); + dec_stats(fca->fca_vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + + d_iov_set(&key, &blk_off, sizeof(blk_off)); + rc = dbtree_delete(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent bitmap " + "tree error: "DF_RC"\n", blk_off, blk_cnt, DP_RC(rc)); + goto abort; + } + /* call persistent_free_extent instead */ + rc = persistent_free(vsi, &fca->fca_vfe); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent " + "extent tree error: "DF_RC"\n", blk_off, + blk_cnt, DP_RC(rc)); + goto abort; + } + rc = umem_tx_add_callback(umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + free_commit_cb, fca); + if (rc == 0) + fca = NULL; +abort: + D_FREE(fca); + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); + if (rc) + return rc; + nr++; + if (nr >= nr_reclaim) + goto out; + } + } + +out: + if (nr_reclaimed) + *nr_reclaimed = nr; + return rc; +} + int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed) @@ -597,6 +1032,10 @@ trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, } d_sgl_fini(&unmap_sgl, false); + + rc = reclaim_unused_bitmap(vsi, MAX_FLUSH_FRAGS, NULL); + if (rc) + goto out; out: if (nr_flushed != NULL) *nr_flushed = tot_flushed; diff --git a/src/vea/vea_hint.c b/src/vea/vea_hint.c index 65c923476b5..83f2a13e1e2 100644 --- a/src/vea/vea_hint.c +++ b/src/vea/vea_hint.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -54,7 +54,7 @@ hint_cancel(struct vea_hint_context *hint, uint64_t off, uint64_t seq_min, */ hint->vhc_off = off; return 0; - } else if (hint->vhc_seq > seq_max) { + } else if (hint->vhc_seq >= seq_max) { /* * Subsequent reserve detected, abort hint cancel. It could * result in un-allocated holes on out of order hint cancels, diff --git a/src/vea/vea_init.c b/src/vea/vea_init.c index d237c46af70..adf8258c2f3 100644 --- a/src/vea/vea_init.c +++ b/src/vea/vea_init.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -25,13 +25,13 @@ destroy_free_class(struct vea_free_class *vfc) static bool heap_node_cmp(struct d_binheap_node *a, struct d_binheap_node *b) { - struct vea_entry *nodea, *nodeb; + struct vea_extent_entry *nodea, *nodeb; - nodea = container_of(a, struct vea_entry, ve_node); - nodeb = container_of(b, struct vea_entry, ve_node); + nodea = container_of(a, struct vea_extent_entry, vee_node); + nodeb = container_of(b, struct vea_extent_entry, vee_node); /* Max heap, the largest free extent is heap root */ - return nodea->ve_ext.vfe_blk_cnt > nodeb->ve_ext.vfe_blk_cnt; + return nodea->vee_ext.vfe_blk_cnt > nodeb->vee_ext.vfe_blk_cnt; } static struct d_binheap_ops heap_ops = { @@ -45,6 +45,7 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) { struct umem_attr uma; int rc; + int i; vfc->vfc_size_btr = DAOS_HDL_INVAL; rc = d_binheap_create_inplace(DBH_FT_NOLOCK, 0, NULL, &heap_ops, @@ -60,9 +61,17 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) /* Create in-memory sized free extent tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, &vfc->vfc_size_btr); - if (rc != 0) + if (rc != 0) { destroy_free_class(vfc); + goto out; + } + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_lru[i]); + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_empty[i]); + } +out: return rc; } @@ -74,9 +83,14 @@ unload_space_info(struct vea_space_info *vsi) vsi->vsi_md_free_btr = DAOS_HDL_INVAL; } - if (daos_handle_is_valid(vsi->vsi_md_vec_btr)) { - dbtree_close(vsi->vsi_md_vec_btr); - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + if (daos_handle_is_valid(vsi->vsi_md_bitmap_btr)) { + dbtree_close(vsi->vsi_md_bitmap_btr); + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; + } + + if (vsi->vsi_bitmap_hint_context) { + vea_hint_unload(vsi->vsi_bitmap_hint_context); + vsi->vsi_bitmap_hint_context = NULL; } } @@ -96,7 +110,7 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) if (rc != 0) return rc; - rc = compound_free(vsi, vfe, VEA_FL_NO_MERGE); + rc = compound_free_extent(vsi, vfe, VEA_FL_NO_MERGE); if (rc != 0) return rc; @@ -104,22 +118,28 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) } static int -load_vec_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +load_bitmap_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_ext_vector *vec; + struct vea_free_bitmap *vfb; struct vea_space_info *vsi; + struct vea_bitmap_entry *bitmap_entry; uint64_t *off; int rc; vsi = (struct vea_space_info *)arg; off = (uint64_t *)key->iov_buf; - vec = (struct vea_ext_vector *)val->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; - rc = verify_vec_entry(off, vec); + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); if (rc != 0) return rc; - return compound_vec_alloc(vsi, vec); + rc = bitmap_entry_insert(vsi, vfb, VEA_BITMAP_STATE_PUBLISHED, &bitmap_entry, 0); + bitmap_entry->vbe_md_bitmap = vfb; + + return rc; } int @@ -127,6 +147,9 @@ load_space_info(struct vea_space_info *vsi) { struct umem_attr uma = {0}; int rc; + struct vea_hint_df *df; + uint64_t offset; + d_iov_t key, val; D_ASSERT(vsi->vsi_umem != NULL); D_ASSERT(vsi->vsi_md != NULL); @@ -141,10 +164,9 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Open SCM extent vector tree */ - D_ASSERT(daos_handle_is_inval(vsi->vsi_md_vec_btr)); - rc = dbtree_open_inplace(&vsi->vsi_md->vsd_vec_tree, &uma, - &vsi->vsi_md_vec_btr); + /* Open SCM bitmap tree */ + rc = dbtree_open_inplace(&vsi->vsi_md->vsd_bitmap_tree, &uma, + &vsi->vsi_md_bitmap_btr); if (rc != 0) goto error; @@ -154,12 +176,28 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Build up in-memory extent vector tree */ - rc = dbtree_iterate(vsi->vsi_md_vec_btr, DAOS_INTENT_DEFAULT, false, - load_vec_entry, (void *)vsi); + /* Build up in-memory bitmap tree */ + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, false, + load_bitmap_entry, (void *)vsi); if (rc != 0) goto error; + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_DEFAULT, + &key, NULL, &val); + if (rc) + goto error; + + df = (struct vea_hint_df *)val.iov_buf; + rc = vea_hint_load(df, &vsi->vsi_bitmap_hint_context); + if (rc) + goto error; + return 0; error: unload_space_info(vsi); diff --git a/src/vea/vea_internal.h b/src/vea/vea_internal.h index 3a5ac97fde0..e0880bde951 100644 --- a/src/vea/vea_internal.h +++ b/src/vea/vea_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,12 +11,39 @@ #include #include #include +#include #include #define VEA_MAGIC (0xea201804) #define VEA_BLK_SZ (4 * 1024) /* 4K */ #define VEA_TREE_ODR 20 +/* Common free extent structure for both SCM & in-memory index */ +struct vea_free_extent { + uint64_t vfe_blk_off; /* Block offset of the extent */ + uint32_t vfe_blk_cnt; /* Total blocks of the extent */ + uint32_t vfe_age; /* Monotonic timestamp */ +}; + +/* Min bitmap allocation class */ +#define VEA_MIN_BITMAP_CLASS 1 +/* Max bitmap allocation class */ +#define VEA_MAX_BITMAP_CLASS 64 + +/* Bitmap chunk size */ +#define VEA_BITMAP_MIN_CHUNK_BLKS 256 /* 1MiB */ +#define VEA_BITMAP_MAX_CHUNK_BLKS (VEA_MAX_BITMAP_CLASS * 256) /* 64 MiB */ + + +/* Common free bitmap structure for both SCM & in-memory index */ +struct vea_free_bitmap { + uint64_t vfb_blk_off; /* Block offset of the bitmap */ + uint32_t vfb_blk_cnt; /* Block count of the bitmap */ + uint16_t vfb_class; /* Allocation class of bitmap */ + uint16_t vfb_bitmap_sz; /* Bitmap size*/ + uint64_t vfb_bitmaps[0]; /* Bitmaps of this chunk */ +}; + /* Per I/O stream hint context */ struct vea_hint_context { struct vea_hint_df *vhc_pd; @@ -27,18 +54,55 @@ struct vea_hint_context { }; /* Free extent informat stored in the in-memory compound free extent index */ -struct vea_entry { +struct vea_extent_entry { /* * Always keep it as first item, since vfe_blk_off is the direct key * of DBTREE_CLASS_IV */ - struct vea_free_extent ve_ext; - /* Link to one of vsc_lru or vsi_agg_lru */ - d_list_t ve_link; + struct vea_free_extent vee_ext; + /* Link to one of vsc_extent_lru */ + d_list_t vee_link; /* Back reference to sized tree entry */ - struct vea_sized_class *ve_sized_class; + struct vea_sized_class *vee_sized_class; /* Link to vfc_heap */ - struct d_binheap_node ve_node; + struct d_binheap_node vee_node; +}; + +enum { + VEA_BITMAP_STATE_PUBLISHED, + VEA_BITMAP_STATE_PUBLISHING, + VEA_BITMAP_STATE_NEW, +}; + +/* Bitmap entry */ +struct vea_bitmap_entry { + /* Link to one of vfc_bitmap_lru[] */ + d_list_t vbe_link; + /* Bitmap published state */ + int vbe_published_state; + /* + * Free entries sorted by offset, for coalescing the just recent + * free blocks inside this bitmap chunk. + */ + daos_handle_t vbe_agg_btr; + /* Point to persistent free bitmap entry */ + struct vea_free_bitmap *vbe_md_bitmap; + /* free bitmap, always keep it as last item*/ + struct vea_free_bitmap vbe_bitmap; +}; + +enum { + VEA_FREE_ENTRY_EXTENT, + VEA_FREE_ENTRY_BITMAP, +}; + +/* freed entry stored in aggregation tree */ +struct vea_free_entry { + struct vea_free_extent vfe_ext; + /* Back pointer bitmap entry */ + struct vea_bitmap_entry *vfe_bitmap; + /* Link to one vsi_agg_lru */ + d_list_t vfe_link; }; #define VEA_LARGE_EXT_MB 64 /* Large extent threshold in MB */ @@ -47,9 +111,10 @@ struct vea_entry { /* Value entry of sized free extent tree (vfc_size_btr) */ struct vea_sized_class { /* Small extents LRU list */ - d_list_t vsc_lru; + d_list_t vsc_extent_lru; }; +#define VEA_BITMAP_CHUNK_HINT_KEY (~(0ULL)) /* * Large free extents (>VEA_LARGE_EXT_MB) are tracked in max a heap, small * free extents (<= VEA_LARGE_EXT_MB) are tracked in a size tree. @@ -61,6 +126,10 @@ struct vea_free_class { daos_handle_t vfc_size_btr; /* Size threshold for large extent */ uint32_t vfc_large_thresh; + /* Bitmap LRU list for different bitmap allocation class*/ + d_list_t vfc_bitmap_lru[VEA_MAX_BITMAP_CLASS]; + /* Empty bitmap list for different allocation class */ + d_list_t vfc_bitmap_empty[VEA_MAX_BITMAP_CLASS]; }; enum { @@ -68,21 +137,27 @@ enum { STAT_RESRV_HINT = 0, /* Number of large reserve */ STAT_RESRV_LARGE = 1, - /* Number of small reserve */ + /* Number of small extents reserve */ STAT_RESRV_SMALL = 2, + /* Number of bitmap reserve */ + STAT_RESRV_BITMAP = 3, /* Max reserve type */ - STAT_RESRV_TYPE_MAX = 3, + STAT_RESRV_TYPE_MAX = 4, /* Number of large(> VEA_LARGE_EXT_MB) free frags available for allocation */ - STAT_FRAGS_LARGE = 3, - /* Number of small free frags available for allocation */ - STAT_FRAGS_SMALL = 4, + STAT_FRAGS_LARGE = 4, + /* Number of small free extent frags available for allocation */ + STAT_FRAGS_SMALL = 5, /* Number of frags in aging buffer (to be unmapped) */ - STAT_FRAGS_AGING = 5, + STAT_FRAGS_AGING = 6, + /* Number of bitmaps */ + STAT_FRAGS_BITMAP = 7, /* Max frag type */ - STAT_FRAGS_TYPE_MAX = 3, - /* Number of blocks available for allocation */ - STAT_FREE_BLKS = 6, - STAT_MAX = 7, + STAT_FRAGS_TYPE_MAX = 4, + /* Number of extent blocks available for allocation */ + STAT_FREE_EXTENT_BLKS = 8, + /* Number of bitmap blocks available for allocation */ + STAT_FREE_BITMAP_BLKS = 9, + STAT_MAX = 10, }; struct vea_metrics { @@ -91,6 +166,8 @@ struct vea_metrics { struct d_tm_node_t *vm_free_blks; }; +#define MAX_FLUSH_FRAGS 256 + /* In-memory compound index */ struct vea_space_info { /* Instance for the pmemobj pool on SCM */ @@ -106,18 +183,20 @@ struct vea_space_info { struct vea_space_df *vsi_md; /* Open handles for the persistent free extent tree */ daos_handle_t vsi_md_free_btr; - /* Open handles for the persistent extent vector tree */ - daos_handle_t vsi_md_vec_btr; + /* Open handles for the persistent bitmap tree */ + daos_handle_t vsi_md_bitmap_btr; /* Free extent tree sorted by offset, for all free extents. */ daos_handle_t vsi_free_btr; - /* Extent vector tree, for non-contiguous allocation */ - daos_handle_t vsi_vec_btr; + /* Bitmap tree, for small allocation */ + daos_handle_t vsi_bitmap_btr; + /* Hint context for bitmap chunk allocation */ + struct vea_hint_context *vsi_bitmap_hint_context; /* Index for searching free extent by size & age */ struct vea_free_class vsi_class; - /* LRU to aggergate just recent freed extents */ + /* LRU to aggergate just recent freed extents or bitmap blocks */ d_list_t vsi_agg_lru; /* - * Free extent tree sorted by offset, for coalescing the just recent + * Free entries sorted by offset, for coalescing the just recent * free extents. */ daos_handle_t vsi_agg_btr; @@ -132,6 +211,11 @@ struct vea_space_info { bool vsi_flush_scheduled; }; +struct free_commit_cb_arg { + struct vea_space_info *fca_vsi; + struct vea_free_entry fca_vfe; +}; + static inline uint32_t get_current_age(void) { @@ -146,6 +230,46 @@ enum vea_free_flags { VEA_FL_NO_ACCOUNTING = (1 << 1), }; +static inline bool +is_bitmap_feature_enabled(struct vea_space_info *vsi) +{ + return vsi->vsi_md->vsd_compat & VEA_COMPAT_FEATURE_BITMAP; +} + +static inline int +alloc_free_bitmap_size(uint16_t bitmap_sz) +{ + return sizeof(struct vea_free_bitmap) + (bitmap_sz << 3); +} + +static inline uint32_t +bitmap_free_blocks(struct vea_free_bitmap *vfb) +{ + uint32_t free_blocks; + int diff; + + int free_bits = daos_count_free_bits(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz); + + free_blocks = free_bits * vfb->vfb_class; + diff = vfb->vfb_bitmap_sz * 64 * vfb->vfb_class - vfb->vfb_blk_cnt; + + D_ASSERT(diff == 0); + + return free_blocks; +} + +static inline bool +is_bitmap_empty(uint64_t *bitmap, int bitmap_sz) +{ + int i; + + for (i = 0; i < bitmap_sz; i++) + if (bitmap[i]) + return false; + + return true; +} + /* vea_init.c */ void destroy_free_class(struct vea_free_class *vfc); int create_free_class(struct vea_free_class *vfc, struct vea_space_df *md); @@ -154,36 +278,45 @@ int load_space_info(struct vea_space_info *vsi); /* vea_util.c */ int verify_free_entry(uint64_t *off, struct vea_free_extent *vfe); -int verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec); +int verify_bitmap_entry(struct vea_free_bitmap *vfb); int ext_adjacent(struct vea_free_extent *cur, struct vea_free_extent *next); int verify_resrvd_ext(struct vea_resrvd_ext *resrvd); int vea_dump(struct vea_space_info *vsi, bool transient); int vea_verify_alloc(struct vea_space_info *vsi, bool transient, - uint64_t off, uint32_t cnt); + uint64_t off, uint32_t cnt, bool is_bitmap); void dec_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); void inc_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); /* vea_alloc.c */ -int compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec); int reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); int reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); -int reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd); -int persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe); +int persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr); +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear); /* vea_free.c */ -#define MAX_FLUSH_FRAGS 256 -void free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry); -int free_class_add(struct vea_space_info *vsi, struct vea_entry *entry); -int compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags); -int persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); -int aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); +void extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags); +int compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, unsigned int flags); +int persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed); int schedule_aging_flush(struct vea_space_info *vsi); +int bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags); +int free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry); +void +free_commit_cb(void *data, bool noop); /* vea_hint.c */ void hint_get(struct vea_hint_context *hint, uint64_t *off); diff --git a/src/vea/vea_util.c b/src/vea/vea_util.c index c7452cc2ebf..21c11e3daa4 100644 --- a/src/vea/vea_util.c +++ b/src/vea/vea_util.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -35,35 +35,38 @@ verify_free_entry(uint64_t *off, struct vea_free_extent *vfe) } int -verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec) +verify_bitmap_entry(struct vea_free_bitmap *vfb) { - int i; - uint64_t prev_off = 0; + D_ASSERT(vfb != NULL); + if (vfb->vfb_blk_off == VEA_HINT_OFF_INVAL) { + D_CRIT("corrupted bitmap entry, off == VEA_HINT_OFF_INVAL(%d)\n", + VEA_HINT_OFF_INVAL); + return -DER_INVAL; + } - D_ASSERT(vec != NULL); - if (vec->vev_size == 0 || vec->vev_size > VEA_EXT_VECTOR_MAX) { - D_CRIT("corrupted vector entry, sz: %u\n", vec->vev_size); + if (vfb->vfb_class < VEA_MIN_BITMAP_CLASS || vfb->vfb_class > VEA_MAX_BITMAP_CLASS) { + D_CRIT("corrupted bitmap entry, class: %u is out of [%u, %u]\n", + vfb->vfb_class, VEA_MIN_BITMAP_CLASS, VEA_MAX_BITMAP_CLASS); return -DER_INVAL; } - if (off != NULL && *off != vec->vev_blk_off[0]) { - D_CRIT("corrupted vector entry, off: "DF_U64" != "DF_U64"\n", - *off, vec->vev_blk_off[0]); + if (vfb->vfb_blk_cnt < VEA_BITMAP_MIN_CHUNK_BLKS || + vfb->vfb_blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + D_CRIT("corrupted bitmap entry, chunk size: %u is out of [%u, %u]\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS, VEA_BITMAP_MAX_CHUNK_BLKS); return -DER_INVAL; } - for (i = 0; i < vec->vev_size; i++) { - if (vec->vev_blk_off[i] <= prev_off) { - D_CRIT("corrupted vector entry[%d]," - " "DF_U64" <= "DF_U64"\n", - i, vec->vev_blk_off[i], prev_off); - return -DER_INVAL; - } - if (vec->vev_blk_cnt[i] == 0) { - D_CRIT("corrupted vector entry[%d], %u\n", - i, vec->vev_blk_cnt[i]); - return -DER_INVAL; - } + if (vfb->vfb_blk_cnt % VEA_BITMAP_MIN_CHUNK_BLKS) { + D_CRIT("coruppted bitmap entry, chunk size: %u should be times of %u\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS); + return -DER_INVAL; + } + + if (vfb->vfb_bitmap_sz * 64 * vfb->vfb_class < vfb->vfb_blk_cnt) { + D_CRIT("corrupted bitmap entry, bitmap size: %u could not cover chunk size: %u\n", + vfb->vfb_bitmap_sz, vfb->vfb_blk_cnt); + return -DER_INVAL; } return 0; @@ -102,28 +105,25 @@ verify_resrvd_ext(struct vea_resrvd_ext *resrvd) } else if (resrvd->vre_blk_cnt == 0) { D_CRIT("invalid blk_cnt %u\n", resrvd->vre_blk_cnt); return -DER_INVAL; - } else if (resrvd->vre_vector != NULL) { - /* Vector allocation isn't supported yet. */ - D_CRIT("vector isn't NULL?\n"); - return -DER_NOSYS; } return 0; } -int -vea_dump(struct vea_space_info *vsi, bool transient) +static int +vea_dump_bitmap(struct vea_space_info *vsi, bool transient) { - struct vea_free_extent *ext; - daos_handle_t ih, btr_hdl; - d_iov_t key, val; - uint64_t *off; - int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + struct vea_free_bitmap *bitmap; + struct vea_bitmap_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; if (transient) - btr_hdl = vsi->vsi_free_btr; + btr_hdl = vsi->vsi_bitmap_btr; else - btr_hdl = vsi->vsi_md_free_btr; + btr_hdl = vsi->vsi_md_bitmap_btr; D_ASSERT(daos_handle_is_valid(btr_hdl)); rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); @@ -132,6 +132,7 @@ vea_dump(struct vea_space_info *vsi, bool transient) rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + D_PRINT("Bitmaps:"); while (rc == 0) { d_iov_set(&key, NULL, 0); d_iov_set(&val, NULL, 0); @@ -140,15 +141,76 @@ vea_dump(struct vea_space_info *vsi, bool transient) break; off = (uint64_t *)key.iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + goto next; + if (transient) { - struct vea_entry *entry; + entry = (struct vea_bitmap_entry *)val.iov_buf; + bitmap = &entry->vbe_bitmap; + } else { + bitmap = (struct vea_free_bitmap *)val.iov_buf; + + } + rc = verify_bitmap_entry(bitmap); + if (rc != 0) { + D_ERROR("dump failed???\n"); + break; + } + + D_PRINT("["DF_U64", %u]", bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + print_cnt++; + if (print_cnt % 10 == 0) + D_PRINT("\n"); + else + D_PRINT(" "); +next: + rc = dbtree_iter_next(ih); + } + + D_PRINT("\n"); + dbtree_iter_finish(ih); + + return rc = -DER_NONEXIST ? 0 : rc; + - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; +} + +static int +vea_dump_extent(struct vea_space_info *vsi, bool transient) +{ + struct vea_free_extent *ext; + struct vea_extent_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + + if (transient) + btr_hdl = vsi->vsi_free_btr; + else + btr_hdl = vsi->vsi_md_free_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); + if (rc) + return rc; + + rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + + D_PRINT("Free extents:"); + while (rc == 0) { + d_iov_set(&key, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_iter_fetch(ih, &key, &val, NULL); + if (rc != 0) + break; + + off = (uint64_t *)key.iov_buf; + if (transient) { + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } - rc = verify_free_entry(off, ext); if (rc != 0) break; @@ -169,6 +231,18 @@ vea_dump(struct vea_space_info *vsi, bool transient) return rc = -DER_NONEXIST ? 0 : rc; } +int +vea_dump(struct vea_space_info *vsi, bool transient) +{ + int rc; + + rc = vea_dump_bitmap(vsi, transient); + if (rc) + return rc; + + return vea_dump_extent(vsi, transient); +} + /** * Check if two extents are overlapping. * returns 0 - Non-overlapping @@ -189,27 +263,70 @@ ext_overlapping(struct vea_free_extent *ext1, struct vea_free_extent *ext2) return -DER_INVAL; } -/** - * Verify if an extent is allocated in persistent or transient metadata. - * - * \param vsi [IN] In-memory compound index - * \param transient [IN] Persistent or transient - * \param off [IN] Block offset of extent - * \param cnt [IN] Block count of extent - * - * \return 0 - Allocated - * 1 - Not allocated - * Negative value on error - */ -int -vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, - uint32_t cnt) +static int +verify_alloc_bitmap(struct vea_space_info *vsi, bool transient, uint64_t off, + uint32_t cnt) { - struct vea_free_extent vfe, *ext; daos_handle_t btr_hdl; d_iov_t key, key_out, val; - uint64_t *key_off; int rc, opc = BTR_PROBE_LE; + struct vea_free_bitmap *vfb; + + if (transient) + btr_hdl = vsi->vsi_bitmap_btr; + else + btr_hdl = vsi->vsi_md_bitmap_btr; + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &off, sizeof(off)); + + d_iov_set(&key_out, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key, &key_out, + &val); + /* bitmap not allocated */ + if (rc == -DER_NONEXIST) + return 1; + + if (rc) + return rc; + + if (transient) { + struct vea_bitmap_entry *entry; + + entry = (struct vea_bitmap_entry *)val.iov_buf; + vfb = &entry->vbe_bitmap; + } else { + vfb = (struct vea_free_bitmap *)val.iov_buf; + } + + rc = verify_bitmap_entry(vfb); + if (rc != 0) { + D_ERROR("verify bitmap alloc failed\n"); + return rc; + } + + /* not in the bitmap range */ + if (off + cnt <= vfb->vfb_blk_off || off >= vfb->vfb_blk_off + vfb->vfb_blk_cnt) + return 1; + + if (isset_range((uint8_t *)vfb->vfb_bitmaps, + (off - vfb->vfb_blk_off) / vfb->vfb_class, + (off - vfb->vfb_blk_off + cnt - 1) / vfb->vfb_class)) + return 0; + + return 1; +} + + +static int +verify_alloc_extent(struct vea_space_info *vsi, bool transient, uint64_t off, uint32_t cnt) +{ + struct vea_free_extent vfe, *ext; + daos_handle_t btr_hdl; + d_iov_t key, key_out, val; + uint64_t *key_off; + int rc, opc = BTR_PROBE_LE; /* Sanity check on input parameters */ vfe.vfe_blk_off = off; @@ -243,10 +360,10 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, key_off = (uint64_t *)key_out.iov_buf; if (transient) { - struct vea_entry *entry; + struct vea_extent_entry *entry; - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } @@ -267,6 +384,29 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, return rc; } +/** + * Verify if an extent is allocated in persistent or transient metadata. + * + * \param vsi [IN] In-memory compound index + * \param transient [IN] Persistent or transient + * \param off [IN] Block offset of extent + * \param cnt [IN] Block count of extent + * \param is_bitmap [IN] Bitmap or extent + * + * \return 0 - Allocated + * 1 - Not allocated + * Negative value on error + */ +int +vea_verify_alloc(struct vea_space_info *vsi, bool transient, + uint64_t off, uint32_t cnt, bool is_bitmap) +{ + if (!is_bitmap) + return verify_alloc_extent(vsi, transient, off, cnt); + + return verify_alloc_bitmap(vsi, transient, off, cnt); +} + void vea_metrics_free(void *data) { @@ -283,6 +423,8 @@ rsrv_type2str(int rsrv_type) return "large"; case STAT_RESRV_SMALL: return "small"; + case STAT_RESRV_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -298,6 +440,8 @@ frags_type2str(int frags_type) return "small"; case STAT_FRAGS_AGING: return "aging"; + case STAT_FRAGS_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -366,6 +510,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de case STAT_RESRV_HINT: case STAT_RESRV_LARGE: case STAT_RESRV_SMALL: + case STAT_RESRV_BITMAP: D_ASSERT(!dec && nr == 1); vsi->vsi_stat[type] += nr; if (metrics && metrics->vm_rsrv[type]) @@ -373,6 +518,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de break; case STAT_FRAGS_LARGE: case STAT_FRAGS_SMALL: + case STAT_FRAGS_BITMAP: case STAT_FRAGS_AGING: D_ASSERT(nr == 1 && type >= STAT_FRAGS_LARGE); if (dec) { @@ -385,7 +531,8 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de if (metrics && metrics->vm_frags[frag_idx]) d_tm_set_gauge(metrics->vm_frags[frag_idx], vsi->vsi_stat[type]); break; - case STAT_FREE_BLKS: + case STAT_FREE_EXTENT_BLKS: + case STAT_FREE_BITMAP_BLKS: if (dec) { D_ASSERTF(vsi->vsi_stat[type] >= nr, "free:"DF_U64" < rsrvd:"DF_U64"\n", vsi->vsi_stat[type], nr); diff --git a/src/vos/tests/vts_aggregate.c b/src/vos/tests/vts_aggregate.c index 2b2b92082af..67ff1539e83 100644 --- a/src/vos/tests/vts_aggregate.c +++ b/src/vos/tests/vts_aggregate.c @@ -1840,13 +1840,14 @@ print_space_info(vos_pool_info_t *pi, char *desc) VERBOSE_MSG(" NVMe allocator statistics:\n"); VERBOSE_MSG(" free_p: "DF_U64", \tfree_t: "DF_U64", " "\tfrags_large: "DF_U64", \tfrags_small: "DF_U64", " - "\tfrags_aging: "DF_U64"\n", + "\tfrags_aging: "DF_U64" \tfrags_bitmap: "DF_U64"\n", stat->vs_free_persistent, stat->vs_free_transient, stat->vs_frags_large, stat->vs_frags_small, - stat->vs_frags_aging); + stat->vs_frags_aging, stat->vs_frags_bitmap); VERBOSE_MSG(" resrv_hit: "DF_U64", \tresrv_large: "DF_U64", " - "\tresrv_small: "DF_U64"\n", stat->vs_resrv_hint, - stat->vs_resrv_large, stat->vs_resrv_small); + "\tresrv_small: "DF_U64", \tresrv_bitmap: "DF_U64"\n", + stat->vs_resrv_hint, stat->vs_resrv_large, + stat->vs_resrv_small, stat->vs_resrv_bitmap); } static int diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index b2b82d1494b..171235b7ceb 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1424,6 +1424,11 @@ vos_pool_upgrade(daos_handle_t poh, uint32_t version) "Invalid pool upgrade version %d, current version is %d\n", version, pool_df->pd_version); + rc = vea_upgrade(pool->vp_vea_info, &pool->vp_umm, &pool_df->pd_vea_df, + pool_df->pd_version); + if (rc) + return rc; + rc = umem_tx_begin(&pool->vp_umm, NULL); if (rc != 0) return rc; From 6afb094004556ea900af60f7f469fd321d59f607 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Tue, 19 Sep 2023 23:02:09 -0500 Subject: [PATCH 23/29] DAOS-14409 build: use libfabric pkgconfig files (#13066) Let mercury use libfabric pkg-config instead of hard-coded lib path Signed-off-by: Jerome Soumagne --- site_scons/components/__init__.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 94154c6d2f9..3044a6b58f6 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -134,6 +134,7 @@ def define_mercury(reqs): libs=['fabric'], config_cb=ofi_config, headers=['rdma/fabric.h'], + pkgconfig='libfabric', package='libfabric-devel' if inst(reqs, 'ofi') else None, patch_rpath=['lib'], build_env={'CFLAGS': "-fstack-usage"}) @@ -186,12 +187,6 @@ def define_mercury(reqs): else: mercury_build.append('-DMERCURY_ENABLE_DEBUG:BOOL=OFF') - mercury_build.extend(check(reqs, - 'ofi', - ['-DOFI_INCLUDE_DIR:PATH=$OFI_PREFIX/include', - '-DOFI_LIBRARY:FILEPATH=$OFI_PREFIX/lib/libfabric.so'], - [])) - reqs.define('mercury', retriever=GitRepoRetriever('https://github.com/mercury-hpc/mercury.git', True), commands=[mercury_build, From f7fe80fafca98bd6d47d38b98e1a4ba160c4067e Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 20 Sep 2023 07:20:24 -0500 Subject: [PATCH 24/29] DAOS-623 build: fix build when lustre APIs are installed (#13069) Signed-off-by: Mohamad Chaarawi --- src/client/dfs/duns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/client/dfs/duns.c b/src/client/dfs/duns.c index cad9790507c..6c0e089bd58 100644 --- a/src/client/dfs/duns.c +++ b/src/client/dfs/duns.c @@ -859,7 +859,7 @@ duns_link_lustre_path(const char *pool, const char *cont, daos_cont_layout_t typ { char str[DUNS_MAX_XATTR_LEN + 1]; int len; - int rc, rc2; + int rc; /* XXX if liblustreapi is not binded, do it now ! */ if (liblustre_binded == false && liblustre_notfound == false) { @@ -1247,7 +1247,9 @@ duns_link_cont(daos_handle_t poh, const char *cont, const char *path) #ifdef LUSTRE_INCLUDE struct statfs fs; char *dir, *dirp; + size_t path_len; + path_len = strnlen(path, PATH_MAX); D_STRNDUP(dir, path, path_len); if (dir == NULL) D_GOTO(out_cont, rc = ENOMEM); From 478eb165c03dbad7a3bb117fd52d972db4ed849a Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 21 Sep 2023 16:26:06 -0400 Subject: [PATCH 25/29] DAOS-14203 test - Adding debug info. (#12960) Display memory info before and after server format. Signed-off-by: Phil Henderson --- src/tests/ftest/util/server_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 514375ccce9..87716bb0465 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -474,6 +474,14 @@ def support_collect_log(self, **kwargs): return run_remote( self.log, self._hosts, cmd.with_exports, timeout=self.collect_log_timeout.value) + def display_memory_info(self): + """Display server hosts memory info.""" + self.log.debug("#" * 80) + self.log.debug(" Collection debug memory info") + run_remote(self.log, self._hosts, "free -m") + run_remote(self.log, self._hosts, "ps -eo size,pid,user,command --sort -size | head -n 6") + self.log.debug("#" * 80) + def detect_format_ready(self, reformat=False): """Detect when all the daos_servers are ready for storage format. @@ -666,11 +674,14 @@ def start(self): self.prepare() # Start the servers and wait for them to be ready for storage format + self.display_memory_info() self.detect_format_ready() # Collect storage and network information from the servers. + self.display_memory_info() self.information.collect_storage_information() self.information.collect_network_information() + self.display_memory_info() # Format storage and wait for server to change ownership self.log.info(" Formatting hosts: <%s>", self.dmg.hostlist) From d016a2bdb583bb1754006410563ff328c822c731 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 21 Sep 2023 20:55:37 -0700 Subject: [PATCH 26/29] DAOS-13503 gurt: add memory metrics (#11956) - Add memory metrics for DTX and VOS memory usage. - Update error message for PARTIAL update. - Add total memory usage track. - Only enable it by D_MEMORY_TRACK=1 - Add memory allocation(-m) information to telemetry. Signed-off-by: Di Wang --- src/cart/README.env | 5 ++ src/dtx/dtx_common.c | 21 +++++- src/dtx/dtx_internal.h | 2 + src/dtx/dtx_srv.c | 17 ++++- src/engine/init.c | 1 - src/engine/srv.c | 50 +++++++++++++ src/engine/srv_internal.h | 10 +++ src/engine/srv_metrics.c | 2 +- src/gurt/misc.c | 90 ++++++++++++++++++++-- src/gurt/telemetry.c | 99 +++++++++++++++++++++++++ src/include/daos_errno.h | 4 +- src/include/daos_srv/dtx_srv.h | 7 +- src/include/gurt/common.h | 4 + src/include/gurt/telemetry_common.h | 14 +++- src/include/gurt/telemetry_producer.h | 3 +- src/tests/ftest/util/telemetry_utils.py | 10 +++ src/utils/daos_metrics/daos_metrics.c | 10 ++- src/vos/lru_array.c | 32 +++++++- src/vos/lru_array.h | 6 +- src/vos/tests/vts_io.c | 4 +- src/vos/tests/vts_ts.c | 4 +- src/vos/vos_common.c | 60 +++++++++++++-- src/vos/vos_container.c | 9 ++- src/vos/vos_dtx.c | 11 ++- src/vos/vos_internal.h | 2 + src/vos/vos_obj_cache.c | 9 ++- src/vos/vos_tls.h | 3 + src/vos/vos_ts.c | 34 ++++++++- src/vos/vos_ts.h | 8 +- 29 files changed, 483 insertions(+), 48 deletions(-) diff --git a/src/cart/README.env b/src/cart/README.env index 8a8cca74f85..edfbb39c561 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -80,6 +80,11 @@ This file lists the environment variables used in CaRT. by default, and can be specified using DD_SUBSYS, for example: "DD_SUBSYS=RPC,BULK,CORPC,GRP,LM,HG,PMIX,ST,IV" or also "DD_SUBSYS=all". + . D_MEMORY_TRACK + User can enable memory track for daos engine by D_MEMORY_TRACK=1. With the + environment, all of allocations inside DAOS (by D_ALLOC) will be tracked, and + total allocated bytes per xstream can be shown through metrics. + . CRT_TIMEOUT Set it as integer in the range of (0, 3600] to set the global timeout value of all RPCs (second). Without setting it or set it as any other value will diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index a23bc3094c7..9f59060f87e 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1114,6 +1114,7 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, struct dtx_memberships *mbs, struct dtx_leader_handle **p_dlh) { struct dtx_leader_handle *dlh; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_handle *dth; int rc; int i; @@ -1151,10 +1152,12 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, DP_DTI(dti), sub_modification_cnt, dth->dth_ver, DP_UOID(*leader_oid), dti_cos_cnt, tgt_cnt, flags, DP_RC(rc)); - if (rc != 0) + if (rc != 0) { D_FREE(dlh); - else + } else { *p_dlh = dlh; + d_tm_inc_gauge(tls->dt_dtx_leader_total, 1); + } return rc; } @@ -1178,6 +1181,17 @@ dtx_leader_wait(struct dtx_leader_handle *dlh) return dlh->dlh_result; }; +void +dtx_entry_put(struct dtx_entry *dte) +{ + if (--(dte->dte_refs) == 0) { + struct dtx_tls *tls = dtx_tls_get(); + + d_tm_dec_gauge(tls->dt_dtx_entry_total, 1); + D_FREE(dte); + } +} + /** * Stop the leader thandle. * @@ -1192,6 +1206,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul { struct ds_cont_child *cont = coh->sch_cont; struct dtx_handle *dth = &dlh->dlh_handle; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_entry *dte; struct dtx_memberships *mbs; size_t size; @@ -1308,6 +1323,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul dte->dte_ver = dth->dth_ver; dte->dte_refs = 1; dte->dte_mbs = mbs; + d_tm_inc_gauge(tls->dt_dtx_entry_total, 1); /* Use the new created @dte instead of dth->dth_dte that will be * released after dtx_leader_end(). @@ -1419,6 +1435,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul D_FREE(dth->dth_oid_array); D_FREE(dlh); + d_tm_dec_gauge(tls->dt_dtx_leader_total, 1); return result; } diff --git a/src/dtx/dtx_internal.h b/src/dtx/dtx_internal.h index 3abaa197b1d..a38c747a61d 100644 --- a/src/dtx/dtx_internal.h +++ b/src/dtx/dtx_internal.h @@ -160,6 +160,8 @@ struct dtx_pool_metrics { */ struct dtx_tls { struct d_tm_node_t *dt_committable; + struct d_tm_node_t *dt_dtx_leader_total; + struct d_tm_node_t *dt_dtx_entry_total; uint64_t dt_agg_gen; uint32_t dt_batched_ult_cnt; }; diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index 095c3d7fa20..9ea25a9dcd0 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -39,6 +39,22 @@ dtx_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create DTX committable metric: " DF_RC"\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->dt_dtx_leader_total, D_TM_GAUGE, + "total number of leader dtx in cache", "entry", + "mem/dtx/dtx_leader_handle_%u/tgt_%u", + sizeof(struct dtx_leader_handle), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX leader metric: " DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->dt_dtx_entry_total, D_TM_GAUGE, + "total number of dtx entry in cache", "entry", + "mem/dtx/dtx_entry_%u/tgt_%u", + sizeof(struct dtx_entry), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX entry metric: " DF_RC"\n", + DP_RC(rc)); + return tls; } @@ -105,7 +121,6 @@ dtx_metrics_alloc(const char *path, int tgt_id) D_WARN("Failed to create DTX RPC cnt metric for %s: " DF_RC"\n", dtx_opc_to_str(opc), DP_RC(rc)); } - return metrics; } diff --git a/src/engine/init.c b/src/engine/init.c index 874fbf62ebf..5e90d4ec248 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -678,7 +678,6 @@ server_init(int argc, char *argv[]) DP_RC(rc)); metrics = &dss_engine_metrics; - /** Report timestamp when engine was started */ d_tm_record_timestamp(metrics->started_time); diff --git a/src/engine/srv.c b/src/engine/srv.c index 1be1aa2d9f1..d7f1acdbe80 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "drpc_internal.h" #include "srv_internal.h" @@ -353,6 +354,7 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi) D_DEBUG(DB_TRACE, "XS(%d) drained ULTs.\n", dx->dx_xs_id); } +#define D_MEMORY_TRACK_ENV "D_MEMORY_TRACK" /* * The server handler ULT first sets CPU affinity, initialize the per-xstream * TLS, CRT(comm) context, NVMe context, creates the long-run ULTs (GC & NVMe @@ -366,12 +368,18 @@ dss_srv_handler(void *arg) struct dss_thread_local_storage *dtc; struct dss_module_info *dmi; int rc; + bool track_mem = false; bool signal_caller = true; rc = dss_xstream_set_affinity(dx); if (rc) goto signal; + d_getenv_bool(D_MEMORY_TRACK_ENV, &track_mem); + if (unlikely(track_mem)) + d_set_alloc_track_cb(dss_mem_total_alloc_track, dss_mem_total_free_track, + &dx->dx_mem_stats); + /* initialize xstream-local storage */ dtc = dss_tls_init(dx->dx_tag, dx->dx_xs_id, dx->dx_tgt_id); if (dtc == NULL) { @@ -643,6 +651,46 @@ dss_xstream_free(struct dss_xstream *dx) D_FREE(dx); } +static void +dss_mem_stats_init(struct mem_stats *stats, int xs_id) +{ + int rc; + + rc = d_tm_add_metric(&stats->ms_total_usage, D_TM_GAUGE, + "Total memory usage", "byte", "mem/total_mem/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + + rc = d_tm_add_metric(&stats->ms_mallinfo, D_TM_MEMINFO, + "Total memory arena", "", "mem/meminfo/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + stats->ms_current = 0; +} + +void +dss_mem_total_alloc_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_inc_gauge(stats->ms_total_usage, bytes); + /* Only retrieve mallocinfo every 10 allocation */ + if ((stats->ms_current++ % 10) == 0) + d_tm_record_meminfo(stats->ms_mallinfo); +} + +void +dss_mem_total_free_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_dec_gauge(stats->ms_total_usage, bytes); +} + /** * Start one xstream. * @@ -735,6 +783,8 @@ dss_start_one_xstream(hwloc_cpuset_t cpus, int tag, int xs_id) D_GOTO(out_dx, rc); } + dss_mem_stats_init(&dx->dx_mem_stats, xs_id); + /** start XS, ABT rank 0 is reserved for the primary xstream */ rc = ABT_xstream_create_with_rank(dx->dx_sched, xs_id + 1, &dx->dx_xstream); diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index b3e0ca7ee9e..4fbe5d386d7 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -54,6 +54,12 @@ struct sched_info { unsigned int si_stop:1; }; +struct mem_stats { + struct d_tm_node_t *ms_total_usage; /* Total memory usage (bytes) */ + struct d_tm_node_t *ms_mallinfo; /* memory allocate information */ + uint64_t ms_current; +}; + /** Per-xstream configuration data */ struct dss_xstream { char dx_name[DSS_XS_NAME_LEN]; @@ -80,6 +86,7 @@ struct dss_xstream { bool dx_main_xs; /* true for main XS */ bool dx_comm; /* true with cart context */ bool dx_dsc_started; /* DSC progress ULT started */ + struct mem_stats dx_mem_stats; /* memory usages stats on this xstream */ #ifdef ULT_MMAP_STACK /* per-xstream pool/list of free stacks */ struct stack_pool *dx_sp; @@ -95,6 +102,7 @@ struct engine_metrics { struct d_tm_node_t *rank_id; struct d_tm_node_t *dead_rank_events; struct d_tm_node_t *last_event_time; + struct d_tm_node_t *meminfo; }; extern struct engine_metrics dss_engine_metrics; @@ -150,6 +158,8 @@ void dss_dump_ABT_state(FILE *fp); void dss_xstreams_open_barrier(void); struct dss_xstream *dss_get_xstream(int stream_id); int dss_xstream_cnt(void); +void dss_mem_total_alloc_track(void *arg, daos_size_t bytes); +void dss_mem_total_free_track(void *arg, daos_size_t bytes); /* srv_metrics.c */ int dss_engine_metrics_init(void); diff --git a/src/engine/srv_metrics.c b/src/engine/srv_metrics.c index ef302241ca9..0be06a4733c 100644 --- a/src/engine/srv_metrics.c +++ b/src/engine/srv_metrics.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ diff --git a/src/gurt/misc.c b/src/gurt/misc.c index ca40fcc194a..ef3a2e91709 100644 --- a/src/gurt/misc.c +++ b/src/gurt/misc.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,20 @@ /* state buffer for DAOS rand and srand calls, NOT thread safe */ static struct drand48_data randBuffer = {0}; +d_alloc_track_cb_t d_alloc_track_cb; +d_alloc_track_cb_t d_free_track_cb; +static __thread void *track_arg; + +void +d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg) +{ + d_alloc_track_cb = alloc_cb; + d_free_track_cb = free_cb; + track_arg = arg; + + D_INFO("memory track is enabled for the engine.\n"); +} + void d_srand(long int seedval) { @@ -49,6 +64,12 @@ d_rand() void d_free(void *ptr) { + if (unlikely(track_arg != NULL)) { + size_t size = malloc_usable_size(ptr); + + d_free_track_cb(track_arg, size); + } + free(ptr); } @@ -87,25 +108,62 @@ d_free(void *ptr) void * d_calloc(size_t count, size_t eltsize) { - return calloc(count, eltsize); + void *ptr; + + ptr = calloc(count, eltsize); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_malloc(size_t size) { - return malloc(size); + void *ptr; + + ptr = malloc(size); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, size); + } + + return ptr; } void * d_realloc(void *ptr, size_t size) { - return realloc(ptr, size); + void *new_ptr; + + if (unlikely(track_arg != NULL)) { + size_t old_size = malloc_usable_size(ptr); + + new_ptr = realloc(ptr, size); + if (new_ptr != NULL) { + d_free_track_cb(track_arg, old_size); + d_alloc_track_cb(track_arg, size); + } + } else { + new_ptr = realloc(ptr, size); + } + return new_ptr; } char * d_strndup(const char *s, size_t n) { - return strndup(s, n); + char *ptr; + + ptr = strndup(s, n); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } int @@ -118,6 +176,11 @@ d_asprintf(char **strp, const char *fmt, ...) rc = vasprintf(strp, fmt, ap); va_end(ap); + if (unlikely(track_arg != NULL)) { + if (rc > 0 && *strp != NULL) + d_alloc_track_cb(track_arg, (size_t)rc); + } + return rc; } @@ -143,16 +206,31 @@ d_asprintf2(int *_rc, const char *fmt, ...) char * d_realpath(const char *path, char *resolved_path) { - return realpath(path, resolved_path); + char *ptr; + + ptr = realpath(path, resolved_path); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_aligned_alloc(size_t alignment, size_t size, bool zero) { - void *buf = aligned_alloc(alignment, size); + void *buf; + + buf = aligned_alloc(alignment, size); + if (unlikely(track_arg != NULL)) { + if (buf != NULL) + d_alloc_track_cb(track_arg, size); + } if (!zero || buf == NULL) return buf; + memset(buf, 0, size); return buf; } diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index e34abe19ad0..a5e34fab2d4 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -933,6 +934,27 @@ d_tm_print_timestamp(time_t *clk, char *name, int format, int opt_fields, } } +static void +d_tm_print_meminfo(struct d_tm_meminfo_t *meminfo, char *name, int format, + int opt_fields, FILE *stream) +{ + if ((name == NULL) || (stream == NULL)) + return; + + if (format == D_TM_CSV) { + fprintf(stream, "%s", name); + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, ",arena,ordblks,uordblks,fordblks"); + fprintf(stream, ",%zu,%zu,%zu,%zu", meminfo->arena, meminfo->ordblks, + meminfo->uordblks, meminfo->fordblks); + } else { + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, "type: arena,ordblks,uordblks,fordblks,"); + fprintf(stream, "%s:%zu,%zu,%zu,%zu", name, meminfo->arena, + meminfo->ordblks, meminfo->uordblks, meminfo->fordblks); + } +} + /** * Prints the time snapshot \a tms with \a name to the \a stream provided * @@ -1147,6 +1169,9 @@ d_tm_print_metadata(char *desc, char *units, int format, FILE *stream) } } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node); /** * Prints a single \a node. * Used as a convenience function to demonstrate usage for the client @@ -1179,6 +1204,7 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *name = NULL; char *desc = NULL; char *units = NULL; + struct d_tm_meminfo_t meminfo; bool stats_printed = false; bool show_timestamp = false; bool show_meta = false; @@ -1247,6 +1273,14 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, } d_tm_print_timestamp(&clk, name, format, opt_fields, stream); break; + case D_TM_MEMINFO: + rc = d_tm_get_meminfo(ctx, &meminfo, node); + if (rc != DER_SUCCESS) { + fprintf(stream, "Error on meminfo read: %d\n", rc); + break; + } + d_tm_print_meminfo(&meminfo, name, format, opt_fields, stream); + break; case D_TM_TIMER_SNAPSHOT: case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_REALTIME): case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_PROCESS_CPUTIME): @@ -1745,6 +1779,41 @@ d_tm_record_timestamp(struct d_tm_node_t *metric) d_tm_node_unlock(metric); } +/** + * Record the current meminfo + * + * \param[in] metric Pointer to the metric + */ +void +d_tm_record_meminfo(struct d_tm_node_t *metric) +{ +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 33) + struct mallinfo2 mi; + + mi = mallinfo2(); +#else + struct mallinfo mi; + + mi = mallinfo(); +#endif + + if (metric == NULL) + return; + + if (metric->dtn_type != D_TM_MEMINFO) { + D_ERROR("Failed to record meminfo on item %s not a " + "meminfo. Operation mismatch: " DF_RC "\n", + metric->dtn_name, DP_RC(-DER_OP_NOT_PERMITTED)); + return; + } + d_tm_node_lock(metric); + metric->dtn_metric->dtm_data.meminfo.arena = mi.arena; + metric->dtn_metric->dtm_data.meminfo.ordblks = mi.ordblks; + metric->dtn_metric->dtm_data.meminfo.uordblks = mi.uordblks; + metric->dtn_metric->dtm_data.meminfo.fordblks = mi.fordblks; + d_tm_node_unlock(metric); +} + /** * Read and store a high resolution timer snapshot value * @@ -2977,6 +3046,36 @@ d_tm_get_timestamp(struct d_tm_context *ctx, time_t *val, return DER_SUCCESS; } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node) +{ + struct d_tm_metric_t *metric_data = NULL; + struct d_tm_shmem_hdr *shmem = NULL; + int rc; + + if (ctx == NULL || meminfo == NULL || node == NULL) + return -DER_INVAL; + + rc = validate_node_ptr(ctx, node, &shmem); + if (rc != 0) + return rc; + + if (node->dtn_type != D_TM_MEMINFO) + return -DER_OP_NOT_PERMITTED; + + metric_data = conv_ptr(shmem, node->dtn_metric); + if (metric_data != NULL) { + d_tm_node_lock(node); + *meminfo = metric_data->dtm_data.meminfo; + d_tm_node_unlock(node); + } else { + return -DER_METRIC_NOT_FOUND; + } + return DER_SUCCESS; +} + + /** * Client function to read the specified high resolution timer. * diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index 25b517c9f21..86709a6bd94 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -205,12 +205,10 @@ extern "C" { ACTION(DER_NVME_IO, NVMe I / O error) \ ACTION(DER_NO_CERT, Unable to access one or more certificates) \ ACTION(DER_BAD_CERT, Invalid x509 certificate) \ - ACTION(DER_VOS_PARTIAL_UPDATE, VOS partial update error) \ + ACTION(DER_VOS_PARTIAL_UPDATE, Same epoch partial overwrite of VOS array value disallowed) \ ACTION(DER_CHKPT_BUSY, Page is temporarily read only due to checkpointing) \ ACTION(DER_DIV_BY_ZERO, Division by zero) -/* clang-format on */ - /** Defines the gurt error codes */ #define D_FOREACH_ERR_RANGE(ACTION) \ ACTION(GURT, 1000) \ diff --git a/src/include/daos_srv/dtx_srv.h b/src/include/daos_srv/dtx_srv.h index 05cc162b19e..d0b2352783a 100644 --- a/src/include/daos_srv/dtx_srv.h +++ b/src/include/daos_srv/dtx_srv.h @@ -305,12 +305,7 @@ dtx_entry_get(struct dtx_entry *dte) return dte; } -static inline void -dtx_entry_put(struct dtx_entry *dte) -{ - if (--(dte->dte_refs) == 0) - D_FREE(dte); -} +void dtx_entry_put(struct dtx_entry *dte); static inline bool dtx_is_valid_handle(const struct dtx_handle *dth) diff --git a/src/include/gurt/common.h b/src/include/gurt/common.h index c6a8f241b26..cfce1a490ec 100644 --- a/src/include/gurt/common.h +++ b/src/include/gurt/common.h @@ -506,6 +506,10 @@ int d_getenv_uint64_t(const char *env, uint64_t *val); int d_write_string_buffer(struct d_string_buffer_t *buf, const char *fmt, ...); void d_free_string(struct d_string_buffer_t *buf); +typedef void (*d_alloc_track_cb_t)(void *arg, size_t size); + +void d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg); + #if !defined(container_of) /* given a pointer @ptr to the field @member embedded into type (usually * struct) @type, return pointer to the embedding instance of @type. diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 8852a1764cf..983ec2553f2 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2022 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -139,6 +139,7 @@ enum { D_TM_CLOCK_PROCESS_CPUTIME = 0x100, D_TM_CLOCK_THREAD_CPUTIME = 0x200, D_TM_LINK = 0x400, + D_TM_MEMINFO = 0x800, D_TM_ALL_NODES = (D_TM_DIRECTORY | \ D_TM_COUNTER | \ D_TM_TIMESTAMP | \ @@ -146,7 +147,8 @@ enum { D_TM_DURATION | \ D_TM_GAUGE | \ D_TM_STATS_GAUGE | \ - D_TM_LINK) + D_TM_LINK | \ + D_TM_MEMINFO) }; enum { @@ -203,10 +205,18 @@ struct d_tm_histogram_t { int dth_value_multiplier; }; +struct d_tm_meminfo_t { + uint64_t arena; + uint64_t ordblks; + uint64_t uordblks; + uint64_t fordblks; +}; + struct d_tm_metric_t { union data { uint64_t value; struct timespec tms[2]; + struct d_tm_meminfo_t meminfo; } dtm_data; struct d_tm_stats_t *dtm_stats; struct d_tm_histogram_t *dtm_histogram; diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h index de85ea11932..5cd323637d4 100644 --- a/src/include/gurt/telemetry_producer.h +++ b/src/include/gurt/telemetry_producer.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,6 +12,7 @@ void d_tm_set_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_inc_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_record_timestamp(struct d_tm_node_t *metric); +void d_tm_record_meminfo(struct d_tm_node_t *metric); void d_tm_take_timer_snapshot(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_start(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_end(struct d_tm_node_t *metric); diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 2f3defae916..cc1cee34127 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -557,6 +557,14 @@ class TelemetryUtils(): ENGINE_NVME_RELIABILITY_METRICS +\ ENGINE_NVME_CRIT_WARN_METRICS +\ ENGINE_NVME_INTEL_VENDOR_METRICS + ENGINE_MEM_USAGE_METRICS = [ + "engine_mem_vos_dtx_cmt_ent_48", + "engine_mem_vos_vos_obj_360", + "engine_mem_vos_vos_lru_size", + "engine_mem_dtx_dtx_leader_handle_336", + "engine_mem_dtx_dtx_entry_40"] + ENGINE_MEM_TOTAL_USAGE_METRICS = [ + "engine_mem_total_mem"] def __init__(self, dmg, servers): """Create a TelemetryUtils object. @@ -587,6 +595,8 @@ def get_all_server_metrics_names(self, server, with_pools=False): all_metrics_names.extend(self.ENGINE_NET_METRICS) all_metrics_names.extend(self.ENGINE_RANK_METRICS) all_metrics_names.extend(self.ENGINE_DMABUFF_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_USAGE_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_TOTAL_USAGE_METRICS) if with_pools: all_metrics_names.extend(self.ENGINE_POOL_METRICS) all_metrics_names.extend(self.ENGINE_CONTAINER_METRICS) diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c index b2f99e4abd2..8a8190d5203 100644 --- a/src/utils/daos_metrics/daos_metrics.c +++ b/src/utils/daos_metrics/daos_metrics.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2021 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -93,6 +93,7 @@ main(int argc, char **argv) {"path", required_argument, NULL, 'p'}, {"delay", required_argument, NULL, 'D'}, {"meta", no_argument, NULL, 'M'}, + {"meminfo", no_argument, NULL, 'm'}, {"type", no_argument, NULL, 'T'}, {"read", no_argument, NULL, 'r'}, {"reset", no_argument, NULL, 'e'}, @@ -100,7 +101,7 @@ main(int argc, char **argv) {NULL, 0, NULL, 0} }; - opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MTrhe", + opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrhe", long_options, NULL); if (opt == -1) break; @@ -136,6 +137,9 @@ main(int argc, char **argv) case 'M': show_meta = true; break; + case 'm': + filter |= D_TM_MEMINFO; + break; case 'T': show_type = true; break; @@ -160,7 +164,7 @@ main(int argc, char **argv) ops |= D_TM_ITER_READ; if (filter == 0) - filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | + filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; ctx = d_tm_open(srv_idx); diff --git a/src/vos/lru_array.c b/src/vos/lru_array.c index b94ff873a51..186026c5ba9 100644 --- a/src/vos/lru_array.c +++ b/src/vos/lru_array.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,6 +11,7 @@ */ #define D_LOGFAC DD_FAC(vos) #include "lru_array.h" +#include "vos_internal.h" /** Internal converter for real index to entity index in sub array */ #define ent2idx(array, sub, ent_idx) \ @@ -63,6 +64,24 @@ fini_cb(struct lru_array *array, struct lru_sub *sub, struct lru_entry *entry, array->la_cbs.lru_on_fini(entry->le_payload, real_idx, array->la_arg); } +static void +alloc_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_alloc == NULL) + return; + + array->la_cbs.lru_on_alloc(array->la_arg, size); +} + +static void +free_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_free == NULL) + return; + + array->la_cbs.lru_on_free(array->la_arg, size); +} + int lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) { @@ -78,6 +97,8 @@ lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) if (sub->ls_table == NULL) return -DER_NOMEM; + alloc_cb(array, rec_size * nr_ents); + /** Add newly allocated ones to head of list */ d_list_del(&sub->ls_link); d_list_add(&sub->ls_link, &array->la_free_sub); @@ -283,6 +304,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, if (cbs != NULL) array->la_cbs = *cbs; + alloc_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); /** Only allocate one sub array, add the rest to free list */ D_INIT_LIST_HEAD(&array->la_free_sub); D_INIT_LIST_HEAD(&array->la_unused_sub); @@ -294,6 +316,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, rc = lrua_array_alloc_one(array, &array->la_sub[0]); if (rc != 0) { + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); D_FREE(array); return rc; } @@ -312,6 +335,10 @@ array_free_one(struct lru_array *array, struct lru_sub *sub) fini_cb(array, sub, &sub->ls_table[idx], idx); D_FREE(sub->ls_table); + + free_cb(array, + (sizeof(struct lru_entry) + array->la_payload_size) * + (array->la_idx_mask + 1)); } void @@ -323,13 +350,14 @@ lrua_array_free(struct lru_array *array) if (array == NULL) return; - for (i = 0; i < array->la_array_nr; i++) { sub = &array->la_sub[i]; if (sub->ls_table != NULL) array_free_one(array, sub); } + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * array->la_array_nr); + D_FREE(array); } diff --git a/src/vos/lru_array.h b/src/vos/lru_array.h index af9705ea72a..7a620c23b87 100644 --- a/src/vos/lru_array.h +++ b/src/vos/lru_array.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -22,6 +22,10 @@ struct lru_callbacks { void (*lru_on_init)(void *entry, uint32_t idx, void *arg); /** Called on finalization of an entry */ void (*lru_on_fini)(void *entry, uint32_t idx, void *arg); + /** Called on allocation of any LRU entries */ + void (*lru_on_alloc)(void *arg, daos_size_t size); + /** Called on free of any LRU entries */ + void (*lru_on_free)(void *arg, daos_size_t size); }; struct lru_entry { diff --git a/src/vos/tests/vts_io.c b/src/vos/tests/vts_io.c index aaa5911e3ca..2a23baacd6b 100644 --- a/src/vos/tests/vts_io.c +++ b/src/vos/tests/vts_io.c @@ -240,8 +240,8 @@ teardown_io(void **state) int rc; if (table) { - vos_ts_table_free(&table); - rc = vos_ts_table_alloc(&table); + vos_ts_table_free(&table, NULL); + rc = vos_ts_table_alloc(&table, NULL); if (rc != 0) { printf("Fatal error, table couldn't be reallocated\n"); exit(rc); diff --git a/src/vos/tests/vts_ts.c b/src/vos/tests/vts_ts.c index 60302ffe262..f882496dc17 100644 --- a/src/vos/tests/vts_ts.c +++ b/src/vos/tests/vts_ts.c @@ -235,7 +235,7 @@ alloc_ts_cache(void **state) if (ts_table != NULL) ts_arg->old_table = ts_table; - rc = vos_ts_table_alloc(&ts_table); + rc = vos_ts_table_alloc(&ts_table, NULL); if (rc != 0) { print_message("Can't allocate timestamp table: "DF_RC"\n", DP_RC(rc)); @@ -757,7 +757,7 @@ ts_test_fini(void **state) vos_ts_set_free(ts_arg->ta_ts_set); ts_table = vos_ts_table_get(true); - vos_ts_table_free(&ts_table); + vos_ts_table_free(&ts_table, NULL); vos_ts_table_set(ts_arg->old_table); D_FREE(ts_arg); diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index cf2ae1520ad..45252f9da0e 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -408,7 +408,7 @@ vos_tls_fini(int tags, void *data) umem_fini_txd(&tls->vtl_txd); if (tls->vtl_ts_table) - vos_ts_table_free(&tls->vtl_ts_table); + vos_ts_table_free(&tls->vtl_ts_table, tls); D_FREE(tls); } @@ -419,7 +419,28 @@ vos_standalone_tls_fini(void) vos_tls_fini(DAOS_TGT_TAG, self_mode.self_tls); self_mode.self_tls = NULL; } +} + +void +vos_lru_alloc_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + d_tm_inc_gauge(tls->vtl_lru_alloc_size, size); +} + +void +vos_lru_free_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + + d_tm_dec_gauge(tls->vtl_lru_alloc_size, size); } static void * @@ -464,17 +485,13 @@ vos_tls_init(int tags, int xs_id, int tgt_id) } if (tags & DAOS_TGT_TAG) { - rc = vos_ts_table_alloc(&tls->vtl_ts_table); + rc = vos_ts_table_alloc(&tls->vtl_ts_table, tls); if (rc) { D_ERROR("Error in creating timestamp table: %d\n", rc); goto failed; } } - if (tgt_id < 0) - /** skip sensor setup on standalone vos & sys xstream */ - return tls; - rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, "Number of committed entries kept around for reply" " reconstruction", "entries", @@ -482,6 +499,37 @@ vos_tls_init(int tags, int xs_id, int tgt_id) if (rc) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); + if (tgt_id >= 0) { + rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, + "Number of committed entries kept around for reply" + " reconstruction", "entries", + "io/dtx/committed/tgt_%u", tgt_id); + if (rc) + D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE, + "Number of committed entries", "entry", + "mem/vos/dtx_cmt_ent_%u/tgt_%u", + sizeof(struct vos_dtx_cmt_ent), tgt_id); + if (rc) + D_WARN("Failed to create committed cnt: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, + "Number of cached vos object", "entry", + "mem/vos/vos_obj_%u/tgt_%u", + sizeof(struct vos_object), tgt_id); + if (rc) + D_WARN("Failed to create vos obj cnt: "DF_RC"\n", DP_RC(rc)); + + } + + rc = d_tm_add_metric(&tls->vtl_lru_alloc_size, D_TM_GAUGE, + "Active DTX table LRU size", "byte", + "mem/vos/vos_lru_size/tgt_%d", tgt_id); + if (rc) + D_WARN("Failed to create LRU alloc size: "DF_RC"\n", DP_RC(rc)); return tls; failed: diff --git a/src/vos/vos_container.c b/src/vos/vos_container.c index 19a10d6acac..93cc62ceeb5 100644 --- a/src/vos/vos_container.c +++ b/src/vos/vos_container.c @@ -314,6 +314,11 @@ vos_cont_create(daos_handle_t poh, uuid_t co_uuid) return rc; } +static const struct lru_callbacks lru_cont_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + /** * Open a container within a VOSP */ @@ -395,8 +400,8 @@ vos_cont_open(daos_handle_t poh, uuid_t co_uuid, daos_handle_t *coh) rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, sizeof(struct vos_dtx_act_ent), - LRU_FLAG_REUSE_UNIQUE, - NULL, NULL); + LRU_FLAG_REUSE_UNIQUE, &lru_cont_cbs, + vos_tls_get(cont->vc_pool->vp_sysdb)); if (rc != 0) { D_ERROR("Failed to create DTX active array: rc = "DF_RC"\n", DP_RC(rc)); diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 63b102e32f9..4eefa622b7a 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -756,6 +756,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p, struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal) { + struct vos_tls *tls = vos_tls_get(false); struct vos_dtx_act_ent *dae = NULL; struct vos_dtx_cmt_ent *dce = NULL; d_iov_t kiov; @@ -820,6 +821,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t if (dce == NULL) D_GOTO(out, rc = -DER_NOMEM); + d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); DCE_CMT_TIME(dce) = cmt_time; if (dae != NULL) { DCE_XID(dce) = DAE_XID(dae); @@ -2471,6 +2473,7 @@ vos_dtx_aggregate(daos_handle_t coh) cont->vc_dtx_committed_count--; cont->vc_pool->vp_dtx_committed_count--; d_tm_dec_gauge(tls->vtl_committed, 1); + d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); } if (epoch != cont_df->cd_newest_aggregated) { @@ -3136,6 +3139,11 @@ vos_dtx_rsrvd_fini(struct dtx_handle *dth) } } +static const struct lru_callbacks lru_dtx_cache_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + int vos_dtx_cache_reset(daos_handle_t coh, bool force) { @@ -3170,7 +3178,8 @@ vos_dtx_cache_reset(daos_handle_t coh, bool force) lrua_array_free(cont->vc_dtx_array); rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, - sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, NULL, NULL); + sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, + &lru_dtx_cache_cbs, vos_tls_get(false)); if (rc != 0) { D_ERROR("Failed to re-create DTX active array for "DF_UUID": "DF_RC"\n", DP_UUID(cont->vc_id), DP_RC(rc)); diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 67f4980b66f..2bee64673bf 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1726,4 +1726,6 @@ int vos_oi_upgrade_layout_ver(struct vos_container *cont, daos_unit_oid_t oid, uint32_t layout_ver); +void vos_lru_free_track(void *arg, daos_size_t size); +void vos_lru_alloc_track(void *arg, daos_size_t size); #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index 826c53f06a5..11e55e9d156 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -58,11 +58,13 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, struct vos_object *obj; struct obj_lru_key *lkey; struct vos_container *cont; + struct vos_tls *tls; int rc; cont = (struct vos_container *)args; D_ASSERT(cont != NULL); + tls = vos_tls_get(cont->vc_pool->vp_sysdb); lkey = (struct obj_lru_key *)key; D_ASSERT(lkey != NULL); @@ -74,7 +76,7 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, D_GOTO(failed, rc = -DER_NOMEM); init_object(obj, lkey->olk_oid, cont); - + d_tm_inc_gauge(tls->vtl_obj_cnt, 1); *llink_p = &obj->obj_llink; rc = 0; failed: @@ -123,10 +125,13 @@ static void obj_lop_free(struct daos_llink *llink) { struct vos_object *obj; + struct vos_tls *tls; D_DEBUG(DB_TRACE, "lru free callback for vos_obj_cache\n"); obj = container_of(llink, struct vos_object, obj_llink); + tls = vos_tls_get(obj->obj_cont->vc_pool->vp_sysdb); + d_tm_dec_gauge(tls->vtl_obj_cnt, 1); clean_object(obj); D_FREE(obj); } diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 96c9a3e0c6d..981cce10be5 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -63,6 +63,9 @@ struct vos_tls { bool vtl_hash_set; }; struct d_tm_node_t *vtl_committed; + struct d_tm_node_t *vtl_obj_cnt; + struct d_tm_node_t *vtl_dtx_cmt_ent_cnt; + struct d_tm_node_t *vtl_lru_alloc_size; }; struct bio_xs_context *vos_xsctxt_get(void); diff --git a/src/vos/vos_ts.c b/src/vos/vos_ts.c index 9e47d100097..4018c2e685e 100644 --- a/src/vos/vos_ts.c +++ b/src/vos/vos_ts.c @@ -99,13 +99,29 @@ static void init_entry(void *payload, uint32_t idx, void *arg) entry->te_info = info; } +static void vos_lru_ts_alloc(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_alloc_track(info->ti_tls, size); +} + +static void vos_lru_ts_free(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_free_track(info->ti_tls, size); +} + static const struct lru_callbacks lru_cbs = { .lru_on_evict = evict_entry, .lru_on_init = init_entry, + .lru_on_alloc = vos_lru_ts_alloc, + .lru_on_free = vos_lru_ts_free, }; int -vos_ts_table_alloc(struct vos_ts_table **ts_tablep) +vos_ts_table_alloc(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_entry *entry; struct vos_ts_table *ts_table; @@ -129,6 +145,11 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) goto free_table; } + if (tls != NULL) + d_tm_inc_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); + ts_table->tt_ts_rl = vos_start_epoch; ts_table->tt_ts_rh = vos_start_epoch; uuid_clear(ts_table->tt_tx_rl.dti_uuid); @@ -140,6 +161,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) info->ti_type = i; info->ti_count = type_counts[i]; info->ti_table = ts_table; + info->ti_tls = tls; switch (i) { case VOS_TS_TYPE_OBJ: miss_size = OBJ_MISS_SIZE; @@ -192,6 +214,10 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) cleanup: for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); free_table: D_FREE(ts_table); @@ -200,7 +226,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) } void -vos_ts_table_free(struct vos_ts_table **ts_tablep) +vos_ts_table_free(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_table *ts_table = *ts_tablep; int i; @@ -208,6 +234,10 @@ vos_ts_table_free(struct vos_ts_table **ts_tablep) for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); D_FREE(ts_table); diff --git a/src/vos/vos_ts.h b/src/vos/vos_ts.h index 2772fab2ce2..379f59f819c 100644 --- a/src/vos/vos_ts.h +++ b/src/vos/vos_ts.h @@ -27,6 +27,8 @@ struct vos_ts_info { struct vos_ts_table *ti_table; /** Negative entries for this type */ struct vos_ts_entry *ti_misses; + /** TLS for tracking memory usage */ + struct vos_tls *ti_tls; /** Type identifier */ uint32_t ti_type; /** Mask for negative entry cache */ @@ -620,20 +622,22 @@ vos_ts_peek_entry(uint32_t *idx, uint32_t type, struct vos_ts_entry **entryp, /** Allocate thread local timestamp cache. Set the initial global times * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. * * \return -DER_NOMEM Not enough memory available * 0 Success */ int -vos_ts_table_alloc(struct vos_ts_table **ts_table); +vos_ts_table_alloc(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Free the thread local timestamp cache and reset pointer to NULL * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. */ void -vos_ts_table_free(struct vos_ts_table **ts_table); +vos_ts_table_free(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Allocate a timestamp set * From ae40bce6cef56900c8f21f3f3c42de0f588b721e Mon Sep 17 00:00:00 2001 From: Michael Hennecke Date: Fri, 22 Sep 2023 10:58:01 +0200 Subject: [PATCH 27/29] DAOS-9355 doc: mkdocs for 2.5 (#13082) edit mkdocs.yml for 2.5 Signed-off-by: Michael Hennecke --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 41027df9a6f..82c3c8125d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,5 +1,5 @@ # Project Information -site_name: DAOS v2.5 - master +site_name: DAOS v2.5 site_description: Distributed Asynchronous Object Storage site_author: DAOS Project From 281b4fad37a339ddfe37e7d25883d900f4360867 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 22 Sep 2023 14:04:22 +0100 Subject: [PATCH 28/29] DAOS-14225 control: Prevent duplicate call to SetRank (#13058) Remove legacy workaround where SetRank is called directly as a special case when rank 0 is bootstrapping the DAOS system as this creates a window where SetRank can be called a second time. The localJoin flag is removed from JoinResp and SetRank will return immediately if rank has already been set as ready. Signed-off-by: Tom Nabarro --- src/control/common/proto/logging.go | 2 +- src/control/common/proto/mgmt/svc.pb.go | 169 +++++++++++------------- src/control/server/instance.go | 25 ++-- src/control/server/mgmt_system.go | 16 --- src/control/server/mgmt_system_test.go | 2 - src/mgmt/svc.pb-c.c | 26 +--- src/mgmt/svc.pb-c.h | 6 +- src/proto/mgmt/svc.proto | 3 +- 8 files changed, 103 insertions(+), 146 deletions(-) diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index a2edc22c67f..624e58fb459 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -136,7 +136,7 @@ func Debug(msg proto.Message) string { fmt.Fprintf(&bld, " %s:%s", p.Label, p.State) } case *mgmtpb.JoinResp: - fmt.Fprintf(&bld, "%T rank:%d (state:%s, local:%t) map:%d", m, m.Rank, m.State, m.LocalJoin, m.MapVersion) + fmt.Fprintf(&bld, "%T rank:%d (state:%s) map:%d", m, m.Rank, m.State, m.MapVersion) case *mgmtpb.GetAttachInfoResp: msRanks := ranklist.RankSetFromRanks(ranklist.RanksFromUint32(m.MsRanks)) uriRanks := ranklist.NewRankSet() diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 74d11533864..e6988dca637 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/svc.proto @@ -342,7 +342,6 @@ type JoinResp struct { Rank uint32 `protobuf:"varint,2,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank assigned. State JoinResp_State `protobuf:"varint,3,opt,name=state,proto3,enum=mgmt.JoinResp_State" json:"state,omitempty"` // Server state in the system map. FaultDomain string `protobuf:"bytes,4,opt,name=faultDomain,proto3" json:"faultDomain,omitempty"` // Fault domain for the instance - LocalJoin bool `protobuf:"varint,5,opt,name=localJoin,proto3" json:"localJoin,omitempty"` // Join processed locally. MapVersion uint32 `protobuf:"varint,6,opt,name=map_version,json=mapVersion,proto3" json:"map_version,omitempty"` // Join processed in this version of the system map. } @@ -406,13 +405,6 @@ func (x *JoinResp) GetFaultDomain() string { return "" } -func (x *JoinResp) GetLocalJoin() bool { - if x != nil { - return x.LocalJoin - } - return false -} - func (x *JoinResp) GetMapVersion() uint32 { if x != nil { return x.MapVersion @@ -1159,7 +1151,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, @@ -1167,85 +1159,84 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, - 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, - 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, - 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, - 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, - 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, - 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, - 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, - 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, - 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, - 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, - 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, - 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, - 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, - 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, - 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, - 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, - 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, - 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, - 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x74, - 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, - 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, - 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, - 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, - 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, - 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, - 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, - 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, - 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, - 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, - 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, - 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, - 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, - 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, - 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, - 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, - 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, - 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, + 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, + 0x10, 0x01, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, + 0x6f, 0x69, 0x6e, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, + 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, + 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, + 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, + 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, + 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, + 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, + 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, + 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, + 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, + 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, + 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, + 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, + 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, + 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, + 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, + 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, + 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, + 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, + 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, + 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, + 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, + 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, + 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, + 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, + 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, + 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, + 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, + 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 14f53cf3b5b..4583c86f170 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -178,10 +178,10 @@ func (ei *EngineInstance) removeSocket() error { return nil } -func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, bool, uint32, error) { +func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, uint32, error) { superblock := ei.getSuperblock() if superblock == nil { - return ranklist.NilRank, false, 0, errors.New("nil superblock while determining rank") + return ranklist.NilRank, 0, errors.New("nil superblock while determining rank") } r := ranklist.NilRank @@ -200,11 +200,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify }) if err != nil { ei.log.Errorf("join failed: %s", err) - return ranklist.NilRank, false, 0, err + return ranklist.NilRank, 0, err } switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: - return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) + return ranklist.NilRank, 0, errors.Errorf("rank %d excluded", resp.Rank) } r = ranklist.Rank(resp.Rank) @@ -218,11 +218,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify superblock.URI = ready.GetUri() ei.setSuperblock(superblock) if err := ei.WriteSuperblock(); err != nil { - return ranklist.NilRank, resp.LocalJoin, 0, err + return ranklist.NilRank, 0, err } } - return r, resp.LocalJoin, resp.MapVersion, nil + return r, resp.MapVersion, nil } func (ei *EngineInstance) updateFaultDomainInSuperblock() error { @@ -259,21 +259,20 @@ func (ei *EngineInstance) handleReady(ctx context.Context, ready *srvpb.NotifyRe ei.log.Error(err.Error()) // nonfatal } - r, localJoin, mapVersion, err := ei.determineRank(ctx, ready) + r, mapVersion, err := ei.determineRank(ctx, ready) if err != nil { return err } - // If the join was already processed because it ran on the same server, - // skip the rest of these steps. - if localJoin { - return nil - } - return ei.SetupRank(ctx, r, mapVersion) } func (ei *EngineInstance) SetupRank(ctx context.Context, rank ranklist.Rank, map_version uint32) error { + if ei.IsReady() { + ei.log.Errorf("SetupRank called on an already set-up instance %d", ei.Index()) + return nil + } + if err := ei.callSetRank(ctx, rank, map_version); err != nil { return errors.Wrap(err, "SetRank failed") } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 8374b0b9e2e..620db09bf11 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -178,22 +178,6 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net MapVersion: joinResponse.MapVersion, } - // If the rank is local to the MS leader, then we need to wire up at least - // one in order to perform a CaRT group update. - if common.IsLocalAddr(peerAddr) && req.Idx == 0 { - resp.LocalJoin = true - - srvs := svc.harness.Instances() - if len(srvs) == 0 { - return nil, errors.New("invalid Join request (index 0 doesn't exist?!?)") - } - srv := srvs[0] - - if err := srv.SetupRank(ctx, joinResponse.Member.Rank, joinResponse.MapVersion); err != nil { - return nil, errors.Wrap(err, "SetupRank on local instance failed") - } - } - return resp, nil } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 375b77c3efb..0ac1112c4ba 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -1967,7 +1967,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: false, MapVersion: 2, }, }, @@ -1993,7 +1992,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: true, MapVersion: 2, }, }, diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index cfd562891e0..c3900429dfe 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1010,7 +1010,7 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = mgmt__join_resp__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = +static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = { { "status", @@ -1060,18 +1060,6 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, - { - "localJoin", - 5, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_BOOL, - 0, /* quantifier_offset */ - offsetof(Mgmt__JoinResp, localjoin), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, { "map_version", 6, @@ -1087,16 +1075,16 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = }; static const unsigned mgmt__join_resp__field_indices_by_name[] = { 3, /* field[3] = faultDomain */ - 4, /* field[4] = localJoin */ - 5, /* field[5] = map_version */ + 4, /* field[4] = map_version */ 1, /* field[1] = rank */ 2, /* field[2] = state */ 0, /* field[0] = status */ }; -static const ProtobufCIntRange mgmt__join_resp__number_ranges[1 + 1] = +static const ProtobufCIntRange mgmt__join_resp__number_ranges[2 + 1] = { { 1, 0 }, - { 0, 6 } + { 6, 4 }, + { 0, 5 } }; const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = { @@ -1106,10 +1094,10 @@ const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = "Mgmt__JoinResp", "mgmt", sizeof(Mgmt__JoinResp), - 6, + 5, mgmt__join_resp__field_descriptors, mgmt__join_resp__field_indices_by_name, - 1, mgmt__join_resp__number_ranges, + 2, mgmt__join_resp__number_ranges, (ProtobufCMessageInit) mgmt__join_resp__init, NULL,NULL,NULL /* reserved[123] */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 55acb283028..c1d61ef44fb 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -163,10 +163,6 @@ struct _Mgmt__JoinResp * Fault domain for the instance */ char *faultdomain; - /* - * Join processed locally. - */ - protobuf_c_boolean localjoin; /* * Join processed in this version of the system map. */ @@ -174,7 +170,7 @@ struct _Mgmt__JoinResp }; #define MGMT__JOIN_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_resp__descriptor) \ - , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0, 0 } + , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0 } struct _Mgmt__LeaderQueryReq diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 400452837ce..668a9905bfd 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -44,6 +44,8 @@ message JoinReq { } message JoinResp { + reserved 5; + reserved "localJoin"; int32 status = 1; // DAOS error code uint32 rank = 2; // Server rank assigned. enum State { @@ -52,7 +54,6 @@ message JoinResp { } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance - bool localJoin = 5; // Join processed locally. uint32 map_version = 6; // Join processed in this version of the system map. } From 02774331ecc9e4959e28e66ba18d74dde5456fb1 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 22 Sep 2023 09:19:37 -0500 Subject: [PATCH 29/29] DAOS-14391 il: reduce eq count and build jobs for vm build test (#13070) Signed-off-by: Mohamad Chaarawi --- src/tests/ftest/dfuse/daos_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index 5edd0b328df..d1afe8366b2 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -138,8 +138,8 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): remote_env = {} if run_on_vms: dfuse_namespace = dfuse_namespace = "/run/dfuse_vm/*" - build_jobs = 6 * 2 - remote_env['D_IL_MAX_EQ'] = '6' + build_jobs = 6 + remote_env['D_IL_MAX_EQ'] = '2' intercept_jobs = build_jobs if intercept: