diff --git a/.rpmignore b/.rpmignore index d0657d759c0..86666be323b 100644 --- a/.rpmignore +++ b/.rpmignore @@ -3,28 +3,22 @@ # but should not be included in the current release # -centos7/daos-client-tests-openmpi*.rpm -centos7/daos-firmware*.rpm -centos7/daos-mofed*.rpm -centos7/daos-serialize*.rpm -centos7/daos-server-tests-openmpi*.rpm -centos7/daos-tests-internal*.rpm -centos7/ucx*.rpm - -el8/daos-client-tests-openmpi*.rpm +el8/daos-*tests*.rpm el8/daos-firmware*.rpm el8/daos-mofed*.rpm el8/daos-serialize*.rpm -el8/daos-server-tests-openmpi*.rpm -el8/daos-tests-internal*.rpm el8/ucx*.rpm -leap15/daos-client-tests-openmpi*.rpm +el9/daos-*tests*.rpm +el9/daos-firmware*.rpm +el9/daos-mofed*.rpm +el9/daos-serialize*.rpm +el9/ucx*.rpm + +leap15/daos-*tests*.rpm leap15/daos-firmware*.rpm leap15/daos-mofed*.rpm leap15/daos-serialize*.rpm -leap15/daos-server-tests-openmpi*.rpm -leap15/daos-tests-internal*.rpm leap15/openucx*.rpm leap15/ucx*.rpm leap15/*protobuf-c*.rpm diff --git a/Jenkinsfile b/Jenkinsfile index 0f257aa9dfe..ce8e946bc7b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -74,6 +74,11 @@ void job_step_update(def value) { Map nlt_test() { // groovylint-disable-next-line NoJavaUtilDate Date startDate = new Date() + try { + unstash('nltr') + } catch (e) { + print 'Unstash failed, results from NLT stage will not be included' + } sh label: 'Fault injection testing using NLT', script: './ci/docker_nlt.sh --class-name el8.fault-injection fi' List filesList = [] @@ -1104,7 +1109,6 @@ pipeline { sconsBuild(parallel_build: true, scons_args: 'PREFIX=/opt/daos TARGET_TYPE=release BUILD_TYPE=debug', build_deps: 'no')) - unstash('nltr') job_step_update(nlt_test()) recordCoverage(tools: [[parser: 'COBERTURA', pattern:'nltr.xml']], skipPublishingChecks: true, diff --git a/SConstruct b/SConstruct index abc04dc8b64..8e59cc56c4f 100644 --- a/SConstruct +++ b/SConstruct @@ -363,7 +363,7 @@ MINIMAL_ENV = ('HOME', 'TERM', 'SSH_AUTH_SOCK', 'http_proxy', 'https_proxy', 'PK # Environment variables that are also kept when LD_PRELOAD is set. PRELOAD_ENV = ('LD_PRELOAD', 'D_LOG_FILE', 'DAOS_AGENT_DRPC_DIR', 'D_LOG_MASK', 'DD_MASK', - 'DD_SUBSYS') + 'DD_SUBSYS', 'D_IL_MAX_EQ') def scons(): diff --git a/ci/rpm/build_unsuccessful.sh b/ci/rpm/build_unsuccessful.sh index 3c88a98dc56..d1d1f3606e4 100755 --- a/ci/rpm/build_unsuccessful.sh +++ b/ci/rpm/build_unsuccessful.sh @@ -8,7 +8,7 @@ mydir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" ci_envs="$mydir/../parse_ci_envs.sh" if [ -e "${ci_envs}" ]; then # at some point we want to use: shellcheck source=ci/parse_ci_envs.sh - # shellcheck disable=SC1091 + # shellcheck disable=SC1091,SC1090 source "${ci_envs}" fi @@ -25,6 +25,9 @@ if [ -d /var/cache/pbuilder/ ]; then exit 0 fi +rpm -q mock +mock --debug-config + mockroot="/var/lib/mock/$CHROOT_NAME" cat "$mockroot"/result/{root,build}.log 2>/dev/null || true diff --git a/mkdocs.yml b/mkdocs.yml index 41027df9a6f..82c3c8125d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,5 +1,5 @@ # Project Information -site_name: DAOS v2.5 - master +site_name: DAOS v2.5 site_description: Distributed Asynchronous Object Storage site_author: DAOS Project diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 1fe43214676..3044a6b58f6 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -58,7 +58,7 @@ def check(self, name): self.installed.append(name) return True - if not GetOption('help'): + if not GetOption('help') and not GetOption('silent'): print(f'Using build version of {name}') self.not_installed.append(name) return False @@ -90,12 +90,14 @@ def check(reqs, name, built_str, installed_str=""): def ofi_config(config): """Check ofi version""" - print('Checking for libfabric > 1.11...', end=' ') + if not GetOption('silent'): + print('Checking for libfabric > 1.11...', end=' ') code = """#include _Static_assert(FI_MAJOR_VERSION == 1 && FI_MINOR_VERSION >= 11, "libfabric must be >= 1.11");""" rc = config.TryCompile(code, ".c") - print('yes' if rc else 'no') + if not GetOption('silent'): + print('yes' if rc else 'no') return rc @@ -132,6 +134,7 @@ def define_mercury(reqs): libs=['fabric'], config_cb=ofi_config, headers=['rdma/fabric.h'], + pkgconfig='libfabric', package='libfabric-devel' if inst(reqs, 'ofi') else None, patch_rpath=['lib'], build_env={'CFLAGS': "-fstack-usage"}) @@ -184,12 +187,6 @@ def define_mercury(reqs): else: mercury_build.append('-DMERCURY_ENABLE_DEBUG:BOOL=OFF') - mercury_build.extend(check(reqs, - 'ofi', - ['-DOFI_INCLUDE_DIR:PATH=$OFI_PREFIX/include', - '-DOFI_LIBRARY:FILEPATH=$OFI_PREFIX/lib/libfabric.so'], - [])) - reqs.define('mercury', retriever=GitRepoRetriever('https://github.com/mercury-hpc/mercury.git', True), commands=[mercury_build, diff --git a/site_scons/env_modules.py b/site_scons/env_modules.py index 9d38df7e2db..df4af0a6498 100644 --- a/site_scons/env_modules.py +++ b/site_scons/env_modules.py @@ -36,7 +36,7 @@ class _env_module(): # pylint: disable=invalid-name "openmpi": ['mpi/mlnx_openmpi-x86_64', 'mpi/openmpi3-x86_64', 'gnu-openmpi', 'mpi/openmpi-x86_64']} - def __init__(self): + def __init__(self, silent=False): """Load Modules for initializing environment variables""" # Leap 15's lmod-lua doesn't include the usual module path # in it's MODULEPATH, for some unknown reason @@ -44,6 +44,7 @@ def __init__(self): os.path.join(os.sep, "usr", "share", "modulefiles"), os.path.join(os.sep, "etc", "modulefiles")] + os.environ.get("MODULEPATH", "").split(":")) + self._silent = silent self._module_load = self._init_mpi_module() def _module_func(self, command, *arguments): # pylint: disable=no-self-use @@ -56,7 +57,8 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # pylint: disable=consider-using-with try: - print(f"Going to run {cmd}") + if not self._silent: + print(' '.join(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE) except OSError as error: if error.errno == errno.ENOENT: @@ -81,11 +83,11 @@ def _module_func(self, command, *arguments): # pylint: disable=no-self-use # return _mlstatus, stderr.decode() # pylint: disable=undefined-variable def _init_mpi_module(self): - """init mpi module function""" + """Init mpi module function""" return self._mpi_module def _mpi_module(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] unload = [] @@ -109,16 +111,17 @@ def _mpi_module(self, mpi): self._module_func('unload', to_unload) for to_load in load: - print(f"Trying to load {to_load}") - if self._module_func('is-avail', to_load)[0] and \ - self._module_func('load', to_load)[0]: - print(f'Loaded {to_load}') + if not self._silent: + print(f"Trying to load {to_load}") + if self._module_func('is-avail', to_load)[0] and self._module_func('load', to_load)[0]: + if not self._silent: + print(f'Loaded {to_load}') return True return False def _mpi_module_old(self, mpi): - """attempt to load the requested module""" + """Attempt to load the requested module""" load = [] for key, value in self._mpi_map.items(): if key == mpi: @@ -162,7 +165,7 @@ def load_mpi(self, mpi): return True def show_avail(self): - """list available modules""" + """List available modules""" try: status, output = self._module_func('avail') if not status: @@ -172,12 +175,12 @@ def show_avail(self): return output def get_map(self, key): - """return the mpi map""" + """Return the mpi map""" return self._mpi_map[key] -def load_mpi(mpi): - """global function to load MPI into os.environ""" +def load_mpi(mpi, silent=False): + """Global function to load MPI into os.environ""" # On Ubuntu, MPI stacks use alternatives and need root to change their # pointer, so just verify that the desired MPI is loaded if distro.id() == "ubuntu": @@ -201,19 +204,19 @@ def load_mpi(mpi): return False if _env_module.env_module_init is None: - _env_module.env_module_init = _env_module() + _env_module.env_module_init = _env_module(silent) return _env_module.env_module_init.load_mpi(mpi) def show_avail(): - """global function to show the available modules""" + """Global function to show the available modules""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.show_avail() def get_module_list(key): - """global function to show the modules that map to a key""" + """Global function to show the modules that map to a key""" if _env_module.env_module_init is None: _env_module.env_module_init = _env_module() return _env_module.env_module_init.get_map(key) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 9163bfb0ed7..f016e4295b0 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1125,6 +1125,11 @@ def _parse_config(self, env, opts): return + def _print(self, msg): + if GetOption('silent'): + return + print(msg) + def has_missing_targets(self, env): """Check for expected build targets (e.g. libraries or headers)""" # pylint: disable=too-many-return-statements @@ -1151,7 +1156,7 @@ def has_missing_targets(self, env): print('help set') return True - print(f"Checking targets for component '{self.name}'") + self._print(f"Checking targets for component '{self.name}'") config = env.Configure() config_cb = self.key_words.get("config_cb", None) @@ -1244,7 +1249,6 @@ def configure(self): def set_environment(self, env, needed_libs): """Modify the specified construction environment to build with the external component""" - if self.skip_arch: return diff --git a/site_scons/site_tools/compiler_setup.py b/site_scons/site_tools/compiler_setup.py index 1f20d00b050..8c91ff2f535 100644 --- a/site_scons/site_tools/compiler_setup.py +++ b/site_scons/site_tools/compiler_setup.py @@ -39,8 +39,9 @@ def _base_setup(env): compiler = env['CC'] build_type = env['BUILD_TYPE'] - print(f'Setting up compile environment for {compiler}') - print(f"Build type is '{build_type}'") + if not GetOption('silent'): + print(f'Setting up compile environment for {compiler}') + print(f"Build type is '{build_type}'") prev_compiler = env.get('BSETUP', False) if prev_compiler: diff --git a/site_scons/site_tools/daos_builder.py b/site_scons/site_tools/daos_builder.py index 36676952ffd..8afd254a182 100644 --- a/site_scons/site_tools/daos_builder.py +++ b/site_scons/site_tools/daos_builder.py @@ -224,6 +224,10 @@ def _configure_mpi(self): if GetOption('help'): return None + def _print(msg): + if not GetOption('silent'): + print(msg) + env = self.Clone() env['CXX'] = None @@ -233,13 +237,13 @@ def _configure_mpi(self): return env for mpi in ['openmpi', 'mpich']: - if not load_mpi(mpi): + if not load_mpi(mpi, GetOption('silent')): continue if _find_mpicc(env): - print(f'{mpi} is installed') + _print(f'{mpi} is installed') return env - print(f'No {mpi} installed and/or loaded') - print("No MPI installed") + _print(f'No {mpi} installed and/or loaded') + _print("No MPI installed") return None diff --git a/src/SConscript b/src/SConscript index e440dff1eea..c4cb419e047 100644 --- a/src/SConscript +++ b/src/SConscript @@ -49,7 +49,8 @@ def read_and_save_version(env): '@Template for @': ''} out = env.Substfile(tmpl_hdr_in, SUBST_DICT=subst_dict) - print(f'generated daos version header file: {out[0].abspath}') + if not GetOption('silent'): + print(f'generated daos version header file: {out[0].abspath}') return version diff --git a/src/cart/README.env b/src/cart/README.env index 8a8cca74f85..edfbb39c561 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -80,6 +80,11 @@ This file lists the environment variables used in CaRT. by default, and can be specified using DD_SUBSYS, for example: "DD_SUBSYS=RPC,BULK,CORPC,GRP,LM,HG,PMIX,ST,IV" or also "DD_SUBSYS=all". + . D_MEMORY_TRACK + User can enable memory track for daos engine by D_MEMORY_TRACK=1. With the + environment, all of allocations inside DAOS (by D_ALLOC) will be tracked, and + total allocated bytes per xstream can be shown through metrics. + . CRT_TIMEOUT Set it as integer in the range of (0, 3600] to set the global timeout value of all RPCs (second). Without setting it or set it as any other value will diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 8e871e7c3a6..e6c5fe70fb1 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -617,8 +617,9 @@ crt_provider_get_ctx_idx(bool primary, int provider) } } - D_ERROR("ctx_num %d, will exceed CRT_SRV_CONTEXT_NUM (%d) if create more context.\n", - prov_data->cpg_ctx_num, CRT_SRV_CONTEXT_NUM); + D_DEBUG(DB_ALL, "provider:%d allowed context limit = %d exceeded\n", + provider, CRT_SRV_CONTEXT_NUM); + return -1; } diff --git a/src/client/api/event.c b/src/client/api/event.c index 85dd514da17..e6996fb6155 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -484,8 +484,13 @@ daos_event_complete(struct daos_event *ev, int rc) } if (evx->evx_status == DAOS_EVS_READY || evx->evx_status == DAOS_EVS_COMPLETED || - evx->evx_status == DAOS_EVS_ABORTED) + evx->evx_status == DAOS_EVS_ABORTED) { + if (evx->is_errno) + ev->ev_error = daos_der2errno(rc); + else + ev->ev_error = rc; goto out; + } D_ASSERT(evx->evx_status == DAOS_EVS_RUNNING); @@ -830,7 +835,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) eqx = daos_eq_lookup(eqh); if (eqx == NULL) { - D_ERROR("eqh nonexist.\n"); + D_ERROR("daos_eq_lookup() failed: "DF_RC"\n", DP_RC(-DER_NONEXIST)); return -DER_NONEXIST; } @@ -862,8 +867,7 @@ daos_eq_destroy(daos_handle_t eqh, int flags) if (eqx->eqx_ctx != NULL) { rc = crt_context_flush(eqx->eqx_ctx, 0); if (rc != 0) { - D_ERROR("failed to flush client context: "DF_RC"\n", - DP_RC(rc)); + D_ERROR("failed to flush client context: "DF_RC"\n", DP_RC(rc)); return rc; } } diff --git a/src/client/dfs/SConscript b/src/client/dfs/SConscript index a0c12efc139..38512536397 100644 --- a/src/client/dfs/SConscript +++ b/src/client/dfs/SConscript @@ -5,24 +5,29 @@ def configure_lustre(denv): """Do Lustre configure checks""" if GetOption('help') or GetOption('clean'): return denv + + def _print(msg): + if not GetOption('silent'): + print(msg) + # If Lustre installed build a Lustre-aware libduns conf = Configure(denv) gotversion = False if not conf.CheckLibWithHeader('lustreapi', 'linux/lustre/lustre_user.h', 'c'): - print("No installed Lustre version detected") + _print("No installed Lustre version detected") else: - print("Installed Lustre version detected") + _print("Installed Lustre version detected") if not conf.CheckFunc('llapi_unlink_foreign'): - print("Lustre version is not compatible") + _print("Lustre version is not compatible") else: - print("Lustre version is compatible") + _print("Lustre version is compatible") gotversion = True if gotversion is True: - print("Building with Lustre bindings.") + _print("Building with Lustre bindings.") denv.AppendUnique(CCFLAGS=['-DLUSTRE_INCLUDE']) else: - print("Not building with Lustre bindings.") + _print("Not building with Lustre bindings.") return conf.Finish() diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 9107db9214b..e502a95ff83 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -11,19 +11,15 @@ #include #include #include -#include #include #include #include #include -#include #include #include -#include #include "daos.h" #include "daos_fs.h" - #include "dfs_internal.h" /** D-key name of SB metadata */ @@ -4493,7 +4489,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, D_ALLOC_PTR(params); if (params == NULL) - D_GOTO(err_task, rc = ENOMEM); + D_GOTO(err_task, rc = -DER_NOMEM); params->read_size = read_size; @@ -4517,10 +4513,12 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, daos_task_set_priv(task, params); rc = tse_task_register_cbs(task, NULL, NULL, 0, read_cb, NULL, 0); if (rc) - D_GOTO(err_params, rc = daos_der2errno(rc)); + D_GOTO(err_params, rc); rc = dc_task_schedule(task, true); - return daos_der2errno(rc); + if (rc) + D_GOTO(err_task, rc); + return 0; err_params: D_FREE(params); @@ -7446,3 +7444,405 @@ dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) D_FREE(entry.value); return rc; } + +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size) +{ + daos_handle_t oh; + int rc; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (daos_obj_id2type(oid) != DAOS_OT_ARRAY_BYTE) + return EINVAL; + + rc = daos_array_open_with_attr(dfs->coh, oid, DAOS_TX_NONE, DAOS_OO_RO, 1, + chunk_size ? chunk_size : dfs->attr.da_chunk_size, + &oh, NULL); + if (rc != 0) { + D_ERROR("daos_array_open() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_get_size(oh, DAOS_TX_NONE, size, NULL); + if (rc) { + daos_array_close(oh, NULL); + D_ERROR("daos_array_get_size() failed: "DF_RC"\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + rc = daos_array_close(oh, NULL); + return daos_der2errno(rc); +} + +struct dfs_pipeline { + daos_pipeline_t pipeline; + dfs_predicate_t pred; + + mode_t constant1; + mode_t constant2; + + d_iov_t dkey_iov; + d_iov_t const1_iov; + d_iov_t const2_iov; + d_iov_t const3_iov; + + daos_filter_part_t dkey_ft; + daos_filter_part_t akey1_ft; + daos_filter_part_t akey2_ft; + daos_filter_part_t const0_ft; + daos_filter_part_t const1_ft; + daos_filter_part_t const2_ft; + daos_filter_part_t const3_ft; + daos_filter_part_t like_ft; + daos_filter_part_t ba_ft; + daos_filter_part_t eq_ft; + daos_filter_part_t gt_ft; + daos_filter_part_t and_ft; + daos_filter_part_t or_ft; + + daos_filter_t pipef; +}; + +#define DKEY_F "DAOS_FILTER_DKEY" +#define AKEY_F "DAOS_FILTER_AKEY" +#define CONST_F "DAOS_FILTER_CONST" +#define BINARY_F "DAOS_FILTER_TYPE_BINARY" +#define INT8_F "DAOS_FILTER_TYPE_UINTEGER8" +#define INT4_F "DAOS_FILTER_TYPE_UINTEGER4" +#define LIKE_F "DAOS_FILTER_FUNC_LIKE" +#define GT_F "DAOS_FILTER_FUNC_GT" +#define EQ_F "DAOS_FILTER_FUNC_EQ" +#define BA_F "DAOS_FILTER_FUNC_BITAND" +#define AND_F "DAOS_FILTER_FUNC_AND" +#define OR_F "DAOS_FILTER_FUNC_OR" +#define COND_F "DAOS_FILTER_CONDITION" + +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **_dpipe) +{ + daos_size_t bin_flen = sizeof(BINARY_F) - 1; + daos_size_t dkey_flen = sizeof(DKEY_F) - 1; + daos_size_t akey_flen = sizeof(AKEY_F) - 1; + daos_size_t const_flen = sizeof(CONST_F) - 1; + daos_size_t int8_flen = sizeof(INT8_F) - 1; + daos_size_t int4_flen = sizeof(INT4_F) - 1; + daos_size_t like_flen = sizeof(LIKE_F) - 1; + daos_size_t gt_flen = sizeof(GT_F) - 1; + daos_size_t eq_flen = sizeof(EQ_F) - 1; + daos_size_t ba_flen = sizeof(BA_F) - 1; + daos_size_t and_flen = sizeof(AND_F) - 1; + daos_size_t or_flen = sizeof(OR_F) - 1; + daos_size_t cond_flen = sizeof(COND_F) - 1; + dfs_pipeline_t *dpipe; + int rc; + + D_ALLOC_PTR(dpipe); + if (dpipe == NULL) + return ENOMEM; + + /** copy the user predicate conditions */ + memcpy(&dpipe->pred, &pred, sizeof(dfs_predicate_t)); + + daos_pipeline_init(&dpipe->pipeline); + + /** build condition for entry name */ + if (flags & DFS_FILTER_NAME) { + daos_size_t name_len; + + name_len = strnlen(dpipe->pred.dp_name, DFS_MAX_NAME); + + d_iov_set(&dpipe->dkey_ft.part_type, DKEY_F, dkey_flen); + d_iov_set(&dpipe->dkey_ft.data_type, BINARY_F, bin_flen); + dpipe->dkey_ft.data_len = DFS_MAX_NAME; + + d_iov_set(&dpipe->const0_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const0_ft.data_type, BINARY_F, bin_flen); + dpipe->const0_ft.num_constants = 1; + dpipe->const0_ft.constant = &dpipe->dkey_iov; + d_iov_set(dpipe->const0_ft.constant, dpipe->pred.dp_name, name_len); + + d_iov_set(&dpipe->like_ft.part_type, LIKE_F, like_flen); + dpipe->like_ft.num_operands = 2; + } + + /** build condition for newer than ctime */ + if (flags & DFS_FILTER_NEWER) { + d_iov_set(&dpipe->akey2_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey2_ft.data_type, INT8_F, int8_flen); + d_iov_set(&dpipe->akey2_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey2_ft.data_offset = CTIME_IDX; + dpipe->akey2_ft.data_len = sizeof(time_t); + + d_iov_set(&dpipe->const3_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const3_ft.data_type, INT8_F, int8_flen); + dpipe->const3_ft.num_constants = 1; + dpipe->const3_ft.constant = &dpipe->const3_iov; + d_iov_set(dpipe->const3_ft.constant, &dpipe->pred.dp_newer, sizeof(time_t)); + + d_iov_set(&dpipe->gt_ft.part_type, GT_F, gt_flen); + dpipe->gt_ft.num_operands = 2; + } + + /** If filter on dirs is not requested, return all dirs so they can be traversed */ + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + d_iov_set(&dpipe->akey1_ft.part_type, AKEY_F, akey_flen); + d_iov_set(&dpipe->akey1_ft.data_type, INT4_F, int4_flen); + d_iov_set(&dpipe->akey1_ft.akey, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + dpipe->akey1_ft.data_offset = MODE_IDX; + dpipe->akey1_ft.data_len = sizeof(mode_t); + + dpipe->constant1 = S_IFMT; + d_iov_set(&dpipe->const1_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const1_ft.data_type, INT4_F, int4_flen); + dpipe->const1_ft.num_constants = 1; + dpipe->const1_ft.constant = &dpipe->const1_iov; + d_iov_set(dpipe->const1_ft.constant, &dpipe->constant1, sizeof(mode_t)); + + dpipe->constant2 = S_IFDIR; + d_iov_set(&dpipe->const2_ft.part_type, CONST_F, const_flen); + d_iov_set(&dpipe->const2_ft.data_type, INT4_F, int4_flen); + dpipe->const2_ft.num_constants = 1; + dpipe->const2_ft.constant = &dpipe->const2_iov; + d_iov_set(dpipe->const2_ft.constant, &dpipe->constant2, sizeof(mode_t)); + + d_iov_set(&dpipe->ba_ft.part_type, BA_F, ba_flen); + dpipe->ba_ft.num_operands = 2; + + d_iov_set(&dpipe->eq_ft.part_type, EQ_F, eq_flen); + dpipe->eq_ft.num_operands = 2; + } + + /** build final condition: IS_DIR || (entry name match && newer match) */ + + d_iov_set(&dpipe->and_ft.part_type, AND_F, and_flen); + dpipe->and_ft.num_operands = 2; + + d_iov_set(&dpipe->or_ft.part_type, OR_F, or_flen); + dpipe->or_ft.num_operands = 2; + + /** initialize and add all the parts to the pipeline */ + daos_filter_init(&dpipe->pipef); + d_iov_set(&dpipe->pipef.filter_type, COND_F, cond_flen); + + if (!(flags & DFS_FILTER_INCLUDE_DIRS)) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->or_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + rc = daos_filter_add(&dpipe->pipef, &dpipe->eq_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->ba_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const1_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER && flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->and_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NAME) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->like_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->dkey_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const0_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + if (flags & DFS_FILTER_NEWER) { + rc = daos_filter_add(&dpipe->pipef, &dpipe->gt_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->akey2_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + rc = daos_filter_add(&dpipe->pipef, &dpipe->const3_ft); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + } + + rc = daos_pipeline_add(&dpipe->pipeline, &dpipe->pipef); + if (rc) + D_GOTO(err, rc = daos_der2errno(rc)); + + *_dpipe = dpipe; + return 0; +err: + printf("failed to create pipeline. rc = %d\n", rc); + D_FREE(dpipe); + return rc; +} + +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe) +{ + if (dpipe->pipeline.num_filters) + D_FREE(dpipe->pipeline.filters); + D_FREE(dpipe); + return 0; +} + +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csize, + uint64_t *nr_scanned) +{ + daos_iod_t iod; + daos_key_desc_t *kds; + d_sg_list_t sgl_keys, sgl_recs; + d_iov_t iov_keys, iov_recs; + char *buf_keys = NULL, *buf_recs = NULL; + daos_recx_t recxs[4]; + uint32_t nr_iods, nr_kds, key_nr, i; + daos_size_t record_len; + int rc = 0; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (obj == NULL || !S_ISDIR(obj->mode)) + return ENOTDIR; + if (*nr == 0) + return 0; + if (dpipe == NULL || dirs == NULL || anchor == NULL) + return EINVAL; + + /* IOD to retrieve the mode_t and the ctime */ + iod.iod_nr = 2; + iod.iod_size = 1; + recxs[0].rx_idx = MODE_IDX; + recxs[0].rx_nr = sizeof(mode_t); + recxs[1].rx_idx = CTIME_IDX; + recxs[1].rx_nr = sizeof(time_t); + iod.iod_recxs = recxs; + iod.iod_type = DAOS_IOD_ARRAY; + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + record_len = recxs[0].rx_nr + recxs[1].rx_nr; + + if (oids) { + recxs[iod.iod_nr].rx_idx = OID_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_obj_id_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + if (csize) { + recxs[iod.iod_nr].rx_idx = CSIZE_IDX; + recxs[iod.iod_nr].rx_nr = sizeof(daos_size_t); + record_len += recxs[iod.iod_nr].rx_nr; + iod.iod_nr ++; + } + + nr_kds = *nr; + nr_iods = 1; + + D_ALLOC_ARRAY(kds, nr_kds); + if (kds == NULL) + return ENOMEM; + + /** alloc buffer to store dkeys enumerated */ + sgl_keys.sg_nr = 1; + sgl_keys.sg_nr_out = 0; + sgl_keys.sg_iovs = &iov_keys; + D_ALLOC_ARRAY(buf_keys, nr_kds * DFS_MAX_NAME); + if (buf_keys == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_keys, buf_keys, nr_kds * DFS_MAX_NAME); + + + /** alloc buffer to store records enumerated */ + sgl_recs.sg_nr = 1; + sgl_recs.sg_nr_out = 0; + sgl_recs.sg_iovs = &iov_recs; + D_ALLOC_ARRAY(buf_recs, nr_kds * record_len); + if (buf_recs == NULL) + D_GOTO(out, rc = ENOMEM); + d_iov_set(&iov_recs, buf_recs, nr_kds * record_len); + + key_nr = 0; + *nr_scanned = 0; + while (!daos_anchor_is_eof(anchor)) { + daos_pipeline_stats_t stats = {0}; + char *ptr1; + + memset(buf_keys, 0, *nr * DFS_MAX_NAME); + + rc = daos_pipeline_run(dfs->coh, obj->oh, &dpipe->pipeline, DAOS_TX_NONE, 0, NULL, + &nr_iods, &iod, anchor, &nr_kds, kds, &sgl_keys, &sgl_recs, + NULL, NULL, &stats, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + + D_ASSERT(nr_iods == 1); + ptr1 = buf_keys; + + for (i = 0; i < nr_kds; i++) { + char *ptr2; + mode_t mode; + char *dkey = (char *)ptr1; + + /** set the dentry name */ + memcpy(dirs[key_nr].d_name, dkey, kds[i].kd_key_len); + dirs[key_nr].d_name[kds[i].kd_key_len] = '\0'; + + /** set the dentry type */ + ptr2 = &buf_recs[i * record_len]; + mode = *((mode_t *)ptr2); + + if (S_ISDIR(mode)) { + dirs[key_nr].d_type = DT_DIR; + } else if (S_ISREG(mode)) { + dirs[key_nr].d_type = DT_REG; + } else if (S_ISLNK(mode)) { + dirs[key_nr].d_type = DT_LNK; + } else { + D_ERROR("Invalid DFS entry type found, possible data corruption\n"); + D_GOTO(out, rc = EINVAL); + } + + /** set the OID for dentry if requested */ + if (oids) { + ptr2 += sizeof(mode_t) + sizeof(time_t); + oid_cp(&oids[key_nr], *((daos_obj_id_t *)ptr2)); + } + + /** set the chunk size for dentry if requested */ + if (csize) { + if (oids) + ptr2 += sizeof(daos_obj_id_t); + else + ptr2 += sizeof(mode_t) + sizeof(time_t); + csize[key_nr] = *((daos_size_t *)ptr2); + } + + key_nr++; + ptr1 += kds[i].kd_key_len; + } + + *nr_scanned += stats.nr_dkeys; + nr_kds = *nr - key_nr; + if (nr_kds == 0) + break; + } + *nr = key_nr; + +out: + D_FREE(kds); + D_FREE(buf_recs); + D_FREE(buf_keys); + return rc; +} diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index c337ec1bf42..83ac13aeaab 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -131,6 +131,97 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); +/** Internal pipeline readdir functionality */ + +/** DFS pipeline object */ +typedef struct dfs_pipeline dfs_pipeline_t; + +enum { + DFS_FILTER_NAME = (1 << 1), + DFS_FILTER_NEWER = (1 << 2), + DFS_FILTER_INCLUDE_DIRS = (1 << 3), +}; + +/** Predicate conditions for filter */ +typedef struct { + char dp_name[DFS_MAX_NAME]; /** name condition for entry - regex */ + time_t dp_newer; /** timestamp for newer condition */ + size_t dp_size; /** size of files - not supported for now */ +} dfs_predicate_t; + +/** + * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the + * chunk_size of the file is also required to be passed if the file was created with a different + * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to + * chunk size. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] oid Object ID of the file. + * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). + * \param[out] size Returned size of the file. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); + +/** + * Create a pipeline object to be used during readdir with filter. Should be destroyed with + * dfs_pipeline_destroy(). + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). + * \param[in] flags Pipeline flags (conditions to apply). + * \param[out] dpipe Pipeline object created. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); + +/** + * Destroy pipeline object. + * + * \param[in] dpipe Pipeline object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe); + +/** + * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() + * on the entries that are enumerated. This function also optionally returns the object ID of each + * dirent if requested through a pre-allocated OID input array. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Opened directory object. + * \param[in] dpipe DFS pipeline filter. + * \param[in,out] + * anchor Hash anchor for the next call, it should be set to + * zeroes for the first call, it should not be changed + * by caller between calls. + * \param[in,out] + * nr [in]: number of dirents allocated in \a dirs. + * [out]: number of returned dirents. + * \param[in,out] + * dirs [in] preallocated array of dirents. + * [out]: dirents returned with d_name filled only. + * \param[in,out] + * oids [in] Optional preallocated array of object IDs. + * [out]: Object ID associated with each dirent that was read. + * \param[in,out] + * csizes [in] Optional preallocated array of sizes. + * [out]: chunk size associated with each dirent that was read. + * \param[out] Total number of entries scanned by readdir before returning. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, + uint64_t *nr_scanned); + #if defined(__cplusplus) } #endif diff --git a/src/client/dfs/duns.c b/src/client/dfs/duns.c index cad9790507c..6c0e089bd58 100644 --- a/src/client/dfs/duns.c +++ b/src/client/dfs/duns.c @@ -859,7 +859,7 @@ duns_link_lustre_path(const char *pool, const char *cont, daos_cont_layout_t typ { char str[DUNS_MAX_XATTR_LEN + 1]; int len; - int rc, rc2; + int rc; /* XXX if liblustreapi is not binded, do it now ! */ if (liblustre_binded == false && liblustre_notfound == false) { @@ -1247,7 +1247,9 @@ duns_link_cont(daos_handle_t poh, const char *cont, const char *path) #ifdef LUSTRE_INCLUDE struct statfs fs; char *dir, *dirp; + size_t path_len; + path_len = strnlen(path, PATH_MAX); D_STRNDUP(dir, path, path_len); if (dir == NULL) D_GOTO(out_cont, rc = ENOMEM); diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index f360aabbc81..369ab21fa09 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -367,20 +367,24 @@ struct dfuse_inode_ops { }; struct dfuse_event { - fuse_req_t de_req; /**< The fuse request handle */ - daos_event_t de_ev; - size_t de_len; /**< The size returned by daos */ - d_iov_t de_iov; - d_sg_list_t de_sgl; - d_list_t de_list; - struct dfuse_eq *de_eqt; - struct dfuse_obj_hdl *de_oh; - off_t de_req_position; /**< The file position requested by fuse */ + fuse_req_t de_req; /**< The fuse request handle */ + daos_event_t de_ev; + size_t de_len; /**< The size returned by daos */ + d_iov_t de_iov; + d_sg_list_t de_sgl; + d_list_t de_list; + struct dfuse_eq *de_eqt; + union { + struct dfuse_obj_hdl *de_oh; + struct dfuse_inode_entry *de_ie; + }; + off_t de_req_position; /**< The file position requested by fuse */ union { size_t de_req_len; size_t de_readahead_len; }; void (*de_complete_cb)(struct dfuse_event *ev); + struct stat de_attr; }; extern struct dfuse_inode_ops dfuse_dfs_ops; diff --git a/src/client/dfuse/il/int_posix.c b/src/client/dfuse/il/int_posix.c index b845c85c05c..93a91cd6215 100644 --- a/src/client/dfuse/il/int_posix.c +++ b/src/client/dfuse/il/int_posix.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -33,6 +33,10 @@ FOREACH_INTERCEPT(IOIL_FORWARD_DECL) +static __thread daos_handle_t ioil_eqh; + +#define IOIL_MAX_EQ 64 + struct ioil_pool { daos_handle_t iop_poh; uuid_t iop_uuid; @@ -43,13 +47,17 @@ struct ioil_pool { struct ioil_global { pthread_mutex_t iog_lock; d_list_t iog_pools_head; + daos_handle_t iog_main_eqh; + daos_handle_t iog_eqs[IOIL_MAX_EQ]; + uint16_t iog_eq_count_max; + uint16_t iog_eq_count; + uint16_t iog_eq_idx; pid_t iog_init_tid; bool iog_initialized; bool iog_no_daos; bool iog_daos_init; bool iog_show_summary; /**< Should a summary be shown at teardown */ - unsigned iog_report_count; /**< Number of operations that should be logged */ ATOMIC uint64_t iog_file_count; /**< Number of file opens intercepted */ @@ -277,6 +285,7 @@ ioil_init(void) struct rlimit rlimit; int rc; uint64_t report_count = 0; + uint64_t eq_count = 0; pthread_once(&init_links_flag, init_links); @@ -319,6 +328,18 @@ ioil_init(void) if (rc) return; + rc = d_getenv_uint64_t("D_IL_MAX_EQ", &eq_count); + if (rc != -DER_NONEXIST) { + if (eq_count > IOIL_MAX_EQ) { + DFUSE_LOG_WARNING("Max EQ count (%"PRIu64") should not exceed: %d", + eq_count, IOIL_MAX_EQ); + eq_count = IOIL_MAX_EQ; + } + ioil_iog.iog_eq_count_max = (uint16_t)eq_count; + } else { + ioil_iog.iog_eq_count_max = IOIL_MAX_EQ; + } + ioil_iog.iog_initialized = true; } @@ -377,12 +398,55 @@ ioil_fini(void) ioil_shrink_pool(pool); } - if (ioil_iog.iog_daos_init) + if (ioil_iog.iog_daos_init) { + int i; + + /** destroy EQs created by threads */ + for (i = 0; i < ioil_iog.iog_eq_count; i++) + daos_eq_destroy(ioil_iog.iog_eqs[i], 0); + /** destroy main thread eq */ + if (daos_handle_is_valid(ioil_iog.iog_main_eqh)) + daos_eq_destroy(ioil_iog.iog_main_eqh, 0); daos_fini(); + } ioil_iog.iog_daos_init = false; daos_debug_fini(); } +int +ioil_get_eqh(daos_handle_t *eqh) +{ + int rc; + + if (daos_handle_is_valid(ioil_eqh)) { + *eqh = ioil_eqh; + return 0; + } + + /** No EQ support requested */ + if (ioil_iog.iog_eq_count_max == 0) + return -1; + + rc = pthread_mutex_lock(&ioil_iog.iog_lock); + /** create a new EQ if the EQ pool is not full; otherwise round robin EQ use from pool */ + if (ioil_iog.iog_eq_count >= ioil_iog.iog_eq_count_max) { + ioil_eqh = ioil_iog.iog_eqs[ioil_iog.iog_eq_idx ++]; + if (ioil_iog.iog_eq_idx == ioil_iog.iog_eq_count_max) + ioil_iog.iog_eq_idx = 0; + } else { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + pthread_mutex_unlock(&ioil_iog.iog_lock); + return -1; + } + ioil_iog.iog_eqs[ioil_iog.iog_eq_count] = ioil_eqh; + ioil_iog.iog_eq_count ++; + } + pthread_mutex_unlock(&ioil_iog.iog_lock); + *eqh = ioil_eqh; + return 0; +} + /* Get the object handle for the file itself */ static int fetch_dfs_obj_handle(int fd, struct fd_entry *entry) @@ -729,6 +793,20 @@ call_daos_init(int fd) return rcb; } +static void +child_hdlr(void) +{ + int rc; + + daos_dti_reset(); + ioil_eqh = DAOS_HDL_INVAL; + rc = daos_eq_create(&ioil_eqh); + if (rc) + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + else + ioil_iog.iog_main_eqh = ioil_eqh; +} + /* Returns true on success */ static bool check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) @@ -764,10 +842,23 @@ check_ioctl_on_open(int fd, struct fd_entry *entry, int flags) rc = pthread_mutex_lock(&ioil_iog.iog_lock); D_ASSERT(rc == 0); - if (!ioil_iog.iog_daos_init) + if (!ioil_iog.iog_daos_init) { if (!call_daos_init(fd)) goto err; + if (ioil_iog.iog_eq_count_max) { + rc = daos_eq_create(&ioil_eqh); + if (rc) { + DFUSE_LOG_WARNING("daos_eq_create() failed: "DF_RC, DP_RC(rc)); + D_GOTO(err, rc = daos_der2errno(rc)); + } + ioil_iog.iog_main_eqh = ioil_eqh; + + rc = pthread_atfork(NULL, NULL, &child_hdlr); + D_ASSERT(rc == 0); + } + } + d_list_for_each_entry(pool, &ioil_iog.iog_pools_head, iop_pools) { if (uuid_compare(pool->iop_uuid, il_reply.fir_pool) != 0) continue; diff --git a/src/client/dfuse/il/int_read.c b/src/client/dfuse/il/int_read.c index 6b5ee1fd7b5..497e39273ab 100644 --- a/src/client/dfuse/il/int_read.c +++ b/src/client/dfuse/il/int_read.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,17 +15,52 @@ static ssize_t read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - daos_size_t read_size = 0; - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + daos_size_t read_size = 0; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); sgl.sg_nr = 1; d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, NULL); + + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, + &read_size, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, + NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_read() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/int_write.c b/src/client/dfuse/il/int_write.c index fc602f0a1c3..abbb573638d 100644 --- a/src/client/dfuse/il/int_write.c +++ b/src/client/dfuse/il/int_write.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,9 +15,11 @@ ssize_t ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - d_iov_t iov = {}; - d_sg_list_t sgl = {}; - int rc; + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + daos_event_t ev; + daos_handle_t eqh; + int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); @@ -25,7 +27,37 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en d_iov_set(&iov, (void *)buff, len); sgl.sg_iovs = &iov; - rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + rc = ioil_get_eqh(&eqh); + if (rc == 0) { + bool flag = false; + + rc = daos_event_init(&ev, eqh, NULL); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_init() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &ev); + if (rc) + D_GOTO(out, rc); + + while (1) { + rc = daos_event_test(&ev, DAOS_EQ_NOWAIT, &flag); + if (rc) { + DFUSE_TRA_ERROR(entry->fd_dfsoh, "daos_event_test() failed: "DF_RC, + DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + if (flag) + break; + sched_yield(); + } + rc = ev.ev_error; + } else { + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + } +out: if (rc) { DFUSE_TRA_ERROR(entry->fd_dfsoh, "dfs_write() failed: %d (%s)", rc, strerror(rc)); *errcode = rc; diff --git a/src/client/dfuse/il/ioil.h b/src/client/dfuse/il/ioil.h index 8c4a7205e4e..b9581b3bd77 100644 --- a/src/client/dfuse/il/ioil.h +++ b/src/client/dfuse/il/ioil.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2017-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -53,5 +53,7 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en ssize_t ioil_do_pwritev(const struct iovec *iov, int count, off_t position, struct fd_entry *entry, int *errcode); +int +ioil_get_eqh(daos_handle_t *eqh); #endif /* __IOIL_H__ */ diff --git a/src/client/dfuse/ops/fgetattr.c b/src/client/dfuse/ops/fgetattr.c index 481c9fc56e6..6fdee73515c 100644 --- a/src/client/dfuse/ops/fgetattr.c +++ b/src/client/dfuse/ops/fgetattr.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,11 +7,32 @@ #include "dfuse_common.h" #include "dfuse.h" +static void +dfuse_cb_getattr_cb(struct dfuse_event *ev) +{ + if (ev->de_ev.ev_error != 0) { + DFUSE_REPLY_ERR_RAW(ev->de_ie, ev->de_req, ev->de_ev.ev_error); + D_GOTO(release, 0); + } + + ev->de_attr.st_ino = ev->de_ie->ie_stat.st_ino; + + ev->de_ie->ie_stat = ev->de_attr; + + DFUSE_REPLY_ATTR(ev->de_ie, ev->de_req, &ev->de_attr); +release: + daos_event_fini(&ev->de_ev); + D_FREE(ev); +} + void dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) { - struct stat attr = {}; - int rc; + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_event *ev; + uint64_t eqt_idx; + struct dfuse_eq *eqt; + int rc; if (ie->ie_unlinked) { DFUSE_TRA_DEBUG(ie, "File is unlinked, returning most recent data"); @@ -19,17 +40,29 @@ dfuse_cb_getattr(fuse_req_t req, struct dfuse_inode_entry *ie) return; } - rc = dfs_ostat(ie->ie_dfs->dfs_ns, ie->ie_obj, &attr); - if (rc != 0) - D_GOTO(err, rc); + eqt_idx = atomic_fetch_add_relaxed(&dfuse_info->di_eqt_idx, 1); + eqt = &dfuse_info->di_eqt[eqt_idx % dfuse_info->di_eq_count]; + D_ALLOC_PTR(ev); + if (ev == NULL) + D_GOTO(err, rc = ENOMEM); - attr.st_ino = ie->ie_stat.st_ino; + ev->de_req = req; + ev->de_complete_cb = dfuse_cb_getattr_cb; + ev->de_ie = ie; - ie->ie_stat = attr; + rc = daos_event_init(&ev->de_ev, eqt->de_eq, NULL); + if (rc != -DER_SUCCESS) + D_GOTO(ev, rc = daos_der2errno(rc)); + + rc = dfs_ostatx(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_attr, &ev->de_ev); + if (rc != 0) + D_GOTO(ev, rc); - DFUSE_REPLY_ATTR(ie, req, &attr); + sem_post(&eqt->de_sem); return; +ev: + D_FREE(ev); err: DFUSE_REPLY_ERR_RAW(ie, req, rc); } diff --git a/src/client/java/hadoop-daos/pom.xml b/src/client/java/hadoop-daos/pom.xml index 34ecdf445ac..7f8dac9f9f6 100644 --- a/src/client/java/hadoop-daos/pom.xml +++ b/src/client/java/hadoop-daos/pom.xml @@ -15,7 +15,7 @@ jar - 3.3.3 + 3.3.6 ${project.basedir}/build ${project.basedir}/install diff --git a/src/client/serialize/SConscript b/src/client/serialize/SConscript index a50ffca0ebd..83b077d7867 100644 --- a/src/client/serialize/SConscript +++ b/src/client/serialize/SConscript @@ -3,6 +3,7 @@ def scons(): """Execute build""" + Import('env') denv = env.Clone() @@ -19,11 +20,13 @@ def scons(): src = ['daos_serialize.c'] if have_hdf5 is True: - print("Building with hdf5 bindings.") + if not GetOption('silent'): + print("Building with hdf5 bindings.") daos_serialize = denv.d_library('daos_serialize', src, LIBS=libraries) denv.Install('$PREFIX/lib64/', daos_serialize) else: - print("No installed hdf5 detected, DAOS serialization is not enabled") + if not GetOption('silent'): + print("No installed hdf5 detected, DAOS serialization is not enabled") if __name__ == "SCons.Script": diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index c3454a4cd2d..675906d466e 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -34,7 +34,6 @@ static int arena_tx_publish(struct ad_arena *arena, struct ad_tx *tx); static void arena_dump(struct ad_arena *arena); static inline int group_unit_avail(const struct ad_group_df *gd); static inline int group_weight(const struct ad_group_df *gd); -static int find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); #define ASSERT_DUMP_ARENA(cond, arena) \ do { \ @@ -129,21 +128,6 @@ static struct ad_group_spec grp_specs_large[] = { static struct ad_blob *dummy_blob; -static inline void -setbits64(uint64_t *bmap, int at, int bits) -{ - setbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -static inline void -clrbits64(uint64_t *bmap, int at, int bits) -{ - clrbit_range((uint8_t *)bmap, at, at + bits - 1); -} - -#define setbit64(bm, at) setbit(((uint8_t *)bm), at) -#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) -#define isset64(bm, at) isset(((uint8_t *)bm), at) static int group_u2b(int unit, int unit_nr) @@ -1007,7 +991,7 @@ arena_find(struct ad_blob *blob, uint32_t *arena_id, struct ad_arena_df **ad_p) if (id == AD_ARENA_ANY) { int bits = 1; - id = find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); + id = daos_find_bits(bd->bd_bmap, blob->bb_bmap_rsv, blob_bmap_size(blob), 1, &bits); if (id < 0) { rc = -DER_NOSPACE; D_ERROR("Blob %s is full, cannot create more arena, "DF_RC"\n", @@ -1867,83 +1851,6 @@ arena_remove_grp(struct ad_arena *arena, struct ad_group *group) arena->ar_grp_nr--; } -/** Find requested number of unused bits (neither set it @used or @reserved */ -static int -find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) -{ - int nr_saved; - int at_saved; - int nr; - int at; - int i; - int j; - - nr = nr_saved = 0; - at = at_saved = -1; - - for (i = 0; i < bmap_sz; i++) { - uint64_t free_bits = ~used[i]; - - if (reserved) - free_bits &= ~reserved[i]; - - if (free_bits == 0) { /* no space in the current int64 */ - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - continue; - } - - j = ffsll(free_bits); - D_ASSERT(j > 0); - if (at >= 0 && j == 1) { - D_ASSERT(nr > 0); - nr++; - } else { - at = i * 64 + j - 1; - nr = 1; - } - - for (; j < 64; j++) { - if (nr == *bits) /* done */ - goto out; - - if (isset64(&free_bits, j)) { - if (at < 0) - at = i * 64 + j; - nr++; - continue; - } - - if (nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - nr = 0; - at = -1; - if ((free_bits >> j) == 0) - break; - } - if (nr == *bits) - goto out; - } - out: - if (nr == *bits || nr > nr_saved) { - nr_saved = nr; - at_saved = at; - } - - if (nr_saved >= bits_min) - *bits = nr_saved; - else - at_saved = -1; - - return at_saved; -} - /** reserve a new group within @arena */ static int arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, @@ -1981,7 +1888,7 @@ arena_reserve_grp(struct ad_arena *arena, daos_size_t size, int *pos, if (bits_min > bits) bits_min = bits; - bit_at = find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); + bit_at = daos_find_bits(ad->ad_bmap, arena->ar_space_rsv, ARENA_GRP_BMSZ, bits_min, &bits); if (bit_at < 0) return -DER_NOSPACE; @@ -2076,7 +1983,7 @@ group_reserve_addr(struct ad_group *grp, struct ad_reserv_act *act) int b = 1; int at; - at = find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); + at = daos_find_bits(gd->gd_bmap, grp->gp_bmap_rsv, GRP_UNIT_BMSZ, 1, &b); /* NB: bitmap may includes more bits than the actual number of units */ if (at < 0 || at >= gd->gd_unit_nr) return 0; diff --git a/src/common/misc.c b/src/common/misc.c index bc902538e1a..a3a8c7bfd6c 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -705,6 +705,8 @@ daos_crt_init_opt_get(bool server, int ctx_nr) return &daos_crt_init_opt; } +static __thread uuid_t dti_uuid; + void daos_dti_gen_unique(struct dtx_id *dti) { @@ -719,19 +721,23 @@ daos_dti_gen_unique(struct dtx_id *dti) void daos_dti_gen(struct dtx_id *dti, bool zero) { - static __thread uuid_t uuid; - if (zero) { memset(dti, 0, sizeof(*dti)); } else { - if (uuid_is_null(uuid)) - uuid_generate(uuid); + if (uuid_is_null(dti_uuid)) + uuid_generate(dti_uuid); - uuid_copy(dti->dti_uuid, uuid); + uuid_copy(dti->dti_uuid, dti_uuid); dti->dti_hlc = d_hlc_get(); } } +void +daos_dti_reset(void) +{ + memset(dti_uuid, 0, sizeof(dti_uuid)); +} + /** * daos_recx_alloc/_free to provide same log facility for recx's alloc and free * for iom->iom_recxs' usage for example. @@ -773,3 +779,108 @@ daos_hlc2timestamp(uint64_t hlc, time_t *ts) *ts = tspec.tv_sec; return 0; } + +/** Find requested number of unused bits (neither set it @used or @reserved */ +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits) +{ + int nr_saved; + int at_saved; + int nr; + int at; + int i; + int j; + + nr = nr_saved = 0; + at = at_saved = -1; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + if (reserved) + free_bits &= ~reserved[i]; + + if (free_bits == 0) { /* no space in the current int64 */ + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + continue; + } + + j = ffsll(free_bits); + D_ASSERT(j > 0); + if (at >= 0 && j == 1) { + D_ASSERT(nr > 0); + nr++; + } else { + at = i * 64 + j - 1; + nr = 1; + } + + for (; j < 64; j++) { + if (nr == *bits) /* done */ + goto out; + + if (isset64(&free_bits, j)) { + if (at < 0) + at = i * 64 + j; + nr++; + continue; + } + + if (nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + nr = 0; + at = -1; + if ((free_bits >> j) == 0) + break; + } + if (nr == *bits) + goto out; + } + out: + if (nr == *bits || nr > nr_saved) { + nr_saved = nr; + at_saved = at; + } + + if (nr_saved >= bits_min) + *bits = nr_saved; + else + at_saved = -1; + + return at_saved; +} + +int +daos_count_free_bits(uint64_t *used, int bmap_sz) +{ + int i; + int j; + int nr = 0; + + for (i = 0; i < bmap_sz; i++) { + uint64_t free_bits = ~used[i]; + + /* no free bits in the current int64 */ + if (free_bits == 0) + continue; + + j = ffsll(free_bits); + D_ASSERT(j > 0); + nr++; + for (; j < 64; j++) { + if (isset64(&free_bits, j)) + nr++; + if ((free_bits >> j) == 0) + break; + } + } + + return nr; +} diff --git a/src/control/cmd/daos_agent/main.go b/src/control/cmd/daos_agent/main.go index 8f5c135b4d9..f6906a1fc83 100644 --- a/src/control/cmd/daos_agent/main.go +++ b/src/control/cmd/daos_agent/main.go @@ -257,6 +257,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAgent), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/cmd/daos_server/start.go b/src/control/cmd/daos_server/start.go index bb773d02b3a..f2d7b77feda 100644 --- a/src/control/cmd/daos_server/start.go +++ b/src/control/cmd/daos_server/start.go @@ -64,7 +64,9 @@ func (cmd *startCmd) setCLIOverrides() error { if cmd.Modules != nil { cmd.config.WithModules(*cmd.Modules) } - cmd.config.RecreateSuperblocks = cmd.RecreateSuperblocks + if cmd.RecreateSuperblocks { + cmd.Notice("--recreate-superblocks is deprecated and no longer needed to use externally-managed tmpfs") + } for _, srv := range cmd.config.Engines { if cmd.Targets > 0 { diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index d9d1eacfa2d..a15c65867a4 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -302,6 +302,7 @@ func main() { ctlInvoker := control.NewClient( control.WithClientLogger(log), + control.WithClientComponent(build.ComponentAdmin), ) if err := parseOpts(os.Args[1:], &opts, ctlInvoker, log); err != nil { diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index 39e1c4179c8..df6936c8a9f 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -199,6 +199,7 @@ type PoolCreateCmd struct { NumSvcReps uint32 `short:"v" long:"nsvc" description:"Number of pool service replicas"` ScmSize sizeFlag `short:"s" long:"scm-size" description:"Per-engine SCM allocation for DAOS pool (manual)"` NVMeSize sizeFlag `short:"n" long:"nvme-size" description:"Per-engine NVMe allocation for DAOS pool (manual)"` + MetaSize sizeFlag `long:"meta-size" description:"In MD-on-SSD mode specify meta blob size to be used in DAOS pool (manual)"` RankList ui.RankSetFlag `short:"r" long:"ranks" description:"Storage engine unique identifiers (ranks) for DAOS pool"` Args struct { @@ -208,11 +209,18 @@ type PoolCreateCmd struct { // Execute is run when PoolCreateCmd subcommand is activated func (cmd *PoolCreateCmd) Execute(args []string) error { - if cmd.Size.IsSet() && (cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet()) { - return errIncompatFlags("size", "scm-size", "nvme-size") - } - if !cmd.Size.IsSet() && !cmd.ScmSize.IsSet() { - return errors.New("either --size or --scm-size must be supplied") + if cmd.Size.IsSet() { + if cmd.ScmSize.IsSet() || cmd.NVMeSize.IsSet() { + return errIncompatFlags("size", "scm-size", "nvme-size") + } + if cmd.MetaSize.IsSet() { + // NOTE DAOS-14223: --meta-size value is currently not taken into account + // when storage tier sizes are auto-calculated so only + // support in manual mode. + return errors.New("--meta-size can only be set if --scm-size is set") + } + } else if !cmd.ScmSize.IsSet() { + return errors.New("either --size or --scm-size must be set") } if cmd.Args.PoolLabel != "" { @@ -299,13 +307,22 @@ func (cmd *PoolCreateCmd) Execute(args []string) error { scmBytes := cmd.ScmSize.bytes nvmeBytes := cmd.NVMeSize.bytes + metaBytes := cmd.MetaSize.bytes scmRatio := cmd.updateRequest(req, scmBytes, nvmeBytes) - cmd.Infof("Creating DAOS pool with manual per-engine storage allocation: "+ - "%s SCM, %s NVMe (%0.2f%% ratio)", - humanize.Bytes(scmBytes), - humanize.Bytes(nvmeBytes), - scmRatio*100) + if metaBytes > 0 && metaBytes < scmBytes { + return errors.Errorf("--meta-size (%s) can not be smaller than --scm-size (%s)", + humanize.Bytes(metaBytes), humanize.Bytes(scmBytes)) + } + req.MetaBytes = metaBytes + + msg := fmt.Sprintf("Creating DAOS pool with manual per-engine storage allocation:"+ + " %s SCM, %s NVMe (%0.2f%% ratio)", humanize.Bytes(scmBytes), + humanize.Bytes(nvmeBytes), scmRatio*100) + if metaBytes > 0 { + msg += fmt.Sprintf(" with %s meta-blob-size", humanize.Bytes(metaBytes)) + } + cmd.Info(msg) } resp, err := control.PoolCreate(context.Background(), cmd.ctlInvoker, req) diff --git a/src/control/cmd/dmg/pool_test.go b/src/control/cmd/dmg/pool_test.go index 8f17d7e6e4f..0c53669aa10 100644 --- a/src/control/cmd/dmg/pool_test.go +++ b/src/control/cmd/dmg/pool_test.go @@ -226,7 +226,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with missing size", "pool create label", "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with missing label", @@ -276,6 +276,12 @@ func TestPoolCommands(t *testing.T) { "", errors.New("may not be mixed"), }, + { + "Create pool with incompatible arguments (auto with meta-blob)", + fmt.Sprintf("pool create label --size %s --meta-size 32G", testSizeStr), + "", + errors.New("can only be set"), + }, { "Create pool with too-large tier-ratio (auto)", fmt.Sprintf("pool create label --size %s --tier-ratio 200", testSizeStr), @@ -355,7 +361,7 @@ func TestPoolCommands(t *testing.T) { "Create pool with incompatible arguments (-n without -s)", fmt.Sprintf("pool create label --nvme-size %s", testSizeStr), "", - errors.New("must be supplied"), + errors.New("must be set"), }, { "Create pool with minimal arguments", @@ -374,6 +380,30 @@ func TestPoolCommands(t *testing.T) { }, " "), nil, }, + { + "Create pool with manual meta blob size", + fmt.Sprintf("pool create label --scm-size %s --meta-size 1024G", + testSizeStr), + strings.Join([]string{ + printRequest(t, &control.PoolCreateReq{ + User: eUsr.Username + "@", + UserGroup: eGrp.Name + "@", + Ranks: []ranklist.Rank{}, + TierBytes: []uint64{uint64(testSize), 0}, + MetaBytes: humanize.GByte * 1024, + Properties: []*daos.PoolProperty{ + propWithVal("label", "label"), + }, + }), + }, " "), + nil, + }, + { + "Create pool with manual meta blob size smaller than scm", + "pool create label --scm-size 1026G --meta-size 1024G", + "", + errors.New("can not be smaller than"), + }, { "Create pool with manual ranks", fmt.Sprintf("pool create label --size %s --ranks 1,2", testSizeStr), diff --git a/src/control/common/proto/consts.go b/src/control/common/proto/consts.go new file mode 100644 index 00000000000..039e7fe23f2 --- /dev/null +++ b/src/control/common/proto/consts.go @@ -0,0 +1,14 @@ +// +// (C) Copyright 2023 Intel Corporation. +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package proto + +const ( + // DaosComponentHeader defines the header name used to convey the component name. + DaosComponentHeader = "x-daos-component" + // DaosVersionHeader defines the header name used to convey the component version. + DaosVersionHeader = "x-daos-version" +) diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index a2edc22c67f..624e58fb459 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -136,7 +136,7 @@ func Debug(msg proto.Message) string { fmt.Fprintf(&bld, " %s:%s", p.Label, p.State) } case *mgmtpb.JoinResp: - fmt.Fprintf(&bld, "%T rank:%d (state:%s, local:%t) map:%d", m, m.Rank, m.State, m.LocalJoin, m.MapVersion) + fmt.Fprintf(&bld, "%T rank:%d (state:%s) map:%d", m, m.Rank, m.State, m.MapVersion) case *mgmtpb.GetAttachInfoResp: msRanks := ranklist.RankSetFromRanks(ranklist.RanksFromUint32(m.MsRanks)) uriRanks := ranklist.NewRankSet() diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 00841f9fa28..60ae78baa40 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -308,13 +308,14 @@ type PoolCreateReq struct { // representing members of the tree in a breadth-first traversal order. // Each domain above rank consists of: (level, id, num children) // Each rank consists of: (rank number) - FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format - Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas - Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) - Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) - Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) - Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) - Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + FaultDomains []uint32 `protobuf:"varint,7,rep,packed,name=faultDomains,proto3" json:"faultDomains,omitempty"` // Fault domain tree, minimal format + Numsvcreps uint32 `protobuf:"varint,8,opt,name=numsvcreps,proto3" json:"numsvcreps,omitempty"` // desired number of pool service replicas + Totalbytes uint64 `protobuf:"varint,9,opt,name=totalbytes,proto3" json:"totalbytes,omitempty"` // Total pool size in bytes (auto config) + Tierratio []float64 `protobuf:"fixed64,10,rep,packed,name=tierratio,proto3" json:"tierratio,omitempty"` // Ratio of storage tiers expressed as % of totalbytes (auto config) + Numranks uint32 `protobuf:"varint,11,opt,name=numranks,proto3" json:"numranks,omitempty"` // Number of target ranks to use (auto config) + Ranks []uint32 `protobuf:"varint,12,rep,packed,name=ranks,proto3" json:"ranks,omitempty"` // target ranks (manual config) + Tierbytes []uint64 `protobuf:"varint,13,rep,packed,name=tierbytes,proto3" json:"tierbytes,omitempty"` // Size in bytes of storage tiers (manual config) + MetaBlobSize uint64 `protobuf:"varint,14,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateReq) Reset() { @@ -440,17 +441,25 @@ func (x *PoolCreateReq) GetTierbytes() []uint64 { return nil } +func (x *PoolCreateReq) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolCreateResp returns created pool uuid and ranks. type PoolCreateResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code - Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader - SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks - TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks - TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code + Leader uint32 `protobuf:"varint,2,opt,name=leader,proto3" json:"leader,omitempty"` // Current service leader + SvcReps []uint32 `protobuf:"varint,3,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // pool service replica ranks + TgtRanks []uint32 `protobuf:"varint,4,rep,packed,name=tgt_ranks,json=tgtRanks,proto3" json:"tgt_ranks,omitempty"` // pool target ranks + TierBytes []uint64 `protobuf:"varint,5,rep,packed,name=tier_bytes,json=tierBytes,proto3" json:"tier_bytes,omitempty"` // storage tiers allocated to pool + MetaBlobSize uint64 `protobuf:"varint,6,opt,name=meta_blob_size,json=metaBlobSize,proto3" json:"meta_blob_size,omitempty"` // Size in bytes of metadata blob on SSD (manual config) } func (x *PoolCreateResp) Reset() { @@ -520,6 +529,13 @@ func (x *PoolCreateResp) GetTierBytes() []uint64 { return nil } +func (x *PoolCreateResp) GetMetaBlobSize() uint64 { + if x != nil { + return x.MetaBlobSize + } + return 0 +} + // PoolDestroyReq supplies pool identifier and force flag. type PoolDestroyReq struct { state protoimpl.MessageState @@ -2823,7 +2839,7 @@ var File_mgmt_pool_proto protoreflect.FileDescriptor var file_mgmt_pool_proto_rawDesc = []byte{ 0x0a, 0x0f, 0x6d, 0x67, 0x6d, 0x74, 0x2f, 0x70, 0x6f, 0x6f, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xff, 0x02, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, + 0x6f, 0x12, 0x04, 0x6d, 0x67, 0x6d, 0x74, 0x22, 0xa5, 0x03, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, @@ -2847,294 +2863,298 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, - 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x97, 0x01, 0x0a, 0x0e, 0x50, 0x6f, - 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, - 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, - 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, - 0x74, 0x65, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, - 0x72, 0x6f, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, - 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, - 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, - 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, - 0x63, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, - 0x0a, 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x5f, 0x64, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x0c, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, - 0x07, 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x6d, 0x61, 0x63, 0x68, 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0e, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0xbd, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x74, 0x67, 0x74, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, + 0x52, 0x08, 0x74, 0x67, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, + 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x74, + 0x61, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x22, + 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, + 0x73, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x72, 0x65, 0x63, 0x75, + 0x72, 0x73, 0x69, 0x76, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, - 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, - 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, - 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, - 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, - 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, - 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x22, 0xc0, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x18, 0x0a, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x07, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x64, 0x65, 0x73, + 0x74, 0x72, 0x6f, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x5f, 0x64, 0x65, + 0x73, 0x74, 0x72, 0x6f, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x66, 0x6f, 0x72, + 0x63, 0x65, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x63, + 0x68, 0x69, 0x6e, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, 0x63, 0x68, + 0x69, 0x6e, 0x65, 0x22, 0x3d, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x76, 0x69, 0x63, 0x74, + 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x63, 0x6f, 0x75, + 0x6e, 0x74, 0x22, 0x81, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, - 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, - 0xa6, 0x01, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, + 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, + 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x22, 0x7f, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, - 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, - 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, - 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, - 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, - 0x61, 0x69, 0x6e, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, - 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, - 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, - 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, - 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, - 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, - 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, - 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, - 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2e, 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, - 0x6c, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, - 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, - 0x72, 0x65, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, - 0x65, 0x70, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, - 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, - 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x22, 0x27, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0xa6, 0x01, 0x0a, 0x0d, + 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, - 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, - 0x04, 0x43, 0x6f, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, - 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, - 0x6b, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, - 0x65, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, - 0x16, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, - 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, - 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, - 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, - 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, - 0x72, 0x65, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x03, 0x6d, 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x03, 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xbb, 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x14, 0x0a, 0x05, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x05, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, + 0x18, 0x06, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x0c, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x73, 0x22, 0x47, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x45, 0x78, 0x74, 0x65, + 0x6e, 0x64, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, + 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, 0xa3, 0x01, + 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x09, 0x74, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x69, 0x64, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, 0x74, + 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x04, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x62, 0x79, + 0x74, 0x65, 0x73, 0x22, 0x2d, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x69, 0x6e, 0x74, + 0x65, 0x67, 0x72, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x20, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0d, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, + 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2e, + 0x0a, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x05, 0x70, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x21, + 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x1a, 0x86, 0x01, 0x0a, 0x04, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, + 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, + 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, + 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x22, 0x4c, 0x0a, 0x0b, 0x4c, 0x69, + 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x7b, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x1d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, - 0x18, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, - 0x22, 0xed, 0x04, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, - 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, - 0x61, 0x62, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, - 0x69, 0x76, 0x65, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, - 0x12, 0x29, 0x0a, 0x10, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, - 0x62, 0x6c, 0x65, 0x64, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, - 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, - 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, - 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, - 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, - 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, - 0x0e, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, - 0x67, 0x69, 0x6e, 0x65, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x45, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, - 0x6c, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x0d, 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, - 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, - 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, - 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, - 0x2c, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, - 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, - 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, - 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x12, 0x37, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x52, 0x0a, 0x63, + 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x1a, 0x0a, 0x04, 0x43, 0x6f, 0x6e, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x75, 0x75, 0x69, 0x64, 0x22, 0xb7, 0x01, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, + 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x45, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x69, 0x6e, 0x63, + 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x69, 0x6e, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, + 0xac, 0x01, 0x0a, 0x11, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, + 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, + 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, + 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6d, 0x69, + 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, + 0x6d, 0x61, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xbb, + 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, + 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, + 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, + 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, + 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x02, 0x22, 0xed, 0x04, 0x0a, + 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, + 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, + 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, + 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, + 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, + 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, + 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, + 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, + 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, + 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, + 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, + 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, + 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, + 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, + 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, + 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, + 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, + 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, + 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, - 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, - 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, - 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, - 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, - 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, - 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, - 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, - 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, - 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, - 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, - 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, - 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, - 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, - 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, - 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, - 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, - 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, - 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, - 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, - 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, - 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, - 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, - 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, + 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, + 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, + 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, + 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, + 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, + 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, + 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, + 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, + 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, + 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, + 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, + 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, + 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, + 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, + 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, + 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index 74d11533864..e6988dca637 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.31.0 // protoc v3.5.0 // source: mgmt/svc.proto @@ -342,7 +342,6 @@ type JoinResp struct { Rank uint32 `protobuf:"varint,2,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank assigned. State JoinResp_State `protobuf:"varint,3,opt,name=state,proto3,enum=mgmt.JoinResp_State" json:"state,omitempty"` // Server state in the system map. FaultDomain string `protobuf:"bytes,4,opt,name=faultDomain,proto3" json:"faultDomain,omitempty"` // Fault domain for the instance - LocalJoin bool `protobuf:"varint,5,opt,name=localJoin,proto3" json:"localJoin,omitempty"` // Join processed locally. MapVersion uint32 `protobuf:"varint,6,opt,name=map_version,json=mapVersion,proto3" json:"map_version,omitempty"` // Join processed in this version of the system map. } @@ -406,13 +405,6 @@ func (x *JoinResp) GetFaultDomain() string { return "" } -func (x *JoinResp) GetLocalJoin() bool { - if x != nil { - return x.LocalJoin - } - return false -} - func (x *JoinResp) GetMapVersion() uint32 { if x != nil { return x.MapVersion @@ -1159,7 +1151,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, @@ -1167,85 +1159,84 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, - 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, - 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, - 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, - 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, - 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, - 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, - 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, - 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, - 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, - 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, - 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, - 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, - 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, - 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, - 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, - 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, - 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, - 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, - 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, - 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, - 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x74, - 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, - 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, - 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, - 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, - 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, - 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, - 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, - 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, - 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, - 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, - 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, - 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, - 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, - 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, - 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, - 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, - 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, - 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, - 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, - 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, - 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, - 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, + 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, + 0x10, 0x01, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, + 0x6f, 0x69, 0x6e, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, + 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, + 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, + 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, + 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, + 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, + 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, + 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, + 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, + 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, + 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, + 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, + 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, + 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, + 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, + 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, + 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, + 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, + 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, + 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, + 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, + 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, + 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, + 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, + 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, + 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, + 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, + 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, + 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, + 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, + 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, + 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, + 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, + 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index cd88b5acf25..81c17facecd 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -129,6 +129,10 @@ func CmpErrBool(want, got error) bool { func CmpErr(t *testing.T, want, got error) { t.Helper() + if want != nil && want.Error() == "" { + t.Fatal("comparison with empty error will always return true, don't do it") + } + if !CmpErrBool(want, got) { t.Fatalf("unexpected error\n(wanted: %v, got: %v)", want, got) } diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index eff960b6f30..89bfb32bed0 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -79,6 +79,7 @@ const ( ScmBadRegion ScmInvalidPMem ScmRamdiskLowMem + ScmRamdiskBadSize ScmConfigTierMissing ) diff --git a/src/control/lib/control/interceptors.go b/src/control/lib/control/interceptors.go index d3c4c3da375..d507845fc46 100644 --- a/src/control/lib/control/interceptors.go +++ b/src/control/lib/control/interceptors.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,8 +12,10 @@ import ( "github.com/pkg/errors" "google.golang.org/grpc" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/security" ) @@ -59,8 +61,8 @@ func streamErrorInterceptor() grpc.DialOption { } // unaryErrorInterceptor calls the specified unary RPC and returns any unwrapped errors. -func unaryErrorInterceptor() grpc.DialOption { - return grpc.WithUnaryInterceptor(func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { +func unaryErrorInterceptor() grpc.UnaryClientInterceptor { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { err := invoker(ctx, method, req, reply, cc, opts...) if err != nil { st := status.Convert(err) @@ -71,5 +73,25 @@ func unaryErrorInterceptor() grpc.DialOption { return connErrToFault(st, cc.Target()) } return nil - }) + } +} + +// unaryVersionedComponentInterceptor appends the component name and version to the +// outgoing request headers. +func unaryVersionedComponentInterceptor(comp build.Component) grpc.UnaryClientInterceptor { + return func(parent context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + // NB: The caller should specify its component, but as a fallback, we + // can make a decent guess about the calling component based on the method. + if comp == build.ComponentAny { + var err error + if comp, err = security.MethodToComponent(method); err != nil { + return errors.Wrap(err, "unable to determine component from method") + } + } + ctx := metadata.AppendToOutgoingContext(parent, + proto.DaosComponentHeader, comp.String(), + proto.DaosVersionHeader, build.DaosVersion, + ) + return invoker(ctx, method, req, reply, cc, opts...) + } } diff --git a/src/control/lib/control/mocks.go b/src/control/lib/control/mocks.go index 752d597abdb..077937d26a0 100644 --- a/src/control/lib/control/mocks.go +++ b/src/control/lib/control/mocks.go @@ -21,6 +21,7 @@ import ( "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/runtime/protoimpl" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" commonpb "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/proto/convert" @@ -50,6 +51,7 @@ type ( // for a MockInvoker. MockInvokerConfig struct { Sys string + Component build.Component UnaryError error UnaryResponse *UnaryResponse UnaryResponseSet []*UnaryResponse @@ -102,6 +104,10 @@ func (mi *MockInvoker) GetSystem() string { return mi.cfg.Sys } +func (mi *MockInvoker) GetComponent() build.Component { + return mi.cfg.Component +} + func (mi *MockInvoker) InvokeUnaryRPC(ctx context.Context, uReq UnaryRequest) (*UnaryResponse, error) { // Allow the test to override the timeouts set by the caller. if mi.cfg.ReqTimeout > 0 { diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index b849099c511..4982a9edc61 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -248,6 +248,7 @@ type ( // manual params Ranks []ranklist.Rank TierBytes []uint64 + MetaBytes uint64 `json:"meta_blob_size"` } // PoolCreateResp contains the response from a pool create request. diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index 7ee31167317..4d2e85ef913 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -17,6 +17,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common/proto/convert" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/fault" @@ -349,13 +350,58 @@ func TestControl_PoolEvict(t *testing.T) { } } +func strVal(s string) daos.PoolPropertyValue { + v := daos.PoolPropertyValue{} + v.SetString(s) + return v +} + +func TestControl_PoolCreateReq_Convert(t *testing.T) { + req := &PoolCreateReq{ + User: "bob", + UserGroup: "work", + NumSvcReps: 2, + TotalBytes: 1, + TierRatio: []float64{0.06, 0.94}, + NumRanks: 3, + Ranks: []ranklist.Rank{1, 2, 3}, + TierBytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBytes: 2 * humanize.GiByte, + Properties: []*daos.PoolProperty{ + { + Name: "label", + Number: daos.PoolPropertyLabel, + Value: strVal("foo"), + }, + }, + } + reqPB := new(mgmtpb.PoolCreateReq) + if err := convert.Types(req, reqPB); err != nil { + t.Fatal(err) + } + expReqPB := &mgmtpb.PoolCreateReq{ + User: "bob", + Usergroup: "work", + Numsvcreps: 2, + Totalbytes: 1, + Tierratio: []float64{0.06, 0.94}, + Numranks: 3, + Ranks: []uint32{1, 2, 3}, + Tierbytes: []uint64{humanize.GiByte, 10 * humanize.GiByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: []*mgmtpb.PoolProperty{ + {Number: 1, Value: &mgmtpb.PoolProperty_Strval{"foo"}}, + }, + } + + cmpOpt := cmpopts.IgnoreUnexported(mgmtpb.PoolCreateReq{}, mgmtpb.PoolProperty{}) + if diff := cmp.Diff(expReqPB, reqPB, cmpOpt); diff != "" { + t.Fatalf("Unexpected response (-want, +got):\n%s\n", diff) + } +} + func TestControl_PoolCreate(t *testing.T) { mockExt := auth.NewMockExtWithUser("poolTest", 0, 0) - strVal := func(s string) daos.PoolPropertyValue { - v := daos.PoolPropertyValue{} - v.SetString(s) - return v - } for name, tc := range map[string]struct { mic *MockInvokerConfig diff --git a/src/control/lib/control/rpc.go b/src/control/lib/control/rpc.go index f5424ebaf91..e00374603c4 100644 --- a/src/control/lib/control/rpc.go +++ b/src/control/lib/control/rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +19,7 @@ import ( "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/fault/code" @@ -88,6 +89,7 @@ type ( UnaryInvoker interface { sysGetter debugLogger + GetComponent() build.Component InvokeUnaryRPC(ctx context.Context, req UnaryRequest) (*UnaryResponse, error) InvokeUnaryRPCAsync(ctx context.Context, req UnaryRequest) (HostResponseChan, error) } @@ -122,14 +124,22 @@ type ( // Client implements the Invoker interface and should be provided to // API methods to invoke RPCs. Client struct { - config *Config - log debugLogger + config *Config + log debugLogger + component build.Component } // ClientOption defines the signature for functional Client options. ClientOption func(c *Client) ) +// WithClientComponent sets the client's component. +func WithClientComponent(comp build.Component) ClientOption { + return func(c *Client) { + c.component = comp + } +} + // WithClientLogger sets the client's debugLogger. func WithClientLogger(log debugLogger) ClientOption { return func(c *Client) { @@ -171,6 +181,11 @@ func DefaultClient() *Client { ) } +// GetComponent returns the client's component. +func (c *Client) GetComponent() build.Component { + return c.component +} + // SetConfig sets the client configuration for an // existing Client. func (c *Client) SetConfig(cfg *Config) { @@ -196,7 +211,10 @@ func (c *Client) Debugf(fmtStr string, args ...interface{}) { func (c *Client) dialOptions() ([]grpc.DialOption, error) { opts := []grpc.DialOption{ streamErrorInterceptor(), - unaryErrorInterceptor(), + grpc.WithChainUnaryInterceptor( + unaryErrorInterceptor(), + unaryVersionedComponentInterceptor(c.GetComponent()), + ), grpc.FailOnNonTempDialError(true), } diff --git a/src/control/security/grpc_authorization.go b/src/control/security/grpc_authorization.go index 39a3d67dc2b..de9ef2bddf5 100644 --- a/src/control/security/grpc_authorization.go +++ b/src/control/security/grpc_authorization.go @@ -6,6 +6,12 @@ package security +import ( + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" +) + // Component represents the DAOS component being granted authorization. type Component int @@ -77,6 +83,24 @@ var methodAuthorizations = map[string][]Component{ "/RaftTransport/InstallSnapshot": {ComponentServer}, } +func methodToComponent(method string, methodAuthorizations map[string][]Component) (build.Component, error) { + comps, found := methodAuthorizations[method] + if !found || len(comps) == 0 { + return build.ComponentAny, errors.Errorf("method %q does not map to a known authorized component", method) + } else if len(comps) > 1 { + // In this case, the caller must explicitly set the component and cannot + // rely on this helper to resolve it. + return build.ComponentAny, errors.Errorf("method %q maps to multiple authorized components", method) + } + + return build.Component(comps[0].String()), nil +} + +// MethodToComponent resolves a gRPC method string to a build.Component. +func MethodToComponent(method string) (build.Component, error) { + return methodToComponent(method, methodAuthorizations) +} + // HasAccess check if the given component has access to method given in FullMethod func (c Component) HasAccess(FullMethod string) bool { compList, ok := methodAuthorizations[FullMethod] diff --git a/src/control/security/grpc_authorization_test.go b/src/control/security/grpc_authorization_test.go index 25b43873744..ee31dcbd0e2 100644 --- a/src/control/security/grpc_authorization_test.go +++ b/src/control/security/grpc_authorization_test.go @@ -12,6 +12,9 @@ import ( "strings" "testing" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" @@ -218,3 +221,56 @@ func TestSecurity_AuthorizedRpcsAreValid(t *testing.T) { }) } } + +func TestSecurity_MethodToCompnent(t *testing.T) { + for name, tc := range map[string]struct { + method string + authMap map[string][]Component + expComp build.Component + expErr error + }{ + "method maps to an unknown component": { + method: "/unknown", + expErr: errors.New("does not map"), + }, + "method maps to 0 components": { + method: "/zero", + authMap: map[string][]Component{ + "/zero": nil, + }, + expErr: errors.New("does not map"), + }, + "method maps to 2 components": { + method: "/two", + authMap: map[string][]Component{ + "/two": {ComponentAdmin, ComponentAgent}, + }, + expErr: errors.New("multiple authorized"), + }, + "method maps to 1 component": { + method: "/one", + authMap: map[string][]Component{ + "/one": {ComponentServer}, + }, + expComp: build.ComponentServer, + }, + } { + t.Run(name, func(t *testing.T) { + var gotComp build.Component + var gotErr error + + if tc.authMap != nil { + gotComp, gotErr = methodToComponent(tc.method, tc.authMap) + } else { + gotComp, gotErr = MethodToComponent(tc.method) + } + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expComp, gotComp, "unexpected component") + }) + } +} diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index a860edae8f9..974d11161f8 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -40,26 +40,25 @@ const ( // See utils/config/daos_server.yml for parameter descriptions. type Server struct { // control-specific - ControlPort int `yaml:"port"` - TransportConfig *security.TransportConfig `yaml:"transport_config"` - Engines []*engine.Config `yaml:"engines"` - BdevExclude []string `yaml:"bdev_exclude,omitempty"` - DisableVFIO bool `yaml:"disable_vfio"` - DisableVMD *bool `yaml:"disable_vmd"` - EnableHotplug bool `yaml:"enable_hotplug"` - NrHugepages int `yaml:"nr_hugepages"` // total for all engines - SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines - DisableHugepages bool `yaml:"disable_hugepages"` - ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` - ControlLogFile string `yaml:"control_log_file,omitempty"` - ControlLogJSON bool `yaml:"control_log_json,omitempty"` - HelperLogFile string `yaml:"helper_log_file,omitempty"` - FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` - RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` - FaultPath string `yaml:"fault_path,omitempty"` - TelemetryPort int `yaml:"telemetry_port,omitempty"` - CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` - ClientEnvVars []string `yaml:"client_env_vars,omitempty"` + ControlPort int `yaml:"port"` + TransportConfig *security.TransportConfig `yaml:"transport_config"` + Engines []*engine.Config `yaml:"engines"` + BdevExclude []string `yaml:"bdev_exclude,omitempty"` + DisableVFIO bool `yaml:"disable_vfio"` + DisableVMD *bool `yaml:"disable_vmd"` + EnableHotplug bool `yaml:"enable_hotplug"` + NrHugepages int `yaml:"nr_hugepages"` // total for all engines + SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines + DisableHugepages bool `yaml:"disable_hugepages"` + ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` + ControlLogFile string `yaml:"control_log_file,omitempty"` + ControlLogJSON bool `yaml:"control_log_json,omitempty"` + HelperLogFile string `yaml:"helper_log_file,omitempty"` + FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` + FaultPath string `yaml:"fault_path,omitempty"` + TelemetryPort int `yaml:"telemetry_port,omitempty"` + CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` + ClientEnvVars []string `yaml:"client_env_vars,omitempty"` // duplicated in engine.Config SystemName string `yaml:"name"` @@ -87,13 +86,6 @@ func (cfg *Server) WithCoreDumpFilter(filter uint8) *Server { return cfg } -// WithRecreateSuperblocks indicates that a missing superblock should not be treated as -// an error. The server will create new superblocks as necessary. -func (cfg *Server) WithRecreateSuperblocks() *Server { - cfg.RecreateSuperblocks = true - return cfg -} - // WithSystemName sets the system name. func (cfg *Server) WithSystemName(name string) *Server { cfg.SystemName = name diff --git a/src/control/server/config/server_legacy.go b/src/control/server/config/server_legacy.go index b09092a9ca9..fba515a0c54 100644 --- a/src/control/server/config/server_legacy.go +++ b/src/control/server/config/server_legacy.go @@ -18,6 +18,8 @@ type ServerLegacy struct { EnableVMD *bool `yaml:"enable_vmd,omitempty"` // Detect outdated "servers" config, to direct users to change their config file. Servers []*engine.Config `yaml:"servers,omitempty"` + // Detect outdated "recreate_superblocks" config, to direct users to change their config file. + RecreateSuperblocks bool `yaml:"recreate_superblocks,omitempty"` } // WithEnableVMD can be used to set the state of VMD functionality, @@ -27,6 +29,13 @@ func (sl *ServerLegacy) WithEnableVMD(enabled bool) *ServerLegacy { return sl } +// WithRecreateSuperblocks indicates that a missing superblock should not be treated as +// an error. The server will create new superblocks as necessary. +func (sl *ServerLegacy) WithRecreateSuperblocks() *ServerLegacy { + sl.RecreateSuperblocks = true + return sl +} + func updateVMDSetting(legacyCfg ServerLegacy, srvCfg *Server) error { switch { case legacyCfg.EnableVMD == nil: diff --git a/src/control/server/ctl_storage.go b/src/control/server/ctl_storage.go index fd75f01263c..f4747f87513 100644 --- a/src/control/server/ctl_storage.go +++ b/src/control/server/ctl_storage.go @@ -212,6 +212,9 @@ func (cs *ControlService) scanAssignedBdevs(ctx context.Context, nsps []*ctl.Scm continue } + // NOTE DAOS-14223: This metadata size calculation won't necessarily match + // the meta blob size on SSD if --meta-size is specified in + // pool create command. md_size = mp.GetUsableBytes() / uint64(ei.GetTargetCount()) engineCfg, err := cs.getEngineCfgFromScmNsp(nsp) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index cf0ca43be13..f0d0e0e58f6 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -308,6 +308,8 @@ func (c *ControlService) getMetaClusterCount(engineCfg *engine.Config, devToAdju engineTargetNb := uint64(engineCfg.TargetCount) if dev.GetRoleBits()&storage.BdevRoleMeta != 0 { + // TODO DAOS-14223: GetMetaSize() should reflect custom values set through pool + // create --meta-size option. clusterCount := getClusterCount(dev.GetMetaSize(), engineTargetNb, clusterSize) c.log.Tracef("Removing %d Metadata clusters (cluster size: %d) from the usable size of the SMD device %s (rank %d, ctlr %s): ", clusterCount, clusterSize, dev.GetUuid(), devToAdjust.rank, devToAdjust.ctlr.GetPciAddr()) @@ -623,6 +625,7 @@ type formatScmReq struct { func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatResp) (map[int]string, map[int]bool, error) { needFormat := make(map[int]bool) + emptyTmpfs := make(map[int]bool) scmCfgs := make(map[int]*storage.TierConfig) allNeedFormat := true @@ -641,6 +644,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR return nil, nil, errors.Wrap(err, "retrieving SCM config") } scmCfgs[idx] = scmCfg + + // If the tmpfs was already mounted but empty, record that fact for later usage. + if scmCfg.Class == storage.ClassRam && !needs { + info, err := ei.GetStorage().GetScmUsage() + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to check SCM usage for instance %d", idx) + } + emptyTmpfs[idx] = info.TotalBytes-info.AvailBytes == 0 + } } if allNeedFormat { @@ -673,7 +685,15 @@ func formatScm(ctx context.Context, req formatScmReq, resp *ctlpb.StorageFormatR }, }) - skipped[idx] = true + // In the normal case, where SCM wasn't already mounted, we want + // to trigger NVMe format. In the case where SCM was mounted and + // wasn't empty, we want to skip NVMe format, as we're using + // mountedness as a proxy for already-formatted. In the special + // case where tmpfs was already mounted but empty, we will treat it + // as an indication that the NVMe format needs to occur. + if !emptyTmpfs[idx] { + skipped[idx] = true + } } for formatting > 0 { @@ -708,7 +728,7 @@ func formatNvme(ctx context.Context, req formatNvmeReq, resp *ctlpb.StorageForma _, hasError := req.errored[idx] _, skipped := req.skipped[idx] if hasError || (skipped && !req.mdFormatted) { - // if scm errored or was already formatted, indicate skipping bdev format + // if scm failed to format or was already formatted, indicate skipping bdev format ret := ei.newCret(storage.NilBdevAddress, nil) ret.State.Info = fmt.Sprintf(msgNvmeFormatSkip, ei.Index()) resp.Crets = append(resp.Crets, ret) diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 76ff043af92..ba9f3de1e0a 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -1777,6 +1777,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { for name, tc := range map[string]struct { scmMounted bool // if scmMounted we emulate ext4 fs is mounted + tmpfsEmpty bool // if false, an already-mounted ramdisk is not empty superblockExists bool instancesStarted bool // engine already started sMounts []string @@ -1995,6 +1996,44 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { }, }, }, + "ram already mounted but empty": { + scmMounted: true, + tmpfsEmpty: true, + sMounts: []string{"/mnt/daos"}, + sClass: storage.ClassRam, + sSize: 6, + bClass: storage.ClassNvme, + bDevs: [][]string{{mockNvmeController0.PciAddr}}, + bmbc: &bdev.MockBackendConfig{ + ScanRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{mockNvmeController0}, + }, + FormatRes: &storage.BdevFormatResponse{ + DeviceResponses: storage.BdevDeviceFormatResponses{ + mockNvmeController0.PciAddr: &storage.BdevDeviceFormatResponse{ + Formatted: true, + }, + }, + }, + }, + expResp: &ctlpb.StorageFormatResp{ + Crets: []*ctlpb.NvmeControllerResult{ + { + PciAddr: mockNvmeController0.PciAddr, + State: new(ctlpb.ResponseState), + }, + }, + Mrets: []*ctlpb.ScmMountResult{ + { + Mntpoint: "/mnt/daos", + State: &ctlpb.ResponseState{ + Status: ctlpb.ResponseStatus_CTL_SUCCESS, + Info: "SCM is already formatted", + }, + }, + }, + }, + }, "ram already mounted and reformat set": { scmMounted: true, reformat: true, @@ -2247,6 +2286,19 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { GetfsStr: getFsRetStr, SourceToTarget: devToMount, } + if tc.sClass == storage.ClassRam { + total := uint64(1234) + avail := total + if !tc.tmpfsEmpty { + avail-- + } + smsc.GetfsUsageResps = []system.GetfsUsageRetval{ + { + Total: total, + Avail: avail, + }, + } + } sysProv := system.NewMockSysProvider(log, smsc) mounter := mount.NewProvider(log, sysProv) scmProv := scm.NewProvider(log, nil, sysProv, mounter) @@ -2301,7 +2353,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { // if the instance is expected to have a valid superblock, create one if tc.superblockExists { - if err := ei.createSuperblock(false); err != nil { + if err := ei.createSuperblock(); err != nil { t.Fatal(err) } } else { @@ -2332,7 +2384,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { go func(ctx context.Context, e *EngineInstance) { select { case <-ctx.Done(): - case awaitCh <- e.awaitStorageReady(ctx, false): + case awaitCh <- e.awaitStorageReady(ctx): } }(ctx, ei.(*EngineInstance)) } diff --git a/src/control/server/engine/utils.go b/src/control/server/engine/utils.go index 56734985c22..9f3098389a9 100644 --- a/src/control/server/engine/utils.go +++ b/src/control/server/engine/utils.go @@ -141,8 +141,8 @@ var ( "CRT", "RPC", "BULK", "CORPC", "GRP", "LM", "HG", // CaRT subsystems "EXTERNAL", "ST", "IV", "CTL", } - errLogNameAllWithOther = errors.New("'all' identifier cannot be used with any other") - errLogNameAllInMasks = errors.New("'all' identifier cannot be used in log mask level assignments") + errLogNameAllWithOther = errors.New("'all' identifier can not be used with any other log identifier") + errLogNameAllInMasks = errors.New("'all' identifier can not be used in log mask level assignments") ) func isLogLevelValid(name string) bool { diff --git a/src/control/server/engine/utils_test.go b/src/control/server/engine/utils_test.go index 41b765b4c7c..f4c9aa7a49a 100644 --- a/src/control/server/engine/utils_test.go +++ b/src/control/server/engine/utils_test.go @@ -47,7 +47,7 @@ func Test_ValidateLogMasks(t *testing.T) { }, "single level; single assignment; illegal use of all": { masks: "ERR,all=DBUG", - expErr: errors.New(""), + expErr: errors.New("identifier can not be used"), }, "single level; single assignment; bad level": { masks: "ERR,mgmt=DEG", diff --git a/src/control/server/harness.go b/src/control/server/harness.go index 88028bc658a..f27febc1dce 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -62,7 +62,7 @@ type Engine interface { IsReady() bool LocalState() system.MemberState RemoveSuperblock() error - Run(context.Context, bool) + Run(context.Context) SetupRank(context.Context, ranklist.Rank, uint32) error Stop(os.Signal) error OnInstanceExit(...onInstanceExitFn) @@ -260,7 +260,7 @@ func (h *EngineHarness) Start(ctx context.Context, db dbLeader, cfg *config.Serv defer h.started.SetFalse() for _, ei := range h.Instances() { - ei.Run(ctx, cfg.RecreateSuperblocks) + ei.Run(ctx) } h.OnDrpcFailure(newOnDrpcFailureFn(h.log, db)) diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 14f53cf3b5b..4583c86f170 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -178,10 +178,10 @@ func (ei *EngineInstance) removeSocket() error { return nil } -func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, bool, uint32, error) { +func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, uint32, error) { superblock := ei.getSuperblock() if superblock == nil { - return ranklist.NilRank, false, 0, errors.New("nil superblock while determining rank") + return ranklist.NilRank, 0, errors.New("nil superblock while determining rank") } r := ranklist.NilRank @@ -200,11 +200,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify }) if err != nil { ei.log.Errorf("join failed: %s", err) - return ranklist.NilRank, false, 0, err + return ranklist.NilRank, 0, err } switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: - return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) + return ranklist.NilRank, 0, errors.Errorf("rank %d excluded", resp.Rank) } r = ranklist.Rank(resp.Rank) @@ -218,11 +218,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify superblock.URI = ready.GetUri() ei.setSuperblock(superblock) if err := ei.WriteSuperblock(); err != nil { - return ranklist.NilRank, resp.LocalJoin, 0, err + return ranklist.NilRank, 0, err } } - return r, resp.LocalJoin, resp.MapVersion, nil + return r, resp.MapVersion, nil } func (ei *EngineInstance) updateFaultDomainInSuperblock() error { @@ -259,21 +259,20 @@ func (ei *EngineInstance) handleReady(ctx context.Context, ready *srvpb.NotifyRe ei.log.Error(err.Error()) // nonfatal } - r, localJoin, mapVersion, err := ei.determineRank(ctx, ready) + r, mapVersion, err := ei.determineRank(ctx, ready) if err != nil { return err } - // If the join was already processed because it ran on the same server, - // skip the rest of these steps. - if localJoin { - return nil - } - return ei.SetupRank(ctx, r, mapVersion) } func (ei *EngineInstance) SetupRank(ctx context.Context, rank ranklist.Rank, map_version uint32) error { + if ei.IsReady() { + ei.log.Errorf("SetupRank called on an already set-up instance %d", ei.Index()) + return nil + } + if err := ei.callSetRank(ctx, rank, map_version); err != nil { return errors.Wrap(err, "SetRank failed") } diff --git a/src/control/server/instance_exec.go b/src/control/server/instance_exec.go index ab22cb4504f..19143782ec3 100644 --- a/src/control/server/instance_exec.go +++ b/src/control/server/instance_exec.go @@ -30,14 +30,14 @@ type EngineRunner interface { GetConfig() *engine.Config } -func (ei *EngineInstance) format(ctx context.Context, recreateSBs bool) error { +func (ei *EngineInstance) format(ctx context.Context) error { idx := ei.Index() ei.log.Debugf("instance %d: checking if storage is formatted", idx) - if err := ei.awaitStorageReady(ctx, recreateSBs); err != nil { + if err := ei.awaitStorageReady(ctx); err != nil { return err } - if err := ei.createSuperblock(recreateSBs); err != nil { + if err := ei.createSuperblock(); err != nil { return err } @@ -158,7 +158,7 @@ func (ei *EngineInstance) handleExit(ctx context.Context, exitPid int, exitErr e // will only return (if no errors are returned during setup) on I/O Engine // process exit (triggered by harness shutdown through context cancellation // or abnormal I/O Engine process termination). -func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) (_ chan *engine.RunnerExitInfo, err error) { +func (ei *EngineInstance) startRunner(parent context.Context) (_ chan *engine.RunnerExitInfo, err error) { ctx, cancel := context.WithCancel(parent) defer func() { if err != nil { @@ -168,7 +168,7 @@ func (ei *EngineInstance) startRunner(parent context.Context, recreateSBs bool) } }() - if err = ei.format(ctx, recreateSBs); err != nil { + if err = ei.format(ctx); err != nil { return } @@ -192,7 +192,7 @@ func (ei *EngineInstance) requestStart(ctx context.Context) { // Run starts the control loop for an EngineInstance. Engine starts are triggered by // calling requestStart() on the instance. -func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { +func (ei *EngineInstance) Run(ctx context.Context) { // Start the instance control loop. go func() { var runnerExitCh engine.RunnerExitChan @@ -212,7 +212,7 @@ func (ei *EngineInstance) Run(ctx context.Context, recreateSBs bool) { continue } - runnerExitCh, err = ei.startRunner(ctx, recreateSBs) + runnerExitCh, err = ei.startRunner(ctx) if err != nil { ei.log.Errorf("runner exited without starting process: %s", err) ei.handleExit(ctx, 0, err) diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 7be5c570b86..2cc4f1f5443 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -76,7 +76,7 @@ func createPublishFormatRequiredFunc(publish func(*events.RASEvent), hostname st } // awaitStorageReady blocks until instance has storage available and ready to be used. -func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSuperblock bool) error { +func (ei *EngineInstance) awaitStorageReady(ctx context.Context) error { idx := ei.Index() if ei.IsStarted() { @@ -117,9 +117,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe } if !needsMetaFormat && !needsScmFormat { - if skipMissingSuperblock { - return nil - } ei.log.Debugf("instance %d: no SCM format required; checking for superblock", idx) needsSuperblock, err := ei.NeedsSuperblock() if err != nil { @@ -132,16 +129,6 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context, skipMissingSupe ei.log.Debugf("instance %d: superblock needed", idx) } - if needsScmFormat { - cfg, err := ei.storage.GetScmConfig() - if err != nil { - return err - } - if skipMissingSuperblock { - return FaultScmUnmanaged(cfg.Scm.MountPoint) - } - } - // by this point we need superblock and possibly scm format formatType := "SCM" if !needsScmFormat { diff --git a/src/control/server/instance_storage_test.go b/src/control/server/instance_storage_test.go index 0a73da3458c..2bbc049bd65 100644 --- a/src/control/server/instance_storage_test.go +++ b/src/control/server/instance_storage_test.go @@ -361,7 +361,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted bool needsScmFormat bool hasSB bool - skipMissingSB bool engineIndex uint32 expFmtType string expErr error @@ -370,14 +369,6 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { engineStarted: true, expErr: errStarted, }, - "needs format but skip missing superblock": { - needsScmFormat: true, - skipMissingSB: true, - expErr: FaultScmUnmanaged("/mnt/test"), - }, - "no need to format and skip missing superblock": { - skipMissingSB: true, - }, "no need to format and existing superblock": { hasSB: true, }, @@ -432,9 +423,9 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { ctx, cancel := context.WithTimeout(test.Context(t), time.Millisecond*100) defer cancel() - gotErr := engine.awaitStorageReady(ctx, tc.skipMissingSB) + gotErr := engine.awaitStorageReady(ctx) test.CmpErr(t, tc.expErr, gotErr) - if tc.expErr == errStarted || tc.skipMissingSB == true || tc.hasSB == true { + if tc.expErr == errStarted || tc.hasSB == true { return } diff --git a/src/control/server/instance_superblock.go b/src/control/server/instance_superblock.go index 11b7f3849ea..0d6ec613a8b 100644 --- a/src/control/server/instance_superblock.go +++ b/src/control/server/instance_superblock.go @@ -111,7 +111,7 @@ func (ei *EngineInstance) NeedsSuperblock() (bool, error) { } // createSuperblock creates instance superblock if needed. -func (ei *EngineInstance) createSuperblock(recreate bool) error { +func (ei *EngineInstance) createSuperblock() error { if ei.IsStarted() { return errors.Errorf("can't create superblock: instance %d already started", ei.Index()) } @@ -120,7 +120,7 @@ func (ei *EngineInstance) createSuperblock(recreate bool) error { if !needsSuperblock { return nil } - if err != nil && !recreate { + if err != nil { return err } diff --git a/src/control/server/instance_superblock_test.go b/src/control/server/instance_superblock_test.go index 8354f17f320..5c1220cd7dc 100644 --- a/src/control/server/instance_superblock_test.go +++ b/src/control/server/instance_superblock_test.go @@ -56,7 +56,7 @@ func TestServer_Instance_createSuperblock(t *testing.T) { } for _, e := range h.Instances() { - if err := e.(*EngineInstance).createSuperblock(false); err != nil { + if err := e.(*EngineInstance).createSuperblock(); err != nil { t.Fatal(err) } } diff --git a/src/control/server/instance_test.go b/src/control/server/instance_test.go index cb6552fc84a..d6767df30c0 100644 --- a/src/control/server/instance_test.go +++ b/src/control/server/instance_test.go @@ -248,7 +248,7 @@ func (mi *MockInstance) RemoveSuperblock() error { return mi.cfg.RemoveSuperblockErr } -func (mi *MockInstance) Run(_ context.Context, _ bool) {} +func (mi *MockInstance) Run(_ context.Context) {} func (mi *MockInstance) SetupRank(_ context.Context, _ ranklist.Rank, _ uint32) error { return mi.cfg.SetupRankErr diff --git a/src/control/server/interceptors.go b/src/control/server/interceptors.go index 1762b58d790..1f0aa24efb6 100644 --- a/src/control/server/interceptors.go +++ b/src/control/server/interceptors.go @@ -16,6 +16,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "google.golang.org/grpc/status" "google.golang.org/protobuf/reflect/protoreflect" @@ -28,6 +29,10 @@ import ( "github.com/daos-stack/daos/src/control/system" ) +var ( + errNoReqMetadata = errors.New("no component/version metadata found in request") +) + func componentFromContext(ctx context.Context) (comp *security.Component, err error) { clientPeer, ok := peer.FromContext(ctx) if !ok { @@ -116,7 +121,25 @@ var selfServerComponent = func() *build.VersionedComponent { return self }() -func checkVersion(ctx context.Context, self *build.VersionedComponent, req interface{}) error { +func compVersionFromHeaders(ctx context.Context) (*build.VersionedComponent, error) { + md, hasMD := metadata.FromIncomingContext(ctx) + if !hasMD { + return nil, errNoReqMetadata + } + compName, hasName := md[proto.DaosComponentHeader] + if !hasName { + return nil, errNoReqMetadata + } + comp := build.Component(compName[0]) + compVersion, hasVersion := md[proto.DaosVersionHeader] + if !hasVersion { + return nil, errNoReqMetadata + } + + return build.NewVersionedComponent(comp, compVersion[0]) +} + +func checkVersion(ctx context.Context, log logging.Logger, self *build.VersionedComponent, req interface{}) error { // If we can't determine our own version, then there's no // checking to be done. if self.Version.IsZero() { @@ -127,33 +150,58 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter // are most stringent for server/server communication. We have // to set a default because this security component lookup // will fail if certificates are disabled. - buildComponent := build.ComponentServer + otherComponent := build.ComponentServer + otherVersion := build.MustNewVersion("0.0.0") secComponent, err := componentFromContext(ctx) if err == nil { - buildComponent = build.Component(secComponent.String()) + otherComponent = build.Component(secComponent.String()) } isInsecure := status.Code(err) == codes.Unauthenticated - otherVersion := build.MustNewVersion("0.0.0") - if sReq, ok := req.(interface{ GetSys() string }); ok { - comps := strings.Split(sReq.GetSys(), "-") - if len(comps) > 1 { - if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { - otherVersion = ver - } + fromHeaders, err := compVersionFromHeaders(ctx) + if err != nil && err != errNoReqMetadata { + return errors.Wrap(err, "failed to extract peer component/version from headers") + } + + // Prefer the new header-based component/version mechanism. + // If we are in secure mode, verify that the component presented + // in the header matches the certificate's component. + if fromHeaders != nil { + otherVersion = fromHeaders.Version + if isInsecure { + otherComponent = fromHeaders.Component + } else if otherComponent != fromHeaders.Component { + return status.Errorf(codes.PermissionDenied, + "component mismatch (req: %q != cert: %q)", fromHeaders.Component, otherComponent) } } else { - // If the request message type does not implement GetSys(), then - // there is no version to check. We leave message compatibility - // to lower layers. - return nil - } + // If we did not receive a version via request header, then we need to fall back + // to trying to pick it out of the overloaded system name field. + // + // TODO (DAOS-14336): Remove this once the compatibility window has closed (e.g. for 2.8+). + if sReq, ok := req.(interface{ GetSys() string }); ok { + comps := strings.Split(sReq.GetSys(), "-") + if len(comps) > 1 { + if ver, err := build.NewVersion(comps[len(comps)-1]); err == nil { + otherVersion = ver + } + } + } else { + // If the request message type does not implement GetSys(), then + // there is no version to check. We leave message compatibility + // to lower layers. + return nil + } - if isInsecure && !self.Version.Equals(otherVersion) { - return FaultNoCompatibilityInsecure(self.Version, otherVersion) + // If we're running without certificates and we didn't receive a component + // via headers, then we have to enforce the strictest compatibility requirements, + // i.e. exact same version. + if isInsecure && !self.Version.Equals(otherVersion) { + return FaultNoCompatibilityInsecure(self.Version, otherVersion) + } } - other, err := build.NewVersionedComponent(buildComponent, otherVersion.String()) + other, err := build.NewVersionedComponent(otherComponent, otherVersion.String()) if err != nil { other = &build.VersionedComponent{ Component: "unknown", @@ -163,18 +211,22 @@ func checkVersion(ctx context.Context, self *build.VersionedComponent, req inter } if err := build.CheckCompatibility(self, other); err != nil { + log.Errorf("%s is incompatible with %s", other, self) return FaultIncompatibleComponents(self, other) } + log.Debugf("%s is compatible with %s", other, self) return nil } -func unaryVersionInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { - if err := checkVersion(ctx, selfServerComponent, req); err != nil { - return nil, errors.Wrapf(err, "version check failed for %T", req) - } +func unaryVersionInterceptor(log logging.Logger) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + if err := checkVersion(ctx, log, selfServerComponent, req); err != nil { + return nil, errors.Wrapf(err, "version check failed for %T", req) + } - return handler(ctx, req) + return handler(ctx, req) + } } func unaryErrorInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { diff --git a/src/control/server/interceptors_test.go b/src/control/server/interceptors_test.go index 2dbfbca30c8..bdde5bde6e8 100644 --- a/src/control/server/interceptors_test.go +++ b/src/control/server/interceptors_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -16,12 +16,15 @@ import ( "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/common" + "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" ) type testStatus struct { @@ -141,10 +144,96 @@ func TestServer_checkVersion(t *testing.T) { otherVersion: "2.4.0", ctx: newTestAuthCtx(test.Context(t), "agent"), }, - "non-sys msg bypasses version checks": { + "non-sys msg bypasses version checks in secure mode": { selfVersion: "2.4.0", + ctx: newTestAuthCtx(test.Context(t), "agent"), nonSysMsg: true, }, + "insecure prelease agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.3.108", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 agent with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.4.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 agent with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAgent.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + }, + "insecure 2.4.1 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.4.1", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 dmg with 2.4.0 server": { + selfVersion: "2.4.0", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentAdmin.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "insecure 2.4.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.4.0", + )), + nonSysMsg: true, + }, + "insecure 2.6.0 server with 2.4.1 server": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("not compatible"), + }, + "invalid component": { + selfVersion: "2.4.1", + ctx: metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, "banana", + proto.DaosVersionHeader, "2.6.0", + )), + nonSysMsg: true, + expErr: errors.New("invalid component"), + }, + "header/certificate component mismatch": { + selfVersion: "2.4.0", + ctx: newTestAuthCtx( + metadata.NewIncomingContext(test.Context(t), metadata.Pairs( + proto.DaosComponentHeader, build.ComponentServer.String(), + proto.DaosVersionHeader, "2.6.0"), + ), "agent"), + nonSysMsg: true, + expErr: errors.New("component mismatch"), + }, } { t.Run(name, func(t *testing.T) { ctx := test.Context(t) @@ -169,7 +258,10 @@ func TestServer_checkVersion(t *testing.T) { req = verReq } - gotErr := checkVersion(ctx, selfComp, req) + log, buf := logging.NewTestLogger(name) + test.ShowBufferOnFailure(t, buf) + + gotErr := checkVersion(ctx, log, selfComp, req) test.CmpErr(t, tc.expErr, gotErr) }) } diff --git a/src/control/server/mgmt_pool.go b/src/control/server/mgmt_pool.go index 9189a7ef116..6ae1e1287da 100644 --- a/src/control/server/mgmt_pool.go +++ b/src/control/server/mgmt_pool.go @@ -299,6 +299,7 @@ func (svc *mgmtSvc) poolCreate(parent context.Context, req *mgmtpb.PoolCreateReq resp.SvcReps = ranklist.RanksToUint32(ps.Replicas) resp.TgtRanks = ranklist.RanksToUint32(ps.Storage.CreationRanks()) resp.TierBytes = ps.Storage.PerRankTierStorage + // TODO DAOS-14223: Store Meta-Blob-Size in sysdb. return resp, nil } diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index e484b7be19b..004dc19a08a 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -314,6 +314,7 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { targetCount int memberCount int req *mgmtpb.PoolCreateReq + drpcRet *mgmtpb.PoolCreateResp expResp *mgmtpb.PoolCreateResp expErr error }{ @@ -388,11 +389,34 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, TgtRanks: []uint32{0, 1}, }, }, + "successful creation with meta size": { + targetCount: 8, + req: &mgmtpb.PoolCreateReq{ + Uuid: test.MockUUID(1), + Tierbytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + Properties: testPoolLabelProp(), + }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + expResp: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + MetaBlobSize: 2 * humanize.GiByte, + TgtRanks: []uint32{0, 1}, + }, + }, "successful creation minimum size": { targetCount: 8, req: &mgmtpb.PoolCreateReq{ @@ -400,6 +424,10 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Tierbytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ TierBytes: []uint64{engine.ScmMinBytesPerTarget * 8, engine.NvmeMinBytesPerTarget * 8}, TgtRanks: []uint32{0, 1}, @@ -412,9 +440,19 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { Totalbytes: 100 * humanize.GiByte, Properties: testPoolLabelProp(), }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, + }, expResp: &mgmtpb.PoolCreateResp{ - TierBytes: []uint64{((100 * humanize.GiByte) * DefaultPoolScmRatio) / 2, (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2}, - TgtRanks: []uint32{0, 1}, + TierBytes: []uint64{ + (100 * humanize.GiByte * DefaultPoolScmRatio) / 2, + (100 * humanize.GiByte * DefaultPoolNvmeRatio) / 2, + }, + TgtRanks: []uint32{0, 1}, }, }, "failed creation invalid ranks": { @@ -512,14 +550,15 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { numMembers = 2 } for i := 0; i < numMembers; i++ { - if _, err := tc.mgmtSvc.membership.Add(system.MockMember(t, uint32(i), system.MemberStateJoined)); err != nil { + mm := system.MockMember(t, uint32(i), system.MemberStateJoined) + if _, err := tc.mgmtSvc.membership.Add(mm); err != nil { t.Fatal(err) } } if tc.setupMockDrpc == nil { tc.setupMockDrpc = func(svc *mgmtSvc, err error) { - setupMockDrpcClient(tc.mgmtSvc, tc.expResp, tc.expErr) + setupMockDrpcClient(tc.mgmtSvc, tc.drpcRet, tc.expErr) } } tc.setupMockDrpc(tc.mgmtSvc, tc.expErr) diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 8374b0b9e2e..620db09bf11 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -178,22 +178,6 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net MapVersion: joinResponse.MapVersion, } - // If the rank is local to the MS leader, then we need to wire up at least - // one in order to perform a CaRT group update. - if common.IsLocalAddr(peerAddr) && req.Idx == 0 { - resp.LocalJoin = true - - srvs := svc.harness.Instances() - if len(srvs) == 0 { - return nil, errors.New("invalid Join request (index 0 doesn't exist?!?)") - } - srv := srvs[0] - - if err := srv.SetupRank(ctx, joinResponse.Member.Rank, joinResponse.MapVersion); err != nil { - return nil, errors.Wrap(err, "SetupRank on local instance failed") - } - } - return resp, nil } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 375b77c3efb..0ac1112c4ba 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -1967,7 +1967,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: false, MapVersion: 2, }, }, @@ -1993,7 +1992,6 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, - LocalJoin: true, MapVersion: 2, }, }, diff --git a/src/control/server/server.go b/src/control/server/server.go index 8e5d921bb57..e4f7b5bdfa1 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -188,6 +188,7 @@ func (srv *server) createServices(ctx context.Context) (err error) { cliCfg := control.DefaultConfig() cliCfg.TransportConfig = srv.cfg.TransportConfig rpcClient := control.NewClient( + control.WithClientComponent(build.ComponentServer), control.WithConfig(cliCfg), control.WithClientLogger(srv.log)) diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index f6f02e55731..6e059aef32e 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -540,6 +540,25 @@ func checkEngineTmpfsMem(srv *server, ei *EngineInstance, mi *common.MemInfo) er memRamdisk := uint64(sc.Scm.RamdiskSize) * humanize.GiByte memAvail := uint64(mi.MemAvailableKiB) * humanize.KiByte + // In the event that tmpfs was already mounted, we need to verify that it + // is the correct size and that the memory usage still makes sense. + if isMounted, err := ei.storage.ScmIsMounted(); err == nil && isMounted { + usage, err := ei.storage.GetScmUsage() + if err != nil { + return errors.Wrap(err, "unable to check tmpfs usage") + } + // Ensure that the existing ramdisk is not larger than the calculated + // optimal size, in order to avoid potential OOM situations. + if usage.TotalBytes > memRamdisk { + return storage.FaultRamdiskBadSize(usage.TotalBytes, memRamdisk) + } + // Looks OK, so we can return early and bypass additional checks. + srv.log.Debugf("using existing tmpfs of size %s", humanize.IBytes(usage.TotalBytes)) + return nil + } else if err != nil { + return errors.Wrap(err, "unable to check for mounted tmpfs") + } + if err := checkMemForRamdisk(srv.log, memRamdisk, memAvail); err != nil { return err } @@ -714,7 +733,7 @@ func getGrpcOpts(log logging.Logger, cfgTransport *security.TransportConfig, ldr unaryLoggingInterceptor(log, ldrChk), // must be first in order to properly log errors unaryErrorInterceptor, unaryStatusInterceptor, - unaryVersionInterceptor, + unaryVersionInterceptor(log), } streamInterceptors := []grpc.StreamServerInterceptor{ streamErrorInterceptor, diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 069fac5e028..1fb0567fadd 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -753,9 +753,11 @@ func TestServer_prepBdevStorage(t *testing.T) { func TestServer_checkEngineTmpfsMem(t *testing.T) { for name, tc := range map[string]struct { - srvCfgExtra func(*config.Server) *config.Server - memAvailGiB int - expErr error + srvCfgExtra func(*config.Server) *config.Server + memAvailGiB int + tmpfsMounted bool + tmpfsSize uint64 + expErr error }{ "pmem tier; skip check": { srvCfgExtra: func(sc *config.Server) *config.Server { @@ -780,6 +782,21 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { expErr: storage.FaultRamdiskLowMem("Available", 10*humanize.GiByte, 9*humanize.GiByte, 8*humanize.GiByte), }, + "tmpfs already mounted; more than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 11, + expErr: errors.New("ramdisk size"), + }, + "tmpfs already mounted; less than calculated": { + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(ramEngine(0, 10)) + }, + tmpfsMounted: true, + tmpfsSize: 9, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(name) @@ -799,7 +816,19 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { ec := cfg.Engines[0] runner := engine.NewRunner(log, ec) - provider := storage.MockProvider(log, 0, &ec.Storage, nil, nil, nil, nil) + sysMockCfg := &sysprov.MockSysConfig{ + IsMountedBool: tc.tmpfsMounted, + } + if tc.tmpfsMounted { + sysMockCfg.GetfsUsageResps = []sysprov.GetfsUsageRetval{ + { + Total: tc.tmpfsSize * humanize.GiByte, + }, + } + } + sysMock := sysprov.NewMockSysProvider(log, sysMockCfg) + scmMock := &storage.MockScmProvider{} + provider := storage.MockProvider(log, 0, &ec.Storage, sysMock, scmMock, nil, nil) instance := NewEngineInstance(log, provider, nil, runner) srv, err := newServer(log, cfg, &system.FaultDomain{}) diff --git a/src/control/server/storage/faults.go b/src/control/server/storage/faults.go index fb64eabc51f..cbf029c93f2 100644 --- a/src/control/server/storage/faults.go +++ b/src/control/server/storage/faults.go @@ -78,6 +78,17 @@ func FaultRamdiskLowMem(memType string, confRamdiskSize, memNeed, memHave uint64 "file if reducing the requested amount of RAM is not possible") } +// FaultRamdiskBadSize indicates that the already-mounted ramdisk is out +// of spec with the calculated ramdisk size for the engine. +func FaultRamdiskBadSize(existingSize, calcSize uint64) *fault.Fault { + return storageFault( + code.ScmRamdiskBadSize, + fmt.Sprintf("already-mounted ramdisk size %s is too far from optimal size of %s", + humanize.IBytes(existingSize), humanize.IBytes(calcSize)), + fmt.Sprintf("unmount the ramdisk and allow DAOS to manage it, or remount with size %s", + humanize.IBytes(calcSize))) +} + // FaultConfigRamdiskUnderMinMem indicates that the tmpfs size requested in config is less than // minimum allowed. func FaultConfigRamdiskUnderMinMem(confSize, memRamdiskMin uint64) *fault.Fault { diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index a23bc3094c7..9f59060f87e 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1114,6 +1114,7 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, struct dtx_memberships *mbs, struct dtx_leader_handle **p_dlh) { struct dtx_leader_handle *dlh; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_handle *dth; int rc; int i; @@ -1151,10 +1152,12 @@ dtx_leader_begin(daos_handle_t coh, struct dtx_id *dti, DP_DTI(dti), sub_modification_cnt, dth->dth_ver, DP_UOID(*leader_oid), dti_cos_cnt, tgt_cnt, flags, DP_RC(rc)); - if (rc != 0) + if (rc != 0) { D_FREE(dlh); - else + } else { *p_dlh = dlh; + d_tm_inc_gauge(tls->dt_dtx_leader_total, 1); + } return rc; } @@ -1178,6 +1181,17 @@ dtx_leader_wait(struct dtx_leader_handle *dlh) return dlh->dlh_result; }; +void +dtx_entry_put(struct dtx_entry *dte) +{ + if (--(dte->dte_refs) == 0) { + struct dtx_tls *tls = dtx_tls_get(); + + d_tm_dec_gauge(tls->dt_dtx_entry_total, 1); + D_FREE(dte); + } +} + /** * Stop the leader thandle. * @@ -1192,6 +1206,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul { struct ds_cont_child *cont = coh->sch_cont; struct dtx_handle *dth = &dlh->dlh_handle; + struct dtx_tls *tls = dtx_tls_get(); struct dtx_entry *dte; struct dtx_memberships *mbs; size_t size; @@ -1308,6 +1323,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul dte->dte_ver = dth->dth_ver; dte->dte_refs = 1; dte->dte_mbs = mbs; + d_tm_inc_gauge(tls->dt_dtx_entry_total, 1); /* Use the new created @dte instead of dth->dth_dte that will be * released after dtx_leader_end(). @@ -1419,6 +1435,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul D_FREE(dth->dth_oid_array); D_FREE(dlh); + d_tm_dec_gauge(tls->dt_dtx_leader_total, 1); return result; } diff --git a/src/dtx/dtx_internal.h b/src/dtx/dtx_internal.h index 3abaa197b1d..a38c747a61d 100644 --- a/src/dtx/dtx_internal.h +++ b/src/dtx/dtx_internal.h @@ -160,6 +160,8 @@ struct dtx_pool_metrics { */ struct dtx_tls { struct d_tm_node_t *dt_committable; + struct d_tm_node_t *dt_dtx_leader_total; + struct d_tm_node_t *dt_dtx_entry_total; uint64_t dt_agg_gen; uint32_t dt_batched_ult_cnt; }; diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index 095c3d7fa20..9ea25a9dcd0 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -39,6 +39,22 @@ dtx_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create DTX committable metric: " DF_RC"\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->dt_dtx_leader_total, D_TM_GAUGE, + "total number of leader dtx in cache", "entry", + "mem/dtx/dtx_leader_handle_%u/tgt_%u", + sizeof(struct dtx_leader_handle), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX leader metric: " DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->dt_dtx_entry_total, D_TM_GAUGE, + "total number of dtx entry in cache", "entry", + "mem/dtx/dtx_entry_%u/tgt_%u", + sizeof(struct dtx_entry), tgt_id); + if (rc != DER_SUCCESS) + D_WARN("Failed to create DTX entry metric: " DF_RC"\n", + DP_RC(rc)); + return tls; } @@ -105,7 +121,6 @@ dtx_metrics_alloc(const char *path, int tgt_id) D_WARN("Failed to create DTX RPC cnt metric for %s: " DF_RC"\n", dtx_opc_to_str(opc), DP_RC(rc)); } - return metrics; } diff --git a/src/engine/init.c b/src/engine/init.c index 874fbf62ebf..5e90d4ec248 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -678,7 +678,6 @@ server_init(int argc, char *argv[]) DP_RC(rc)); metrics = &dss_engine_metrics; - /** Report timestamp when engine was started */ d_tm_record_timestamp(metrics->started_time); diff --git a/src/engine/srv.c b/src/engine/srv.c index 1be1aa2d9f1..d7f1acdbe80 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "drpc_internal.h" #include "srv_internal.h" @@ -353,6 +354,7 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi) D_DEBUG(DB_TRACE, "XS(%d) drained ULTs.\n", dx->dx_xs_id); } +#define D_MEMORY_TRACK_ENV "D_MEMORY_TRACK" /* * The server handler ULT first sets CPU affinity, initialize the per-xstream * TLS, CRT(comm) context, NVMe context, creates the long-run ULTs (GC & NVMe @@ -366,12 +368,18 @@ dss_srv_handler(void *arg) struct dss_thread_local_storage *dtc; struct dss_module_info *dmi; int rc; + bool track_mem = false; bool signal_caller = true; rc = dss_xstream_set_affinity(dx); if (rc) goto signal; + d_getenv_bool(D_MEMORY_TRACK_ENV, &track_mem); + if (unlikely(track_mem)) + d_set_alloc_track_cb(dss_mem_total_alloc_track, dss_mem_total_free_track, + &dx->dx_mem_stats); + /* initialize xstream-local storage */ dtc = dss_tls_init(dx->dx_tag, dx->dx_xs_id, dx->dx_tgt_id); if (dtc == NULL) { @@ -643,6 +651,46 @@ dss_xstream_free(struct dss_xstream *dx) D_FREE(dx); } +static void +dss_mem_stats_init(struct mem_stats *stats, int xs_id) +{ + int rc; + + rc = d_tm_add_metric(&stats->ms_total_usage, D_TM_GAUGE, + "Total memory usage", "byte", "mem/total_mem/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + + rc = d_tm_add_metric(&stats->ms_mallinfo, D_TM_MEMINFO, + "Total memory arena", "", "mem/meminfo/xs_%u", xs_id); + if (rc) + D_WARN("Failed to create memory telemetry: "DF_RC"\n", DP_RC(rc)); + stats->ms_current = 0; +} + +void +dss_mem_total_alloc_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_inc_gauge(stats->ms_total_usage, bytes); + /* Only retrieve mallocinfo every 10 allocation */ + if ((stats->ms_current++ % 10) == 0) + d_tm_record_meminfo(stats->ms_mallinfo); +} + +void +dss_mem_total_free_track(void *arg, daos_size_t bytes) +{ + struct mem_stats *stats = arg; + + D_ASSERT(arg != NULL); + + d_tm_dec_gauge(stats->ms_total_usage, bytes); +} + /** * Start one xstream. * @@ -735,6 +783,8 @@ dss_start_one_xstream(hwloc_cpuset_t cpus, int tag, int xs_id) D_GOTO(out_dx, rc); } + dss_mem_stats_init(&dx->dx_mem_stats, xs_id); + /** start XS, ABT rank 0 is reserved for the primary xstream */ rc = ABT_xstream_create_with_rank(dx->dx_sched, xs_id + 1, &dx->dx_xstream); diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index b3e0ca7ee9e..4fbe5d386d7 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -54,6 +54,12 @@ struct sched_info { unsigned int si_stop:1; }; +struct mem_stats { + struct d_tm_node_t *ms_total_usage; /* Total memory usage (bytes) */ + struct d_tm_node_t *ms_mallinfo; /* memory allocate information */ + uint64_t ms_current; +}; + /** Per-xstream configuration data */ struct dss_xstream { char dx_name[DSS_XS_NAME_LEN]; @@ -80,6 +86,7 @@ struct dss_xstream { bool dx_main_xs; /* true for main XS */ bool dx_comm; /* true with cart context */ bool dx_dsc_started; /* DSC progress ULT started */ + struct mem_stats dx_mem_stats; /* memory usages stats on this xstream */ #ifdef ULT_MMAP_STACK /* per-xstream pool/list of free stacks */ struct stack_pool *dx_sp; @@ -95,6 +102,7 @@ struct engine_metrics { struct d_tm_node_t *rank_id; struct d_tm_node_t *dead_rank_events; struct d_tm_node_t *last_event_time; + struct d_tm_node_t *meminfo; }; extern struct engine_metrics dss_engine_metrics; @@ -150,6 +158,8 @@ void dss_dump_ABT_state(FILE *fp); void dss_xstreams_open_barrier(void); struct dss_xstream *dss_get_xstream(int stream_id); int dss_xstream_cnt(void); +void dss_mem_total_alloc_track(void *arg, daos_size_t bytes); +void dss_mem_total_free_track(void *arg, daos_size_t bytes); /* srv_metrics.c */ int dss_engine_metrics_init(void); diff --git a/src/engine/srv_metrics.c b/src/engine/srv_metrics.c index ef302241ca9..0be06a4733c 100644 --- a/src/engine/srv_metrics.c +++ b/src/engine/srv_metrics.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ diff --git a/src/gurt/misc.c b/src/gurt/misc.c index ca40fcc194a..ef3a2e91709 100644 --- a/src/gurt/misc.c +++ b/src/gurt/misc.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,20 @@ /* state buffer for DAOS rand and srand calls, NOT thread safe */ static struct drand48_data randBuffer = {0}; +d_alloc_track_cb_t d_alloc_track_cb; +d_alloc_track_cb_t d_free_track_cb; +static __thread void *track_arg; + +void +d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg) +{ + d_alloc_track_cb = alloc_cb; + d_free_track_cb = free_cb; + track_arg = arg; + + D_INFO("memory track is enabled for the engine.\n"); +} + void d_srand(long int seedval) { @@ -49,6 +64,12 @@ d_rand() void d_free(void *ptr) { + if (unlikely(track_arg != NULL)) { + size_t size = malloc_usable_size(ptr); + + d_free_track_cb(track_arg, size); + } + free(ptr); } @@ -87,25 +108,62 @@ d_free(void *ptr) void * d_calloc(size_t count, size_t eltsize) { - return calloc(count, eltsize); + void *ptr; + + ptr = calloc(count, eltsize); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_malloc(size_t size) { - return malloc(size); + void *ptr; + + ptr = malloc(size); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, size); + } + + return ptr; } void * d_realloc(void *ptr, size_t size) { - return realloc(ptr, size); + void *new_ptr; + + if (unlikely(track_arg != NULL)) { + size_t old_size = malloc_usable_size(ptr); + + new_ptr = realloc(ptr, size); + if (new_ptr != NULL) { + d_free_track_cb(track_arg, old_size); + d_alloc_track_cb(track_arg, size); + } + } else { + new_ptr = realloc(ptr, size); + } + return new_ptr; } char * d_strndup(const char *s, size_t n) { - return strndup(s, n); + char *ptr; + + ptr = strndup(s, n); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } int @@ -118,6 +176,11 @@ d_asprintf(char **strp, const char *fmt, ...) rc = vasprintf(strp, fmt, ap); va_end(ap); + if (unlikely(track_arg != NULL)) { + if (rc > 0 && *strp != NULL) + d_alloc_track_cb(track_arg, (size_t)rc); + } + return rc; } @@ -143,16 +206,31 @@ d_asprintf2(int *_rc, const char *fmt, ...) char * d_realpath(const char *path, char *resolved_path) { - return realpath(path, resolved_path); + char *ptr; + + ptr = realpath(path, resolved_path); + if (unlikely(track_arg != NULL)) { + if (ptr != NULL) + d_alloc_track_cb(track_arg, malloc_usable_size(ptr)); + } + + return ptr; } void * d_aligned_alloc(size_t alignment, size_t size, bool zero) { - void *buf = aligned_alloc(alignment, size); + void *buf; + + buf = aligned_alloc(alignment, size); + if (unlikely(track_arg != NULL)) { + if (buf != NULL) + d_alloc_track_cb(track_arg, size); + } if (!zero || buf == NULL) return buf; + memset(buf, 0, size); return buf; } diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index e34abe19ad0..a5e34fab2d4 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -933,6 +934,27 @@ d_tm_print_timestamp(time_t *clk, char *name, int format, int opt_fields, } } +static void +d_tm_print_meminfo(struct d_tm_meminfo_t *meminfo, char *name, int format, + int opt_fields, FILE *stream) +{ + if ((name == NULL) || (stream == NULL)) + return; + + if (format == D_TM_CSV) { + fprintf(stream, "%s", name); + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, ",arena,ordblks,uordblks,fordblks"); + fprintf(stream, ",%zu,%zu,%zu,%zu", meminfo->arena, meminfo->ordblks, + meminfo->uordblks, meminfo->fordblks); + } else { + if (opt_fields & D_TM_INCLUDE_TYPE) + fprintf(stream, "type: arena,ordblks,uordblks,fordblks,"); + fprintf(stream, "%s:%zu,%zu,%zu,%zu", name, meminfo->arena, + meminfo->ordblks, meminfo->uordblks, meminfo->fordblks); + } +} + /** * Prints the time snapshot \a tms with \a name to the \a stream provided * @@ -1147,6 +1169,9 @@ d_tm_print_metadata(char *desc, char *units, int format, FILE *stream) } } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node); /** * Prints a single \a node. * Used as a convenience function to demonstrate usage for the client @@ -1179,6 +1204,7 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *name = NULL; char *desc = NULL; char *units = NULL; + struct d_tm_meminfo_t meminfo; bool stats_printed = false; bool show_timestamp = false; bool show_meta = false; @@ -1247,6 +1273,14 @@ d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, } d_tm_print_timestamp(&clk, name, format, opt_fields, stream); break; + case D_TM_MEMINFO: + rc = d_tm_get_meminfo(ctx, &meminfo, node); + if (rc != DER_SUCCESS) { + fprintf(stream, "Error on meminfo read: %d\n", rc); + break; + } + d_tm_print_meminfo(&meminfo, name, format, opt_fields, stream); + break; case D_TM_TIMER_SNAPSHOT: case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_REALTIME): case (D_TM_TIMER_SNAPSHOT | D_TM_CLOCK_PROCESS_CPUTIME): @@ -1745,6 +1779,41 @@ d_tm_record_timestamp(struct d_tm_node_t *metric) d_tm_node_unlock(metric); } +/** + * Record the current meminfo + * + * \param[in] metric Pointer to the metric + */ +void +d_tm_record_meminfo(struct d_tm_node_t *metric) +{ +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 33) + struct mallinfo2 mi; + + mi = mallinfo2(); +#else + struct mallinfo mi; + + mi = mallinfo(); +#endif + + if (metric == NULL) + return; + + if (metric->dtn_type != D_TM_MEMINFO) { + D_ERROR("Failed to record meminfo on item %s not a " + "meminfo. Operation mismatch: " DF_RC "\n", + metric->dtn_name, DP_RC(-DER_OP_NOT_PERMITTED)); + return; + } + d_tm_node_lock(metric); + metric->dtn_metric->dtm_data.meminfo.arena = mi.arena; + metric->dtn_metric->dtm_data.meminfo.ordblks = mi.ordblks; + metric->dtn_metric->dtm_data.meminfo.uordblks = mi.uordblks; + metric->dtn_metric->dtm_data.meminfo.fordblks = mi.fordblks; + d_tm_node_unlock(metric); +} + /** * Read and store a high resolution timer snapshot value * @@ -2977,6 +3046,36 @@ d_tm_get_timestamp(struct d_tm_context *ctx, time_t *val, return DER_SUCCESS; } +static int +d_tm_get_meminfo(struct d_tm_context *ctx, struct d_tm_meminfo_t *meminfo, + struct d_tm_node_t *node) +{ + struct d_tm_metric_t *metric_data = NULL; + struct d_tm_shmem_hdr *shmem = NULL; + int rc; + + if (ctx == NULL || meminfo == NULL || node == NULL) + return -DER_INVAL; + + rc = validate_node_ptr(ctx, node, &shmem); + if (rc != 0) + return rc; + + if (node->dtn_type != D_TM_MEMINFO) + return -DER_OP_NOT_PERMITTED; + + metric_data = conv_ptr(shmem, node->dtn_metric); + if (metric_data != NULL) { + d_tm_node_lock(node); + *meminfo = metric_data->dtm_data.meminfo; + d_tm_node_unlock(node); + } else { + return -DER_METRIC_NOT_FOUND; + } + return DER_SUCCESS; +} + + /** * Client function to read the specified high resolution timer. * diff --git a/src/include/daos/common.h b/src/include/daos/common.h index c7af0fc6563..78acb71858c 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -237,6 +237,27 @@ setbit_range(uint8_t *bitmap, uint32_t start, uint32_t end) setbit(bitmap, index); } +static inline void +setbits64(uint64_t *bmap, int at, int bits) +{ + setbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +static inline void +clrbits64(uint64_t *bmap, int at, int bits) +{ + clrbit_range((uint8_t *)bmap, at, at + bits - 1); +} + +#define setbit64(bm, at) setbit(((uint8_t *)bm), at) +#define clrbit64(bm, at) clrbit(((uint8_t *)bm), at) +#define isset64(bm, at) isset(((uint8_t *)bm), at) + +int +daos_find_bits(uint64_t *used, uint64_t *reserved, int bmap_sz, int bits_min, int *bits); +int +daos_count_free_bits(uint64_t *used, int bmap_sz); + static inline unsigned int daos_power2_nbits(unsigned int val) { diff --git a/src/include/daos/dtx.h b/src/include/daos/dtx.h index 272c041dabf..14b2337ea0f 100644 --- a/src/include/daos/dtx.h +++ b/src/include/daos/dtx.h @@ -174,6 +174,7 @@ struct dtx_id { void daos_dti_gen_unique(struct dtx_id *dti); void daos_dti_gen(struct dtx_id *dti, bool zero); +void daos_dti_reset(void); static inline void daos_dti_copy(struct dtx_id *des, const struct dtx_id *src) diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index 25b517c9f21..86709a6bd94 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -205,12 +205,10 @@ extern "C" { ACTION(DER_NVME_IO, NVMe I / O error) \ ACTION(DER_NO_CERT, Unable to access one or more certificates) \ ACTION(DER_BAD_CERT, Invalid x509 certificate) \ - ACTION(DER_VOS_PARTIAL_UPDATE, VOS partial update error) \ + ACTION(DER_VOS_PARTIAL_UPDATE, Same epoch partial overwrite of VOS array value disallowed) \ ACTION(DER_CHKPT_BUSY, Page is temporarily read only due to checkpointing) \ ACTION(DER_DIV_BY_ZERO, Division by zero) -/* clang-format on */ - /** Defines the gurt error codes */ #define D_FOREACH_ERR_RANGE(ACTION) \ ACTION(GURT, 1000) \ diff --git a/src/include/daos_srv/dtx_srv.h b/src/include/daos_srv/dtx_srv.h index 05cc162b19e..d0b2352783a 100644 --- a/src/include/daos_srv/dtx_srv.h +++ b/src/include/daos_srv/dtx_srv.h @@ -305,12 +305,7 @@ dtx_entry_get(struct dtx_entry *dte) return dte; } -static inline void -dtx_entry_put(struct dtx_entry *dte) -{ - if (--(dte->dte_refs) == 0) - D_FREE(dte); -} +void dtx_entry_put(struct dtx_entry *dte); static inline bool dtx_is_valid_handle(const struct dtx_handle *dth) diff --git a/src/include/daos_srv/vea.h b/src/include/daos_srv/vea.h index bdcd6c2ad21..1b37d1c042f 100644 --- a/src/include/daos_srv/vea.h +++ b/src/include/daos_srv/vea.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -20,23 +20,6 @@ #include #include -/* Common free extent structure for both SCM & in-memory index */ -struct vea_free_extent { - uint64_t vfe_blk_off; /* Block offset of the extent */ - uint32_t vfe_blk_cnt; /* Total blocks of the extent */ - uint32_t vfe_age; /* Monotonic timestamp */ -}; - -/* Maximum extents a non-contiguous allocation can have */ -#define VEA_EXT_VECTOR_MAX 9 - -/* Allocated extent vector */ -struct vea_ext_vector { - uint64_t vev_blk_off[VEA_EXT_VECTOR_MAX]; - uint32_t vev_blk_cnt[VEA_EXT_VECTOR_MAX]; - uint32_t vev_size; /* Size of the extent vector */ -}; - /* Reserved extent(s) */ struct vea_resrvd_ext { /* Link to a list for a series of vea_reserve() calls */ @@ -49,8 +32,12 @@ struct vea_resrvd_ext { uint64_t vre_hint_seq; /* Total reserved blocks */ uint32_t vre_blk_cnt; + /* New extent allocated for bitmap */ + uint32_t vre_new_bitmap_chunk:1; /* Extent vector for non-contiguous reserve */ struct vea_ext_vector *vre_vector; + /* private pointer */ + void *vre_private; }; /* @@ -83,6 +70,8 @@ struct vea_unmap_context { bool vnc_ext_flush; }; +#define VEA_COMPAT_FEATURE_BITMAP (1 << 0) + /* Free space tracking information on SCM */ struct vea_space_df { uint32_t vsd_magic; @@ -95,8 +84,8 @@ struct vea_space_df { uint64_t vsd_tot_blks; /* Free extent tree, sorted by offset */ struct btr_root vsd_free_tree; - /* Allocated extent vector tree, for non-contiguous allocation */ - struct btr_root vsd_vec_tree; + /* Free bitmap tree, sorted by offset */ + struct btr_root vsd_bitmap_tree; }; /* VEA attributes */ @@ -116,8 +105,10 @@ struct vea_stat { uint64_t vs_resrv_hint; /* Number of hint reserve */ uint64_t vs_resrv_large; /* Number of large reserve */ uint64_t vs_resrv_small; /* Number of small reserve */ + uint64_t vs_resrv_bitmap; /* Number of bitmap reserve */ uint64_t vs_frags_large; /* Large free frags */ uint64_t vs_frags_small; /* Small free frags */ + uint64_t vs_frags_bitmap; /* Bitmap frags */ uint64_t vs_frags_aging; /* Aging frags */ }; @@ -148,6 +139,20 @@ int vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, struct vea_space_df *md, uint32_t blk_sz, uint32_t hdr_blks, uint64_t capacity, vea_format_callback_t cb, void *cb_data, bool force); +/** + * Upgrade VEA to support latest disk format + * + * \param vsi [IN] In-memory compound free extent index + * \param umem [IN] An instance of SCM + * \param md [IN] The allocation metadata on SCM + * \param version [IN] Version which we try to upgrade + * + * \return Zero on success, in-memory compound free extent + * index returned by @vsi; Appropriated negative + * value on error + */ +int vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version); /** * Load space tracking information from SCM to initialize the in-memory compound diff --git a/src/include/gurt/common.h b/src/include/gurt/common.h index c6a8f241b26..cfce1a490ec 100644 --- a/src/include/gurt/common.h +++ b/src/include/gurt/common.h @@ -506,6 +506,10 @@ int d_getenv_uint64_t(const char *env, uint64_t *val); int d_write_string_buffer(struct d_string_buffer_t *buf, const char *fmt, ...); void d_free_string(struct d_string_buffer_t *buf); +typedef void (*d_alloc_track_cb_t)(void *arg, size_t size); + +void d_set_alloc_track_cb(d_alloc_track_cb_t alloc_cb, d_alloc_track_cb_t free_cb, void *arg); + #if !defined(container_of) /* given a pointer @ptr to the field @member embedded into type (usually * struct) @type, return pointer to the embedding instance of @type. diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 8852a1764cf..983ec2553f2 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2022 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -139,6 +139,7 @@ enum { D_TM_CLOCK_PROCESS_CPUTIME = 0x100, D_TM_CLOCK_THREAD_CPUTIME = 0x200, D_TM_LINK = 0x400, + D_TM_MEMINFO = 0x800, D_TM_ALL_NODES = (D_TM_DIRECTORY | \ D_TM_COUNTER | \ D_TM_TIMESTAMP | \ @@ -146,7 +147,8 @@ enum { D_TM_DURATION | \ D_TM_GAUGE | \ D_TM_STATS_GAUGE | \ - D_TM_LINK) + D_TM_LINK | \ + D_TM_MEMINFO) }; enum { @@ -203,10 +205,18 @@ struct d_tm_histogram_t { int dth_value_multiplier; }; +struct d_tm_meminfo_t { + uint64_t arena; + uint64_t ordblks; + uint64_t uordblks; + uint64_t fordblks; +}; + struct d_tm_metric_t { union data { uint64_t value; struct timespec tms[2]; + struct d_tm_meminfo_t meminfo; } dtm_data; struct d_tm_stats_t *dtm_stats; struct d_tm_histogram_t *dtm_histogram; diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h index de85ea11932..5cd323637d4 100644 --- a/src/include/gurt/telemetry_producer.h +++ b/src/include/gurt/telemetry_producer.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,6 +12,7 @@ void d_tm_set_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_inc_counter(struct d_tm_node_t *metric, uint64_t value); void d_tm_record_timestamp(struct d_tm_node_t *metric); +void d_tm_record_meminfo(struct d_tm_node_t *metric); void d_tm_take_timer_snapshot(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_start(struct d_tm_node_t *metric, int clk_id); void d_tm_mark_duration_end(struct d_tm_node_t *metric); diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 912218b58bd..6b740050c98 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -1504,7 +1504,7 @@ void mgmt__pool_query_target_resp__free_unpacked assert(message->base.descriptor == &mgmt__pool_query_target_resp__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[13] = +static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[14] = { { "uuid", @@ -1662,10 +1662,23 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_req__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 14, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateReq, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { 4, /* field[4] = acl */ 6, /* field[6] = faultDomains */ + 13, /* field[13] = meta_blob_size */ 10, /* field[10] = numranks */ 7, /* field[7] = numsvcreps */ 5, /* field[5] = properties */ @@ -1681,7 +1694,7 @@ static const unsigned mgmt__pool_create_req__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_req__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 13 } + { 0, 14 } }; const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = { @@ -1691,14 +1704,14 @@ const ProtobufCMessageDescriptor mgmt__pool_create_req__descriptor = "Mgmt__PoolCreateReq", "mgmt", sizeof(Mgmt__PoolCreateReq), - 13, + 14, mgmt__pool_create_req__field_descriptors, mgmt__pool_create_req__field_indices_by_name, 1, mgmt__pool_create_req__number_ranges, (ProtobufCMessageInit) mgmt__pool_create_req__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[6] = { { "status", @@ -1760,9 +1773,22 @@ static const ProtobufCFieldDescriptor mgmt__pool_create_resp__field_descriptors[ 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "meta_blob_size", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolCreateResp, meta_blob_size), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { 1, /* field[1] = leader */ + 5, /* field[5] = meta_blob_size */ 0, /* field[0] = status */ 2, /* field[2] = svc_reps */ 3, /* field[3] = tgt_ranks */ @@ -1771,7 +1797,7 @@ static const unsigned mgmt__pool_create_resp__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_create_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = { @@ -1781,7 +1807,7 @@ const ProtobufCMessageDescriptor mgmt__pool_create_resp__descriptor = "Mgmt__PoolCreateResp", "mgmt", sizeof(Mgmt__PoolCreateResp), - 5, + 6, mgmt__pool_create_resp__field_descriptors, mgmt__pool_create_resp__field_indices_by_name, 1, mgmt__pool_create_resp__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index 1fc8a18ae76..9357267326f 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -209,10 +209,14 @@ struct _Mgmt__PoolCreateReq */ size_t n_tierbytes; uint64_t *tierbytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_REQ__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_req__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0,NULL, 0,NULL, 0 } /* @@ -244,10 +248,14 @@ struct _Mgmt__PoolCreateResp */ size_t n_tier_bytes; uint64_t *tier_bytes; + /* + * Size in bytes of metadata blob on SSD (manual config) + */ + uint64_t meta_blob_size; }; #define MGMT__POOL_CREATE_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_create_resp__descriptor) \ - , 0, 0, 0,NULL, 0,NULL, 0,NULL } + , 0, 0, 0,NULL, 0,NULL, 0,NULL, 0 } /* diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index a5d089f2c11..9ef6054beda 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -497,7 +497,8 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) /* Ranks to allocate targets (in) & svc for pool replicas (out). */ rc = ds_mgmt_create_pool(pool_uuid, req->sys, "pmem", targets, req->tierbytes[DAOS_MEDIA_SCM], req->tierbytes[DAOS_MEDIA_NVME], - prop, &svc, req->n_faultdomains, req->faultdomains); + prop, &svc, req->n_faultdomains, req->faultdomains, + req->meta_blob_size); if (rc != 0) { D_ERROR("failed to create pool: "DF_RC"\n", DP_RC(rc)); goto out; diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 9cfc0b5a0ab..6bd142022ab 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -66,7 +66,7 @@ int ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in); /** srv_pool.c */ int ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains); + int domains_nr, uint32_t *domains, size_t meta_blob_size); int ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks); int ds_mgmt_evict_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uuid_t *handles, size_t n_handles, uint32_t destroy, uint32_t force_destroy, diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index e5f94333f1c..5f99c9ef406 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -168,16 +168,17 @@ ds_mgmt_pool_svc_create(uuid_t pool_uuid, int ntargets, const char *group, d_ran } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, size_t nvme_size, - daos_prop_t *prop, d_rank_list_t **svcp, - int domains_nr, uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { d_rank_list_t *pg_ranks = NULL; d_rank_list_t *pg_targets = NULL; int rc; int rc_cleanup; + D_DEBUG(DB_MGMT, DF_UUID ": meta blob size %ld", DP_UUID(pool_uuid), meta_blob_size); + /* Sanity check targets versus cart's current primary group members. * If any targets not in PG, flag error before MGMT_TGT_ corpcs fail. */ diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index cfd562891e0..c3900429dfe 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1010,7 +1010,7 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = mgmt__join_resp__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = +static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = { { "status", @@ -1060,18 +1060,6 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, - { - "localJoin", - 5, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_BOOL, - 0, /* quantifier_offset */ - offsetof(Mgmt__JoinResp, localjoin), - NULL, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, { "map_version", 6, @@ -1087,16 +1075,16 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = }; static const unsigned mgmt__join_resp__field_indices_by_name[] = { 3, /* field[3] = faultDomain */ - 4, /* field[4] = localJoin */ - 5, /* field[5] = map_version */ + 4, /* field[4] = map_version */ 1, /* field[1] = rank */ 2, /* field[2] = state */ 0, /* field[0] = status */ }; -static const ProtobufCIntRange mgmt__join_resp__number_ranges[1 + 1] = +static const ProtobufCIntRange mgmt__join_resp__number_ranges[2 + 1] = { { 1, 0 }, - { 0, 6 } + { 6, 4 }, + { 0, 5 } }; const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = { @@ -1106,10 +1094,10 @@ const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = "Mgmt__JoinResp", "mgmt", sizeof(Mgmt__JoinResp), - 6, + 5, mgmt__join_resp__field_descriptors, mgmt__join_resp__field_indices_by_name, - 1, mgmt__join_resp__number_ranges, + 2, mgmt__join_resp__number_ranges, (ProtobufCMessageInit) mgmt__join_resp__init, NULL,NULL,NULL /* reserved[123] */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 55acb283028..c1d61ef44fb 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -163,10 +163,6 @@ struct _Mgmt__JoinResp * Fault domain for the instance */ char *faultdomain; - /* - * Join processed locally. - */ - protobuf_c_boolean localjoin; /* * Join processed in this version of the system map. */ @@ -174,7 +170,7 @@ struct _Mgmt__JoinResp }; #define MGMT__JOIN_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_resp__descriptor) \ - , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0, 0 } + , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0 } struct _Mgmt__LeaderQueryReq diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 4b104f19195..80f95891c8d 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -509,11 +509,9 @@ ds_mgmt_group_update_handler(struct mgmt_grp_up_in *in) } int -ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, - d_rank_list_t *targets, size_t scm_size, - size_t nvme_size, daos_prop_t *prop, - d_rank_list_t **svcp, int nr_domains, - uint32_t *domains) +ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, char *tgt_dev, d_rank_list_t *targets, + size_t scm_size, size_t nvme_size, daos_prop_t *prop, d_rank_list_t **svcp, + int domains_nr, uint32_t *domains, size_t meta_blob_size) { return 0; } diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 51280364c2b..fa7579ca516 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -620,7 +620,6 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, d_iov_t *csum_iov_fetch) { struct migrate_pool_tls *tls; - struct dc_object *obj; int rc = 0; tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid, @@ -634,21 +633,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, if (daos_oclass_grp_size(&mrone->mo_oca) > 1) flags |= DIOF_TO_LEADER; - /** - * For EC data migration, let's force it to do degraded fetch, - * make sure reintegration will not fetch from the original - * shard, which might cause parity corruption. - */ - obj = obj_hdl2ptr(oh); - if (iods[0].iod_type != DAOS_IOD_SINGLE && - daos_oclass_is_ec(&mrone->mo_oca) && - is_ec_data_shard(obj, mrone->mo_dkey_hash, mrone->mo_oid.id_shard) && - obj_ec_parity_alive(oh, mrone->mo_dkey_hash, NULL)) - flags |= DIOF_FOR_FORCE_DEGRADE; - - obj_decref(obj); - - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); if (rc != 0) @@ -669,7 +654,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, csum_iov_fetch->iov_len = 0; csum_iov_fetch->iov_buf = p; - rc = dsc_obj_fetch(oh, mrone->mo_epoch, &mrone->mo_dkey, iod_num, iods, sgls, + rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, NULL, csum_iov_fetch); } @@ -1223,7 +1208,8 @@ migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, static int __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, - daos_iod_t *iods, int iod_num, daos_epoch_t update_eph, + daos_iod_t *iods, int iod_num, daos_epoch_t fetch_eph, + daos_epoch_t update_eph, uint32_t flags, struct ds_cont_child *ds_cont) { d_sg_list_t sgls[OBJ_ENUM_UNPACK_MAX_IODS]; @@ -1282,8 +1268,7 @@ __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, p_csum_iov = &csum_iov; } - rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, mrone->mo_epoch, - flags, p_csum_iov); + rc = mrone_obj_fetch(mrone, oh, sgls, iods, iod_num, fetch_eph, flags, p_csum_iov); if (rc) { D_ERROR("migrate dkey "DF_KEY" failed: "DF_RC"\n", DP_KEY(&mrone->mo_dkey), DP_RC(rc)); @@ -1358,6 +1343,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, if (!daos_oclass_is_ec(&mrone->mo_oca)) return __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods, mrone->mo_iod_num, + mrone->mo_epoch, mrone->mo_min_epoch, DIOF_FOR_MIGRATION, ds_cont); @@ -1370,22 +1356,19 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, * this data shard. */ - if (mrone->mo_iods_num_from_parity > 0) { - daos_epoch_t min_eph = DAOS_EPOCH_MAX; + for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { + for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) { + daos_iod_t iod = mrone->mo_iods_from_parity[i]; - for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { - for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) - min_eph = min(min_eph, - mrone->mo_iods_update_ephs_from_parity[i][j]); + iod.iod_nr = 1; + iod.iod_recxs = &mrone->mo_iods_from_parity[i].iod_recxs[j]; + rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_iods_update_ephs_from_parity[i][j], + mrone->mo_iods_update_ephs_from_parity[i][j], + DIOF_EC_RECOV_FROM_PARITY, ds_cont); + if (rc != 0) + D_GOTO(out, rc); } - - rc = __migrate_fetch_update_bulk(mrone, oh, mrone->mo_iods_from_parity, - mrone->mo_iods_num_from_parity, - min_eph, - DIOF_FOR_MIGRATION | DIOF_EC_RECOV_FROM_PARITY, - ds_cont); - if (rc != 0) - D_GOTO(out, rc); } /* The data, rebuilt from replication, needs to keep the same epoch during rebuild, @@ -1401,6 +1384,7 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, iod.iod_nr = 1; iod.iod_recxs = &mrone->mo_iods[i].iod_recxs[j]; rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, + mrone->mo_epoch, mrone->mo_iods_update_ephs[i][j], DIOF_FOR_MIGRATION, ds_cont); if (rc < 0) { @@ -1500,21 +1484,31 @@ migrate_punch(struct migrate_pool_tls *tls, struct migrate_one *mrone, static int migrate_get_cont_child(struct migrate_pool_tls *tls, uuid_t cont_uuid, - struct ds_cont_child **cont_p) + struct ds_cont_child **cont_p, bool create) { struct ds_cont_child *cont_child = NULL; int rc; *cont_p = NULL; - if (tls->mpt_opc == RB_OP_EXTEND || tls->mpt_opc == RB_OP_REINT) { - /* For extend and reintegration, it may need create the container */ + if (tls->mpt_pool->spc_pool->sp_stopping) { + D_DEBUG(DB_REBUILD, DF_UUID "pool is being destroyed.\n", + DP_UUID(tls->mpt_pool_uuid)); + return 0; + } + + if (create) { + /* Since the shard might be moved different location for any pool operation, + * so it may need create the container in all cases. + */ rc = ds_cont_child_open_create(tls->mpt_pool_uuid, cont_uuid, &cont_child); if (rc != 0) { - if (rc == -DER_SHUTDOWN) { + if (rc == -DER_SHUTDOWN || (cont_child && cont_child->sc_stopping)) { D_DEBUG(DB_REBUILD, DF_UUID "container is being destroyed\n", DP_UUID(cont_uuid)); rc = 0; } + if (cont_child) + ds_cont_child_put(cont_child); return rc; } } else { @@ -1548,7 +1542,7 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, int rc; D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont); + rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont, true); if (rc || cont == NULL) D_GOTO(cont_put, rc); @@ -2343,10 +2337,9 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) migrate_tgt_off = obj_ec_shard_off_by_layout_ver(layout_ver, io->ui_dkey_hash, &arg->oc_attr, shard); unpack_tgt_off = obj_ec_shard_off(obj, io->ui_dkey_hash, io->ui_oid.id_shard); - if ((rc == 1 && + if (rc == 1 && (is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) || - (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) || - (tls->mpt_opc == RB_OP_EXCLUDE && io->ui_oid.id_shard == shard)) { + (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) { D_DEBUG(DB_REBUILD, DF_UOID" ignore shard "DF_KEY"/%u/%d/%u/%d.\n", DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); @@ -2458,7 +2451,7 @@ migrate_obj_punch_one(void *data) tls, DP_UUID(tls->mpt_pool_uuid), arg->version, arg->punched_epoch, DP_UOID(arg->oid)); - rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont); + rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont, true); if (rc != 0 || cont == NULL) D_GOTO(put, rc); @@ -2579,7 +2572,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, /* Only open with RW flag, reintegrating flag will be set, which is needed * during unpack_cb to check if parity shard alive. */ - rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RW, &oh); + rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &oh); if (rc) { D_ERROR("dsc_obj_open failed: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_cont, rc); @@ -2960,7 +2953,7 @@ migrate_obj_ult(void *data) struct ds_cont_child *cont_child = NULL; /* check again to see if the container is being destroyed. */ - migrate_get_cont_child(tls, arg->cont_uuid, &cont_child); + migrate_get_cont_child(tls, arg->cont_uuid, &cont_child, false); if (cont_child == NULL || cont_child->sc_stopping) rc = 0; diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c index bbcc07f2dc1..1b57aff1719 100644 --- a/src/placement/jump_map.c +++ b/src/placement/jump_map.c @@ -734,6 +734,8 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l } else { if (domain != NULL) setbit(dom_cur_grp_real, domain - root); + if (pool_target_down(target)) + layout->ol_shards[k].po_rebuilding = 1; } if (is_extending != NULL && pool_target_is_up_or_drain(target)) @@ -743,7 +745,7 @@ get_object_layout(struct pl_jump_map *jmap, uint32_t layout_ver, struct pl_obj_l if (fail_tgt_cnt > 0) rc = obj_remap_shards(jmap, layout_ver, md, layout, jmop, &remap_list, out_list, - allow_status, md->omd_ver, tgts_used, dom_used, dom_full, + allow_status, allow_version, tgts_used, dom_used, dom_full, fail_tgt_cnt, is_extending, fdom_lvl); out: if (rc) @@ -1025,7 +1027,12 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + if (mode & DAOS_OO_RO) + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | + PO_COMP_ST_DOWN; + else + allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; + rc = obj_layout_alloc_and_get(jmap, layout_version, &jmop, md, allow_status, md->omd_ver, &layout, NULL, &is_extending); if (rc != 0) { @@ -1090,66 +1097,16 @@ jump_map_obj_place(struct pl_map *map, uint32_t layout_version, struct daos_obj_ * another target, Or 0 if none need to be rebuilt. */ static int -jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, - uint32_t *tgt_id, uint32_t *shard_idx, unsigned int array_size) -{ - struct pl_jump_map *jmap; - struct pl_obj_layout *layout; - d_list_t remap_list; - struct jm_obj_placement jmop; - daos_obj_id_t oid; - int rc; - - int idx = 0; - - D_DEBUG(DB_PL, "Finding Rebuild at version: %u\n", rebuild_ver); - - /* Caller should guarantee the pl_map is up-to-date */ - if (pl_map_version(map) < rebuild_ver) { - D_ERROR("pl_map version(%u) < rebuild version(%u)\n", - pl_map_version(map), rebuild_ver); - return -DER_INVAL; - } - - jmap = pl_map2jmap(map); - oid = md->omd_id; - - rc = jm_obj_placement_init(jmap, md, shard_md, &jmop); - if (rc) { - D_ERROR("jm_obj_placement_init failed, rc "DF_RC"\n", DP_RC(rc)); - return rc; - } - - D_INIT_LIST_HEAD(&remap_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jmop, md, PO_COMP_ST_UPIN, - rebuild_ver, &layout, &remap_list, NULL); - if (rc < 0) - D_GOTO(out, rc); - - obj_layout_dump(oid, layout); - rc = remap_list_fill(map, md, shard_md, rebuild_ver, tgt_id, shard_idx, - array_size, &idx, layout, &remap_list, false); - -out: - jm_obj_placement_fini(&jmop); - remap_list_free_all(&remap_list); - if (layout != NULL) - pl_obj_layout_free(layout); - return rc < 0 ? rc : idx; -} - -static int -jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, - struct daos_obj_shard_md *shard_md, uint32_t reint_ver, - uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) +jump_map_obj_find_diff(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t old_status, uint32_t new_status, + uint32_t *tgt_rank, uint32_t *shard_id, unsigned int array_size) { struct pl_jump_map *jmap; struct pl_obj_layout *layout = NULL; struct pl_obj_layout *reint_layout = NULL; d_list_t reint_list; struct jm_obj_placement jop; - uint32_t allow_status; int rc; int idx = 0; @@ -1170,16 +1127,14 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc; } - allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN; D_INIT_LIST_HEAD(&reint_list); - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, old_status, reint_ver, &layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); obj_layout_dump(md->omd_id, layout); - allow_status |= PO_COMP_ST_UP; - rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, allow_status, + rc = obj_layout_alloc_and_get(jmap, layout_ver, &jop, md, new_status, reint_ver, &reint_layout, NULL, NULL); if (rc < 0) D_GOTO(out, rc); @@ -1200,6 +1155,27 @@ jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj return rc < 0 ? rc : idx; } +static int +jump_map_obj_find_reint(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t reint_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, reint_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_UP, + tgt_id, shard_id, array_size); +} + +static int +jump_map_obj_find_rebuild(struct pl_map *map, uint32_t layout_ver, struct daos_obj_md *md, + struct daos_obj_shard_md *shard_md, uint32_t rebuild_ver, + uint32_t *tgt_id, uint32_t *shard_id, unsigned int array_size) +{ + return jump_map_obj_find_diff(map, layout_ver, md, shard_md, rebuild_ver, + PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN | PO_COMP_ST_DOWN, + PO_COMP_ST_UPIN, tgt_id, shard_id, array_size); +} + /** API for generic placement map functionality */ struct pl_map_ops jump_map_ops = { .o_create = jump_map_create, diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c index 691d0e1e600..47f620d6635 100644 --- a/src/placement/pl_map_common.c +++ b/src/placement/pl_map_common.c @@ -327,7 +327,8 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, * skip this shard. */ if (f_shard->fs_status == PO_COMP_ST_DOWN || - f_shard->fs_status == PO_COMP_ST_DRAIN) + f_shard->fs_status == PO_COMP_ST_DRAIN || + pool_target_down(spare_tgt)) l_shard->po_rebuilding = 1; } else { l_shard->po_shard = -1; diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c index fdbc08ef07e..5de8ba810c4 100644 --- a/src/placement/tests/jump_map_place_obj.c +++ b/src/placement/tests/jump_map_place_obj.c @@ -1607,7 +1607,7 @@ placement_handles_multiple_states(void **state) */ ctx.ver = ver_after_fail; jtc_scan(&ctx); - assert_int_equal(ctx.rebuild.out_nr, 1); + assert_int_equal(ctx.rebuild.out_nr, 2); /* Complete the rebuild */ ctx.ver = ver_after_reint_complete; /* Restore the version first */ diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 1dd9842db51..51b55b1254f 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -32,6 +32,7 @@ message PoolCreateReq { uint32 numranks = 11; // Number of target ranks to use (auto config) repeated uint32 ranks = 12; // target ranks (manual config) repeated uint64 tierbytes = 13; // Size in bytes of storage tiers (manual config) + uint64 meta_blob_size = 14; // Size in bytes of metadata blob on SSD (manual config) } // PoolCreateResp returns created pool uuid and ranks. @@ -41,6 +42,7 @@ message PoolCreateResp { repeated uint32 svc_reps = 3; // pool service replica ranks repeated uint32 tgt_ranks = 4; // pool target ranks repeated uint64 tier_bytes = 5; // storage tiers allocated to pool + uint64 meta_blob_size = 6; // Size in bytes of metadata blob on SSD (manual config) } // PoolDestroyReq supplies pool identifier and force flag. diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 400452837ce..668a9905bfd 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -44,6 +44,8 @@ message JoinReq { } message JoinResp { + reserved 5; + reserved "localJoin"; int32 status = 1; // DAOS error code uint32 rank = 2; // Server rank assigned. enum State { @@ -52,7 +54,6 @@ message JoinResp { } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance - bool localJoin = 5; // Join processed locally. uint32 map_version = 6; // Join processed in this version of the system map. } diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 8587d7b5d8c..0f8707f5aab 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -646,7 +646,7 @@ rebuild_object(struct rebuild_tgt_pool_tracker *rpt, uuid_t co_uuid, daos_unit_o rc = 0; if (myrank == target->ta_comp.co_rank && mytarget == target->ta_comp.co_index && - rpt->rt_rebuild_op != RB_OP_UPGRADE) { + (shard == oid.id_shard) && rpt->rt_rebuild_op != RB_OP_UPGRADE) { D_DEBUG(DB_REBUILD, DF_UOID" %u/%u already on the target shard\n", DP_UOID(oid), myrank, mytarget); return 0; diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 94d0b2a79bc..e16583436ce 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1412,7 +1412,8 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, task->dst_new_layout_version, &task->dst_tgts, retry_opc, 5); } else if (task->dst_rebuild_op == RB_OP_REINT || task->dst_rebuild_op == RB_OP_EXTEND || - task->dst_rebuild_op == RB_OP_UPGRADE) { + task->dst_rebuild_op == RB_OP_UPGRADE || task->dst_rebuild_op == RB_OP_EXCLUDE || + task->dst_rebuild_op == RB_OP_DRAIN) { /* Otherwise schedule reclaim for reintegrate/extend/upgrade. */ rgt->rgt_status.rs_state = DRS_IN_PROGRESS; rc = ds_rebuild_schedule(pool, task->dst_map_ver, rgt->rgt_reclaim_epoch, diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 20bbfcf6296..a016e1937c9 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -27,7 +27,7 @@ timeouts: test_daos_extend_simple: 3600 test_daos_oid_allocator: 640 test_daos_checksum: 500 - test_daos_rebuild_ec: 4800 + test_daos_rebuild_ec: 6400 test_daos_aggregate_ec: 200 test_daos_degraded_ec: 1900 test_daos_dedup: 220 diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index acb2b8cb6a5..d1afe8366b2 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -135,9 +135,11 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): # Note that run_on_vms does not tell ftest where to run, this should be set according to # the test tags so the test can run with appropriate settings. + remote_env = {} if run_on_vms: dfuse_namespace = dfuse_namespace = "/run/dfuse_vm/*" - build_jobs = 6 * 2 + build_jobs = 6 + remote_env['D_IL_MAX_EQ'] = '2' intercept_jobs = build_jobs if intercept: @@ -189,7 +191,6 @@ def run_build_test(self, cache_mode, intercept=False, run_on_vms=False): mount_dir = self.dfuse.mount_dir.value build_dir = os.path.join(mount_dir, 'daos') - remote_env = {} remote_env['PATH'] = '{}:$PATH'.format(os.path.join(mount_dir, 'venv', 'bin')) remote_env['VIRTUAL_ENV'] = os.path.join(mount_dir, 'venv') remote_env['COVFILE'] = os.environ['COVFILE'] diff --git a/src/tests/ftest/pool/svc.yaml b/src/tests/ftest/pool/svc.yaml index 9c9f47448c0..a2811dae6ca 100644 --- a/src/tests/ftest/pool/svc.yaml +++ b/src/tests/ftest/pool/svc.yaml @@ -12,7 +12,7 @@ server_config: class: ram scm_mount: /mnt/daos system_ram_reserved: 1 -timeout: 200 +timeout: 300 pool: control_method: dmg scm_size: 134217728 diff --git a/src/tests/ftest/rebuild/basic.py b/src/tests/ftest/rebuild/basic.py index c6263211190..2d7b0e723c1 100644 --- a/src/tests/ftest/rebuild/basic.py +++ b/src/tests/ftest/rebuild/basic.py @@ -97,7 +97,7 @@ def run_rebuild_test(self, pool_quantity): pi_ndisabled=target_count ) status &= pool.check_rebuild_status( - rs_state=2, rs_obj_nr=rs_obj_nr[index], rs_rec_nr=rs_rec_nr[index], rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/rebuild/container_create_race.py b/src/tests/ftest/rebuild/container_create_race.py index 2607c9ef6f0..6684d89ad53 100644 --- a/src/tests/ftest/rebuild/container_create_race.py +++ b/src/tests/ftest/rebuild/container_create_race.py @@ -152,8 +152,8 @@ def test_rebuild_container_create(self): # Check for pool and rebuild info after rebuild self.log.info("=> (6) Check for pool and rebuild info after rebuild") info_checks["pi_ndisabled"] += targets - rebuild_checks["rs_obj_nr"] = ">0" - rebuild_checks["rs_rec_nr"] = ">0" + rebuild_checks["rs_obj_nr"] = ">=0" + rebuild_checks["rs_rec_nr"] = ">=0" rebuild_checks["rs_state"] = 2 self.assertTrue( self.pool.check_pool_info(**info_checks), diff --git a/src/tests/ftest/rebuild/with_io.py b/src/tests/ftest/rebuild/with_io.py index 229b3fa3ca3..7e7a1e623d4 100644 --- a/src/tests/ftest/rebuild/with_io.py +++ b/src/tests/ftest/rebuild/with_io.py @@ -92,7 +92,7 @@ def test_rebuild_with_io(self): pi_ndisabled=targets, # DAOS-2799 ) status &= self.pool.check_rebuild_status( - rs_state=2, rs_obj_nr=">0", rs_rec_nr=">0", rs_errno=0) + rs_state=2, rs_errno=0) self.assertTrue(status, "Error confirming pool info after rebuild") # Verify the data after rebuild diff --git a/src/tests/ftest/scrubber/csum_fault.py b/src/tests/ftest/scrubber/csum_fault.py index a038e1d2d37..6e1b9a968db 100644 --- a/src/tests/ftest/scrubber/csum_fault.py +++ b/src/tests/ftest/scrubber/csum_fault.py @@ -20,7 +20,7 @@ def test_scrubber_csum_fault(self): whether scrubber finds them. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium - :avocado: tags=scrubber + :avocado: tags=scrubber,faults :avocado: tags=TestWithScrubberFault,test_scrubber_csum_fault """ diff --git a/src/tests/ftest/util/rebuild_test_base.py b/src/tests/ftest/util/rebuild_test_base.py index a4f7d845e2e..1435aa1815e 100644 --- a/src/tests/ftest/util/rebuild_test_base.py +++ b/src/tests/ftest/util/rebuild_test_base.py @@ -75,8 +75,8 @@ def update_pool_verify(self): """Update the pool verification expected values.""" self.info_checks["pi_ndisabled"] = ">0" self.rebuild_checks["rs_state"] = 2 - self.rebuild_checks["rs_obj_nr"] = ">0" - self.rebuild_checks["rs_rec_nr"] = ">0" + self.rebuild_checks["rs_obj_nr"] = ">=0" + self.rebuild_checks["rs_rec_nr"] = ">=0" def execute_pool_verify(self, msg=None): """Verify the pool info. diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index cf8350f09ba..87716bb0465 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -272,60 +272,89 @@ def clean_files(self, verbose=True): Args: verbose (bool, optional): display clean commands. Defaults to True. - """ - clean_commands = [] - for index, engine_params in enumerate(self.manager.job.yaml.engine_params): - scm_mount = engine_params.get_value("scm_mount") - self.log.info("Cleaning up the %s directory.", str(scm_mount)) - - # Remove the superblocks - cmd = "sudo rm -fr {}/*".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) - - # Remove the shared memory segment associated with this io server - cmd = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) - clean_commands.append(cmd) - - # Dismount the scm mount point - cmd = "while sudo umount {}; do continue; done".format(scm_mount) - if cmd not in clean_commands: - clean_commands.append(cmd) + Raises: + ServerFailed: if there was an error cleaning up the daos server files + """ + scm_mounts = [] + scm_lists = [] + for engine_params in self.manager.job.yaml.engine_params: + scm_mounts.append(engine_params.get_value("scm_mount")) if self.manager.job.using_dcpm: scm_list = engine_params.get_value("scm_list") if isinstance(scm_list, list): - self.log.info("Cleaning up the following device(s): %s.", ", ".join(scm_list)) - # Umount and wipefs the dcpm device - cmd_list = [ - "for dev in {}".format(" ".join(scm_list)), - "do mount=$(lsblk $dev -n -o MOUNTPOINT)", - "if [ ! -z $mount ]", - "then while sudo umount $mount", - "do continue", - "done", - "fi", - "sudo wipefs -a $dev", - "done" - ] - cmd = "; ".join(cmd_list) - if cmd not in clean_commands: - clean_commands.append(cmd) + scm_lists.append(scm_list) + + for index, scm_mount in enumerate(scm_mounts): + # Remove the superblocks and dismount the scm mount point + self.log.info("Cleaning up the %s scm mount.", str(scm_mount)) + self.clean_mount(self._hosts, scm_mount, verbose, index) + + for scm_list in scm_lists: + # Umount and wipefs the dcpm device + self.log.info("Cleaning up the %s dcpm devices", str(scm_list)) + command_list = [ + "for dev in {}".format(" ".join(scm_list)), + "do mount=$(lsblk $dev -n -o MOUNTPOINT)", + "if [ ! -z $mount ]", + "then while sudo umount $mount", + "do continue", + "done", + "fi", + "sudo wipefs -a $dev", + "done" + ] + command = "; ".join(command_list) + result = run_remote(self.log, self._hosts, command, verbose) + if not result.passed: + raise ServerFailed("Failed cleaning {} on {}".format(scm_list, result.failed_hosts)) if self.manager.job.using_control_metadata: # Remove the contents (superblocks) of the control plane metadata path - cmd = "sudo rm -fr {}/*".format(self.manager.job.control_metadata.path.value) - if cmd not in clean_commands: - clean_commands.append(cmd) + self.log.info( + "Cleaning up the control metadata path %s", + self.manager.job.control_metadata.path.value) + self.clean_mount(self._hosts, self.manager.job.control_metadata.path.value, verbose) - if self.manager.job.control_metadata.device.value is not None: - # Dismount the control plane metadata mount point - cmd = "while sudo umount {}; do continue; done".format( - self.manager.job.control_metadata.device.value) - if cmd not in clean_commands: - clean_commands.append(cmd) + def clean_mount(self, hosts, mount, verbose=True, index=None): + """Clean the mount point by removing the superblocks and dismounting. - pcmd(self._hosts, "; ".join(clean_commands), verbose) + Args: + hosts (NodeSet): the hosts on which to clean the mount point + mount (str): the mount point to clean + verbose (bool, optional): display clean commands. Defaults to True. + index (int, optional): Defaults to None. + + Raises: + ServerFailed: if there is an error cleaning the mount point + """ + self.log.debug("Checking for the existence of the %s mount point", mount) + command = "test -d {}".format(mount) + result = run_remote(self.log, hosts, command, verbose) + if result.passed_hosts: + mounted_hosts = result.passed_hosts + + # Remove the superblocks + self.log.debug("Removing the %s superblocks", mount) + command = "sudo rm -fr {}/*".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to remove superblocks for {} on {}".format(mount, result.failed_hosts)) + + if index is not None: + # Remove the shared memory segment associated with this io server + self.log.debug("Removing the shared memory segment") + command = "sudo ipcrm -M {}".format(self.D_TM_SHARED_MEMORY_KEY + index) + run_remote(self.log, self._hosts, command, verbose) + + # Dismount the scm mount point + self.log.debug("Dismount the %s mount point", mount) + command = "while sudo umount {}; do continue; done".format(mount) + result = run_remote(self.log, mounted_hosts, command, verbose) + if not result.passed: + raise ServerFailed( + "Failed to dismount {} on {}".format(mount, result.failed_hosts)) def prepare_storage(self, user, using_dcpm=None, using_nvme=None): """Prepare the server storage. @@ -445,6 +474,14 @@ def support_collect_log(self, **kwargs): return run_remote( self.log, self._hosts, cmd.with_exports, timeout=self.collect_log_timeout.value) + def display_memory_info(self): + """Display server hosts memory info.""" + self.log.debug("#" * 80) + self.log.debug(" Collection debug memory info") + run_remote(self.log, self._hosts, "free -m") + run_remote(self.log, self._hosts, "ps -eo size,pid,user,command --sort -size | head -n 6") + self.log.debug("#" * 80) + def detect_format_ready(self, reformat=False): """Detect when all the daos_servers are ready for storage format. @@ -637,11 +674,14 @@ def start(self): self.prepare() # Start the servers and wait for them to be ready for storage format + self.display_memory_info() self.detect_format_ready() # Collect storage and network information from the servers. + self.display_memory_info() self.information.collect_storage_information() self.information.collect_network_information() + self.display_memory_info() # Format storage and wait for server to change ownership self.log.info(" Formatting hosts: <%s>", self.dmg.hostlist) diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 2f3defae916..cc1cee34127 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -557,6 +557,14 @@ class TelemetryUtils(): ENGINE_NVME_RELIABILITY_METRICS +\ ENGINE_NVME_CRIT_WARN_METRICS +\ ENGINE_NVME_INTEL_VENDOR_METRICS + ENGINE_MEM_USAGE_METRICS = [ + "engine_mem_vos_dtx_cmt_ent_48", + "engine_mem_vos_vos_obj_360", + "engine_mem_vos_vos_lru_size", + "engine_mem_dtx_dtx_leader_handle_336", + "engine_mem_dtx_dtx_entry_40"] + ENGINE_MEM_TOTAL_USAGE_METRICS = [ + "engine_mem_total_mem"] def __init__(self, dmg, servers): """Create a TelemetryUtils object. @@ -587,6 +595,8 @@ def get_all_server_metrics_names(self, server, with_pools=False): all_metrics_names.extend(self.ENGINE_NET_METRICS) all_metrics_names.extend(self.ENGINE_RANK_METRICS) all_metrics_names.extend(self.ENGINE_DMABUFF_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_USAGE_METRICS) + all_metrics_names.extend(self.ENGINE_MEM_TOTAL_USAGE_METRICS) if with_pools: all_metrics_names.extend(self.ENGINE_POOL_METRICS) all_metrics_names.extend(self.ENGINE_CONTAINER_METRICS) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 333cc2c93b2..0826ea7d864 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -729,10 +729,10 @@ def query(self, show_enabled=False, show_disabled=False): "test yaml parameter.".format( self.pool_query_timeout.value, self.identifier)) from error - if self.pool_query_delay: + if self.pool_query_delay.value: self.log.info( "Waiting %s seconds before issuing next dmg pool query", - self.pool_query_delay) + self.pool_query_delay.value) sleep(self.pool_query_delay.value) @fail_on(CommandFailure) diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index f4872c86e09..efbcba289cb 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -1,72 +1,4 @@ """Build test suite""" -import sys -import subprocess # nosec - -TEST_CMOCKA_SKIP = """ -#include -#include -#include -#include - -static void -test(void **state) { skip(); } - -int main(int argc, char **argv) -{ - const struct CMUnitTest tests[] = { - cmocka_unit_test(test), - cmocka_unit_test(test), - }; - return cmocka_run_group_tests(tests, NULL, NULL); -} -""" - - -# pylint: disable-next=invalid-name -def CheckCmockaSkip(context): - """Configure check for cmocka bug""" - context.Message('Checking if cmocka skip() bug is present ... ') - rc = context.TryCompile(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Compile failed) assuming ") - context.Result(not rc) - return rc - rc = context.TryLink(TEST_CMOCKA_SKIP, '.c') - if rc == 0: - sys.stdout.write(" (Link failed) assuming ") - context.Result(not rc) - return rc - prog = context.lastTarget - pname = prog.get_abspath() - rc = subprocess.call(pname, env={"CMOCKA_TEST_ABORT": "1"}, shell=False, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - # in case of abort rc is -6 instead of 134 (128+6) with shell ... - if rc == -6: - sys.stdout.write(" (Bug reproduced) ") - else: - if rc != 0: - sys.stdout.write(" (Other error than bug) assuming ") - else: - sys.stdout.write(" (Bug not reproduced) ") - context.Result(rc) - # return 0 means error - return not rc - - -# pylint: disable=no-member -def configure_cmocka(nenv): - """configure cmocka environment""" - if GetOption('help') or GetOption('clean'): - return nenv - conf = Configure(nenv, custom_tests={'CheckCmockaSkip': CheckCmockaSkip}) - conf.env.AppendUnique(LIBS=['cmocka']) - if not conf.CheckCmockaSkip(): - # it would be cool to be able to check exit code is effectively 134 - # (for abort() upon skip() bug) but in all error cases we should - # decide to use workaround - conf.env.AppendUnique(CCFLAGS=['-DOVERRIDE_CMOCKA_SKIP']) - print("libcmocka with broken skip(), using workaround (DAOS-1093).") - return conf.Finish() def scons(): @@ -97,7 +29,7 @@ def scons(): c_files + daos_test_tgt, LIBS=['daos_common'] + libraries) - newenv = configure_cmocka(denv.Clone()) + newenv = denv.Clone() c_files = Split("""daos_array.c daos_base_tx.c daos_capa.c daos_checksum.c daos_container.c daos_dedup.c daos_degraded.c diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index a250d044f15..65bcd069de9 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -38,6 +38,8 @@ drain_dkeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -103,6 +105,8 @@ cont_open_in_drain(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -157,6 +161,8 @@ drain_akeys(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -208,6 +214,8 @@ drain_indexes(void **state) int i; int j; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -267,6 +275,7 @@ drain_snap_update_keys(void **state) char buf[256]; int buf_len = 256; + FAULT_INJECTION_REQUIRED(); if (!test_runable(arg, 4)) return; @@ -343,6 +352,8 @@ drain_snap_punch_keys(void **state) int buf_len = 256; uint32_t number; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -429,6 +440,8 @@ drain_multiple(void **state) int j; int k; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -495,6 +508,8 @@ drain_large_rec(void **state) char buffer[5000]; char v_buffer[5000]; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -543,6 +558,8 @@ drain_objects(void **state) int tgt = DEFAULT_FAIL_TGT; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -569,6 +586,8 @@ drain_fail_and_retry_objects(void **state) daos_obj_id_t oids[OBJ_NR]; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -598,6 +617,8 @@ drain_then_exclude(void **state) test_arg_t *arg = *state; daos_obj_id_t oid; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; @@ -849,6 +870,8 @@ dfs_extend_drain_common(void **state, int opc, uint32_t objclass) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 4)) return; diff --git a/src/tests/suite/daos_extend_simple.c b/src/tests/suite/daos_extend_simple.c index d7e32e99a1f..c0635359527 100644 --- a/src/tests/suite/daos_extend_simple.c +++ b/src/tests/suite/daos_extend_simple.c @@ -499,6 +499,8 @@ dfs_extend_punch_kill(void **state) void dfs_extend_punch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_PUNCH, extend_cb_internal, false); } @@ -511,6 +513,8 @@ dfs_extend_stat_kill(void **state) void dfs_extend_stat_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_STAT, extend_cb_internal, false); } @@ -523,6 +527,8 @@ dfs_extend_enumerate_kill(void **state) void dfs_extend_enumerate_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_ENUMERATE, extend_cb_internal, false); } @@ -535,6 +541,8 @@ dfs_extend_fetch_kill(void **state) void dfs_extend_fetch_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_FETCH, extend_cb_internal, false); } @@ -547,6 +555,8 @@ dfs_extend_write_kill(void **state) void dfs_extend_write_extend(void **state) { + FAULT_INJECTION_REQUIRED(); + dfs_extend_internal(state, EXTEND_UPDATE, extend_cb_internal, false); } @@ -562,6 +572,8 @@ dfs_extend_fail_retry(void **state) dfs_attr_t attr = {}; int rc; + FAULT_INJECTION_REQUIRED(); + attr.da_props = daos_prop_alloc(1); assert_non_null(attr.da_props); attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; diff --git a/src/tests/suite/daos_obj_ec.c b/src/tests/suite/daos_obj_ec.c index 609b0ab319f..2eef576d096 100644 --- a/src/tests/suite/daos_obj_ec.c +++ b/src/tests/suite/daos_obj_ec.c @@ -2464,6 +2464,8 @@ ec_three_stripes_nvme_io(void **state) daos_recx_t recx; int i; + FAULT_INJECTION_REQUIRED(); + if (!test_runable(arg, 6)) return; diff --git a/src/tests/suite/daos_rebuild_ec.c b/src/tests/suite/daos_rebuild_ec.c index 0863647d845..6669d32490e 100644 --- a/src/tests/suite/daos_rebuild_ec.c +++ b/src/tests/suite/daos_rebuild_ec.c @@ -1111,6 +1111,7 @@ rebuild_ec_multiple_shards(void **state) d_rank_t rank = 2; int i, j, k; char *data; + char *verify_data; uint64_t stripe_size = 4 * CELL_SIZE; daos_recx_t recx; @@ -1118,32 +1119,69 @@ rebuild_ec_multiple_shards(void **state) return; data = (char *)malloc(stripe_size); + verify_data = (char *)malloc(stripe_size); assert_true(data != NULL); + assert_true(verify_data != NULL); + for (i = 0; i < 20; i++) + oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); + for (k = 0; k < 3; k++) { for (i = 0; i < 20; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_EC_4P2GX, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(data, 'a' + i, stripe_size); for (j = 5 * k; j < 5 * (k + 1); j++) { req.iod_type = DAOS_IOD_ARRAY; recx.rx_nr = stripe_size; recx.rx_idx = j * stripe_size; - memset(data, 'a', stripe_size); insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, stripe_size, &req); } ioreq_fini(&req); } + rebuild_pools_ranks(&arg, 1, &rank, 1, false); daos_cont_status_clear(arg->coh, NULL); + print_message("exclude rank %u\n", rank); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } rank = 2; - for (i = 0; i < 3; i++) { + for (k = 0; k < 3; k++) { reintegrate_pools_ranks(&arg, 1, &rank, 1, false); rank++; + + for (i = 0; i < 20; i++) { + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(verify_data, 'a' + i, stripe_size); + for (j = 5 * k; j < 5 * (k + 1); j++) { + req.iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = stripe_size; + recx.rx_idx = j * stripe_size; + memset(data, 0, stripe_size); + lookup_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, stripe_size, &req); + assert_memory_equal(verify_data, data, stripe_size); + } + ioreq_fini(&req); + } } + free(verify_data); free(data); } diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index e423a61433e..49a40e2f62c 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -22,23 +22,6 @@ #include #include -#ifdef OVERRIDE_CMOCKA_SKIP -/* redefine cmocka's skip() so it will no longer abort() - * if CMOCKA_TEST_ABORT=1 - * - * it can't be redefined as a function as it must return from current context - */ -#undef skip -#define skip() \ - do { \ - const char *abort_test = getenv("CMOCKA_TEST_ABORT"); \ - if (abort_test != NULL && abort_test[0] == '1') \ - print_message("Skipped !!!\n"); \ - else \ - _skip(__FILE__, __LINE__); \ - return; \ - } while (0) -#endif #if FAULT_INJECTION #define FAULT_INJECTION_REQUIRED() do { } while (0) diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index c1def757c4c..47845f57b20 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -3053,6 +3053,129 @@ dfs_test_fix_chunk_size(void **state) D_FREE(buf); } +#define NUM_ENTRIES 1024 +#define NR_ENUM 64 + +static void +dfs_test_pipeline_find(void **state) +{ + dfs_obj_t *dir1, *f1; + int i; + time_t ts = 0; + mode_t create_mode = S_IWUSR | S_IRUSR; + int create_flags = O_RDWR | O_CREAT | O_EXCL; + char *dirname = "pipeline_dir"; + int rc; + + rc = dfs_open(dfs_mt, NULL, dirname, create_mode | S_IFDIR, create_flags, + OC_SX, 0, NULL, &dir1); + assert_int_equal(rc, 0); + + for (i = 0; i < NUM_ENTRIES; i++) { + char name[24]; + + /* create 1 dir for every 100 files */ + if (i % 100 == 0) { + sprintf(name, "dir.%d", i); + rc = dfs_mkdir(dfs_mt, dir1, name, create_mode | S_IFDIR, 0); + assert_int_equal(rc, 0); + } else { + daos_obj_id_t oid; + + sprintf(name, "file.%d", i); + rc = dfs_open(dfs_mt, dir1, name, create_mode | S_IFREG, create_flags, 0, 0, + NULL, &f1); + assert_int_equal(rc, 0); + + dfs_obj2id(f1, &oid); + /* printf("File %s \t OID: %"PRIu64".%"PRIu64"\n", name, oid.hi, oid.lo); */ + + rc = dfs_release(f1); + assert_int_equal(rc, 0); + } + + if (i == NUM_ENTRIES / 2) { + sleep(1); + ts = time(NULL); + sleep(1); + } + } + + dfs_predicate_t pred = {0}; + dfs_pipeline_t *dpipe = NULL; + + strcpy(pred.dp_name, "%.6%"); + pred.dp_newer = ts; + rc = dfs_pipeline_create(dfs_mt, pred, DFS_FILTER_NAME | DFS_FILTER_NEWER, &dpipe); + assert_int_equal(rc, 0); + + + uint32_t num_split = 0, j; + + rc = dfs_obj_anchor_split(dir1, &num_split, NULL); + assert_int_equal(rc, 0); + print_message("Anchor split in %u parts\n", num_split); + + daos_anchor_t *anchors; + struct dirent *dents = NULL; + daos_obj_id_t *oids = NULL; + daos_size_t *csizes = NULL; + + anchors = malloc(sizeof(daos_anchor_t) * num_split); + dents = malloc (sizeof(struct dirent) * NR_ENUM); + oids = calloc(NR_ENUM, sizeof(daos_obj_id_t)); + csizes = calloc(NR_ENUM, sizeof(daos_size_t)); + + uint64_t nr_total = 0, nr_matched = 0, nr_scanned; + + for (j = 0; j < num_split; j++) { + daos_anchor_t *anchor = &anchors[j]; + uint32_t nr; + + memset(anchor, 0, sizeof(daos_anchor_t)); + + rc = dfs_obj_anchor_set(dir1, j, anchor); + assert_int_equal(rc, 0); + + while (!daos_anchor_is_eof(anchor)) { + nr = NR_ENUM; + rc = dfs_readdir_with_filter(dfs_mt, dir1, dpipe, anchor, &nr, dents, oids, + csizes, &nr_scanned); + assert_int_equal(rc, 0); + + nr_total += nr_scanned; + nr_matched += nr; + + for (i = 0; i < nr; i++) { + print_message("Name: %s\t", dents[i].d_name); + print_message("OID: %"PRIu64".%"PRIu64"\t", oids[i].hi, oids[i].lo); + print_message("CSIZE = %zu\n", csizes[i]); + if (dents[i].d_type == DT_DIR) + print_message("Type: DIR\n"); + else if (dents[i].d_type == DT_REG) + print_message("Type: FILE\n"); + else + assert(0); + } + } + } + + print_message("total entries scanned = %"PRIu64"\n", nr_total); + print_message("total entries matched = %"PRIu64"\n", nr_matched); + + free(dents); + free(anchors); + free(oids); + free(csizes); + rc = dfs_pipeline_destroy(dpipe); + assert_int_equal(rc, 0); + /** close / finalize */ + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, dirname, true, NULL); + assert_int_equal(rc, 0); +} + static const struct CMUnitTest dfs_unit_tests[] = { { "DFS_UNIT_TEST1: DFS mount / umount", dfs_test_mount, async_disable, test_case_teardown}, @@ -3106,6 +3229,8 @@ static const struct CMUnitTest dfs_unit_tests[] = { dfs_test_relink_root, async_disable, test_case_teardown}, { "DFS_UNIT_TEST26: dfs MWC chunk size fix", dfs_test_fix_chunk_size, async_disable, test_case_teardown}, + { "DFS_UNIT_TEST27: dfs pipeline find", + dfs_test_pipeline_find, async_disable, test_case_teardown}, }; static int diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c index b2f99e4abd2..8a8190d5203 100644 --- a/src/utils/daos_metrics/daos_metrics.c +++ b/src/utils/daos_metrics/daos_metrics.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2021 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -93,6 +93,7 @@ main(int argc, char **argv) {"path", required_argument, NULL, 'p'}, {"delay", required_argument, NULL, 'D'}, {"meta", no_argument, NULL, 'M'}, + {"meminfo", no_argument, NULL, 'm'}, {"type", no_argument, NULL, 'T'}, {"read", no_argument, NULL, 'r'}, {"reset", no_argument, NULL, 'e'}, @@ -100,7 +101,7 @@ main(int argc, char **argv) {NULL, 0, NULL, 0} }; - opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MTrhe", + opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrhe", long_options, NULL); if (opt == -1) break; @@ -136,6 +137,9 @@ main(int argc, char **argv) case 'M': show_meta = true; break; + case 'm': + filter |= D_TM_MEMINFO; + break; case 'T': show_type = true; break; @@ -160,7 +164,7 @@ main(int argc, char **argv) ops |= D_TM_ITER_READ; if (filter == 0) - filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | + filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; ctx = d_tm_open(srv_idx); diff --git a/src/vea/tests/vea_stress.c b/src/vea/tests/vea_stress.c index 49d56e684cd..b50f37f8e7a 100644 --- a/src/vea/tests/vea_stress.c +++ b/src/vea/tests/vea_stress.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2022 Intel Corporation. + * (C) Copyright 2021-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,6 +24,7 @@ uint64_t pool_capacity = (1024ULL << 30); /* 1TB */ unsigned int cont_per_pool = 1; unsigned int obj_per_cont = 100; unsigned int test_duration = (2 * 60); /* 2 mins */ +unsigned int upd_blks_max = 256; /* 1MB by default */ unsigned int rand_seed; bool loading_test; /* test loading pool */ @@ -40,7 +41,6 @@ enum { #define VS_RSRV_CNT_MAX 10 /* extents */ #define VS_FREE_CNT_MAX 30 /* extents */ #define VS_MERGE_CNT_MAX 10 /* extents */ -#define VS_UPD_BLKS_MAX 256 /* 1MB */ #define VS_AGG_BLKS_MAX 1024 /* 4MB */ struct vs_perf_cntr { @@ -311,7 +311,7 @@ vs_update(struct vea_stress_pool *vs_pool) rsrv_cnt = get_random_count(VS_RSRV_CNT_MAX); for (i = 0; i < rsrv_cnt; i++) { - blk_cnt = get_random_count(VS_UPD_BLKS_MAX); + blk_cnt = get_random_count(upd_blks_max); cur_ts = daos_getutime(); rc = vea_reserve(vs_pool->vsp_vsi, blk_cnt, hint, &r_list); @@ -601,10 +601,11 @@ vs_stop_run(struct vea_stress_pool *vs_pool, int rc) } fprintf(stdout, "free_blks:["DF_12U64","DF_12U64"] frags_l:"DF_12U64" frags_s:"DF_12U64" " - "frags_a:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" r_small:"DF_12U64"\n", + "frags_a:"DF_12U64" frags_bitmap:"DF_12U64" r_hint:"DF_12U64" r_large:"DF_12U64" " + "r_small:"DF_12U64" r_bitmap:"DF_12U64"\n", stat.vs_free_persistent, stat.vs_free_transient, stat.vs_frags_large, - stat.vs_frags_small, stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, - stat.vs_resrv_small); + stat.vs_frags_small, stat.vs_frags_aging, stat.vs_frags_bitmap, + stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small, stat.vs_resrv_bitmap); return stop; } @@ -873,6 +874,7 @@ vs_init(void) const char vs_stress_options[] = "Available options are:\n" +"-b max blocks per update\n" "-C pool capacity\n" "-c container nr\n" "-d test duration in seconds\n" @@ -932,6 +934,7 @@ vs_op2str(unsigned int op) int main(int argc, char **argv) { static struct option long_ops[] = { + { "block_max", required_argument, NULL, 'b' }, { "capacity", required_argument, NULL, 'C' }, { "cont_nr", required_argument, NULL, 'c' }, { "duration", required_argument, NULL, 'd' }, @@ -949,8 +952,16 @@ int main(int argc, char **argv) rand_seed = (unsigned int)(time(NULL) & 0xFFFFFFFFUL); memset(pool_file, 0, sizeof(pool_file)); - while ((rc = getopt_long(argc, argv, "C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { + while ((rc = getopt_long(argc, argv, "b:C:c:d:f:H:lo:s:h", long_ops, NULL)) != -1) { switch (rc) { + case 'b': + upd_blks_max = strtoull(optarg, &endp, 0); + if (*endp != '\0') { + printf("invalid update max blocks\n"); + print_usage(); + return -1; + } + break; case 'C': pool_capacity = strtoul(optarg, &endp, 0); pool_capacity = val_unit(pool_capacity, *endp); diff --git a/src/vea/tests/vea_ut.c b/src/vea/tests/vea_ut.c index a16590329c3..3f6c8369550 100644 --- a/src/vea/tests/vea_ut.c +++ b/src/vea/tests/vea_ut.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -83,6 +83,8 @@ ut_load(void **state) rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, NULL, &args->vua_vsi); assert_rc_equal(rc, 0); + /* turn off bitmap feature to test legacy allocation */ + args->vua_md->vsd_compat = 0; } static void @@ -114,10 +116,12 @@ ut_query(void **state) assert_int_equal(stat.vs_free_transient, tot_blks); assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 0); + assert_int_equal(stat.vs_frags_bitmap, 0); assert_int_equal(stat.vs_frags_aging, 0); assert_int_equal(stat.vs_resrv_hint, 0); assert_int_equal(stat.vs_resrv_large, 0); assert_int_equal(stat.vs_resrv_small, 0); + assert_int_equal(stat.vs_resrv_bitmap, 0); } static void @@ -172,9 +176,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_a); - rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_a, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -199,9 +205,11 @@ ut_reserve(void **state) else assert_int_equal(ext->vre_blk_off, off_b); - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* update hint offset */ @@ -226,10 +234,12 @@ ut_reserve(void **state) assert_int_equal(ext->vre_blk_off, off_b); /* Verify transient is allocated */ - rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 0); /* Verify persistent is not allocated */ - rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, off_b, blk_cnt, + !!ext->vre_private); assert_rc_equal(rc, 1); /* Verify statistics */ @@ -238,12 +248,135 @@ ut_reserve(void **state) assert_int_equal(stat.vs_frags_large, 1); assert_int_equal(stat.vs_frags_small, 1); + assert_int_equal(stat.vs_frags_bitmap, 0); /* 2 hint from the second reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_hint, 2); /* 2 large from the first reserve for io stream 0 & 1 */ assert_int_equal(stat.vs_resrv_large, 2); /* 1 small from the reserve for io stream 2 */ assert_int_equal(stat.vs_resrv_small, 1); + /* 0 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 0); +} + +static void +ut_reserve_bitmap(void **state) +{ + struct vea_ut_args *args = *state; + uint32_t blk_cnt; + struct vea_resrvd_ext *ext; + struct vea_hint_context *h_ctxt; + d_list_t *r_list; + struct vea_stat stat; + int rc, ext_cnt; + uint32_t hdr_blks = 1; + uint64_t capacity = UT_TOTAL_BLKS; + struct vea_unmap_context unmap_ctxt = { 0 }; + uint32_t blk_cnt_stream0[3] = { 4, 32, 4}; + uint32_t blk_cnt_stream1[3] = { 1, 2, 3}; + int i; + + rc = vea_format(&args->vua_umm, &args->vua_txd, args->vua_md, 0, + hdr_blks, capacity, NULL, NULL, true); + assert_rc_equal(rc, 0); + + rc = vea_load(&args->vua_umm, &args->vua_txd, args->vua_md, &unmap_ctxt, + NULL, &args->vua_vsi); + assert_rc_equal(rc, 0); + + for (i = 0; i < IO_STREAM_CNT; i++) { + /* reset off and seq */ + args->vua_hint[i]->vhd_off = 0; + args->vua_hint[i]->vhd_seq = 0; + + rc = vea_hint_load(args->vua_hint[i], &args->vua_hint_ctxt[i]); + assert_rc_equal(rc, 0); + } + /* + * Reserve three blocks from I/O stream 0 and I/O stream 1 in + * interleaved order, the reservation from I/O stream 0 will be + * canceled later, and the reservation from I/O stream 1 will + * be published. + */ + for (ext_cnt = 0; ext_cnt < 3; ext_cnt++) { + print_message("reserve extent %d from I/O stream 0\n", ext_cnt); + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + blk_cnt = blk_cnt_stream0[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + print_message("reserve extent %d from I/O stream 1\n", ext_cnt); + + r_list = &args->vua_resrvd_list[1]; + h_ctxt = args->vua_hint_ctxt[1]; + + blk_cnt = blk_cnt_stream1[ext_cnt]; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + } + + /* Reserve from I/O stream 2, it will reserve from small free extent */ + print_message("reserve extent from I/O stream 2\n"); + + r_list = &args->vua_resrvd_list[2]; + h_ctxt = args->vua_hint_ctxt[2]; + + blk_cnt = 1024; + rc = vea_reserve(args->vua_vsi, blk_cnt, h_ctxt, r_list); + assert_rc_equal(rc, 0); + + /* correctness check */ + ext = d_list_entry(r_list->prev, struct vea_resrvd_ext, vre_link); + assert_int_equal(ext->vre_hint_off, VEA_HINT_OFF_INVAL); + assert_int_equal(ext->vre_blk_cnt, blk_cnt); + + /* Verify transient is allocated */ + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 0); + /* Verify persistent is not allocated */ + rc = vea_verify_alloc(args->vua_vsi, false, ext->vre_blk_off, blk_cnt, + !!ext->vre_private); + assert_rc_equal(rc, 1); + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + assert_rc_equal(rc, 0); + + assert_int_equal(stat.vs_frags_large, 1); + assert_int_equal(stat.vs_frags_small, 1); + /* 5 bitmaps for io stream 0 & 1 */ + assert_int_equal(stat.vs_frags_bitmap, 5); + /* 4 hint from */ + assert_int_equal(stat.vs_resrv_hint, 4); + /* 1 large from the first reserve for io stream 2 */ + assert_int_equal(stat.vs_resrv_large, 1); + assert_int_equal(stat.vs_resrv_small, 1); + /* 6 bitmap reserve */ + assert_int_equal(stat.vs_resrv_bitmap, 6); } static void @@ -269,11 +402,48 @@ ut_cancel(void **state) print_message("cancel reservation from I/O stream 0\n"); rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); assert_int_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, true); + assert_rc_equal(rc, 1); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt, false); assert_rc_equal(rc, 1); assert_int_equal(h_ctxt->vhc_off, VEA_HINT_OFF_INVAL); } +static void +ut_cancel_bitmap(void **state) +{ + + struct vea_ut_args *args = *state; + struct vea_hint_context *h_ctxt; + struct vea_resrvd_ext *ext; + d_list_t *r_list; + struct vea_stat stat; + int rc; + + r_list = &args->vua_resrvd_list[0]; + h_ctxt = args->vua_hint_ctxt[0]; + + print_message("cancel reservation from I/O stream 0\n"); + rc = vea_cancel(args->vua_vsi, h_ctxt, r_list); + assert_int_equal(rc, 0); + + d_list_for_each_entry(ext, r_list, vre_link) { + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, true); + assert_rc_equal(rc, 1); + + rc = vea_verify_alloc(args->vua_vsi, true, ext->vre_blk_off, + ext->vre_blk_cnt, false); + assert_rc_equal(rc, 1); + } + + /* Verify statistics */ + rc = vea_query(args->vua_vsi, NULL, &stat); + + /* 3 bitmaps left */ + assert_int_equal(stat.vs_frags_bitmap, 3); +} + static void ut_tx_publish(void **state) { @@ -301,9 +471,11 @@ ut_tx_publish(void **state) assert_ptr_not_equal(copy, NULL); D_INIT_LIST_HEAD(©->vre_link); + copy->vre_new_bitmap_chunk = ext->vre_new_bitmap_chunk; + copy->vre_private = ext->vre_private; copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; - d_list_add(©->vre_link, &args->vua_alloc_list); + d_list_add_tail(©->vre_link, &args->vua_alloc_list); } print_message("publish reservation from I/O stream %d\n", i); @@ -319,10 +491,12 @@ ut_tx_publish(void **state) blk_off = copy->vre_blk_off; blk_cnt = copy->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!copy->vre_private); assert_rc_equal(rc, 0); } } @@ -331,7 +505,7 @@ static void ut_free(void **state) { struct vea_ut_args *args = *state; - struct vea_resrvd_ext *ext; + struct vea_resrvd_ext *ext, *tmp; d_list_t *r_list; uint64_t blk_off; uint32_t blk_cnt, nr_flushed; @@ -346,10 +520,12 @@ ut_free(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args->vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -363,13 +539,21 @@ ut_free(void **state) assert_rc_equal(rc, 0); assert_true(nr_flushed > 0); + print_message("transient free extents after flush:\n"); + vea_dump(args->vua_vsi, true); + print_message("persistent free extents after flush:\n"); + vea_dump(args->vua_vsi, false); + r_list = &args->vua_alloc_list; - d_list_for_each_entry(ext, r_list, vre_link) { + d_list_for_each_entry_safe(ext, tmp, r_list, vre_link) { blk_off = ext->vre_blk_off; blk_cnt = ext->vre_blk_cnt; - rc = vea_verify_alloc(args->vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args->vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); } print_message("transient free extents after migration:\n"); @@ -400,6 +584,14 @@ ut_unload(void **state) args->vua_vsi = NULL; } +static void +ut_free_bitmap(void **state) +{ + ut_free(state); + ut_hint_unload(state); + ut_unload(state); +} + static int ut_setup(struct vea_ut_args *test_args) { @@ -885,23 +1077,6 @@ ut_inval_params_set_ext_age(void **state) ut_teardown(&args); } -static void -ut_inval_params_get_ext_vector(void **state) -{ - struct vea_ut_args args; - uint64_t block_offset = 0; - uint64_t block_count = 1; - struct vea_ext_vector ext_vector; - - print_message("Testing invalid parameters to vea_get_ext_vector\n"); - ut_setup(&args); - expect_assert_failure(vea_get_ext_vector(NULL, block_offset, - block_count, &ext_vector)); - expect_assert_failure(vea_get_ext_vector(args.vua_vsi, block_offset, - block_count, NULL)); - ut_teardown(&args); -} - static void ut_free_invalid_space(void **state) { @@ -932,6 +1107,13 @@ ut_free_invalid_space(void **state) rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list); assert_int_equal(rc, 0); + print_message("transient free extents:\n"); + rc = vea_dump(args.vua_vsi, true); + assert_rc_equal(rc, 0); + print_message("persistent free extents:\n"); + rc = vea_dump(args.vua_vsi, false); + assert_rc_equal(rc, 0); + /* Try to free from I/O Stream 1, which hasn't been reserved */ r_list = &args.vua_resrvd_list[1]; h_ctxt = args.vua_hint_ctxt[1]; @@ -957,12 +1139,13 @@ print_stats(struct vea_ut_args *args, bool verbose) rc = vea_query(args->vua_vsi, NULL, &stat); assert_int_equal(rc, 0); print_message("free_blks:"DF_U64"/"DF_U64", frags_large:"DF_U64", " - "frags_small:"DF_U64", frags_aging:"DF_U64"\n" + "frags_small:"DF_U64", frags_bitmap:"DF_U64" frags_aging:"DF_U64"\n" "resrv_hint:"DF_U64"\nresrv_large:"DF_U64"\n" - "resrv_small:"DF_U64"\n", + "resrv_small:"DF_U64"\nresrv_bitmap:"DF_U64"\n", stat.vs_free_persistent, stat.vs_free_transient, - stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_aging, - stat.vs_resrv_hint, stat.vs_resrv_large, stat.vs_resrv_small); + stat.vs_frags_large, stat.vs_frags_small, stat.vs_frags_bitmap, + stat.vs_frags_aging, stat.vs_resrv_hint, stat.vs_resrv_large, + stat.vs_resrv_small, stat.vs_resrv_bitmap); if (verbose) vea_dump(args->vua_vsi, true); @@ -980,6 +1163,8 @@ ut_interleaved_ops(void **state) uint32_t header_blocks = 1; uint64_t capacity = ((VEA_LARGE_EXT_MB * 2) << 20); /* 128 MB */ uint32_t block_count; + d_list_t tmp_list; + struct vea_resrvd_ext *ext, *tmp; int rc; print_message("Test interleaved operations\n"); @@ -992,8 +1177,6 @@ ut_interleaved_ops(void **state) NULL, &args.vua_vsi); assert_int_equal(rc, 0); - rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); - assert_int_equal(rc, 0); /* * Do the following interleaved operations: @@ -1006,6 +1189,7 @@ ut_interleaved_ops(void **state) * 7. reserve A, reserve B, cancel A, cancel B * 8. reserve A, reserve B, cancel B, cancel A * 9. reserve A, reserve B, reserve C, publish B, publish A & C + * 10. reserve A, reserve B, reserve C, cancel A & C. publish B. **/ block_count = 2; r_list_a = &args.vua_resrvd_list[0]; @@ -1020,10 +1204,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 2 */ block_count += 2; @@ -1032,10 +1220,14 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 3 */ block_count += 2; @@ -1046,8 +1238,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 4 */ block_count += 2; @@ -1056,8 +1252,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); @@ -1070,8 +1270,12 @@ ut_interleaved_ops(void **state) assert_rc_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); /* Case 6 */ block_count += 2; @@ -1080,8 +1284,12 @@ ut_interleaved_ops(void **state) block_count += 2; rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_b); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); assert_int_equal(rc, 0); @@ -1120,12 +1328,46 @@ ut_interleaved_ops(void **state) /* Reserve C */ rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); /* Publish B */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_b); assert_rc_equal(rc, 0); /* Publish A & C */ rc = vea_tx_publish(args.vua_vsi, h_ctxt, r_list_a); assert_rc_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + /* Case 10 */ + block_count = 256; + /* Reserve A */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + block_count = 260; + /* Reserve B */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + block_count = 261; + /* Reserve C */ + rc = vea_reserve(args.vua_vsi, block_count, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + + D_INIT_LIST_HEAD(&tmp_list); + d_list_for_each_entry_safe(ext, tmp, r_list_a, vre_link) { + /* move second reserve out */ + if (ext->vre_blk_cnt == 260) + d_list_move_tail(&ext->vre_link, &tmp_list); + } + /* cancel A & C */ + rc = vea_cancel(args.vua_vsi, h_ctxt, r_list_a); + assert_rc_equal(rc, 0); + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + /* Publish B */ + rc = vea_tx_publish(args.vua_vsi, h_ctxt, &tmp_list); + assert_rc_equal(rc, 0); rc = umem_tx_commit(&args.vua_umm); assert_int_equal(rc, 0); @@ -1190,6 +1432,7 @@ ut_fragmentation(void **state) D_INIT_LIST_HEAD(©->vre_link); copy->vre_blk_off = ext->vre_blk_off; copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; d_list_add(©->vre_link, &args.vua_alloc_list); } } @@ -1239,10 +1482,12 @@ ut_fragmentation(void **state) assert_rc_equal(rc, 0); /* not immediately visual for allocation */ - rc = vea_verify_alloc(args.vua_vsi, true, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 0); - rc = vea_verify_alloc(args.vua_vsi, false, blk_off, blk_cnt); + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); assert_rc_equal(rc, 1); } @@ -1250,6 +1495,98 @@ ut_fragmentation(void **state) ut_teardown(&args); } +static void +ut_reclaim_unused_bitmap(void **state) +{ + struct vea_ut_args args; + struct vea_unmap_context unmap_ctxt = { 0 }; + d_list_t *r_list; + uint64_t capacity = 1llu << 27; /* 128 MiB */ + uint32_t block_size = 4096; /* use the default size */ + uint32_t header_blocks = 1; + d_list_t persist_list; + struct vea_resrvd_ext *ext, *copy; + struct vea_resrvd_ext *tmp_ext; + int rc; + + print_message("Test bitmap allocation\n"); + ut_setup(&args); + rc = vea_format(&args.vua_umm, &args.vua_txd, args.vua_md, block_size, + header_blocks, capacity, NULL, NULL, false); + assert_rc_equal(rc, 0); + + rc = vea_load(&args.vua_umm, &args.vua_txd, args.vua_md, &unmap_ctxt, + NULL, &args.vua_vsi); + assert_rc_equal(rc, 0); + + r_list = &args.vua_resrvd_list[0]; + /* keep reserving until we run out of space */ + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 8, NULL, r_list); + } + + D_INIT_LIST_HEAD(&persist_list); + d_list_for_each_entry_safe(ext, tmp_ext, r_list, vre_link) { + /* Copy the extents to keep to persist_list */ + D_ALLOC_PTR(copy); + assert_ptr_not_equal(copy, NULL); + + D_INIT_LIST_HEAD(©->vre_link); + copy->vre_blk_off = ext->vre_blk_off; + copy->vre_blk_cnt = ext->vre_blk_cnt; + copy->vre_private = ext->vre_private; + d_list_add(©->vre_link, &persist_list); + } + + /* Publish the ones to persist */ + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, r_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 8 blocks:\n"); + print_stats(&args, true); + + d_list_for_each_entry_safe(ext, tmp_ext, &persist_list, vre_link) { + uint64_t blk_off = ext->vre_blk_off; + uint32_t blk_cnt = ext->vre_blk_cnt; + + rc = vea_free(args.vua_vsi, blk_off, blk_cnt); + assert_rc_equal(rc, 0); + + /* not immediately visual for allocation */ + rc = vea_verify_alloc(args.vua_vsi, true, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 0); + + rc = vea_verify_alloc(args.vua_vsi, false, blk_off, + blk_cnt, !!ext->vre_private); + assert_rc_equal(rc, 1); + d_list_del_init(&ext->vre_link); + D_FREE(ext); + } + + rc = 0; + D_INIT_LIST_HEAD(&persist_list); + while (rc == 0) { + rc = vea_reserve(args.vua_vsi, 16, NULL, &persist_list); + } + rc = umem_tx_begin(&args.vua_umm, &args.vua_txd); + assert_int_equal(rc, 0); + rc = vea_tx_publish(args.vua_vsi, NULL, &persist_list); + assert_int_equal(rc, 0); + rc = umem_tx_commit(&args.vua_umm); + assert_int_equal(rc, 0); + + print_message("Fragments after filling 16 blocks:\n"); + print_stats(&args, true); + + vea_unload(args.vua_vsi); + ut_teardown(&args); +} + static const struct CMUnitTest vea_uts[] = { { "vea_format", ut_format, NULL, NULL}, { "vea_load", ut_load, NULL, NULL}, @@ -1261,6 +1598,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_free", ut_free, NULL, NULL}, { "vea_hint_unload", ut_hint_unload, NULL, NULL}, { "vea_unload", ut_unload, NULL, NULL}, + { "vea_reserve_bitmap", ut_reserve_bitmap, NULL, NULL}, + { "vea_cancel_bitmap", ut_cancel_bitmap, NULL, NULL}, + { "vea_tx_publish_bitmap", ut_tx_publish, NULL, NULL}, + { "vea_free_bitmap", ut_free_bitmap, NULL, NULL}, { "vea_reserve_special", ut_reserve_special, NULL, NULL}, { "vea_inval_params_format", ut_inval_params_format, NULL, NULL}, { "vea_inval_params_load", ut_inval_params_load, NULL, NULL}, @@ -1271,11 +1612,10 @@ static const struct CMUnitTest vea_uts[] = { { "vea_inval_param_hint_load", ut_inval_params_hint_load, NULL, NULL}, { "vea_inval_param_set_ext_age", ut_inval_params_set_ext_age, NULL, NULL}, - { "vea_inval_param_get_ext_vector", ut_inval_params_get_ext_vector, - NULL, NULL}, { "vea_free_invalid_space", ut_free_invalid_space, NULL, NULL}, { "vea_interleaved_ops", ut_interleaved_ops, NULL, NULL}, - { "vea_fragmentation", ut_fragmentation, NULL, NULL} + { "vea_fragmentation", ut_fragmentation, NULL, NULL}, + { "vea_reclaim_unused_bitmap", ut_reclaim_unused_bitmap, NULL, NULL} }; int main(int argc, char **argv) diff --git a/src/vea/vea_alloc.c b/src/vea/vea_alloc.c index 6e0986c0dba..a9fd9424184 100644 --- a/src/vea/vea_alloc.c +++ b/src/vea/vea_alloc.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -10,27 +10,20 @@ #include #include "vea_internal.h" -int -compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec) -{ - /* TODO Add in in-memory extent vector tree */ - return 0; -} - static int -compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, - struct vea_entry *entry) +compound_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + struct vea_extent_entry *entry) { struct vea_free_extent *remain; d_iov_t key; int rc; - remain = &entry->ve_ext; + remain = &entry->vee_ext; D_ASSERT(remain->vfe_blk_cnt >= vfe->vfe_blk_cnt); D_ASSERT(remain->vfe_blk_off == vfe->vfe_blk_off); /* Remove the found free extent from compound index */ - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); if (remain->vfe_blk_cnt == vfe->vfe_blk_cnt) { d_iov_set(&key, &vfe->vfe_blk_off, sizeof(vfe->vfe_blk_off)); @@ -40,7 +33,7 @@ compound_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe, remain->vfe_blk_off += vfe->vfe_blk_cnt; remain->vfe_blk_cnt -= vfe->vfe_blk_cnt; - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); } return rc; @@ -51,7 +44,7 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; d_iov_t key, val; int rc; @@ -72,12 +65,12 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, if (rc) return (rc == -DER_NONEXIST) ? 0 : rc; - entry = (struct vea_entry *)val.iov_buf; + entry = (struct vea_extent_entry *)val.iov_buf; /* The matching free extent isn't big enough */ - if (entry->ve_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < vfe.vfe_blk_cnt) return 0; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -94,82 +87,33 @@ reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, static int reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd) -{ - daos_handle_t btr_hdl; - struct vea_sized_class *sc; - struct vea_free_extent vfe; - struct vea_entry *entry; - d_iov_t key, val_out; - uint64_t int_key = blk_cnt; - int rc; - - /* Skip huge allocate request */ - if (blk_cnt > vsi->vsi_class.vfc_large_thresh) - return 0; - - btr_hdl = vsi->vsi_class.vfc_size_btr; - D_ASSERT(daos_handle_is_valid(btr_hdl)); - - d_iov_set(&key, &int_key, sizeof(int_key)); - d_iov_set(&val_out, NULL, 0); - - rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); - if (rc == -DER_NONEXIST) { - return 0; - } else if (rc) { - D_ERROR("Search size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); - return rc; - } - - sc = (struct vea_sized_class *)val_out.iov_buf; - D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); - - /* Get the least used item from head */ - entry = d_list_entry(sc->vsc_lru.next, struct vea_entry, ve_link); - D_ASSERT(entry->ve_sized_class == sc); - D_ASSERT(entry->ve_ext.vfe_blk_cnt >= blk_cnt); - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; - vfe.vfe_blk_cnt = blk_cnt; - - rc = compound_alloc(vsi, &vfe, entry); - if (rc) - return rc; - - resrvd->vre_blk_off = vfe.vfe_blk_off; - resrvd->vre_blk_cnt = blk_cnt; - inc_stats(vsi, STAT_RESRV_SMALL, 1); - - D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); - - return rc; -} + struct vea_resrvd_ext *resrvd); +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd); -int -reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, +static int +reserve_extent(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { struct vea_free_class *vfc = &vsi->vsi_class; struct vea_free_extent vfe; - struct vea_entry *entry; + struct vea_extent_entry *entry; struct d_binheap_node *root; int rc; - /* No large free extent available */ if (d_binheap_is_empty(&vfc->vfc_heap)) - return reserve_small(vsi, blk_cnt, resrvd); + return 0; root = d_binheap_root(&vfc->vfc_heap); - entry = container_of(root, struct vea_entry, ve_node); + entry = container_of(root, struct vea_extent_entry, vee_node); - D_ASSERT(entry->ve_ext.vfe_blk_cnt > vfc->vfc_large_thresh); + D_ASSERT(entry->vee_ext.vfe_blk_cnt > vfc->vfc_large_thresh); D_DEBUG(DB_IO, "largest free extent ["DF_U64", %u]\n", - entry->ve_ext.vfe_blk_off, entry->ve_ext.vfe_blk_cnt); + entry->vee_ext.vfe_blk_off, entry->vee_ext.vfe_blk_cnt); /* The largest free extent can't satisfy huge allocate request */ - if (entry->ve_ext.vfe_blk_cnt < blk_cnt) + if (entry->vee_ext.vfe_blk_cnt < blk_cnt) return 0; /* @@ -178,16 +122,11 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, * reserve from the small extents first, if it fails, reserve from the * largest free extent. */ - if (entry->ve_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { - /* Try small extents first */ - rc = reserve_small(vsi, blk_cnt, resrvd); - if (rc != 0 || resrvd->vre_blk_cnt != 0) - return rc; - - vfe.vfe_blk_off = entry->ve_ext.vfe_blk_off; + if (entry->vee_ext.vfe_blk_cnt <= (max(blk_cnt, vfc->vfc_large_thresh) * 2)) { + vfe.vfe_blk_off = entry->vee_ext.vfe_blk_off; vfe.vfe_blk_cnt = blk_cnt; - rc = compound_alloc(vsi, &vfe, entry); + rc = compound_alloc_extent(vsi, &vfe, entry); if (rc) return rc; @@ -195,15 +134,15 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t half_blks, tot_blks; uint64_t blk_off; - blk_off = entry->ve_ext.vfe_blk_off; - tot_blks = entry->ve_ext.vfe_blk_cnt; + blk_off = entry->vee_ext.vfe_blk_off; + tot_blks = entry->vee_ext.vfe_blk_cnt; half_blks = tot_blks >> 1; D_ASSERT(tot_blks >= (half_blks + blk_cnt)); /* Shrink the original extent to half size */ - free_class_remove(vsi, entry); - entry->ve_ext.vfe_blk_cnt = half_blks; - rc = free_class_add(vsi, entry); + extent_free_class_remove(vsi, entry); + entry->vee_ext.vfe_blk_cnt = half_blks; + rc = extent_free_class_add(vsi, entry); if (rc) return rc; @@ -213,8 +152,8 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, vfe.vfe_blk_cnt = tot_blks - half_blks - blk_cnt; vfe.vfe_age = 0; /* Not used */ - rc = compound_free(vsi, &vfe, VEA_FL_NO_MERGE | - VEA_FL_NO_ACCOUNTING); + rc = compound_free_extent(vsi, &vfe, VEA_FL_NO_MERGE | + VEA_FL_NO_ACCOUNTING); if (rc) return rc; } @@ -232,16 +171,263 @@ reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, return 0; } +static int +reserve_size_tree(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + daos_handle_t btr_hdl; + struct vea_sized_class *sc; + struct vea_free_extent vfe; + struct vea_extent_entry *extent_entry; + d_iov_t key, val_out; + uint64_t int_key = blk_cnt; + int rc; + + btr_hdl = vsi->vsi_class.vfc_size_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + + d_iov_set(&key, &int_key, sizeof(int_key)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_fetch(btr_hdl, BTR_PROBE_GE, DAOS_INTENT_DEFAULT, &key, NULL, &val_out); + if (rc == -DER_NONEXIST) + return 0; + else if (rc) + return rc; + + sc = (struct vea_sized_class *)val_out.iov_buf; + D_ASSERT(sc != NULL); + + /* Get the least used item from head */ + extent_entry = d_list_entry(sc->vsc_extent_lru.next, struct vea_extent_entry, vee_link); + D_ASSERT(extent_entry->vee_sized_class == sc); + D_ASSERT(extent_entry->vee_ext.vfe_blk_cnt >= blk_cnt); + + vfe.vfe_blk_off = extent_entry->vee_ext.vfe_blk_off; + vfe.vfe_blk_cnt = blk_cnt; + + rc = compound_alloc_extent(vsi, &vfe, extent_entry); + if (rc) + return rc; + resrvd->vre_blk_off = vfe.vfe_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = NULL; + inc_stats(vsi, STAT_RESRV_SMALL, 1); + + return 0; +} + +static int +reserve_bitmap_chunk(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Get hint offset */ + hint_get(vsi->vsi_bitmap_hint_context, &resrvd->vre_hint_off); + + /* Reserve from hint offset */ + if (resrvd->vre_hint_off != VEA_HINT_OFF_INVAL) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + return rc; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } + + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + goto extent; + + rc = reserve_size_tree(vsi, blk_cnt, resrvd); + if (rc) + return rc; + + if (resrvd->vre_blk_cnt > 0) + goto done; + +extent: + rc = reserve_extent(vsi, blk_cnt, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return -DER_NOSPACE; +done: + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + + /* Update hint offset */ + hint_update(vsi->vsi_bitmap_hint_context, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + return rc; +} + +#define LARGE_EXT_FREE_BLKS ((32UL << 30) / VEA_BLK_SZ) + +static inline uint32_t +get_bitmap_chunk_blks(struct vea_space_info *vsi, uint32_t blk_cnt) +{ + uint32_t chunk_blks = VEA_BITMAP_MIN_CHUNK_BLKS; + + D_ASSERT(blk_cnt <= VEA_MAX_BITMAP_CLASS); + chunk_blks *= blk_cnt; + + D_ASSERT(chunk_blks <= VEA_BITMAP_MAX_CHUNK_BLKS); + /* + * Always try to allocate large bitmap chunk if there + * is enough free extent blocks. + */ + if (vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] >= LARGE_EXT_FREE_BLKS) { + int times = VEA_BITMAP_MAX_CHUNK_BLKS / chunk_blks; + + if (times > 1) + chunk_blks *= times; + } + + /* should be aligned with 64 bits */ + D_ASSERT(chunk_blks % (blk_cnt * 64) == 0); + + return chunk_blks; +} + +static inline int +get_bitmap_sz(uint32_t chunk_blks, uint16_t class) +{ + int bits = chunk_blks / class; + + D_ASSERT(chunk_blks % class == 0); + D_ASSERT(bits % 64 == 0); + + return bits / 64; +} + +static int +reserve_bitmap(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_bitmap_entry *entry; + int rc; + struct vea_free_bitmap *vfb; + struct vea_free_bitmap new_vfb = { 0 }; + int bits = 1; + uint32_t chunk_blks; + int bitmap_sz; + d_list_t *list_head; + + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + if (blk_cnt > VEA_MAX_BITMAP_CLASS) + return 0; + + D_ASSERT(blk_cnt > 0); + /* reserve from bitmap */ + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + /* Only assert in server mode */ + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + rc = daos_find_bits(vfb->vfb_bitmaps, NULL, vfb->vfb_bitmap_sz, 1, &bits); + if (rc < 0) { + d_list_del_init(&bitmap_entry->vbe_link); + continue; + } + + D_ASSERT(rc * blk_cnt + blk_cnt <= vfb->vfb_blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off + (rc * blk_cnt); + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, rc, 1); + rc = 0; + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + return 0; + } + + list_head = &vsi->vsi_class.vfc_bitmap_empty[blk_cnt - 1]; + if (!d_list_empty(list_head)) { + bitmap_entry = d_list_entry(list_head->next, struct vea_bitmap_entry, + vbe_link); + if (vsi->vsi_unmap_ctxt.vnc_ext_flush) + D_ASSERT(bitmap_entry->vbe_published_state != VEA_BITMAP_STATE_PUBLISHING); + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == blk_cnt); + resrvd->vre_blk_off = vfb->vfb_blk_off; + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)bitmap_entry; + setbits64(vfb->vfb_bitmaps, 0, 1); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + d_list_move_tail(&bitmap_entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[blk_cnt - 1]); + return 0; + } + + chunk_blks = get_bitmap_chunk_blks(vsi, blk_cnt); + bitmap_sz = get_bitmap_sz(chunk_blks, blk_cnt); + rc = reserve_bitmap_chunk(vsi, chunk_blks, resrvd); + if (resrvd->vre_blk_cnt <= 0) + return 0; + + resrvd->vre_new_bitmap_chunk = 1; + + new_vfb.vfb_blk_off = resrvd->vre_blk_off; + new_vfb.vfb_class = blk_cnt; + new_vfb.vfb_blk_cnt = chunk_blks; + new_vfb.vfb_bitmap_sz = bitmap_sz; + rc = bitmap_entry_insert(vsi, &new_vfb, VEA_BITMAP_STATE_NEW, + &entry, VEA_FL_NO_ACCOUNTING); + if (rc) + return rc; + + resrvd->vre_blk_cnt = blk_cnt; + resrvd->vre_private = (void *)entry; + + D_DEBUG(DB_IO, "["DF_U64", %u]\n", resrvd->vre_blk_off, resrvd->vre_blk_cnt); + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, chunk_blks); + inc_stats(vsi, STAT_RESRV_BITMAP, 1); + + return rc; +} + +static int +reserve_small(struct vea_space_info *vsi, uint32_t blk_cnt, + struct vea_resrvd_ext *resrvd) +{ + int rc; + + /* Skip huge allocate request */ + if (blk_cnt >= vsi->vsi_class.vfc_large_thresh) + return 0; + + rc = reserve_bitmap(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + + return reserve_size_tree(vsi, blk_cnt, resrvd); +} + int -reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, +reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd) { - /* TODO reserve extent vector for non-contiguous allocation */ - return -DER_NOSPACE; + struct vea_free_class *vfc = &vsi->vsi_class; + int rc; + + /* No large free extent available */ + if (d_binheap_is_empty(&vfc->vfc_heap)) + return reserve_small(vsi, blk_cnt, resrvd); + + if (blk_cnt < vsi->vsi_class.vfc_large_thresh) { + rc = reserve_small(vsi, blk_cnt, resrvd); + if (rc || resrvd->vre_blk_cnt > 0) + return rc; + } + + return reserve_extent(vsi, blk_cnt, resrvd); } -int -persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) +static int +persistent_alloc_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent *found, frag = {0}; daos_handle_t btr_hdl; @@ -328,3 +514,183 @@ persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe) return 0; } + +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr) +{ + uint32_t bitmap_off = bit_at / 8; + uint32_t bitmap_sz = 0; + + if (bit_at % 8) + bitmap_sz = 1; + + if (bits_nr > (bit_at % 8)) + bitmap_sz += (bits_nr - (bit_at % 8) + 7) / 8; + + return umem_tx_add_ptr(vsi_umem, (char *)bitmap + bitmap_off, bitmap_sz); +} + +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear) +{ + uint32_t bit_at, bits_nr; + int rc; + + if (blk_off < bitmap->vfb_blk_off || + blk_off + blk_cnt > bitmap->vfb_blk_off + bitmap->vfb_blk_cnt) { + D_ERROR("range ["DF_U64", %u] is not within bitmap ["DF_U64", %u]\n", + blk_off, blk_cnt, bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + return -DER_INVAL; + } + + bit_at = blk_off - bitmap->vfb_blk_off; + if (bit_at % bitmap->vfb_class != 0) { + D_ERROR("invalid block offset: "DF_U64" which is not times of %u\n", + blk_off, bitmap->vfb_class); + return -DER_INVAL; + } + if (blk_cnt % bitmap->vfb_class != 0) { + D_ERROR("invalid block count: %u which is not times of %u\n", + blk_cnt, bitmap->vfb_class); + return -DER_INVAL; + } + bit_at /= bitmap->vfb_class; + bits_nr = blk_cnt / bitmap->vfb_class; + if (clear) { + if (!isset_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already cleared in the range.\n"); + return -DER_INVAL; + } + } else { + if (!isclr_range((uint8_t *)bitmap->vfb_bitmaps, + bit_at, bit_at + bits_nr - 1)) { + D_ERROR("bitmap already set in the range.["DF_U64", %u]\n", + blk_off, blk_cnt); + return -DER_INVAL; + } + } + + if (vsi_umem) { + rc = bitmap_tx_add_ptr(vsi_umem, bitmap->vfb_bitmaps, bit_at, bits_nr); + if (rc) + return rc; + } + + D_ASSERT(bit_at + bits_nr <= bitmap->vfb_bitmap_sz * 64); + if (clear) + clrbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + else + setbits64(bitmap->vfb_bitmaps, bit_at, bits_nr); + + return 0; +} + +static void +new_chunk_commit_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHED; +} + +static void +new_chunk_abort_cb(void *data, bool noop) +{ + struct vea_bitmap_entry *bitmap_entry = (struct vea_bitmap_entry *)data; + + if (noop) + return; + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_NEW; +} + +int +persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + struct vea_bitmap_entry *bitmap_entry = vfe->vfe_bitmap; + + if (bitmap_entry == NULL) + return persistent_alloc_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(bitmap_entry != NULL); + + /* if this bitmap is new */ + if (bitmap_entry->vbe_published_state == VEA_BITMAP_STATE_NEW) { + d_iov_t key, val, val_out; + struct vea_free_bitmap *bitmap; + int rc; + struct vea_free_extent extent; + daos_handle_t btr_hdl = vsi->vsi_md_bitmap_btr; + rc = umem_tx_begin(vsi->vsi_umem, vsi->vsi_txd); + if (rc != 0) + return rc; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONABORT, + new_chunk_abort_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk abort callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + bitmap_entry->vbe_published_state = VEA_BITMAP_STATE_PUBLISHING; + + rc = umem_tx_add_callback(vsi->vsi_umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + new_chunk_commit_cb, bitmap_entry); + if (rc) { + D_ERROR("add chunk commit callback failed. "DF_RC"\n", DP_RC(rc)); + goto out; + } + + extent = vfe->vfe_ext; + extent.vfe_blk_off = bitmap_entry->vbe_bitmap.vfb_blk_off; + extent.vfe_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + rc = persistent_alloc_extent(vsi, &extent); + if (rc) + goto out; + + D_ALLOC(bitmap, alloc_free_bitmap_size(bitmap_entry->vbe_bitmap.vfb_bitmap_sz)); + if (!bitmap) { + rc = -DER_NOMEM; + goto out; + } + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt != 0); + bitmap->vfb_blk_off = extent.vfe_blk_off; + bitmap->vfb_class = bitmap_entry->vbe_bitmap.vfb_class; + bitmap->vfb_blk_cnt = bitmap_entry->vbe_bitmap.vfb_blk_cnt; + bitmap->vfb_bitmap_sz = bitmap_entry->vbe_bitmap.vfb_bitmap_sz; + rc = bitmap_set_range(NULL, bitmap, vfe->vfe_ext.vfe_blk_off, + vfe->vfe_ext.vfe_blk_cnt, false); + if (rc) { + D_FREE(bitmap); + goto out; + } + /* Add to persistent bitmap tree */ + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &bitmap->vfb_blk_off, sizeof(bitmap->vfb_blk_off)); + d_iov_set(&val, bitmap, alloc_free_bitmap_size(bitmap->vfb_bitmap_sz)); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(btr_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(bitmap); + if (rc) + D_ERROR("Insert persistent bitmap failed. "DF_RC"\n", DP_RC(rc)); + else + bitmap_entry->vbe_md_bitmap = (struct vea_free_bitmap *)val_out.iov_buf; +out: + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(vsi->vsi_umem, rc) : umem_tx_commit(vsi->vsi_umem); + + return rc; + } + + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, false); +} diff --git a/src/vea/vea_api.c b/src/vea/vea_api.c index ffad7b1b870..a5530a8e5f2 100644 --- a/src/vea/vea_api.c +++ b/src/vea/vea_api.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,7 +14,7 @@ static void erase_md(struct umem_instance *umem, struct vea_space_df *md) { struct umem_attr uma = {0}; - daos_handle_t free_btr, vec_btr; + daos_handle_t free_btr, bitmap_btr; int rc; uma.uma_id = umem->umm_id; @@ -27,15 +27,54 @@ erase_md(struct umem_instance *umem, struct vea_space_df *md) DP_RC(rc)); } - rc = dbtree_open_inplace(&md->vsd_vec_tree, &uma, &vec_btr); + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, &bitmap_btr); if (rc == 0) { - rc = dbtree_destroy(vec_btr, NULL); + rc = dbtree_destroy(bitmap_btr, NULL); if (rc) - D_ERROR("destroy vector tree error: "DF_RC"\n", + D_ERROR("destroy bitmap tree error: "DF_RC"\n", DP_RC(rc)); } } +int +vea_upgrade(struct vea_space_info *vsi, struct umem_instance *umem, + struct vea_space_df *md, uint32_t version) +{ + int rc; + uint64_t offset; + d_iov_t key, val; + struct vea_hint_df dummy; + + if (version < 3) + return 0; + + /* Start transaction to initialize allocation metadata */ + rc = umem_tx_begin(umem, NULL); + if (rc != 0) + return rc; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(vsi->vsi_md_bitmap_btr, &key, &val); + if (rc) { + D_ERROR("upgrade to insert bitmap hint failed: "DF_RC"\n", + DP_RC(rc)); + goto out; + } + + rc = umem_tx_add_ptr(umem, md, sizeof(*md)); + if (rc != 0) + goto out; + + md->vsd_compat |= VEA_COMPAT_FEATURE_BITMAP; + +out: + /* Commit/Abort transaction on success/error */ + return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); +} + /* * Initialize the space tracking information on SCM and the header of the * block device. @@ -48,9 +87,11 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, { struct vea_free_extent free_ext; struct umem_attr uma; - uint64_t tot_blks; - daos_handle_t free_btr, vec_btr; + uint64_t tot_blks, offset; + daos_handle_t free_btr, bitmap_btr; + struct vea_hint_df dummy; d_iov_t key, val; + daos_handle_t md_bitmap_btr = DAOS_HDL_INVAL; int rc; D_ASSERT(umem != NULL); @@ -108,14 +149,15 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) return rc; - free_btr = vec_btr = DAOS_HDL_INVAL; + free_btr = bitmap_btr = DAOS_HDL_INVAL; rc = umem_tx_add_ptr(umem, md, sizeof(*md)); if (rc != 0) goto out; md->vsd_magic = VEA_MAGIC; - md->vsd_compat = 0; + /* Todo only enable bitmap for large pool size */ + md->vsd_compat = VEA_COMPAT_FEATURE_BITMAP; md->vsd_blk_sz = blk_sz; md->vsd_tot_blks = tot_blks; md->vsd_hdr_blks = hdr_blks; @@ -141,26 +183,59 @@ vea_format(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto out; - /* Create extent vector tree */ - rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, - &md->vsd_vec_tree, &vec_btr); + /* Create bitmap tree */ + rc = dbtree_create_inplace(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, + &md->vsd_bitmap_tree, &bitmap_btr); + if (rc != 0) + goto out; + + /* Open bitmap tree */ + uma.uma_id = umem->umm_id; + uma.uma_pool = umem->umm_pool; + rc = dbtree_open_inplace(&md->vsd_bitmap_tree, &uma, + &md_bitmap_btr); if (rc != 0) goto out; + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, &dummy, sizeof(dummy)); + memset(&dummy, 0, sizeof(dummy)); + rc = dbtree_update(md_bitmap_btr, &key, &val); + if (rc) + goto out; out: if (daos_handle_is_valid(free_btr)) dbtree_close(free_btr); - if (daos_handle_is_valid(vec_btr)) - dbtree_close(vec_btr); + if (daos_handle_is_valid(bitmap_btr)) + dbtree_close(bitmap_btr); + if (daos_handle_is_valid(md_bitmap_btr)) + dbtree_close(md_bitmap_btr); /* Commit/Abort transaction on success/error */ return rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); } +static int +destroy_free_bitmap_agg(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + if (daos_handle_is_valid(vbe->vbe_agg_btr)) { + dbtree_destroy(vbe->vbe_agg_btr, NULL); + vbe->vbe_agg_btr = DAOS_HDL_INVAL; + } + + return 0; +} + /* Free the memory footprint created by vea_load(). */ void vea_unload(struct vea_space_info *vsi) { + int rc; + D_ASSERT(vsi != NULL); unload_space_info(vsi); @@ -170,10 +245,14 @@ vea_unload(struct vea_space_info *vsi) vsi->vsi_free_btr = DAOS_HDL_INVAL; } - /* Destroy the in-memory extent vector tree */ - if (daos_handle_is_valid(vsi->vsi_vec_btr)) { - dbtree_destroy(vsi->vsi_vec_btr, NULL); - vsi->vsi_vec_btr = DAOS_HDL_INVAL; + /* Destroy the in-memory bitmap tree */ + if (daos_handle_is_valid(vsi->vsi_bitmap_btr)) { + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, destroy_free_bitmap_agg, NULL); + if (rc) + D_ERROR("Failed to destroy free bitmap aggregation btr: "DF_RC"\n", DP_RC(rc)); + dbtree_destroy(vsi->vsi_bitmap_btr, NULL); + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; } /* Destroy the in-memory aggregation tree */ @@ -218,11 +297,11 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, vsi->vsi_txd = txd; vsi->vsi_md = md; vsi->vsi_md_free_btr = DAOS_HDL_INVAL; - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; vsi->vsi_free_btr = DAOS_HDL_INVAL; + vsi->vsi_bitmap_btr = DAOS_HDL_INVAL; D_INIT_LIST_HEAD(&vsi->vsi_agg_lru); vsi->vsi_agg_btr = DAOS_HDL_INVAL; - vsi->vsi_vec_btr = DAOS_HDL_INVAL; vsi->vsi_flush_time = 0; vsi->vsi_flush_scheduled = false; vsi->vsi_unmap_ctxt = *unmap_ctxt; @@ -240,15 +319,15 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, if (rc != 0) goto error; - /* Create in-memory extent vector tree */ + /* Create in-memory aggregation tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_vec_btr); + &vsi->vsi_agg_btr); if (rc != 0) goto error; - /* Create in-memory aggregation tree */ - rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, - &vsi->vsi_agg_btr); + /* Create in-memory bitmap tree */ + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, + &vsi->vsi_bitmap_btr); if (rc != 0) goto error; @@ -283,8 +362,7 @@ aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t * half-and-half then reserve from the latter half. (lookup vfc_heap). Otherwise; * 3. Try to reserve from some small free extent (<= VEA_LARGE_EXT_MB) in best-fit, * if it fails, reserve from the largest free extent. (lookup vfc_size_btr) - * 4. Repeat the search in 3rd step to reserve an extent vector. (vsi_vec_btr) - * 5. Fail reserve with ENOMEM if all above attempts fail. + * 4. Fail reserve with ENOMEM if all above attempts fail. */ int vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, @@ -294,10 +372,14 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, uint32_t nr_flushed; bool force = false; int rc = 0; + bool try_hint = true; D_ASSERT(vsi != NULL); D_ASSERT(resrvd_list != NULL); + if (is_bitmap_feature_enabled(vsi) && blk_cnt <= VEA_MAX_BITMAP_CLASS) + try_hint = false; + D_ALLOC_PTR(resrvd); if (resrvd == NULL) return -DER_NOMEM; @@ -306,17 +388,20 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, resrvd->vre_hint_off = VEA_HINT_OFF_INVAL; /* Get hint offset */ - hint_get(hint, &resrvd->vre_hint_off); + if (try_hint) + hint_get(hint, &resrvd->vre_hint_off); /* Trigger aging extents flush */ aging_flush(vsi, force, MAX_FLUSH_FRAGS, &nr_flushed); retry: /* Reserve from hint offset */ - rc = reserve_hint(vsi, blk_cnt, resrvd); - if (rc != 0) - goto error; - else if (resrvd->vre_blk_cnt != 0) - goto done; + if (try_hint) { + rc = reserve_hint(vsi, blk_cnt, resrvd); + if (rc != 0) + goto error; + else if (resrvd->vre_blk_cnt != 0) + goto done; + } /* Reserve from the largest extent or a small extent */ rc = reserve_single(vsi, blk_cnt, resrvd); @@ -325,27 +410,28 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, else if (resrvd->vre_blk_cnt != 0) goto done; - /* Reserve extent vector as the last resort */ - rc = reserve_vector(vsi, blk_cnt, resrvd); - - if (rc == -DER_NOSPACE && !force) { + rc = -DER_NOSPACE; + if (!force) { force = true; trigger_aging_flush(vsi, force, MAX_FLUSH_FRAGS * 10, &nr_flushed); if (nr_flushed == 0) goto error; goto retry; - } else if (rc != 0) { + } else { goto error; } done: - D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); D_ASSERT(resrvd->vre_blk_cnt == blk_cnt); - dec_stats(vsi, STAT_FREE_BLKS, blk_cnt); - - /* Update hint offset */ - hint_update(hint, resrvd->vre_blk_off + blk_cnt, - &resrvd->vre_hint_seq); + /* Update hint offset if allocation is from extent */ + if (resrvd->vre_private) { + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + } else { + dec_stats(vsi, STAT_FREE_EXTENT_BLKS, blk_cnt); + D_ASSERT(resrvd->vre_blk_off != VEA_HINT_OFF_INVAL); + hint_update(hint, resrvd->vre_blk_off + blk_cnt, + &resrvd->vre_hint_seq); + } d_list_add_tail(&resrvd->vre_link, resrvd_list); @@ -355,67 +441,130 @@ vea_reserve(struct vea_space_info *vsi, uint32_t blk_cnt, return rc; } +static int +process_free_entry(struct vea_space_info *vsi, struct vea_free_entry *vfe, bool publish) +{ + uint32_t expected_type = vfe->vfe_bitmap ? VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + if (!publish) { + int type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, NULL); + + if (type < 0) + return type; + + if (type != expected_type) { + D_ERROR("mismatch free entry type expected: %d, but got: %d\n", + expected_type, type); + return -DER_INVAL; + } + return compound_free(vsi, vfe, 0); + } + + return persistent_alloc(vsi, vfe); +} + static int process_resrvd_list(struct vea_space_info *vsi, struct vea_hint_context *hint, d_list_t *resrvd_list, bool publish) { struct vea_resrvd_ext *resrvd, *tmp; - struct vea_free_extent vfe; + struct vea_free_entry vfe; uint64_t seq_max = 0, seq_min = 0; uint64_t off_c = 0, off_p = 0; unsigned int seq_cnt = 0; int rc = 0; + uint32_t entry_type; + void *private = NULL; + uint64_t bitmap_seq_max = 0, bitmap_seq_min = 0; + uint64_t bitmap_off_c = 0, bitmap_off_p = 0; + unsigned int bitmap_seq_cnt = 0; + struct vea_hint_context *bitmap_hint = vsi->vsi_bitmap_hint_context; if (d_list_empty(resrvd_list)) return 0; - vfe.vfe_blk_off = 0; - vfe.vfe_blk_cnt = 0; - vfe.vfe_age = 0; /* Not used */ + vfe.vfe_ext.vfe_blk_off = 0; + vfe.vfe_ext.vfe_blk_cnt = 0; + vfe.vfe_ext.vfe_age = 0; /* Not used */ + vfe.vfe_bitmap = NULL; d_list_for_each_entry(resrvd, resrvd_list, vre_link) { + struct vea_bitmap_entry *bitmap_entry; + rc = verify_resrvd_ext(resrvd); if (rc) goto error; + entry_type = resrvd->vre_private ? + VEA_FREE_ENTRY_BITMAP : VEA_FREE_ENTRY_EXTENT; + + bitmap_entry = (struct vea_bitmap_entry *)resrvd->vre_private; /* Reserved list is sorted by hint sequence */ - if (seq_min == 0) { - seq_min = resrvd->vre_hint_seq; - off_c = resrvd->vre_hint_off; - } else if (hint != NULL) { - D_ASSERT(seq_min < resrvd->vre_hint_seq); + /* use bitmap entry chunk offset */ + if (resrvd->vre_new_bitmap_chunk) { + D_ASSERT(bitmap_entry != NULL); + D_ASSERT(entry_type == VEA_FREE_ENTRY_BITMAP); + if (bitmap_seq_min == 0) { + bitmap_seq_min = resrvd->vre_hint_seq; + bitmap_off_c = resrvd->vre_hint_off; + } else { + D_ASSERT(bitmap_seq_min < resrvd->vre_hint_seq); + } + bitmap_seq_cnt++; + bitmap_seq_max = resrvd->vre_hint_seq; + bitmap_off_p = resrvd->vre_blk_off + bitmap_entry->vbe_bitmap.vfb_blk_cnt; + } else if (entry_type == VEA_FREE_ENTRY_EXTENT) { + if (seq_min == 0) { + seq_min = resrvd->vre_hint_seq; + off_c = resrvd->vre_hint_off; + } else if (hint != NULL) { + D_ASSERT(seq_min < resrvd->vre_hint_seq); + } + + seq_cnt++; + seq_max = resrvd->vre_hint_seq; + off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; } - seq_cnt++; - seq_max = resrvd->vre_hint_seq; - off_p = resrvd->vre_blk_off + resrvd->vre_blk_cnt; - - if (vfe.vfe_blk_off + vfe.vfe_blk_cnt == resrvd->vre_blk_off) { - vfe.vfe_blk_cnt += resrvd->vre_blk_cnt; + if (private == resrvd->vre_private && + vfe.vfe_ext.vfe_blk_off + vfe.vfe_ext.vfe_blk_cnt == resrvd->vre_blk_off) { + vfe.vfe_ext.vfe_blk_cnt += resrvd->vre_blk_cnt; continue; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } - vfe.vfe_blk_off = resrvd->vre_blk_off; - vfe.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_ext.vfe_blk_off = resrvd->vre_blk_off; + vfe.vfe_ext.vfe_blk_cnt = resrvd->vre_blk_cnt; + vfe.vfe_bitmap = bitmap_entry; + private = resrvd->vre_private; } - if (vfe.vfe_blk_cnt != 0) { - rc = publish ? persistent_alloc(vsi, &vfe) : - compound_free(vsi, &vfe, 0); + if (vfe.vfe_ext.vfe_blk_cnt != 0) { + rc = process_free_entry(vsi, &vfe, publish); if (rc) goto error; } + if (seq_cnt == 0) + goto bitmap_publish; + rc = publish ? hint_tx_publish(vsi->vsi_umem, hint, off_p, seq_min, seq_max, seq_cnt) : hint_cancel(hint, off_c, seq_min, seq_max, seq_cnt); +bitmap_publish: + if (rc || bitmap_seq_cnt == 0) + goto error; + + rc = publish ? hint_tx_publish(vsi->vsi_umem, bitmap_hint, bitmap_off_p, + bitmap_seq_min, bitmap_seq_max, bitmap_seq_cnt) : + hint_cancel(bitmap_hint, bitmap_off_c, bitmap_seq_min, + bitmap_seq_max, bitmap_seq_cnt); + error: d_list_for_each_entry_safe(resrvd, tmp, resrvd_list, vre_link) { d_list_del_init(&resrvd->vre_link); @@ -457,40 +606,6 @@ vea_tx_publish(struct vea_space_info *vsi, struct vea_hint_context *hint, return process_resrvd_list(vsi, hint, resrvd_list, true); } -struct free_commit_cb_arg { - struct vea_space_info *fca_vsi; - struct vea_free_extent fca_vfe; -}; - -static void -free_commit_cb(void *data, bool noop) -{ - struct free_commit_cb_arg *fca = data; - int rc; - - /* Transaction aborted, only need to free callback arg */ - if (noop) - goto free; - - /* - * Aggregated free will be executed on outermost transaction - * commit. - * - * If it fails, the freed space on persistent free tree won't - * be added in in-memory free tree, hence the space won't be - * visible for allocation until the tree sync up on next server - * restart. Such temporary space leak is tolerable, what we must - * avoid is the contrary case: in-memory tree update succeeds - * but persistent tree update fails, which risks data corruption. - */ - rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); - - D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", - fca->fca_vsi, rc); -free: - D_FREE(fca); -} - /* * Free allocated extent. * @@ -515,10 +630,10 @@ vea_free(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt) return -DER_NOMEM; fca->fca_vsi = vsi; - fca->fca_vfe.vfe_blk_off = blk_off; - fca->fca_vfe.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; - rc = verify_free_entry(NULL, &fca->fca_vfe); + rc = verify_free_entry(NULL, &fca->fca_vfe.vfe_ext); if (rc) goto error; @@ -571,16 +686,6 @@ vea_set_ext_age(struct vea_space_info *vsi, uint64_t blk_off, uint64_t age) return 0; } -/* Convert an extent into an allocated extent vector. */ -int -vea_get_ext_vector(struct vea_space_info *vsi, uint64_t blk_off, - uint32_t blk_cnt, struct vea_ext_vector *ext_vector) -{ - D_ASSERT(vsi != NULL); - D_ASSERT(ext_vector != NULL); - return 0; -} - /* Load persistent hint data and initialize in-memory hint context */ int vea_hint_load(struct vea_hint_df *phd, struct vea_hint_context **thc) @@ -609,8 +714,8 @@ vea_hint_unload(struct vea_hint_context *thc) } static int -count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, - void *arg) +count_free_extent_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) { struct vea_free_extent *vfe; uint64_t *off, *free_blks = arg; @@ -629,16 +734,53 @@ count_free_persistent(daos_handle_t ih, d_iov_t *key, d_iov_t *val, return 0; } +static int +count_free_bitmap_persistent(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_free_bitmap *vfb; + uint64_t *off, *free_blks = arg; + int rc; + + off = (uint64_t *)key->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; + + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); + if (rc != 0) + return rc; + + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(vfb); + + return 0; +} + static int count_free_transient(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_entry *ve; + struct vea_extent_entry *ve; uint64_t *free_blks = arg; - ve = (struct vea_entry *)val->iov_buf; + ve = (struct vea_extent_entry *)val->iov_buf; D_ASSERT(free_blks != NULL); - *free_blks += ve->ve_ext.vfe_blk_cnt; + *free_blks += ve->vee_ext.vfe_blk_cnt; + + return 0; +} + +static int +count_free_bitmap_transient(daos_handle_t ih, d_iov_t *key, + d_iov_t *val, void *arg) +{ + struct vea_bitmap_entry *vbe; + uint64_t *free_blks = arg; + + vbe = (struct vea_bitmap_entry *)val->iov_buf; + D_ASSERT(free_blks != NULL); + *free_blks += bitmap_free_blocks(&vbe->vbe_bitmap); return 0; } @@ -660,7 +802,8 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, attr->va_hdr_blks = vsd->vsd_hdr_blks; attr->va_large_thresh = vsi->vsi_class.vfc_large_thresh; attr->va_tot_blks = vsd->vsd_tot_blks; - attr->va_free_blks = vsi->vsi_stat[STAT_FREE_BLKS]; + attr->va_free_blks = vsi->vsi_stat[STAT_FREE_EXTENT_BLKS] + + vsi->vsi_stat[STAT_FREE_BITMAP_BLKS]; } if (stat != NULL) { @@ -668,7 +811,13 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, stat->vs_free_persistent = 0; rc = dbtree_iterate(vsi->vsi_md_free_btr, DAOS_INTENT_DEFAULT, - false, count_free_persistent, + false, count_free_extent_persistent, + (void *)&stat->vs_free_persistent); + if (rc != 0) + return rc; + + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_persistent, (void *)&stat->vs_free_persistent); if (rc != 0) return rc; @@ -680,11 +829,19 @@ vea_query(struct vea_space_info *vsi, struct vea_attr *attr, if (rc != 0) return rc; + rc = dbtree_iterate(vsi->vsi_bitmap_btr, DAOS_INTENT_DEFAULT, + false, count_free_bitmap_transient, + (void *)&stat->vs_free_transient); + if (rc != 0) + return rc; + stat->vs_resrv_hint = vsi->vsi_stat[STAT_RESRV_HINT]; stat->vs_resrv_large = vsi->vsi_stat[STAT_RESRV_LARGE]; stat->vs_resrv_small = vsi->vsi_stat[STAT_RESRV_SMALL]; + stat->vs_resrv_bitmap = vsi->vsi_stat[STAT_RESRV_BITMAP]; stat->vs_frags_large = vsi->vsi_stat[STAT_FRAGS_LARGE]; stat->vs_frags_small = vsi->vsi_stat[STAT_FRAGS_SMALL]; + stat->vs_frags_bitmap = vsi->vsi_stat[STAT_FRAGS_BITMAP]; stat->vs_frags_aging = vsi->vsi_stat[STAT_FRAGS_AGING]; } diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index 53fa8492a91..f82fd299bd4 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,6 +7,7 @@ #include #include +#include #include "vea_internal.h" enum vea_free_type { @@ -15,33 +16,97 @@ enum vea_free_type { VEA_TYPE_PERSIST, }; +int +free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry) +{ + int type = VEA_FREE_ENTRY_BITMAP; + struct vea_free_bitmap *found; + daos_handle_t btr_hdl = vsi->vsi_bitmap_btr; + d_iov_t key_in, key_out, val; + uint64_t found_end, vfe_end; + int rc, opc = BTR_PROBE_LE; + struct vea_bitmap_entry *entry = NULL; + + if (blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + type = VEA_FREE_ENTRY_EXTENT; + goto out; + } + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + /* Fetch the in-tree record */ + d_iov_set(&key_in, &blk_off, sizeof(blk_off)); + d_iov_set(&key_out, NULL, sizeof(blk_off)); + d_iov_set(&val, NULL, 0); + + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key_in, &key_out, &val); + if (rc == -DER_NONEXIST) + return VEA_FREE_ENTRY_EXTENT; + + if (rc) { + D_ERROR("failed to search range ["DF_U64", %u] int bitmap tree\n", + blk_off, blk_cnt); + return rc; + } + + entry = (struct vea_bitmap_entry *)val.iov_buf; + found = &entry->vbe_bitmap; + rc = verify_bitmap_entry(found); + if (rc) { + D_ERROR("verify bitmap failed in free_type\n"); + return rc; + } + + found_end = found->vfb_blk_off + found->vfb_blk_cnt - 1; + vfe_end = blk_off + blk_cnt - 1; + D_ASSERT(blk_off >= found->vfb_blk_off); + if (blk_off <= found_end) { + if (vfe_end <= found_end) { + if (bitmap_entry) + *bitmap_entry = entry; + return VEA_FREE_ENTRY_BITMAP; + } + + D_CRIT("["DF_U64", %u] should not cross bitmap tree\n", + found->vfb_blk_off, found->vfb_blk_cnt); + return -DER_INVAL; + } else { + type = VEA_FREE_ENTRY_EXTENT; + } +out: + return type; +} + void -free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) +extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry) { struct vea_free_class *vfc = &vsi->vsi_class; - struct vea_sized_class *sc = entry->ve_sized_class; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; + struct vea_sized_class *sc = entry->vee_sized_class; + uint32_t blk_cnt; if (sc == NULL) { + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > vfc->vfc_large_thresh, "%u <= %u", blk_cnt, vfc->vfc_large_thresh); - D_ASSERT(d_list_empty(&entry->ve_link)); + D_ASSERT(d_list_empty(&entry->vee_link)); - d_binheap_remove(&vfc->vfc_heap, &entry->ve_node); + d_binheap_remove(&vfc->vfc_heap, &entry->vee_node); dec_stats(vsi, STAT_FRAGS_LARGE, 1); } else { d_iov_t key; - uint64_t int_key = blk_cnt; int rc; + blk_cnt = entry->vee_ext.vfe_blk_cnt; D_ASSERTF(blk_cnt > 0 && blk_cnt <= vfc->vfc_large_thresh, "%u > %u", blk_cnt, vfc->vfc_large_thresh); D_ASSERT(daos_handle_is_valid(vfc->vfc_size_btr)); - d_list_del_init(&entry->ve_link); - entry->ve_sized_class = NULL; + d_list_del_init(&entry->vee_link); + entry->vee_sized_class = NULL; /* Remove the sized class when it's empty */ - if (d_list_empty(&sc->vsc_lru)) { + if (d_list_empty(&sc->vsc_extent_lru)) { + uint64_t int_key = blk_cnt; + d_iov_set(&key, &int_key, sizeof(int_key)); rc = dbtree_delete(vfc->vfc_size_btr, BTR_PROBE_EQ, &key, NULL); if (rc) @@ -52,32 +117,16 @@ free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry) } } -int -free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) +static int +find_or_create_sized_class(struct vea_space_info *vsi, uint64_t int_key, + struct vea_sized_class **ret_sc) { struct vea_free_class *vfc = &vsi->vsi_class; daos_handle_t btr_hdl = vfc->vfc_size_btr; - uint32_t blk_cnt = entry->ve_ext.vfe_blk_cnt; d_iov_t key, val, val_out; - uint64_t int_key = blk_cnt; - struct vea_sized_class dummy, *sc; + struct vea_sized_class dummy, *sc = NULL; int rc; - D_ASSERT(entry->ve_sized_class == NULL); - D_ASSERT(d_list_empty(&entry->ve_link)); - - /* Add to heap if it's a large free extent */ - if (blk_cnt > vfc->vfc_large_thresh) { - rc = d_binheap_insert(&vfc->vfc_heap, &entry->ve_node); - if (rc != 0) { - D_ERROR("Failed to insert heap: %d\n", rc); - return rc; - } - - inc_stats(vsi, STAT_FRAGS_LARGE, 1); - return 0; - } - /* Add to a sized class */ D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &int_key, sizeof(int_key)); @@ -88,50 +137,119 @@ free_class_add(struct vea_space_info *vsi, struct vea_entry *entry) /* Found an existing sized class */ sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_ASSERT(!d_list_empty(&sc->vsc_lru)); } else if (rc == -DER_NONEXIST) { /* Create a new sized class */ + memset(&dummy, 0, sizeof(dummy)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc != 0) { - D_ERROR("Insert size class:%u failed. "DF_RC"\n", - blk_cnt, DP_RC(rc)); + D_ERROR("Insert size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } sc = (struct vea_sized_class *)val_out.iov_buf; D_ASSERT(sc != NULL); - D_INIT_LIST_HEAD(&sc->vsc_lru); + D_INIT_LIST_HEAD(&sc->vsc_extent_lru); } else { - D_ERROR("Lookup size class:%u failed. "DF_RC"\n", blk_cnt, DP_RC(rc)); + D_ERROR("Lookup size class:%llu failed. "DF_RC"\n", + (unsigned long long)int_key, DP_RC(rc)); return rc; } + *ret_sc = sc; + + return rc; +} + +int +extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry) +{ + struct vea_free_class *vfc = &vsi->vsi_class; + uint64_t int_key; + struct vea_sized_class *sc; + int rc; + + D_ASSERT(entry->vee_sized_class == NULL); + D_ASSERT(d_list_empty(&entry->vee_link)); + + int_key = entry->vee_ext.vfe_blk_cnt; + /* Add to heap if it's a free extent */ + if (int_key > vfc->vfc_large_thresh) { + rc = d_binheap_insert(&vfc->vfc_heap, &entry->vee_node); + if (rc != 0) { + D_ERROR("Failed to insert heap: %d\n", rc); + return rc; + } + inc_stats(vsi, STAT_FRAGS_LARGE, 1); + return 0; + } + + rc = find_or_create_sized_class(vsi, int_key, &sc); + if (rc) + return rc; - entry->ve_sized_class = sc; - d_list_add_tail(&entry->ve_link, &sc->vsc_lru); + entry->vee_sized_class = sc; + d_list_add_tail(&entry->vee_link, &sc->vsc_extent_lru); inc_stats(vsi, STAT_FRAGS_SMALL, 1); return 0; } static void -undock_entry(struct vea_space_info *vsi, struct vea_entry *entry, - unsigned int type) +bitmap_free_class_add(struct vea_space_info *vsi, struct vea_bitmap_entry *entry, + int flags) +{ + uint64_t int_key; + int free_blks; + + D_ASSERT(d_list_empty(&entry->vbe_link)); + + int_key = entry->vbe_bitmap.vfb_class; + D_ASSERT(int_key <= VEA_MAX_BITMAP_CLASS && int_key > 0); + + free_blks = bitmap_free_blocks(&entry->vbe_bitmap); + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, free_blks); + if (free_blks >= int_key) { + if (free_blks == entry->vbe_bitmap.vfb_blk_cnt) + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[int_key - 1]); + else + d_list_add(&entry->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[int_key - 1]); + } + inc_stats(vsi, STAT_FRAGS_BITMAP, 1); +} + +static void +undock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, + unsigned int type) { if (type == VEA_TYPE_PERSIST) return; D_ASSERT(entry != NULL); if (type == VEA_TYPE_COMPOUND) { - free_class_remove(vsi, entry); + extent_free_class_remove(vsi, entry); } else { - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vee_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); } } +static void +undock_free_entry(struct vea_space_info *vsi, struct vea_free_entry *entry, + unsigned int type) +{ + if (type == VEA_TYPE_PERSIST || type == VEA_TYPE_COMPOUND) + return; + + d_list_del_init(&entry->vfe_link); + dec_stats(vsi, STAT_FRAGS_AGING, 1); +} + #define LARGE_AGING_FRAG_BLKS 8192 static inline bool @@ -141,27 +259,20 @@ is_aging_frag_large(struct vea_free_extent *vfe) } static inline void -dock_aging_entry(struct vea_space_info *vsi, struct vea_entry *entry) +dock_aging_entry(struct vea_space_info *vsi, struct vea_free_entry *entry) { - d_list_add_tail(&entry->ve_link, &vsi->vsi_agg_lru); + d_list_add_tail(&entry->vfe_link, &vsi->vsi_agg_lru); inc_stats(vsi, STAT_FRAGS_AGING, 1); } static int -dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int type) +dock_extent_entry(struct vea_space_info *vsi, struct vea_extent_entry *entry, unsigned int type) { - int rc = 0; D_ASSERT(entry != NULL); - if (type == VEA_TYPE_COMPOUND) { - rc = free_class_add(vsi, entry); - } else { - D_ASSERT(type == VEA_TYPE_AGGREGATE); - D_ASSERT(d_list_empty(&entry->ve_link)); - dock_aging_entry(vsi, entry); - } + D_ASSERT(type == VEA_TYPE_COMPOUND); - return rc; + return extent_free_class_add(vsi, entry); } /* @@ -175,26 +286,17 @@ dock_entry(struct vea_space_info *vsi, struct vea_entry *entry, unsigned int typ */ static int merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, - unsigned int type, unsigned int flags) + unsigned int type, unsigned int flags, daos_handle_t btr_hdl) { struct vea_free_extent *ext, *neighbor = NULL; struct vea_free_extent merged = *ext_in; - struct vea_entry *entry, *neighbor_entry = NULL; - daos_handle_t btr_hdl; + struct vea_extent_entry *extent_entry, *neighbor_extent_entry = NULL; + struct vea_free_entry *free_entry, *neighbor_free_entry = NULL; d_iov_t key, key_out, val; uint64_t *off; bool fetch_prev = true, large_prev = false; int rc, del_opc = BTR_PROBE_BYPASS; - if (type == VEA_TYPE_COMPOUND) - btr_hdl = vsi->vsi_free_btr; - else if (type == VEA_TYPE_PERSIST) - btr_hdl = vsi->vsi_md_free_btr; - else if (type == VEA_TYPE_AGGREGATE) - btr_hdl = vsi->vsi_agg_btr; - else - return -DER_INVAL; - D_ASSERT(daos_handle_is_valid(btr_hdl)); d_iov_set(&key, &ext_in->vfe_blk_off, sizeof(ext_in->vfe_blk_off)); d_iov_set(&key_out, NULL, 0); @@ -215,7 +317,7 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } repeat: - d_iov_set(&key_out, NULL, 0); + d_iov_set(&key_out, NULL, sizeof(ext_in->vfe_blk_off)); d_iov_set(&val, NULL, 0); if (fetch_prev) { @@ -249,11 +351,17 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } if (type == VEA_TYPE_PERSIST) { - entry = NULL; + extent_entry = NULL; + free_entry = NULL; ext = (struct vea_free_extent *)val.iov_buf; + } else if (type == VEA_TYPE_COMPOUND) { + free_entry = NULL; + extent_entry = (struct vea_extent_entry *)val.iov_buf; + ext = &extent_entry->vee_ext; } else { - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + extent_entry = NULL; + free_entry = (struct vea_free_entry *)val.iov_buf; + ext = &free_entry->vfe_ext; } off = (uint64_t *)key_out.iov_buf; @@ -297,7 +405,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, merged.vfe_blk_cnt += ext->vfe_blk_cnt; neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } else { merged.vfe_blk_cnt += ext->vfe_blk_cnt; @@ -306,7 +415,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, * adjacent extent. */ if (neighbor != NULL) { - undock_entry(vsi, entry, type); + if (extent_entry) + undock_extent_entry(vsi, extent_entry, type); + else if (free_entry) + undock_free_entry(vsi, free_entry, type); rc = dbtree_delete(btr_hdl, del_opc, &key_out, NULL); if (rc) { D_ERROR("Failed to delete: %d\n", rc); @@ -314,7 +426,8 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, } } else { neighbor = ext; - neighbor_entry = entry; + neighbor_extent_entry = extent_entry; + neighbor_free_entry = free_entry; } } } @@ -335,7 +448,10 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, return rc; } } else { - undock_entry(vsi, neighbor_entry, type); + if (neighbor_extent_entry) + undock_extent_entry(vsi, neighbor_extent_entry, type); + else if (neighbor_free_entry) + undock_free_entry(vsi, neighbor_free_entry, type); } /* Adjust in-tree offset & length */ @@ -344,24 +460,123 @@ merge_free_ext(struct vea_space_info *vsi, struct vea_free_extent *ext_in, if (type == VEA_TYPE_AGGREGATE || type == VEA_TYPE_COMPOUND) { neighbor->vfe_age = merged.vfe_age; - rc = dock_entry(vsi, neighbor_entry, type); - if (rc < 0) - return rc; + if (neighbor_extent_entry) { + rc = dock_extent_entry(vsi, neighbor_extent_entry, type); + if (rc < 0) + return rc; + } else if (neighbor_free_entry) { + D_ASSERT(type == VEA_TYPE_AGGREGATE); + D_ASSERT(d_list_empty(&neighbor_free_entry->vfe_link)); + dock_aging_entry(vsi, neighbor_free_entry); + } } return 1; } -/* Free extent to in-memory compound index */ +/* insert bitmap entry to in-memory index */ int -compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags) +bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags) { - struct vea_entry *entry, dummy; + struct vea_bitmap_entry *entry, *dummy; + d_iov_t key, val, val_out; + int rc, ret; + struct umem_attr uma; + int dummy_size = sizeof(*dummy) + (vfb->vfb_bitmap_sz << 3); + + D_ALLOC(dummy, dummy_size); + if (!dummy) + return -DER_NOMEM; + + memset(dummy, 0, sizeof(*dummy)); + dummy->vbe_bitmap = *vfb; + dummy->vbe_agg_btr = DAOS_HDL_INVAL; + if (state == VEA_BITMAP_STATE_NEW) + setbits64(dummy->vbe_bitmap.vfb_bitmaps, 0, 1); + else + memcpy(dummy->vbe_bitmap.vfb_bitmaps, vfb->vfb_bitmaps, vfb->vfb_bitmap_sz << 3); + dummy->vbe_published_state = state; + + /* Add to in-memory bitmap tree */ + D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); + d_iov_set(&key, &dummy->vbe_bitmap.vfb_blk_off, sizeof(dummy->vbe_bitmap.vfb_blk_off)); + d_iov_set(&val, dummy, dummy_size); + d_iov_set(&val_out, NULL, 0); + + rc = dbtree_upsert(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &key, + &val, &val_out); + D_FREE(dummy); + if (rc != 0) { + D_ERROR("Insert bitmap failed. "DF_RC" %llu\n", DP_RC(rc), + (unsigned long long)vfb->vfb_blk_off); + return rc; + } + + memset(&uma, 0, sizeof(uma)); + uma.uma_id = UMEM_CLASS_VMEM; + + D_ASSERT(val_out.iov_buf != NULL); + entry = (struct vea_bitmap_entry *)val_out.iov_buf; + rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_DIRECT_KEY, VEA_TREE_ODR, &uma, NULL, + &entry->vbe_agg_btr); + if (rc != 0) + goto error; + + D_INIT_LIST_HEAD(&entry->vbe_link); + D_ASSERT(entry->vbe_bitmap.vfb_class == vfb->vfb_class); + + bitmap_free_class_add(vsi, entry, flags); + if (ret_entry) + *ret_entry = entry; + return rc; + +error: + ret = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (ret) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), vfb->vfb_blk_off); + return rc; +} + +static int +bitmap_entry_remove(struct vea_space_info *vsi, struct vea_bitmap_entry *bitmap, + unsigned int flags) +{ + d_iov_t key; + int rc; + + rc = dbtree_destroy(bitmap->vbe_agg_btr, NULL); + if (rc) { + D_ERROR("Failed to destroy bitmap agg tree. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + return rc; + } + bitmap->vbe_agg_btr = DAOS_HDL_INVAL; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + dec_stats(vsi, STAT_FREE_BITMAP_BLKS, bitmap->vbe_bitmap.vfb_blk_cnt); + d_list_del_init(&bitmap->vbe_link); + dec_stats(vsi, STAT_FRAGS_BITMAP, 1); + + d_iov_set(&key, &bitmap->vbe_bitmap.vfb_blk_off, sizeof(bitmap->vbe_bitmap.vfb_blk_off)); + rc = dbtree_delete(vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) + D_ERROR("Failed to clean bitmap failed. "DF_RC" "DF_U64"\n", + DP_RC(rc), bitmap->vbe_bitmap.vfb_blk_off); + + return rc; +} + +int +compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags) +{ + struct vea_extent_entry *entry, dummy; d_iov_t key, val, val_out; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_COMPOUND, flags, vsi->vsi_free_btr); if (rc < 0) { return rc; } else if (rc > 0) { @@ -370,12 +585,12 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + D_INIT_LIST_HEAD(&dummy.vee_link); + dummy.vee_ext = *vfe; /* Add to in-memory free extent tree */ D_ASSERT(daos_handle_is_valid(vsi->vsi_free_btr)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vee_ext.vfe_blk_off, sizeof(dummy.vee_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); @@ -387,27 +602,81 @@ compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_extent_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vee_link); - rc = free_class_add(vsi, entry); + rc = extent_free_class_add(vsi, entry); accounting: if (!rc && !(flags & VEA_FL_NO_ACCOUNTING)) - inc_stats(vsi, STAT_FREE_BLKS, vfe->vfe_blk_cnt); + inc_stats(vsi, STAT_FREE_EXTENT_BLKS, vfe->vfe_blk_cnt); return rc; } -/* Free extent to persistent free tree */ +/* Free entry to in-memory compound index */ int -persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, + unsigned int flags) +{ + int rc; + struct vea_bitmap_entry *found = vfe->vfe_bitmap; + + if (found == NULL) + return compound_free_extent(vsi, &vfe->vfe_ext, flags); + + rc = bitmap_set_range(NULL, &found->vbe_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); + if (rc) + return rc; + + if (!(flags & VEA_FL_NO_ACCOUNTING)) + inc_stats(vsi, STAT_FREE_BITMAP_BLKS, vfe->vfe_ext.vfe_blk_cnt); + + /* if bitmap is not published and clear, then remove it */ + if (found->vbe_published_state == VEA_BITMAP_STATE_NEW) { + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + struct vea_free_extent ext; + + ext.vfe_blk_cnt = found->vbe_bitmap.vfb_blk_cnt; + ext.vfe_blk_off = found->vbe_bitmap.vfb_blk_off; + rc = bitmap_entry_remove(vsi, found, flags); + if (rc) + return rc; + return compound_free_extent(vsi, &ext, flags); + } + } + + if (is_bitmap_empty(found->vbe_bitmap.vfb_bitmaps, + found->vbe_bitmap.vfb_bitmap_sz)) { + if (d_list_empty(&found->vbe_link)) + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + else + d_list_move_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_empty[found->vbe_bitmap.vfb_class - 1]); + return 0; + } + + if (d_list_empty(&found->vbe_link)) { + D_ASSERT(found->vbe_bitmap.vfb_class <= VEA_MAX_BITMAP_CLASS); + d_list_add_tail(&found->vbe_link, + &vsi->vsi_class.vfc_bitmap_lru[found->vbe_bitmap.vfb_class - 1]); + } + + return 0; +} + +/* Free extent to persistent free tree */ +static int +persistent_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe) { struct vea_free_extent dummy; d_iov_t key, val; daos_handle_t btr_hdl = vsi->vsi_md_free_btr; int rc; - rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0); + rc = merge_free_ext(vsi, vfe, VEA_TYPE_PERSIST, 0, vsi->vsi_md_free_btr); if (rc < 0) return rc; else if (rc > 0) @@ -428,41 +697,70 @@ persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) return rc; } +int +persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) +{ + int type; + + D_ASSERT(umem_tx_inprogress(vsi->vsi_umem) || + vsi->vsi_umem->umm_id == UMEM_CLASS_VMEM); + D_ASSERT(vfe->vfe_ext.vfe_blk_off != VEA_HINT_OFF_INVAL); + type = free_type(vsi, vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, + &vfe->vfe_bitmap); + if (type < 0) + return type; + + if (vfe->vfe_bitmap == NULL) + return persistent_free_extent(vsi, &vfe->vfe_ext); + + D_ASSERT(type == VEA_FREE_ENTRY_BITMAP); + + D_ASSERT(vfe->vfe_ext.vfe_blk_cnt > 0 && + vfe->vfe_ext.vfe_blk_cnt < vsi->vsi_class.vfc_large_thresh); + return bitmap_set_range(vsi->vsi_umem, vfe->vfe_bitmap->vbe_md_bitmap, + vfe->vfe_ext.vfe_blk_off, vfe->vfe_ext.vfe_blk_cnt, true); +} + /* Free extent to the aggregate free tree */ int -aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) +aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) { - struct vea_entry *entry, dummy; + struct vea_free_entry *entry, dummy; d_iov_t key, val, val_out; daos_handle_t btr_hdl = vsi->vsi_agg_btr; int rc; - vfe->vfe_age = get_current_age(); - rc = merge_free_ext(vsi, vfe, VEA_TYPE_AGGREGATE, 0); + /* free entry bitmap */ + if (vfe->vfe_bitmap == NULL) + btr_hdl = vsi->vsi_agg_btr; + else + btr_hdl = vfe->vfe_bitmap->vbe_agg_btr; + + vfe->vfe_ext.vfe_age = get_current_age(); + rc = merge_free_ext(vsi, &vfe->vfe_ext, VEA_TYPE_AGGREGATE, 0, btr_hdl); if (rc < 0) return rc; else if (rc > 0) - return 0; /* extent merged in tree */ + return 0; /* entry merged in tree */ - memset(&dummy, 0, sizeof(dummy)); - D_INIT_LIST_HEAD(&dummy.ve_link); - dummy.ve_ext = *vfe; + dummy = *vfe; + D_INIT_LIST_HEAD(&dummy.vfe_link); /* Add to in-memory aggregate free extent tree */ D_ASSERT(daos_handle_is_valid(btr_hdl)); - d_iov_set(&key, &dummy.ve_ext.vfe_blk_off, sizeof(dummy.ve_ext.vfe_blk_off)); + d_iov_set(&key, &dummy.vfe_ext.vfe_blk_off, sizeof(dummy.vfe_ext.vfe_blk_off)); d_iov_set(&val, &dummy, sizeof(dummy)); d_iov_set(&val_out, NULL, 0); rc = dbtree_upsert(btr_hdl, BTR_PROBE_BYPASS, DAOS_INTENT_UPDATE, &key, &val, &val_out); if (rc) { - D_ERROR("Insert aging extent failed. "DF_RC"\n", DP_RC(rc)); + D_ERROR("Insert aging entry failed. "DF_RC"\n", DP_RC(rc)); return rc; } D_ASSERT(val_out.iov_buf != NULL); - entry = (struct vea_entry *)val_out.iov_buf; - D_INIT_LIST_HEAD(&entry->ve_link); + entry = (struct vea_free_entry *)val_out.iov_buf; + D_INIT_LIST_HEAD(&entry->vfe_link); dock_aging_entry(vsi, entry); return 0; @@ -474,35 +772,48 @@ aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe) static int flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_list_t *unmap_sgl) { - struct vea_entry *entry, *tmp; + struct vea_free_entry *entry, *tmp; struct vea_free_extent vfe; + struct vea_free_entry free_entry; d_iov_t *unmap_iov; int i, rc = 0; + d_iov_t key; + struct vea_bitmap_entry *bitmap; + struct vea_bitmap_entry **flush_bitmaps; + daos_handle_t btr_hdl; D_ASSERT(umem_tx_none(vsi->vsi_umem)); D_ASSERT(unmap_sgl->sg_nr_out == 0); - d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, ve_link) { - d_iov_t key; + D_ALLOC_ARRAY(flush_bitmaps, MAX_FLUSH_FRAGS); + if (!flush_bitmaps) + return -DER_NOMEM; - vfe = entry->ve_ext; + d_list_for_each_entry_safe(entry, tmp, &vsi->vsi_agg_lru, vfe_link) { + vfe = entry->vfe_ext; if (!force && cur_time < (vfe.vfe_age + EXPIRE_INTVL)) break; /* Remove entry from aggregate LRU list */ - d_list_del_init(&entry->ve_link); + d_list_del_init(&entry->vfe_link); dec_stats(vsi, STAT_FRAGS_AGING, 1); + bitmap = entry->vfe_bitmap; + if (bitmap) + btr_hdl = bitmap->vbe_agg_btr; + else + btr_hdl = vsi->vsi_agg_btr; /* Remove entry from aggregate tree, entry will be freed on deletion */ d_iov_set(&key, &vfe.vfe_blk_off, sizeof(vfe.vfe_blk_off)); - D_ASSERT(daos_handle_is_valid(vsi->vsi_agg_btr)); - rc = dbtree_delete(vsi->vsi_agg_btr, BTR_PROBE_EQ, &key, NULL); + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_delete(btr_hdl, BTR_PROBE_EQ, &key, NULL); if (rc) { D_ERROR("Remove ["DF_U64", %u] from aggregated tree error: "DF_RC"\n", vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); break; } + flush_bitmaps[unmap_sgl->sg_nr_out] = bitmap; /* Unmap callback may yield, so we can't call it directly in this tight loop */ unmap_sgl->sg_nr_out++; unmap_iov = &unmap_sgl->sg_iovs[unmap_sgl->sg_nr_out - 1]; @@ -533,15 +844,18 @@ flush_internal(struct vea_space_info *vsi, bool force, uint32_t cur_time, d_sg_l for (i = 0; i < unmap_sgl->sg_nr_out; i++) { unmap_iov = &unmap_sgl->sg_iovs[i]; - vfe.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; - vfe.vfe_blk_cnt = unmap_iov->iov_len; - vfe.vfe_age = cur_time; + free_entry.vfe_ext.vfe_blk_off = (uint64_t)unmap_iov->iov_buf; + free_entry.vfe_ext.vfe_blk_cnt = unmap_iov->iov_len; + free_entry.vfe_ext.vfe_age = cur_time; + free_entry.vfe_bitmap = flush_bitmaps[i]; - rc = compound_free(vsi, &vfe, 0); + rc = compound_free(vsi, &free_entry, 0); if (rc) D_ERROR("Compound free ["DF_U64", %u] error: "DF_RC"\n", - vfe.vfe_blk_off, vfe.vfe_blk_cnt, DP_RC(rc)); + free_entry.vfe_ext.vfe_blk_off, free_entry.vfe_ext.vfe_blk_cnt, + DP_RC(rc)); } + D_FREE(flush_bitmaps); return rc; } @@ -562,6 +876,127 @@ need_aging_flush(struct vea_space_info *vsi, uint32_t cur_time, bool force) return true; } +void +free_commit_cb(void *data, bool noop) +{ + struct free_commit_cb_arg *fca = data; + int rc; + + /* Transaction aborted, only need to free callback arg */ + if (noop) + goto free; + + /* + * Aggregated free will be executed on outermost transaction + * commit. + * + * If it fails, the freed space on persistent free tree won't + * be added in in-memory free tree, hence the space won't be + * visible for allocation until the tree sync up on next server + * restart. Such temporary space leak is tolerable, what we must + * avoid is the contrary case: in-memory tree update succeeds + * but persistent tree update fails, which risks data corruption. + */ + rc = aggregated_free(fca->fca_vsi, &fca->fca_vfe); + + D_CDEBUG(rc, DLOG_ERR, DB_IO, "Aggregated free on vsi:%p rc %d\n", + fca->fca_vsi, rc); +free: + D_FREE(fca); +} + +static int +reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t *nr_reclaimed) +{ + int i; + struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_free_bitmap *vfb; + d_iov_t key; + int rc = 0; + struct free_commit_cb_arg *fca; + struct umem_instance *umem = vsi->vsi_umem; + int nr = 0; + uint64_t blk_off; + uint32_t blk_cnt; + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + d_list_for_each_entry_safe(bitmap_entry, tmp_entry, + &vsi->vsi_class.vfc_bitmap_empty[i], vbe_link) { + vfb = &bitmap_entry->vbe_bitmap; + D_ASSERT(vfb->vfb_class == i + 1); + D_ASSERT(is_bitmap_empty(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz)); + d_list_del_init(&bitmap_entry->vbe_link); + D_ALLOC_PTR(fca); + if (!fca) + return -DER_NOMEM; + + blk_off = vfb->vfb_blk_off; + blk_cnt = vfb->vfb_blk_cnt; + fca->fca_vsi = vsi; + fca->fca_vfe.vfe_ext.vfe_blk_off = blk_off; + fca->fca_vfe.vfe_ext.vfe_blk_cnt = blk_cnt; + fca->fca_vfe.vfe_ext.vfe_age = 0; /* not used */ + + rc = umem_tx_begin(umem, vsi->vsi_txd); + if (rc != 0) { + D_FREE(fca); + return rc; + } + + /* + * Even in-memory bitmap failed to remove from tree, it is ok + * because this bitmap chunk has been removed from allocation LRU list. + */ + d_iov_set(&key, &fca->fca_vfe.vfe_ext.vfe_blk_off, + sizeof(fca->fca_vfe.vfe_ext.vfe_blk_off)); + dbtree_destroy(bitmap_entry->vbe_agg_btr, NULL); + rc = dbtree_delete(fca->fca_vsi->vsi_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from bitmap tree " + "error: "DF_RC"\n", fca->fca_vfe.vfe_ext.vfe_blk_off, + fca->fca_vfe.vfe_ext.vfe_blk_cnt, DP_RC(rc)); + goto abort; + } + dec_stats(fca->fca_vsi, STAT_FRAGS_BITMAP, 1); + dec_stats(fca->fca_vsi, STAT_FREE_BITMAP_BLKS, blk_cnt); + + d_iov_set(&key, &blk_off, sizeof(blk_off)); + rc = dbtree_delete(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, &key, NULL); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent bitmap " + "tree error: "DF_RC"\n", blk_off, blk_cnt, DP_RC(rc)); + goto abort; + } + /* call persistent_free_extent instead */ + rc = persistent_free(vsi, &fca->fca_vfe); + if (rc) { + D_ERROR("Remove ["DF_U64", %u] from persistent " + "extent tree error: "DF_RC"\n", blk_off, + blk_cnt, DP_RC(rc)); + goto abort; + } + rc = umem_tx_add_callback(umem, vsi->vsi_txd, UMEM_STAGE_ONCOMMIT, + free_commit_cb, fca); + if (rc == 0) + fca = NULL; +abort: + D_FREE(fca); + /* Commit/Abort transaction on success/error */ + rc = rc ? umem_tx_abort(umem, rc) : umem_tx_commit(umem); + if (rc) + return rc; + nr++; + if (nr >= nr_reclaim) + goto out; + } + } + +out: + if (nr_reclaimed) + *nr_reclaimed = nr; + return rc; +} + int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed) @@ -597,6 +1032,10 @@ trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, } d_sgl_fini(&unmap_sgl, false); + + rc = reclaim_unused_bitmap(vsi, MAX_FLUSH_FRAGS, NULL); + if (rc) + goto out; out: if (nr_flushed != NULL) *nr_flushed = tot_flushed; diff --git a/src/vea/vea_hint.c b/src/vea/vea_hint.c index 65c923476b5..83f2a13e1e2 100644 --- a/src/vea/vea_hint.c +++ b/src/vea/vea_hint.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -54,7 +54,7 @@ hint_cancel(struct vea_hint_context *hint, uint64_t off, uint64_t seq_min, */ hint->vhc_off = off; return 0; - } else if (hint->vhc_seq > seq_max) { + } else if (hint->vhc_seq >= seq_max) { /* * Subsequent reserve detected, abort hint cancel. It could * result in un-allocated holes on out of order hint cancels, diff --git a/src/vea/vea_init.c b/src/vea/vea_init.c index d237c46af70..adf8258c2f3 100644 --- a/src/vea/vea_init.c +++ b/src/vea/vea_init.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -25,13 +25,13 @@ destroy_free_class(struct vea_free_class *vfc) static bool heap_node_cmp(struct d_binheap_node *a, struct d_binheap_node *b) { - struct vea_entry *nodea, *nodeb; + struct vea_extent_entry *nodea, *nodeb; - nodea = container_of(a, struct vea_entry, ve_node); - nodeb = container_of(b, struct vea_entry, ve_node); + nodea = container_of(a, struct vea_extent_entry, vee_node); + nodeb = container_of(b, struct vea_extent_entry, vee_node); /* Max heap, the largest free extent is heap root */ - return nodea->ve_ext.vfe_blk_cnt > nodeb->ve_ext.vfe_blk_cnt; + return nodea->vee_ext.vfe_blk_cnt > nodeb->vee_ext.vfe_blk_cnt; } static struct d_binheap_ops heap_ops = { @@ -45,6 +45,7 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) { struct umem_attr uma; int rc; + int i; vfc->vfc_size_btr = DAOS_HDL_INVAL; rc = d_binheap_create_inplace(DBH_FT_NOLOCK, 0, NULL, &heap_ops, @@ -60,9 +61,17 @@ create_free_class(struct vea_free_class *vfc, struct vea_space_df *md) /* Create in-memory sized free extent tree */ rc = dbtree_create(DBTREE_CLASS_IFV, BTR_FEAT_UINT_KEY, VEA_TREE_ODR, &uma, NULL, &vfc->vfc_size_btr); - if (rc != 0) + if (rc != 0) { destroy_free_class(vfc); + goto out; + } + + for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_lru[i]); + D_INIT_LIST_HEAD(&vfc->vfc_bitmap_empty[i]); + } +out: return rc; } @@ -74,9 +83,14 @@ unload_space_info(struct vea_space_info *vsi) vsi->vsi_md_free_btr = DAOS_HDL_INVAL; } - if (daos_handle_is_valid(vsi->vsi_md_vec_btr)) { - dbtree_close(vsi->vsi_md_vec_btr); - vsi->vsi_md_vec_btr = DAOS_HDL_INVAL; + if (daos_handle_is_valid(vsi->vsi_md_bitmap_btr)) { + dbtree_close(vsi->vsi_md_bitmap_btr); + vsi->vsi_md_bitmap_btr = DAOS_HDL_INVAL; + } + + if (vsi->vsi_bitmap_hint_context) { + vea_hint_unload(vsi->vsi_bitmap_hint_context); + vsi->vsi_bitmap_hint_context = NULL; } } @@ -96,7 +110,7 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) if (rc != 0) return rc; - rc = compound_free(vsi, vfe, VEA_FL_NO_MERGE); + rc = compound_free_extent(vsi, vfe, VEA_FL_NO_MERGE); if (rc != 0) return rc; @@ -104,22 +118,28 @@ load_free_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) } static int -load_vec_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) +load_bitmap_entry(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg) { - struct vea_ext_vector *vec; + struct vea_free_bitmap *vfb; struct vea_space_info *vsi; + struct vea_bitmap_entry *bitmap_entry; uint64_t *off; int rc; vsi = (struct vea_space_info *)arg; off = (uint64_t *)key->iov_buf; - vec = (struct vea_ext_vector *)val->iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + return 0; - rc = verify_vec_entry(off, vec); + vfb = (struct vea_free_bitmap *)val->iov_buf; + rc = verify_bitmap_entry(vfb); if (rc != 0) return rc; - return compound_vec_alloc(vsi, vec); + rc = bitmap_entry_insert(vsi, vfb, VEA_BITMAP_STATE_PUBLISHED, &bitmap_entry, 0); + bitmap_entry->vbe_md_bitmap = vfb; + + return rc; } int @@ -127,6 +147,9 @@ load_space_info(struct vea_space_info *vsi) { struct umem_attr uma = {0}; int rc; + struct vea_hint_df *df; + uint64_t offset; + d_iov_t key, val; D_ASSERT(vsi->vsi_umem != NULL); D_ASSERT(vsi->vsi_md != NULL); @@ -141,10 +164,9 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Open SCM extent vector tree */ - D_ASSERT(daos_handle_is_inval(vsi->vsi_md_vec_btr)); - rc = dbtree_open_inplace(&vsi->vsi_md->vsd_vec_tree, &uma, - &vsi->vsi_md_vec_btr); + /* Open SCM bitmap tree */ + rc = dbtree_open_inplace(&vsi->vsi_md->vsd_bitmap_tree, &uma, + &vsi->vsi_md_bitmap_btr); if (rc != 0) goto error; @@ -154,12 +176,28 @@ load_space_info(struct vea_space_info *vsi) if (rc != 0) goto error; - /* Build up in-memory extent vector tree */ - rc = dbtree_iterate(vsi->vsi_md_vec_btr, DAOS_INTENT_DEFAULT, false, - load_vec_entry, (void *)vsi); + /* Build up in-memory bitmap tree */ + rc = dbtree_iterate(vsi->vsi_md_bitmap_btr, DAOS_INTENT_DEFAULT, false, + load_bitmap_entry, (void *)vsi); if (rc != 0) goto error; + if (!is_bitmap_feature_enabled(vsi)) + return 0; + + offset = VEA_BITMAP_CHUNK_HINT_KEY; + d_iov_set(&key, &offset, sizeof(offset)); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(vsi->vsi_md_bitmap_btr, BTR_PROBE_EQ, DAOS_INTENT_DEFAULT, + &key, NULL, &val); + if (rc) + goto error; + + df = (struct vea_hint_df *)val.iov_buf; + rc = vea_hint_load(df, &vsi->vsi_bitmap_hint_context); + if (rc) + goto error; + return 0; error: unload_space_info(vsi); diff --git a/src/vea/vea_internal.h b/src/vea/vea_internal.h index 3a5ac97fde0..e0880bde951 100644 --- a/src/vea/vea_internal.h +++ b/src/vea/vea_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,12 +11,39 @@ #include #include #include +#include #include #define VEA_MAGIC (0xea201804) #define VEA_BLK_SZ (4 * 1024) /* 4K */ #define VEA_TREE_ODR 20 +/* Common free extent structure for both SCM & in-memory index */ +struct vea_free_extent { + uint64_t vfe_blk_off; /* Block offset of the extent */ + uint32_t vfe_blk_cnt; /* Total blocks of the extent */ + uint32_t vfe_age; /* Monotonic timestamp */ +}; + +/* Min bitmap allocation class */ +#define VEA_MIN_BITMAP_CLASS 1 +/* Max bitmap allocation class */ +#define VEA_MAX_BITMAP_CLASS 64 + +/* Bitmap chunk size */ +#define VEA_BITMAP_MIN_CHUNK_BLKS 256 /* 1MiB */ +#define VEA_BITMAP_MAX_CHUNK_BLKS (VEA_MAX_BITMAP_CLASS * 256) /* 64 MiB */ + + +/* Common free bitmap structure for both SCM & in-memory index */ +struct vea_free_bitmap { + uint64_t vfb_blk_off; /* Block offset of the bitmap */ + uint32_t vfb_blk_cnt; /* Block count of the bitmap */ + uint16_t vfb_class; /* Allocation class of bitmap */ + uint16_t vfb_bitmap_sz; /* Bitmap size*/ + uint64_t vfb_bitmaps[0]; /* Bitmaps of this chunk */ +}; + /* Per I/O stream hint context */ struct vea_hint_context { struct vea_hint_df *vhc_pd; @@ -27,18 +54,55 @@ struct vea_hint_context { }; /* Free extent informat stored in the in-memory compound free extent index */ -struct vea_entry { +struct vea_extent_entry { /* * Always keep it as first item, since vfe_blk_off is the direct key * of DBTREE_CLASS_IV */ - struct vea_free_extent ve_ext; - /* Link to one of vsc_lru or vsi_agg_lru */ - d_list_t ve_link; + struct vea_free_extent vee_ext; + /* Link to one of vsc_extent_lru */ + d_list_t vee_link; /* Back reference to sized tree entry */ - struct vea_sized_class *ve_sized_class; + struct vea_sized_class *vee_sized_class; /* Link to vfc_heap */ - struct d_binheap_node ve_node; + struct d_binheap_node vee_node; +}; + +enum { + VEA_BITMAP_STATE_PUBLISHED, + VEA_BITMAP_STATE_PUBLISHING, + VEA_BITMAP_STATE_NEW, +}; + +/* Bitmap entry */ +struct vea_bitmap_entry { + /* Link to one of vfc_bitmap_lru[] */ + d_list_t vbe_link; + /* Bitmap published state */ + int vbe_published_state; + /* + * Free entries sorted by offset, for coalescing the just recent + * free blocks inside this bitmap chunk. + */ + daos_handle_t vbe_agg_btr; + /* Point to persistent free bitmap entry */ + struct vea_free_bitmap *vbe_md_bitmap; + /* free bitmap, always keep it as last item*/ + struct vea_free_bitmap vbe_bitmap; +}; + +enum { + VEA_FREE_ENTRY_EXTENT, + VEA_FREE_ENTRY_BITMAP, +}; + +/* freed entry stored in aggregation tree */ +struct vea_free_entry { + struct vea_free_extent vfe_ext; + /* Back pointer bitmap entry */ + struct vea_bitmap_entry *vfe_bitmap; + /* Link to one vsi_agg_lru */ + d_list_t vfe_link; }; #define VEA_LARGE_EXT_MB 64 /* Large extent threshold in MB */ @@ -47,9 +111,10 @@ struct vea_entry { /* Value entry of sized free extent tree (vfc_size_btr) */ struct vea_sized_class { /* Small extents LRU list */ - d_list_t vsc_lru; + d_list_t vsc_extent_lru; }; +#define VEA_BITMAP_CHUNK_HINT_KEY (~(0ULL)) /* * Large free extents (>VEA_LARGE_EXT_MB) are tracked in max a heap, small * free extents (<= VEA_LARGE_EXT_MB) are tracked in a size tree. @@ -61,6 +126,10 @@ struct vea_free_class { daos_handle_t vfc_size_btr; /* Size threshold for large extent */ uint32_t vfc_large_thresh; + /* Bitmap LRU list for different bitmap allocation class*/ + d_list_t vfc_bitmap_lru[VEA_MAX_BITMAP_CLASS]; + /* Empty bitmap list for different allocation class */ + d_list_t vfc_bitmap_empty[VEA_MAX_BITMAP_CLASS]; }; enum { @@ -68,21 +137,27 @@ enum { STAT_RESRV_HINT = 0, /* Number of large reserve */ STAT_RESRV_LARGE = 1, - /* Number of small reserve */ + /* Number of small extents reserve */ STAT_RESRV_SMALL = 2, + /* Number of bitmap reserve */ + STAT_RESRV_BITMAP = 3, /* Max reserve type */ - STAT_RESRV_TYPE_MAX = 3, + STAT_RESRV_TYPE_MAX = 4, /* Number of large(> VEA_LARGE_EXT_MB) free frags available for allocation */ - STAT_FRAGS_LARGE = 3, - /* Number of small free frags available for allocation */ - STAT_FRAGS_SMALL = 4, + STAT_FRAGS_LARGE = 4, + /* Number of small free extent frags available for allocation */ + STAT_FRAGS_SMALL = 5, /* Number of frags in aging buffer (to be unmapped) */ - STAT_FRAGS_AGING = 5, + STAT_FRAGS_AGING = 6, + /* Number of bitmaps */ + STAT_FRAGS_BITMAP = 7, /* Max frag type */ - STAT_FRAGS_TYPE_MAX = 3, - /* Number of blocks available for allocation */ - STAT_FREE_BLKS = 6, - STAT_MAX = 7, + STAT_FRAGS_TYPE_MAX = 4, + /* Number of extent blocks available for allocation */ + STAT_FREE_EXTENT_BLKS = 8, + /* Number of bitmap blocks available for allocation */ + STAT_FREE_BITMAP_BLKS = 9, + STAT_MAX = 10, }; struct vea_metrics { @@ -91,6 +166,8 @@ struct vea_metrics { struct d_tm_node_t *vm_free_blks; }; +#define MAX_FLUSH_FRAGS 256 + /* In-memory compound index */ struct vea_space_info { /* Instance for the pmemobj pool on SCM */ @@ -106,18 +183,20 @@ struct vea_space_info { struct vea_space_df *vsi_md; /* Open handles for the persistent free extent tree */ daos_handle_t vsi_md_free_btr; - /* Open handles for the persistent extent vector tree */ - daos_handle_t vsi_md_vec_btr; + /* Open handles for the persistent bitmap tree */ + daos_handle_t vsi_md_bitmap_btr; /* Free extent tree sorted by offset, for all free extents. */ daos_handle_t vsi_free_btr; - /* Extent vector tree, for non-contiguous allocation */ - daos_handle_t vsi_vec_btr; + /* Bitmap tree, for small allocation */ + daos_handle_t vsi_bitmap_btr; + /* Hint context for bitmap chunk allocation */ + struct vea_hint_context *vsi_bitmap_hint_context; /* Index for searching free extent by size & age */ struct vea_free_class vsi_class; - /* LRU to aggergate just recent freed extents */ + /* LRU to aggergate just recent freed extents or bitmap blocks */ d_list_t vsi_agg_lru; /* - * Free extent tree sorted by offset, for coalescing the just recent + * Free entries sorted by offset, for coalescing the just recent * free extents. */ daos_handle_t vsi_agg_btr; @@ -132,6 +211,11 @@ struct vea_space_info { bool vsi_flush_scheduled; }; +struct free_commit_cb_arg { + struct vea_space_info *fca_vsi; + struct vea_free_entry fca_vfe; +}; + static inline uint32_t get_current_age(void) { @@ -146,6 +230,46 @@ enum vea_free_flags { VEA_FL_NO_ACCOUNTING = (1 << 1), }; +static inline bool +is_bitmap_feature_enabled(struct vea_space_info *vsi) +{ + return vsi->vsi_md->vsd_compat & VEA_COMPAT_FEATURE_BITMAP; +} + +static inline int +alloc_free_bitmap_size(uint16_t bitmap_sz) +{ + return sizeof(struct vea_free_bitmap) + (bitmap_sz << 3); +} + +static inline uint32_t +bitmap_free_blocks(struct vea_free_bitmap *vfb) +{ + uint32_t free_blocks; + int diff; + + int free_bits = daos_count_free_bits(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz); + + free_blocks = free_bits * vfb->vfb_class; + diff = vfb->vfb_bitmap_sz * 64 * vfb->vfb_class - vfb->vfb_blk_cnt; + + D_ASSERT(diff == 0); + + return free_blocks; +} + +static inline bool +is_bitmap_empty(uint64_t *bitmap, int bitmap_sz) +{ + int i; + + for (i = 0; i < bitmap_sz; i++) + if (bitmap[i]) + return false; + + return true; +} + /* vea_init.c */ void destroy_free_class(struct vea_free_class *vfc); int create_free_class(struct vea_free_class *vfc, struct vea_space_df *md); @@ -154,36 +278,45 @@ int load_space_info(struct vea_space_info *vsi); /* vea_util.c */ int verify_free_entry(uint64_t *off, struct vea_free_extent *vfe); -int verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec); +int verify_bitmap_entry(struct vea_free_bitmap *vfb); int ext_adjacent(struct vea_free_extent *cur, struct vea_free_extent *next); int verify_resrvd_ext(struct vea_resrvd_ext *resrvd); int vea_dump(struct vea_space_info *vsi, bool transient); int vea_verify_alloc(struct vea_space_info *vsi, bool transient, - uint64_t off, uint32_t cnt); + uint64_t off, uint32_t cnt, bool is_bitmap); void dec_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); void inc_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr); /* vea_alloc.c */ -int compound_vec_alloc(struct vea_space_info *vsi, struct vea_ext_vector *vec); int reserve_hint(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); int reserve_single(struct vea_space_info *vsi, uint32_t blk_cnt, struct vea_resrvd_ext *resrvd); -int reserve_vector(struct vea_space_info *vsi, uint32_t blk_cnt, - struct vea_resrvd_ext *resrvd); -int persistent_alloc(struct vea_space_info *vsi, struct vea_free_extent *vfe); +int persistent_alloc(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int +bitmap_tx_add_ptr(struct umem_instance *vsi_umem, uint64_t *bitmap, + uint32_t bit_at, uint32_t bits_nr); +int +bitmap_set_range(struct umem_instance *vsi_umem, struct vea_free_bitmap *bitmap, + uint64_t blk_off, uint32_t blk_cnt, bool clear); /* vea_free.c */ -#define MAX_FLUSH_FRAGS 256 -void free_class_remove(struct vea_space_info *vsi, struct vea_entry *entry); -int free_class_add(struct vea_space_info *vsi, struct vea_entry *entry); -int compound_free(struct vea_space_info *vsi, struct vea_free_extent *vfe, - unsigned int flags); -int persistent_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); -int aggregated_free(struct vea_space_info *vsi, struct vea_free_extent *vfe); +void extent_free_class_remove(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int extent_free_class_add(struct vea_space_info *vsi, struct vea_extent_entry *entry); +int compound_free_extent(struct vea_space_info *vsi, struct vea_free_extent *vfe, + unsigned int flags); +int compound_free(struct vea_space_info *vsi, struct vea_free_entry *vfe, unsigned int flags); +int persistent_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); +int aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe); int trigger_aging_flush(struct vea_space_info *vsi, bool force, uint32_t nr_flush, uint32_t *nr_flushed); int schedule_aging_flush(struct vea_space_info *vsi); +int bitmap_entry_insert(struct vea_space_info *vsi, struct vea_free_bitmap *vfb, + int state, struct vea_bitmap_entry **ret_entry, unsigned int flags); +int free_type(struct vea_space_info *vsi, uint64_t blk_off, uint32_t blk_cnt, + struct vea_bitmap_entry **bitmap_entry); +void +free_commit_cb(void *data, bool noop); /* vea_hint.c */ void hint_get(struct vea_hint_context *hint, uint64_t *off); diff --git a/src/vea/vea_util.c b/src/vea/vea_util.c index c7452cc2ebf..21c11e3daa4 100644 --- a/src/vea/vea_util.c +++ b/src/vea/vea_util.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -35,35 +35,38 @@ verify_free_entry(uint64_t *off, struct vea_free_extent *vfe) } int -verify_vec_entry(uint64_t *off, struct vea_ext_vector *vec) +verify_bitmap_entry(struct vea_free_bitmap *vfb) { - int i; - uint64_t prev_off = 0; + D_ASSERT(vfb != NULL); + if (vfb->vfb_blk_off == VEA_HINT_OFF_INVAL) { + D_CRIT("corrupted bitmap entry, off == VEA_HINT_OFF_INVAL(%d)\n", + VEA_HINT_OFF_INVAL); + return -DER_INVAL; + } - D_ASSERT(vec != NULL); - if (vec->vev_size == 0 || vec->vev_size > VEA_EXT_VECTOR_MAX) { - D_CRIT("corrupted vector entry, sz: %u\n", vec->vev_size); + if (vfb->vfb_class < VEA_MIN_BITMAP_CLASS || vfb->vfb_class > VEA_MAX_BITMAP_CLASS) { + D_CRIT("corrupted bitmap entry, class: %u is out of [%u, %u]\n", + vfb->vfb_class, VEA_MIN_BITMAP_CLASS, VEA_MAX_BITMAP_CLASS); return -DER_INVAL; } - if (off != NULL && *off != vec->vev_blk_off[0]) { - D_CRIT("corrupted vector entry, off: "DF_U64" != "DF_U64"\n", - *off, vec->vev_blk_off[0]); + if (vfb->vfb_blk_cnt < VEA_BITMAP_MIN_CHUNK_BLKS || + vfb->vfb_blk_cnt > VEA_BITMAP_MAX_CHUNK_BLKS) { + D_CRIT("corrupted bitmap entry, chunk size: %u is out of [%u, %u]\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS, VEA_BITMAP_MAX_CHUNK_BLKS); return -DER_INVAL; } - for (i = 0; i < vec->vev_size; i++) { - if (vec->vev_blk_off[i] <= prev_off) { - D_CRIT("corrupted vector entry[%d]," - " "DF_U64" <= "DF_U64"\n", - i, vec->vev_blk_off[i], prev_off); - return -DER_INVAL; - } - if (vec->vev_blk_cnt[i] == 0) { - D_CRIT("corrupted vector entry[%d], %u\n", - i, vec->vev_blk_cnt[i]); - return -DER_INVAL; - } + if (vfb->vfb_blk_cnt % VEA_BITMAP_MIN_CHUNK_BLKS) { + D_CRIT("coruppted bitmap entry, chunk size: %u should be times of %u\n", + vfb->vfb_blk_cnt, VEA_BITMAP_MIN_CHUNK_BLKS); + return -DER_INVAL; + } + + if (vfb->vfb_bitmap_sz * 64 * vfb->vfb_class < vfb->vfb_blk_cnt) { + D_CRIT("corrupted bitmap entry, bitmap size: %u could not cover chunk size: %u\n", + vfb->vfb_bitmap_sz, vfb->vfb_blk_cnt); + return -DER_INVAL; } return 0; @@ -102,28 +105,25 @@ verify_resrvd_ext(struct vea_resrvd_ext *resrvd) } else if (resrvd->vre_blk_cnt == 0) { D_CRIT("invalid blk_cnt %u\n", resrvd->vre_blk_cnt); return -DER_INVAL; - } else if (resrvd->vre_vector != NULL) { - /* Vector allocation isn't supported yet. */ - D_CRIT("vector isn't NULL?\n"); - return -DER_NOSYS; } return 0; } -int -vea_dump(struct vea_space_info *vsi, bool transient) +static int +vea_dump_bitmap(struct vea_space_info *vsi, bool transient) { - struct vea_free_extent *ext; - daos_handle_t ih, btr_hdl; - d_iov_t key, val; - uint64_t *off; - int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + struct vea_free_bitmap *bitmap; + struct vea_bitmap_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; if (transient) - btr_hdl = vsi->vsi_free_btr; + btr_hdl = vsi->vsi_bitmap_btr; else - btr_hdl = vsi->vsi_md_free_btr; + btr_hdl = vsi->vsi_md_bitmap_btr; D_ASSERT(daos_handle_is_valid(btr_hdl)); rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); @@ -132,6 +132,7 @@ vea_dump(struct vea_space_info *vsi, bool transient) rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + D_PRINT("Bitmaps:"); while (rc == 0) { d_iov_set(&key, NULL, 0); d_iov_set(&val, NULL, 0); @@ -140,15 +141,76 @@ vea_dump(struct vea_space_info *vsi, bool transient) break; off = (uint64_t *)key.iov_buf; + if (*off == VEA_BITMAP_CHUNK_HINT_KEY) + goto next; + if (transient) { - struct vea_entry *entry; + entry = (struct vea_bitmap_entry *)val.iov_buf; + bitmap = &entry->vbe_bitmap; + } else { + bitmap = (struct vea_free_bitmap *)val.iov_buf; + + } + rc = verify_bitmap_entry(bitmap); + if (rc != 0) { + D_ERROR("dump failed???\n"); + break; + } + + D_PRINT("["DF_U64", %u]", bitmap->vfb_blk_off, bitmap->vfb_blk_cnt); + print_cnt++; + if (print_cnt % 10 == 0) + D_PRINT("\n"); + else + D_PRINT(" "); +next: + rc = dbtree_iter_next(ih); + } + + D_PRINT("\n"); + dbtree_iter_finish(ih); + + return rc = -DER_NONEXIST ? 0 : rc; + - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; +} + +static int +vea_dump_extent(struct vea_space_info *vsi, bool transient) +{ + struct vea_free_extent *ext; + struct vea_extent_entry *entry; + daos_handle_t ih, btr_hdl; + d_iov_t key, val; + uint64_t *off; + int rc, print_cnt = 0, opc = BTR_PROBE_FIRST; + + if (transient) + btr_hdl = vsi->vsi_free_btr; + else + btr_hdl = vsi->vsi_md_free_btr; + D_ASSERT(daos_handle_is_valid(btr_hdl)); + rc = dbtree_iter_prepare(btr_hdl, BTR_ITER_EMBEDDED, &ih); + if (rc) + return rc; + + rc = dbtree_iter_probe(ih, opc, DAOS_INTENT_DEFAULT, NULL, NULL); + + D_PRINT("Free extents:"); + while (rc == 0) { + d_iov_set(&key, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_iter_fetch(ih, &key, &val, NULL); + if (rc != 0) + break; + + off = (uint64_t *)key.iov_buf; + if (transient) { + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } - rc = verify_free_entry(off, ext); if (rc != 0) break; @@ -169,6 +231,18 @@ vea_dump(struct vea_space_info *vsi, bool transient) return rc = -DER_NONEXIST ? 0 : rc; } +int +vea_dump(struct vea_space_info *vsi, bool transient) +{ + int rc; + + rc = vea_dump_bitmap(vsi, transient); + if (rc) + return rc; + + return vea_dump_extent(vsi, transient); +} + /** * Check if two extents are overlapping. * returns 0 - Non-overlapping @@ -189,27 +263,70 @@ ext_overlapping(struct vea_free_extent *ext1, struct vea_free_extent *ext2) return -DER_INVAL; } -/** - * Verify if an extent is allocated in persistent or transient metadata. - * - * \param vsi [IN] In-memory compound index - * \param transient [IN] Persistent or transient - * \param off [IN] Block offset of extent - * \param cnt [IN] Block count of extent - * - * \return 0 - Allocated - * 1 - Not allocated - * Negative value on error - */ -int -vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, - uint32_t cnt) +static int +verify_alloc_bitmap(struct vea_space_info *vsi, bool transient, uint64_t off, + uint32_t cnt) { - struct vea_free_extent vfe, *ext; daos_handle_t btr_hdl; d_iov_t key, key_out, val; - uint64_t *key_off; int rc, opc = BTR_PROBE_LE; + struct vea_free_bitmap *vfb; + + if (transient) + btr_hdl = vsi->vsi_bitmap_btr; + else + btr_hdl = vsi->vsi_md_bitmap_btr; + + D_ASSERT(daos_handle_is_valid(btr_hdl)); + d_iov_set(&key, &off, sizeof(off)); + + d_iov_set(&key_out, NULL, 0); + d_iov_set(&val, NULL, 0); + rc = dbtree_fetch(btr_hdl, opc, DAOS_INTENT_DEFAULT, &key, &key_out, + &val); + /* bitmap not allocated */ + if (rc == -DER_NONEXIST) + return 1; + + if (rc) + return rc; + + if (transient) { + struct vea_bitmap_entry *entry; + + entry = (struct vea_bitmap_entry *)val.iov_buf; + vfb = &entry->vbe_bitmap; + } else { + vfb = (struct vea_free_bitmap *)val.iov_buf; + } + + rc = verify_bitmap_entry(vfb); + if (rc != 0) { + D_ERROR("verify bitmap alloc failed\n"); + return rc; + } + + /* not in the bitmap range */ + if (off + cnt <= vfb->vfb_blk_off || off >= vfb->vfb_blk_off + vfb->vfb_blk_cnt) + return 1; + + if (isset_range((uint8_t *)vfb->vfb_bitmaps, + (off - vfb->vfb_blk_off) / vfb->vfb_class, + (off - vfb->vfb_blk_off + cnt - 1) / vfb->vfb_class)) + return 0; + + return 1; +} + + +static int +verify_alloc_extent(struct vea_space_info *vsi, bool transient, uint64_t off, uint32_t cnt) +{ + struct vea_free_extent vfe, *ext; + daos_handle_t btr_hdl; + d_iov_t key, key_out, val; + uint64_t *key_off; + int rc, opc = BTR_PROBE_LE; /* Sanity check on input parameters */ vfe.vfe_blk_off = off; @@ -243,10 +360,10 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, key_off = (uint64_t *)key_out.iov_buf; if (transient) { - struct vea_entry *entry; + struct vea_extent_entry *entry; - entry = (struct vea_entry *)val.iov_buf; - ext = &entry->ve_ext; + entry = (struct vea_extent_entry *)val.iov_buf; + ext = &entry->vee_ext; } else { ext = (struct vea_free_extent *)val.iov_buf; } @@ -267,6 +384,29 @@ vea_verify_alloc(struct vea_space_info *vsi, bool transient, uint64_t off, return rc; } +/** + * Verify if an extent is allocated in persistent or transient metadata. + * + * \param vsi [IN] In-memory compound index + * \param transient [IN] Persistent or transient + * \param off [IN] Block offset of extent + * \param cnt [IN] Block count of extent + * \param is_bitmap [IN] Bitmap or extent + * + * \return 0 - Allocated + * 1 - Not allocated + * Negative value on error + */ +int +vea_verify_alloc(struct vea_space_info *vsi, bool transient, + uint64_t off, uint32_t cnt, bool is_bitmap) +{ + if (!is_bitmap) + return verify_alloc_extent(vsi, transient, off, cnt); + + return verify_alloc_bitmap(vsi, transient, off, cnt); +} + void vea_metrics_free(void *data) { @@ -283,6 +423,8 @@ rsrv_type2str(int rsrv_type) return "large"; case STAT_RESRV_SMALL: return "small"; + case STAT_RESRV_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -298,6 +440,8 @@ frags_type2str(int frags_type) return "small"; case STAT_FRAGS_AGING: return "aging"; + case STAT_FRAGS_BITMAP: + return "bitmap"; default: return "unknown"; } @@ -366,6 +510,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de case STAT_RESRV_HINT: case STAT_RESRV_LARGE: case STAT_RESRV_SMALL: + case STAT_RESRV_BITMAP: D_ASSERT(!dec && nr == 1); vsi->vsi_stat[type] += nr; if (metrics && metrics->vm_rsrv[type]) @@ -373,6 +518,7 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de break; case STAT_FRAGS_LARGE: case STAT_FRAGS_SMALL: + case STAT_FRAGS_BITMAP: case STAT_FRAGS_AGING: D_ASSERT(nr == 1 && type >= STAT_FRAGS_LARGE); if (dec) { @@ -385,7 +531,8 @@ update_stats(struct vea_space_info *vsi, unsigned int type, uint64_t nr, bool de if (metrics && metrics->vm_frags[frag_idx]) d_tm_set_gauge(metrics->vm_frags[frag_idx], vsi->vsi_stat[type]); break; - case STAT_FREE_BLKS: + case STAT_FREE_EXTENT_BLKS: + case STAT_FREE_BITMAP_BLKS: if (dec) { D_ASSERTF(vsi->vsi_stat[type] >= nr, "free:"DF_U64" < rsrvd:"DF_U64"\n", vsi->vsi_stat[type], nr); diff --git a/src/vos/lru_array.c b/src/vos/lru_array.c index b94ff873a51..186026c5ba9 100644 --- a/src/vos/lru_array.c +++ b/src/vos/lru_array.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,6 +11,7 @@ */ #define D_LOGFAC DD_FAC(vos) #include "lru_array.h" +#include "vos_internal.h" /** Internal converter for real index to entity index in sub array */ #define ent2idx(array, sub, ent_idx) \ @@ -63,6 +64,24 @@ fini_cb(struct lru_array *array, struct lru_sub *sub, struct lru_entry *entry, array->la_cbs.lru_on_fini(entry->le_payload, real_idx, array->la_arg); } +static void +alloc_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_alloc == NULL) + return; + + array->la_cbs.lru_on_alloc(array->la_arg, size); +} + +static void +free_cb(struct lru_array *array, daos_size_t size) +{ + if (array->la_cbs.lru_on_free == NULL) + return; + + array->la_cbs.lru_on_free(array->la_arg, size); +} + int lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) { @@ -78,6 +97,8 @@ lrua_array_alloc_one(struct lru_array *array, struct lru_sub *sub) if (sub->ls_table == NULL) return -DER_NOMEM; + alloc_cb(array, rec_size * nr_ents); + /** Add newly allocated ones to head of list */ d_list_del(&sub->ls_link); d_list_add(&sub->ls_link, &array->la_free_sub); @@ -283,6 +304,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, if (cbs != NULL) array->la_cbs = *cbs; + alloc_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); /** Only allocate one sub array, add the rest to free list */ D_INIT_LIST_HEAD(&array->la_free_sub); D_INIT_LIST_HEAD(&array->la_unused_sub); @@ -294,6 +316,7 @@ lrua_array_alloc(struct lru_array **arrayp, uint32_t nr_ent, uint32_t nr_arrays, rc = lrua_array_alloc_one(array, &array->la_sub[0]); if (rc != 0) { + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * nr_arrays); D_FREE(array); return rc; } @@ -312,6 +335,10 @@ array_free_one(struct lru_array *array, struct lru_sub *sub) fini_cb(array, sub, &sub->ls_table[idx], idx); D_FREE(sub->ls_table); + + free_cb(array, + (sizeof(struct lru_entry) + array->la_payload_size) * + (array->la_idx_mask + 1)); } void @@ -323,13 +350,14 @@ lrua_array_free(struct lru_array *array) if (array == NULL) return; - for (i = 0; i < array->la_array_nr; i++) { sub = &array->la_sub[i]; if (sub->ls_table != NULL) array_free_one(array, sub); } + free_cb(array, sizeof(*array) + sizeof(array->la_sub[0]) * array->la_array_nr); + D_FREE(array); } diff --git a/src/vos/lru_array.h b/src/vos/lru_array.h index af9705ea72a..7a620c23b87 100644 --- a/src/vos/lru_array.h +++ b/src/vos/lru_array.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2020-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -22,6 +22,10 @@ struct lru_callbacks { void (*lru_on_init)(void *entry, uint32_t idx, void *arg); /** Called on finalization of an entry */ void (*lru_on_fini)(void *entry, uint32_t idx, void *arg); + /** Called on allocation of any LRU entries */ + void (*lru_on_alloc)(void *arg, daos_size_t size); + /** Called on free of any LRU entries */ + void (*lru_on_free)(void *arg, daos_size_t size); }; struct lru_entry { diff --git a/src/vos/tests/vts_aggregate.c b/src/vos/tests/vts_aggregate.c index 2b2b92082af..67ff1539e83 100644 --- a/src/vos/tests/vts_aggregate.c +++ b/src/vos/tests/vts_aggregate.c @@ -1840,13 +1840,14 @@ print_space_info(vos_pool_info_t *pi, char *desc) VERBOSE_MSG(" NVMe allocator statistics:\n"); VERBOSE_MSG(" free_p: "DF_U64", \tfree_t: "DF_U64", " "\tfrags_large: "DF_U64", \tfrags_small: "DF_U64", " - "\tfrags_aging: "DF_U64"\n", + "\tfrags_aging: "DF_U64" \tfrags_bitmap: "DF_U64"\n", stat->vs_free_persistent, stat->vs_free_transient, stat->vs_frags_large, stat->vs_frags_small, - stat->vs_frags_aging); + stat->vs_frags_aging, stat->vs_frags_bitmap); VERBOSE_MSG(" resrv_hit: "DF_U64", \tresrv_large: "DF_U64", " - "\tresrv_small: "DF_U64"\n", stat->vs_resrv_hint, - stat->vs_resrv_large, stat->vs_resrv_small); + "\tresrv_small: "DF_U64", \tresrv_bitmap: "DF_U64"\n", + stat->vs_resrv_hint, stat->vs_resrv_large, + stat->vs_resrv_small, stat->vs_resrv_bitmap); } static int diff --git a/src/vos/tests/vts_io.c b/src/vos/tests/vts_io.c index aaa5911e3ca..2a23baacd6b 100644 --- a/src/vos/tests/vts_io.c +++ b/src/vos/tests/vts_io.c @@ -240,8 +240,8 @@ teardown_io(void **state) int rc; if (table) { - vos_ts_table_free(&table); - rc = vos_ts_table_alloc(&table); + vos_ts_table_free(&table, NULL); + rc = vos_ts_table_alloc(&table, NULL); if (rc != 0) { printf("Fatal error, table couldn't be reallocated\n"); exit(rc); diff --git a/src/vos/tests/vts_ts.c b/src/vos/tests/vts_ts.c index 60302ffe262..f882496dc17 100644 --- a/src/vos/tests/vts_ts.c +++ b/src/vos/tests/vts_ts.c @@ -235,7 +235,7 @@ alloc_ts_cache(void **state) if (ts_table != NULL) ts_arg->old_table = ts_table; - rc = vos_ts_table_alloc(&ts_table); + rc = vos_ts_table_alloc(&ts_table, NULL); if (rc != 0) { print_message("Can't allocate timestamp table: "DF_RC"\n", DP_RC(rc)); @@ -757,7 +757,7 @@ ts_test_fini(void **state) vos_ts_set_free(ts_arg->ta_ts_set); ts_table = vos_ts_table_get(true); - vos_ts_table_free(&ts_table); + vos_ts_table_free(&ts_table, NULL); vos_ts_table_set(ts_arg->old_table); D_FREE(ts_arg); diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index cf2ae1520ad..45252f9da0e 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -408,7 +408,7 @@ vos_tls_fini(int tags, void *data) umem_fini_txd(&tls->vtl_txd); if (tls->vtl_ts_table) - vos_ts_table_free(&tls->vtl_ts_table); + vos_ts_table_free(&tls->vtl_ts_table, tls); D_FREE(tls); } @@ -419,7 +419,28 @@ vos_standalone_tls_fini(void) vos_tls_fini(DAOS_TGT_TAG, self_mode.self_tls); self_mode.self_tls = NULL; } +} + +void +vos_lru_alloc_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + d_tm_inc_gauge(tls->vtl_lru_alloc_size, size); +} + +void +vos_lru_free_track(void *arg, daos_size_t size) +{ + struct vos_tls *tls = arg; + + if (tls == NULL || tls->vtl_lru_alloc_size == NULL) + return; + + d_tm_dec_gauge(tls->vtl_lru_alloc_size, size); } static void * @@ -464,17 +485,13 @@ vos_tls_init(int tags, int xs_id, int tgt_id) } if (tags & DAOS_TGT_TAG) { - rc = vos_ts_table_alloc(&tls->vtl_ts_table); + rc = vos_ts_table_alloc(&tls->vtl_ts_table, tls); if (rc) { D_ERROR("Error in creating timestamp table: %d\n", rc); goto failed; } } - if (tgt_id < 0) - /** skip sensor setup on standalone vos & sys xstream */ - return tls; - rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, "Number of committed entries kept around for reply" " reconstruction", "entries", @@ -482,6 +499,37 @@ vos_tls_init(int tags, int xs_id, int tgt_id) if (rc) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); + if (tgt_id >= 0) { + rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, + "Number of committed entries kept around for reply" + " reconstruction", "entries", + "io/dtx/committed/tgt_%u", tgt_id); + if (rc) + D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE, + "Number of committed entries", "entry", + "mem/vos/dtx_cmt_ent_%u/tgt_%u", + sizeof(struct vos_dtx_cmt_ent), tgt_id); + if (rc) + D_WARN("Failed to create committed cnt: "DF_RC"\n", + DP_RC(rc)); + + rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, + "Number of cached vos object", "entry", + "mem/vos/vos_obj_%u/tgt_%u", + sizeof(struct vos_object), tgt_id); + if (rc) + D_WARN("Failed to create vos obj cnt: "DF_RC"\n", DP_RC(rc)); + + } + + rc = d_tm_add_metric(&tls->vtl_lru_alloc_size, D_TM_GAUGE, + "Active DTX table LRU size", "byte", + "mem/vos/vos_lru_size/tgt_%d", tgt_id); + if (rc) + D_WARN("Failed to create LRU alloc size: "DF_RC"\n", DP_RC(rc)); return tls; failed: diff --git a/src/vos/vos_container.c b/src/vos/vos_container.c index 19a10d6acac..93cc62ceeb5 100644 --- a/src/vos/vos_container.c +++ b/src/vos/vos_container.c @@ -314,6 +314,11 @@ vos_cont_create(daos_handle_t poh, uuid_t co_uuid) return rc; } +static const struct lru_callbacks lru_cont_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + /** * Open a container within a VOSP */ @@ -395,8 +400,8 @@ vos_cont_open(daos_handle_t poh, uuid_t co_uuid, daos_handle_t *coh) rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, sizeof(struct vos_dtx_act_ent), - LRU_FLAG_REUSE_UNIQUE, - NULL, NULL); + LRU_FLAG_REUSE_UNIQUE, &lru_cont_cbs, + vos_tls_get(cont->vc_pool->vp_sysdb)); if (rc != 0) { D_ERROR("Failed to create DTX active array: rc = "DF_RC"\n", DP_RC(rc)); diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 63b102e32f9..4eefa622b7a 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -756,6 +756,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p, struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal) { + struct vos_tls *tls = vos_tls_get(false); struct vos_dtx_act_ent *dae = NULL; struct vos_dtx_cmt_ent *dce = NULL; d_iov_t kiov; @@ -820,6 +821,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t if (dce == NULL) D_GOTO(out, rc = -DER_NOMEM); + d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); DCE_CMT_TIME(dce) = cmt_time; if (dae != NULL) { DCE_XID(dce) = DAE_XID(dae); @@ -2471,6 +2473,7 @@ vos_dtx_aggregate(daos_handle_t coh) cont->vc_dtx_committed_count--; cont->vc_pool->vp_dtx_committed_count--; d_tm_dec_gauge(tls->vtl_committed, 1); + d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); } if (epoch != cont_df->cd_newest_aggregated) { @@ -3136,6 +3139,11 @@ vos_dtx_rsrvd_fini(struct dtx_handle *dth) } } +static const struct lru_callbacks lru_dtx_cache_cbs = { + .lru_on_alloc = vos_lru_alloc_track, + .lru_on_free = vos_lru_free_track, +}; + int vos_dtx_cache_reset(daos_handle_t coh, bool force) { @@ -3170,7 +3178,8 @@ vos_dtx_cache_reset(daos_handle_t coh, bool force) lrua_array_free(cont->vc_dtx_array); rc = lrua_array_alloc(&cont->vc_dtx_array, DTX_ARRAY_LEN, DTX_ARRAY_NR, - sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, NULL, NULL); + sizeof(struct vos_dtx_act_ent), LRU_FLAG_REUSE_UNIQUE, + &lru_dtx_cache_cbs, vos_tls_get(false)); if (rc != 0) { D_ERROR("Failed to re-create DTX active array for "DF_UUID": "DF_RC"\n", DP_UUID(cont->vc_id), DP_RC(rc)); diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 67f4980b66f..2bee64673bf 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1726,4 +1726,6 @@ int vos_oi_upgrade_layout_ver(struct vos_container *cont, daos_unit_oid_t oid, uint32_t layout_ver); +void vos_lru_free_track(void *arg, daos_size_t size); +void vos_lru_alloc_track(void *arg, daos_size_t size); #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index 826c53f06a5..11e55e9d156 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -58,11 +58,13 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, struct vos_object *obj; struct obj_lru_key *lkey; struct vos_container *cont; + struct vos_tls *tls; int rc; cont = (struct vos_container *)args; D_ASSERT(cont != NULL); + tls = vos_tls_get(cont->vc_pool->vp_sysdb); lkey = (struct obj_lru_key *)key; D_ASSERT(lkey != NULL); @@ -74,7 +76,7 @@ obj_lop_alloc(void *key, unsigned int ksize, void *args, D_GOTO(failed, rc = -DER_NOMEM); init_object(obj, lkey->olk_oid, cont); - + d_tm_inc_gauge(tls->vtl_obj_cnt, 1); *llink_p = &obj->obj_llink; rc = 0; failed: @@ -123,10 +125,13 @@ static void obj_lop_free(struct daos_llink *llink) { struct vos_object *obj; + struct vos_tls *tls; D_DEBUG(DB_TRACE, "lru free callback for vos_obj_cache\n"); obj = container_of(llink, struct vos_object, obj_llink); + tls = vos_tls_get(obj->obj_cont->vc_pool->vp_sysdb); + d_tm_dec_gauge(tls->vtl_obj_cnt, 1); clean_object(obj); D_FREE(obj); } diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index b2b82d1494b..171235b7ceb 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1424,6 +1424,11 @@ vos_pool_upgrade(daos_handle_t poh, uint32_t version) "Invalid pool upgrade version %d, current version is %d\n", version, pool_df->pd_version); + rc = vea_upgrade(pool->vp_vea_info, &pool->vp_umm, &pool_df->pd_vea_df, + pool_df->pd_version); + if (rc) + return rc; + rc = umem_tx_begin(&pool->vp_umm, NULL); if (rc != 0) return rc; diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 96c9a3e0c6d..981cce10be5 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -63,6 +63,9 @@ struct vos_tls { bool vtl_hash_set; }; struct d_tm_node_t *vtl_committed; + struct d_tm_node_t *vtl_obj_cnt; + struct d_tm_node_t *vtl_dtx_cmt_ent_cnt; + struct d_tm_node_t *vtl_lru_alloc_size; }; struct bio_xs_context *vos_xsctxt_get(void); diff --git a/src/vos/vos_ts.c b/src/vos/vos_ts.c index 9e47d100097..4018c2e685e 100644 --- a/src/vos/vos_ts.c +++ b/src/vos/vos_ts.c @@ -99,13 +99,29 @@ static void init_entry(void *payload, uint32_t idx, void *arg) entry->te_info = info; } +static void vos_lru_ts_alloc(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_alloc_track(info->ti_tls, size); +} + +static void vos_lru_ts_free(void *arg, daos_size_t size) +{ + struct vos_ts_info *info = arg; + + vos_lru_free_track(info->ti_tls, size); +} + static const struct lru_callbacks lru_cbs = { .lru_on_evict = evict_entry, .lru_on_init = init_entry, + .lru_on_alloc = vos_lru_ts_alloc, + .lru_on_free = vos_lru_ts_free, }; int -vos_ts_table_alloc(struct vos_ts_table **ts_tablep) +vos_ts_table_alloc(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_entry *entry; struct vos_ts_table *ts_table; @@ -129,6 +145,11 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) goto free_table; } + if (tls != NULL) + d_tm_inc_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); + ts_table->tt_ts_rl = vos_start_epoch; ts_table->tt_ts_rh = vos_start_epoch; uuid_clear(ts_table->tt_tx_rl.dti_uuid); @@ -140,6 +161,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) info->ti_type = i; info->ti_count = type_counts[i]; info->ti_table = ts_table; + info->ti_tls = tls; switch (i) { case VOS_TS_TYPE_OBJ: miss_size = OBJ_MISS_SIZE; @@ -192,6 +214,10 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) cleanup: for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); free_table: D_FREE(ts_table); @@ -200,7 +226,7 @@ vos_ts_table_alloc(struct vos_ts_table **ts_tablep) } void -vos_ts_table_free(struct vos_ts_table **ts_tablep) +vos_ts_table_free(struct vos_ts_table **ts_tablep, struct vos_tls *tls) { struct vos_ts_table *ts_table = *ts_tablep; int i; @@ -208,6 +234,10 @@ vos_ts_table_free(struct vos_ts_table **ts_tablep) for (i = 0; i < VOS_TS_TYPE_COUNT; i++) lrua_array_free(ts_table->tt_type_info[i].ti_array); + if (tls != NULL) + d_tm_dec_gauge(tls->vtl_lru_alloc_size, + sizeof(*ts_table->tt_misses) * + (OBJ_MISS_SIZE + DKEY_MISS_SIZE + AKEY_MISS_SIZE)); D_FREE(ts_table->tt_misses); D_FREE(ts_table); diff --git a/src/vos/vos_ts.h b/src/vos/vos_ts.h index 2772fab2ce2..379f59f819c 100644 --- a/src/vos/vos_ts.h +++ b/src/vos/vos_ts.h @@ -27,6 +27,8 @@ struct vos_ts_info { struct vos_ts_table *ti_table; /** Negative entries for this type */ struct vos_ts_entry *ti_misses; + /** TLS for tracking memory usage */ + struct vos_tls *ti_tls; /** Type identifier */ uint32_t ti_type; /** Mask for negative entry cache */ @@ -620,20 +622,22 @@ vos_ts_peek_entry(uint32_t *idx, uint32_t type, struct vos_ts_entry **entryp, /** Allocate thread local timestamp cache. Set the initial global times * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. * * \return -DER_NOMEM Not enough memory available * 0 Success */ int -vos_ts_table_alloc(struct vos_ts_table **ts_table); +vos_ts_table_alloc(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Free the thread local timestamp cache and reset pointer to NULL * * \param[in,out] ts_table Thread local table pointer + * \param[in] tls TLS to track memory usage. */ void -vos_ts_table_free(struct vos_ts_table **ts_table); +vos_ts_table_free(struct vos_ts_table **ts_table, struct vos_tls *tls); /** Allocate a timestamp set * diff --git a/utils/rpms/packaging/rpm_chrootbuild b/utils/rpms/packaging/rpm_chrootbuild index 73ce60c0746..9cf177e4a41 100755 --- a/utils/rpms/packaging/rpm_chrootbuild +++ b/utils/rpms/packaging/rpm_chrootbuild @@ -117,11 +117,7 @@ echo "\"\"\"" >> "$cfg_file" if [ -n "$DISTRO_VERSION" ]; then releasever_opt=("--config-opts=releasever=$DISTRO_VERSION") fi -# shellcheck disable=SC2086 -if ! eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" \ - ${repo_dels[*]} ${repo_adds[*]} --disablerepo=\*-debug* \ - "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET"; then - # Debug information for filing bugs on mock - rpm -q mock - mock --debug-config -fi + +# shellcheck disable=SC2086,SC2048,SC2294 +eval mock --configdir "$mock_config_dir" -r "${CHROOT_NAME}" ${repo_dels[*]} ${repo_adds[*]} \ + --disablerepo=\*-debug* "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET"