From 88807c372542bc2a0f6cd7325a162b0612c3aab5 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 28 Sep 2023 08:07:07 -0500 Subject: [PATCH 01/11] DAOS-623 dfs: make the dfs pipeline api public with caveat (#13080) - Similar to the daos_pipeline API, make the DFS filter API public with a comment in the header that it should not be used in production. - Free bulk handles in pipeline API to avoid leaks. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs_internal.h | 91 -------------------------------- src/include/daos_fs.h | 98 +++++++++++++++++++++++++++++++++++ src/pipeline/cli_pipeline.c | 10 ++++ 3 files changed, 108 insertions(+), 91 deletions(-) diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 83ac13aeaab..c337ec1bf42 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -131,97 +131,6 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); -/** Internal pipeline readdir functionality */ - -/** DFS pipeline object */ -typedef struct dfs_pipeline dfs_pipeline_t; - -enum { - DFS_FILTER_NAME = (1 << 1), - DFS_FILTER_NEWER = (1 << 2), - DFS_FILTER_INCLUDE_DIRS = (1 << 3), -}; - -/** Predicate conditions for filter */ -typedef struct { - char dp_name[DFS_MAX_NAME]; /** name condition for entry - regex */ - time_t dp_newer; /** timestamp for newer condition */ - size_t dp_size; /** size of files - not supported for now */ -} dfs_predicate_t; - -/** - * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the - * chunk_size of the file is also required to be passed if the file was created with a different - * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to - * chunk size. - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] oid Object ID of the file. - * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). - * \param[out] size Returned size of the file. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); - -/** - * Create a pipeline object to be used during readdir with filter. Should be destroyed with - * dfs_pipeline_destroy(). - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). - * \param[in] flags Pipeline flags (conditions to apply). - * \param[out] dpipe Pipeline object created. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); - -/** - * Destroy pipeline object. - * - * \param[in] dpipe Pipeline object. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_pipeline_destroy(dfs_pipeline_t *dpipe); - -/** - * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() - * on the entries that are enumerated. This function also optionally returns the object ID of each - * dirent if requested through a pre-allocated OID input array. - * - * \param[in] dfs Pointer to the mounted file system. - * \param[in] obj Opened directory object. - * \param[in] dpipe DFS pipeline filter. - * \param[in,out] - * anchor Hash anchor for the next call, it should be set to - * zeroes for the first call, it should not be changed - * by caller between calls. - * \param[in,out] - * nr [in]: number of dirents allocated in \a dirs. - * [out]: number of returned dirents. - * \param[in,out] - * dirs [in] preallocated array of dirents. - * [out]: dirents returned with d_name filled only. - * \param[in,out] - * oids [in] Optional preallocated array of object IDs. - * [out]: Object ID associated with each dirent that was read. - * \param[in,out] - * csizes [in] Optional preallocated array of sizes. - * [out]: chunk size associated with each dirent that was read. - * \param[out] Total number of entries scanned by readdir before returning. - * - * \return 0 on success, errno code on failure. - */ -int -dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, - uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, - uint64_t *nr_scanned); - #if defined(__cplusplus) } #endif diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index ab8568a2974..95433bf966c 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -1167,6 +1167,104 @@ enum { int dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char *name); +/* + * The Pipeline DFS API (everything under this comment) is under heavy development and should not be + * used in production. The API is subject to change. + */ + +/** DFS pipeline object */ +typedef struct dfs_pipeline dfs_pipeline_t; + +enum { + DFS_FILTER_NAME = (1 << 1), + DFS_FILTER_NEWER = (1 << 2), + DFS_FILTER_INCLUDE_DIRS = (1 << 3), +}; + +/** Predicate conditions for filter */ +typedef struct { + /** name condition for entry - regex */ + char dp_name[DFS_MAX_NAME]; + /** timestamp for newer condition */ + time_t dp_newer; + /** size of files - not supported for now */ + size_t dp_size; +} dfs_predicate_t; + +/** + * Same as dfs_get_size() but using the OID of the file instead of the open handle. Note that the + * chunk_size of the file is also required to be passed if the file was created with a different + * chunk size than the default (passing other than 0 to dfs_open). Otherwise, 0 should be passed to + * chunk size. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] oid Object ID of the file. + * \param[in] chunk_size Chunk size of the file (pass 0 if it was created with default). + * \param[out] size Returned size of the file. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_get_size_by_oid(dfs_t *dfs, daos_obj_id_t oid, daos_size_t chunk_size, daos_size_t *size); + +/** + * Create a pipeline object to be used during readdir with filter. Should be destroyed with + * dfs_pipeline_destroy(). + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] pred Predicate condition values (name/regex, newer timestamp, etc.). + * \param[in] flags Pipeline flags (conditions to apply). + * \param[out] dpipe Pipeline object created. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_create(dfs_t *dfs, dfs_predicate_t pred, uint64_t flags, dfs_pipeline_t **dpipe); + +/** + * Destroy pipeline object. + * + * \param[in] dpipe Pipeline object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_pipeline_destroy(dfs_pipeline_t *dpipe); + +/** + * Same as dfs_readdir() but this additionally applies a filter created with dfs_pipeline_create() + * on the entries that are enumerated. This function also optionally returns the object ID of each + * dirent if requested through a pre-allocated OID input array. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Opened directory object. + * \param[in] dpipe DFS pipeline filter. + * \param[in,out] + * anchor Hash anchor for the next call, it should be set to + * zeroes for the first call, it should not be changed + * by caller between calls. + * \param[in,out] + * nr [in]: number of dirents allocated in \a dirs. + * [out]: number of returned dirents. + * \param[in,out] + * dirs [in] preallocated array of dirents. + * [out]: dirents returned with d_name filled only. + * \param[in,out] + * oids [in] Optional preallocated array of object IDs. + * [out]: Object ID associated with each dirent that was read. + * \param[in,out] + * csizes [in] Optional preallocated array of sizes. + * [out]: chunk size associated with each dirent that was read. + * \param[out] nr_scanned + * Total number of entries scanned by readdir before returning. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_readdir_with_filter(dfs_t *dfs, dfs_obj_t *obj, dfs_pipeline_t *dpipe, daos_anchor_t *anchor, + uint32_t *nr, struct dirent *dirs, daos_obj_id_t *oids, daos_size_t *csizes, + uint64_t *nr_scanned); + #if defined(__cplusplus) } #endif /* __cplusplus */ diff --git a/src/pipeline/cli_pipeline.c b/src/pipeline/cli_pipeline.c index c008aa01f4c..b30d8366b79 100644 --- a/src/pipeline/cli_pipeline.c +++ b/src/pipeline/cli_pipeline.c @@ -119,6 +119,7 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) struct pipeline_run_cb_args *cb_args; daos_pipeline_run_t *api_args; struct pipeline_run_out *pro; /** received data from srv */ + struct pipeline_run_in *pri; int opc; int ret = task->dt_result; int rc = 0; @@ -132,6 +133,7 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) api_args = cb_args->api_args; rpc = cb_args->rpc; opc = opc_get(rpc->cr_opc); + pri = (struct pipeline_run_in *)crt_req_get(rpc); if (ret != 0) { D_ERROR("RPC %d failed, " DF_RC "\n", opc, DP_RC(ret)); @@ -247,6 +249,14 @@ pipeline_shard_run_cb(tse_task_t *task, void *data) *api_args->anchor = pro->pro_anchor; out: + if (pri->pri_kds_bulk) + crt_bulk_free(pri->pri_kds_bulk); + if (pri->pri_iods_bulk) + crt_bulk_free(pri->pri_iods_bulk); + if (pri->pri_sgl_keys_bulk) + crt_bulk_free(pri->pri_sgl_keys_bulk); + if (pri->pri_sgl_recx_bulk) + crt_bulk_free(pri->pri_sgl_recx_bulk); crt_req_decref(rpc); tse_task_list_del(task); tse_task_decref(task); From 9961d263fc0f2c8fda8d3bd64ed0e49f0a6481a2 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Sep 2023 16:44:59 +0100 Subject: [PATCH 02/11] Revert "DAOS-14225 control: Prevent duplicate call to SetRank (#13058)" (#13104) This reverts commit 281b4fad37a339ddfe37e7d25883d900f4360867. --- src/control/common/proto/logging.go | 2 +- src/control/common/proto/mgmt/svc.pb.go | 169 +++++++++++++----------- src/control/server/instance.go | 25 ++-- src/control/server/mgmt_system.go | 16 +++ src/control/server/mgmt_system_test.go | 2 + src/mgmt/svc.pb-c.c | 26 +++- src/mgmt/svc.pb-c.h | 6 +- src/proto/mgmt/svc.proto | 3 +- 8 files changed, 146 insertions(+), 103 deletions(-) diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index 624e58fb459..a2edc22c67f 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -136,7 +136,7 @@ func Debug(msg proto.Message) string { fmt.Fprintf(&bld, " %s:%s", p.Label, p.State) } case *mgmtpb.JoinResp: - fmt.Fprintf(&bld, "%T rank:%d (state:%s) map:%d", m, m.Rank, m.State, m.MapVersion) + fmt.Fprintf(&bld, "%T rank:%d (state:%s, local:%t) map:%d", m, m.Rank, m.State, m.LocalJoin, m.MapVersion) case *mgmtpb.GetAttachInfoResp: msRanks := ranklist.RankSetFromRanks(ranklist.RanksFromUint32(m.MsRanks)) uriRanks := ranklist.NewRankSet() diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index e6988dca637..74d11533864 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.28.1 // protoc v3.5.0 // source: mgmt/svc.proto @@ -342,6 +342,7 @@ type JoinResp struct { Rank uint32 `protobuf:"varint,2,opt,name=rank,proto3" json:"rank,omitempty"` // Server rank assigned. State JoinResp_State `protobuf:"varint,3,opt,name=state,proto3,enum=mgmt.JoinResp_State" json:"state,omitempty"` // Server state in the system map. FaultDomain string `protobuf:"bytes,4,opt,name=faultDomain,proto3" json:"faultDomain,omitempty"` // Fault domain for the instance + LocalJoin bool `protobuf:"varint,5,opt,name=localJoin,proto3" json:"localJoin,omitempty"` // Join processed locally. MapVersion uint32 `protobuf:"varint,6,opt,name=map_version,json=mapVersion,proto3" json:"map_version,omitempty"` // Join processed in this version of the system map. } @@ -405,6 +406,13 @@ func (x *JoinResp) GetFaultDomain() string { return "" } +func (x *JoinResp) GetLocalJoin() bool { + if x != nil { + return x.LocalJoin + } + return false +} + func (x *JoinResp) GetMapVersion() uint32 { if x != nil { return x.MapVersion @@ -1151,7 +1159,7 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x69, 0x64, 0x78, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x69, 0x64, 0x78, 0x12, 0x20, 0x0a, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x69, 0x6e, 0x63, 0x61, 0x72, 0x6e, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x22, 0xdd, 0x01, 0x0a, 0x08, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x2a, 0x0a, 0x05, 0x73, 0x74, 0x61, @@ -1159,84 +1167,85 @@ var file_mgmt_svc_proto_rawDesc = []byte{ 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, - 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, - 0x10, 0x01, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x4a, - 0x6f, 0x69, 0x6e, 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, - 0x0f, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, - 0x74, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, - 0x63, 0x61, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, - 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, - 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, - 0x09, 0x61, 0x6c, 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x08, 0x61, 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, - 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, - 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, - 0x72, 0x66, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, - 0x0a, 0x12, 0x63, 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, - 0x61, 0x64, 0x64, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, - 0x74, 0x78, 0x53, 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, - 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x0a, 0x63, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, - 0x6e, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, - 0x12, 0x1e, 0x0a, 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, - 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, - 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, - 0x28, 0x09, 0x52, 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, - 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, - 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, - 0x6b, 0x5f, 0x75, 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, - 0x6f, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, - 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, - 0x6b, 0x73, 0x12, 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, - 0x5f, 0x68, 0x69, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, - 0x52, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, - 0x21, 0x0a, 0x0c, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x73, 0x79, 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, - 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, - 0x74, 0x64, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, - 0x50, 0x69, 0x6e, 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, - 0x41, 0x0a, 0x0a, 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, - 0x6b, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x22, 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, - 0x72, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, - 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, - 0x49, 0x44, 0x12, 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, - 0x55, 0x55, 0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, - 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, - 0x62, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, - 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, - 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, - 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, - 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x44, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x6c, 0x6f, 0x63, 0x61, 0x6c, + 0x4a, 0x6f, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x63, 0x61, + 0x6c, 0x4a, 0x6f, 0x69, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x18, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, + 0x06, 0x0a, 0x02, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4f, 0x55, 0x54, 0x10, 0x01, + 0x22, 0x38, 0x0a, 0x0e, 0x4c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x68, 0x6f, 0x73, 0x74, 0x73, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x25, 0x0a, + 0x0e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x12, 0x22, 0x0a, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x70, 0x6c, + 0x69, 0x63, 0x61, 0x73, 0x22, 0x41, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, + 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x6c, + 0x6c, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, + 0x6c, 0x6c, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x0d, 0x43, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, + 0x76, 0x69, 0x64, 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, 0x61, + 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x66, + 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2b, 0x0a, 0x12, 0x63, + 0x72, 0x74, 0x5f, 0x63, 0x74, 0x78, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5f, 0x61, 0x64, 0x64, + 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x63, 0x72, 0x74, 0x43, 0x74, 0x78, 0x53, + 0x68, 0x61, 0x72, 0x65, 0x41, 0x64, 0x64, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x72, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x63, + 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x6e, 0x65, 0x74, + 0x5f, 0x64, 0x65, 0x76, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x44, 0x65, 0x76, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x12, 0x1e, 0x0a, + 0x0b, 0x73, 0x72, 0x76, 0x5f, 0x73, 0x72, 0x78, 0x5f, 0x73, 0x65, 0x74, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x09, 0x73, 0x72, 0x76, 0x53, 0x72, 0x78, 0x53, 0x65, 0x74, 0x12, 0x19, 0x0a, + 0x08, 0x65, 0x6e, 0x76, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x07, 0x65, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x74, + 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x0a, 0x09, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x75, + 0x72, 0x69, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x47, 0x65, 0x74, 0x41, 0x74, 0x74, 0x61, 0x63, 0x68, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, + 0x73, 0x70, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x52, 0x08, 0x72, 0x61, 0x6e, 0x6b, + 0x55, 0x72, 0x69, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x6d, 0x73, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x6d, 0x73, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, + 0x3b, 0x0a, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x6e, 0x65, 0x74, 0x5f, 0x68, 0x69, + 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x52, 0x0d, 0x63, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4e, 0x65, 0x74, 0x48, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x64, 0x61, 0x74, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, + 0x73, 0x1a, 0x2f, 0x0a, 0x07, 0x52, 0x61, 0x6e, 0x6b, 0x55, 0x72, 0x69, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, + 0x72, 0x69, 0x22, 0x25, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x70, 0x53, 0x68, 0x75, 0x74, 0x64, 0x6f, + 0x77, 0x6e, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x21, 0x0a, 0x0b, 0x50, 0x69, 0x6e, + 0x67, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x22, 0x41, 0x0a, 0x0a, + 0x53, 0x65, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, + 0x6e, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x1f, + 0x0a, 0x0b, 0x6d, 0x61, 0x70, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6d, 0x61, 0x70, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, + 0x7c, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x4d, 0x6f, 0x6e, 0x69, 0x74, 0x6f, 0x72, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x6f, 0x6c, 0x55, 0x55, 0x49, 0x44, 0x12, + 0x26, 0x0a, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, + 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x6f, 0x6c, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x55, 0x55, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x69, 0x64, 0x42, 0x3a, 0x5a, + 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, + 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, + 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/src/control/server/instance.go b/src/control/server/instance.go index 4583c86f170..14f53cf3b5b 100644 --- a/src/control/server/instance.go +++ b/src/control/server/instance.go @@ -178,10 +178,10 @@ func (ei *EngineInstance) removeSocket() error { return nil } -func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, uint32, error) { +func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.NotifyReadyReq) (ranklist.Rank, bool, uint32, error) { superblock := ei.getSuperblock() if superblock == nil { - return ranklist.NilRank, 0, errors.New("nil superblock while determining rank") + return ranklist.NilRank, false, 0, errors.New("nil superblock while determining rank") } r := ranklist.NilRank @@ -200,11 +200,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify }) if err != nil { ei.log.Errorf("join failed: %s", err) - return ranklist.NilRank, 0, err + return ranklist.NilRank, false, 0, err } switch resp.State { case system.MemberStateAdminExcluded, system.MemberStateExcluded: - return ranklist.NilRank, 0, errors.Errorf("rank %d excluded", resp.Rank) + return ranklist.NilRank, resp.LocalJoin, 0, errors.Errorf("rank %d excluded", resp.Rank) } r = ranklist.Rank(resp.Rank) @@ -218,11 +218,11 @@ func (ei *EngineInstance) determineRank(ctx context.Context, ready *srvpb.Notify superblock.URI = ready.GetUri() ei.setSuperblock(superblock) if err := ei.WriteSuperblock(); err != nil { - return ranklist.NilRank, 0, err + return ranklist.NilRank, resp.LocalJoin, 0, err } } - return r, resp.MapVersion, nil + return r, resp.LocalJoin, resp.MapVersion, nil } func (ei *EngineInstance) updateFaultDomainInSuperblock() error { @@ -259,20 +259,21 @@ func (ei *EngineInstance) handleReady(ctx context.Context, ready *srvpb.NotifyRe ei.log.Error(err.Error()) // nonfatal } - r, mapVersion, err := ei.determineRank(ctx, ready) + r, localJoin, mapVersion, err := ei.determineRank(ctx, ready) if err != nil { return err } + // If the join was already processed because it ran on the same server, + // skip the rest of these steps. + if localJoin { + return nil + } + return ei.SetupRank(ctx, r, mapVersion) } func (ei *EngineInstance) SetupRank(ctx context.Context, rank ranklist.Rank, map_version uint32) error { - if ei.IsReady() { - ei.log.Errorf("SetupRank called on an already set-up instance %d", ei.Index()) - return nil - } - if err := ei.callSetRank(ctx, rank, map_version); err != nil { return errors.Wrap(err, "SetRank failed") } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 620db09bf11..8374b0b9e2e 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -178,6 +178,22 @@ func (svc *mgmtSvc) join(ctx context.Context, req *mgmtpb.JoinReq, peerAddr *net MapVersion: joinResponse.MapVersion, } + // If the rank is local to the MS leader, then we need to wire up at least + // one in order to perform a CaRT group update. + if common.IsLocalAddr(peerAddr) && req.Idx == 0 { + resp.LocalJoin = true + + srvs := svc.harness.Instances() + if len(srvs) == 0 { + return nil, errors.New("invalid Join request (index 0 doesn't exist?!?)") + } + srv := srvs[0] + + if err := srv.SetupRank(ctx, joinResponse.Member.Rank, joinResponse.MapVersion); err != nil { + return nil, errors.Wrap(err, "SetupRank on local instance failed") + } + } + return resp, nil } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 0ac1112c4ba..375b77c3efb 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -1967,6 +1967,7 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, + LocalJoin: false, MapVersion: 2, }, }, @@ -1992,6 +1993,7 @@ func TestServer_MgmtSvc_Join(t *testing.T) { Status: 0, Rank: newMember.Rank.Uint32(), State: mgmtpb.JoinResp_IN, + LocalJoin: true, MapVersion: 2, }, }, diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index c3900429dfe..cfd562891e0 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -1010,7 +1010,7 @@ const ProtobufCEnumDescriptor mgmt__join_resp__state__descriptor = mgmt__join_resp__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[6] = { { "status", @@ -1060,6 +1060,18 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "localJoin", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__JoinResp, localjoin), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, { "map_version", 6, @@ -1075,16 +1087,16 @@ static const ProtobufCFieldDescriptor mgmt__join_resp__field_descriptors[5] = }; static const unsigned mgmt__join_resp__field_indices_by_name[] = { 3, /* field[3] = faultDomain */ - 4, /* field[4] = map_version */ + 4, /* field[4] = localJoin */ + 5, /* field[5] = map_version */ 1, /* field[1] = rank */ 2, /* field[2] = state */ 0, /* field[0] = status */ }; -static const ProtobufCIntRange mgmt__join_resp__number_ranges[2 + 1] = +static const ProtobufCIntRange mgmt__join_resp__number_ranges[1 + 1] = { { 1, 0 }, - { 6, 4 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = { @@ -1094,10 +1106,10 @@ const ProtobufCMessageDescriptor mgmt__join_resp__descriptor = "Mgmt__JoinResp", "mgmt", sizeof(Mgmt__JoinResp), - 5, + 6, mgmt__join_resp__field_descriptors, mgmt__join_resp__field_indices_by_name, - 2, mgmt__join_resp__number_ranges, + 1, mgmt__join_resp__number_ranges, (ProtobufCMessageInit) mgmt__join_resp__init, NULL,NULL,NULL /* reserved[123] */ }; diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index c1d61ef44fb..55acb283028 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -163,6 +163,10 @@ struct _Mgmt__JoinResp * Fault domain for the instance */ char *faultdomain; + /* + * Join processed locally. + */ + protobuf_c_boolean localjoin; /* * Join processed in this version of the system map. */ @@ -170,7 +174,7 @@ struct _Mgmt__JoinResp }; #define MGMT__JOIN_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__join_resp__descriptor) \ - , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0 } + , 0, 0, MGMT__JOIN_RESP__STATE__IN, (char *)protobuf_c_empty_string, 0, 0 } struct _Mgmt__LeaderQueryReq diff --git a/src/proto/mgmt/svc.proto b/src/proto/mgmt/svc.proto index 668a9905bfd..400452837ce 100644 --- a/src/proto/mgmt/svc.proto +++ b/src/proto/mgmt/svc.proto @@ -44,8 +44,6 @@ message JoinReq { } message JoinResp { - reserved 5; - reserved "localJoin"; int32 status = 1; // DAOS error code uint32 rank = 2; // Server rank assigned. enum State { @@ -54,6 +52,7 @@ message JoinResp { } State state = 3; // Server state in the system map. string faultDomain = 4; // Fault domain for the instance + bool localJoin = 5; // Join processed locally. uint32 map_version = 6; // Join processed in this version of the system map. } From fbcb99abf7860734686fe59c2ec5d057495120ec Mon Sep 17 00:00:00 2001 From: wangdi Date: Fri, 29 Sep 2023 08:38:31 -0700 Subject: [PATCH 03/11] DAOS-14352 object: right epoch to fetch the data (#13049) If the epoch is higher than EC aggregate boundary, then it should use stable epoch to fetch the data, since the data could be aggregated independently on parity and data shard, so using stable epoch could make sure the consistency view during rebuild. And also EC aggregation should already aggregate the parity, so there should not be any partial update on the parity as well. Otherwise there might be partial update on this rebuilding shard, so let's use the epoch from the parity shard to fetch the data here, which will make sure partial update will not be fetched here. And also EC aggregation is being disabled at the moment, so there should not be any vos aggregation impact this process as well. Signed-off-by: Di Wang --- src/object/srv_obj_migrate.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index fa7579ca516..cd2b9018af1 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1350,22 +1350,38 @@ migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, /* For EC object, if the migration include both extent from parity rebuild * and extent from replicate rebuild, let rebuild the extent with parity first, * then extent from replication. - * - * Since the parity shard epoch should be higher or equal to the data shard epoch, - * so let's use the minimum epochs of all parity shards as the update epoch of - * this data shard. */ - for (i = 0; i < mrone->mo_iods_num_from_parity; i++) { for (j = 0; j < mrone->mo_iods_from_parity[i].iod_nr; j++) { daos_iod_t iod = mrone->mo_iods_from_parity[i]; + daos_epoch_t fetch_eph; iod.iod_nr = 1; iod.iod_recxs = &mrone->mo_iods_from_parity[i].iod_recxs[j]; - rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, - mrone->mo_iods_update_ephs_from_parity[i][j], - mrone->mo_iods_update_ephs_from_parity[i][j], - DIOF_EC_RECOV_FROM_PARITY, ds_cont); + + /* If the epoch is higher than EC aggregate boundary, then + * it should use stable epoch to fetch the data, since + * the data could be aggregated independently on parity + * and data shard, so using stable epoch could make sure + * the consistency view during rebuild. And also EC aggregation + * should already aggregate the parity, so there should not + * be any partial update on the parity as well. + * + * Otherwise there might be partial update on this rebuilding + * shard, so let's use the epoch from the parity shard to fetch + * the data here, which will make sure partial update will not + * be fetched here. And also EC aggregation is being disabled + * at the moment, so there should not be any vos aggregation + * impact this process as well. + */ + if (ds_cont->sc_ec_agg_eph_boundary > + mrone->mo_iods_update_ephs_from_parity[i][j]) + fetch_eph = mrone->mo_epoch; + else + fetch_eph = mrone->mo_iods_update_ephs_from_parity[i][j]; + rc = __migrate_fetch_update_bulk(mrone, oh, &iod, 1, fetch_eph, + mrone->mo_iods_update_ephs_from_parity[i][j], + DIOF_EC_RECOV_FROM_PARITY, ds_cont); if (rc != 0) D_GOTO(out, rc); } From ed692fe5702c4585f409c3094b84936095eb09f0 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 29 Sep 2023 18:09:37 +0100 Subject: [PATCH 04/11] DAOS-14438 mgmt: Improve error reporting on uuid_parse failure. (#13093) This function returns -1 rather than a daos errno so treat it as such. Do not use the result in DF_RC as it will print DER_UNKNOWN. Put the call in the if statement itself rather than assigning rc. Use the new logging macros to log the error proplerly. Do not log the invalid uuid. Signed-off-by: Ashley Pittman --- src/engine/drpc_client.c | 9 ++- src/mgmt/srv_drpc.c | 141 ++++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/src/engine/drpc_client.c b/src/engine/drpc_client.c index 9be829e0f11..54c98d602ec 100644 --- a/src/engine/drpc_client.c +++ b/src/engine/drpc_client.c @@ -387,11 +387,10 @@ ds_pool_find_bylabel(d_const_string_t label, uuid_t pool_uuid, D_GOTO(out_resp, rc = frsp->status); } - rc = uuid_parse(frsp->uuid, pool_uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", frsp->uuid, - DP_RC(rc)); - D_GOTO(out_resp, rc = -DER_IO); + if (uuid_parse(frsp->uuid, pool_uuid) != 0) { + rc = -DER_IO; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out_resp; } ranks = uint32_array_to_rank_list(frsp->svcreps, diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 9ef6054beda..11cabbf5990 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -471,11 +471,10 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); } - rc = uuid_parse(req->uuid, pool_uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->uuid, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->uuid, pool_uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_DEBUG(DB_MGMT, DF_UUID": creating pool\n", DP_UUID(pool_uuid)); @@ -558,11 +557,10 @@ ds_mgmt_drpc_pool_destroy(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to destroy pool %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } /* @@ -630,11 +628,10 @@ ds_mgmt_drpc_pool_evict(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to evict pool connections %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -648,11 +645,10 @@ ds_mgmt_drpc_pool_evict(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); } for (i = 0; i < req->n_handles; i++) { - rc = uuid_parse(req->handles[i], handles[i]); - if (rc != 0) { - D_ERROR("Unable to parse handle UUID %s: " - DF_RC"\n", req->id, DP_RC(rc)); - D_GOTO(out_free, rc = -DER_INVAL); + if (uuid_parse(req->handles[i], handles[i]) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Handle UUID is invalid"); + goto out_free; } } n_handles = req->n_handles; @@ -704,11 +700,10 @@ pool_change_target_state(char *id, d_rank_list_t *svc_ranks, size_t n_targetidx, int rc, i; num_addrs = (n_targetidx > 0) ? n_targetidx : 1; - rc = uuid_parse(id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", id, - DP_RC(rc)); - return -DER_INVAL; + if (uuid_parse(id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + return rc; } rc = pool_target_addr_list_alloc(num_addrs, &target_addr_list); @@ -871,10 +866,9 @@ ds_mgmt_drpc_pool_extend(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) nvme_bytes = req->tierbytes[DAOS_MEDIA_NVME]; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); D_GOTO(out, rc = -DER_INVAL); } @@ -1003,10 +997,10 @@ void ds_mgmt_drpc_pool_set_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_INFO(DF_UUID": received request to set pool properties\n", @@ -1072,11 +1066,10 @@ ds_mgmt_drpc_pool_upgrade(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to upgrade pool %s\n", req->id); - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Unable to parse pool UUID %s: "DF_RC"\n", req->id, - DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1222,10 +1215,10 @@ void ds_mgmt_drpc_pool_get_prop(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } - rc = uuid_parse(req->id, uuid); - if (rc != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->id, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } D_INFO(DF_UUID": received request to get pool properties\n", @@ -1392,8 +1385,9 @@ ds_mgmt_drpc_pool_get_acl(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to get ACL for pool %s\n", req->id); if (uuid_parse(req->id, pool_uuid) != 0) { - D_ERROR("Couldn't parse '%s' to UUID\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1444,8 +1438,9 @@ get_params_from_modify_acl_req(Drpc__Call *drpc_req, uuid_t uuid_out, } if (uuid_parse(req->id, uuid_out) != 0) { - D_ERROR("Couldn't parse UUID\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "UUID is invalid"); + goto out; } rc = daos_acl_from_strs((const char **)req->entries, req->n_entries, acl_out); @@ -1561,8 +1556,9 @@ ds_mgmt_drpc_pool_delete_acl(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) } if (uuid_parse(req->id, pool_uuid) != 0) { - D_ERROR("Couldn't parse UUID\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1622,8 +1618,9 @@ ds_mgmt_drpc_pool_list_cont(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) /* resp.containers, n_containers are NULL/0 */ if (uuid_parse(req->id, req_uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1764,8 +1761,9 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to query DAOS pool %s\n", req->id); if (uuid_parse(req->id, uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -1874,8 +1872,9 @@ ds_mgmt_drpc_pool_query_targets(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to query DAOS pool %s, %zu targets\n", req->id, req->n_targets); if (uuid_parse(req->id, uuid) != 0) { - D_ERROR("Failed to parse pool uuid %s\n", req->id); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); @@ -2134,11 +2133,10 @@ ds_mgmt_drpc_bio_health_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) ctl__bio_health_resp__init(resp); if (strlen(req->dev_uuid) != 0) { - rc = uuid_parse(req->dev_uuid, uuid); - if (rc != 0) { - D_ERROR("Unable to parse device UUID %s: "DF_RC"\n", - req->dev_uuid, DP_RC(rc)); - D_GOTO(out, rc = -DER_INVAL); + if (uuid_parse(req->dev_uuid, uuid) != 0) { + rc = -DER_INVAL; + DL_ERROR(rc, "Device UUID is invalid"); + goto out; } } else uuid_clear(uuid); /* need to set uuid = NULL */ @@ -2283,8 +2281,9 @@ ds_mgmt_drpc_dev_set_faulty(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) ctl__dev_manage_resp__init(resp); if (uuid_parse(req->uuid, dev_uuid) != 0) { - D_ERROR("Device UUID (%s) is invalid\n", req->uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Device UUID is invalid"); + goto pack_resp; } rc = ds_mgmt_dev_set_faulty(dev_uuid, resp); @@ -2380,13 +2379,15 @@ ds_mgmt_drpc_dev_replace(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp->device->uuid = NULL; if (uuid_parse(req->old_dev_uuid, old_uuid) != 0) { - D_ERROR("Old device UUID (%s) is invalid\n", req->old_dev_uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Old device UUID is invalid"); + goto pack_resp; } if (uuid_parse(req->new_dev_uuid, new_uuid) != 0) { - D_ERROR("New device UUID (%s) is invalid\n", req->new_dev_uuid); - D_GOTO(pack_resp, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "New device UUID is invalid"); + goto pack_resp; } /* TODO DAOS-6283: Implement no-reint device replacement option */ @@ -2449,13 +2450,15 @@ ds_mgmt_drpc_cont_set_owner(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to change container owner\n"); if (uuid_parse(req->contuuid, cont_uuid) != 0) { - D_ERROR("Container UUID is invalid\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Container UUID is invalid"); + goto out; } if (uuid_parse(req->pooluuid, pool_uuid) != 0) { - D_ERROR("Pool UUID is invalid\n"); - D_GOTO(out, rc = -DER_INVAL); + rc = -DER_INVAL; + DL_ERROR(rc, "Pool UUID is invalid"); + goto out; } svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); From b58e1cc55967dd1f9de292c4891db8e44bb5fb68 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 2 Oct 2023 22:38:09 +0800 Subject: [PATCH 05/11] DAOS-14417 vea: reclaim bitmaps might yield for MD-on-SSD (#13096) Reclaiming unused bitmaps might yeild for MD-on-SSD, we need pick up empty lists firstly to make sure those lists(to be reclaimed) could be not allocated by reserve ult Signed-off-by: Wang Shilong --- src/vea/vea_free.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index f82fd299bd4..17877bc277b 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -909,7 +909,7 @@ static int reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t *nr_reclaimed) { int i; - struct vea_bitmap_entry *bitmap_entry, *tmp_entry; + struct vea_bitmap_entry *bitmap_entry; struct vea_free_bitmap *vfb; d_iov_t key; int rc = 0; @@ -920,12 +920,11 @@ reclaim_unused_bitmap(struct vea_space_info *vsi, uint32_t nr_reclaim, uint32_t uint32_t blk_cnt; for (i = 0; i < VEA_MAX_BITMAP_CLASS; i++) { - d_list_for_each_entry_safe(bitmap_entry, tmp_entry, - &vsi->vsi_class.vfc_bitmap_empty[i], vbe_link) { + while ((bitmap_entry = d_list_pop_entry(&vsi->vsi_class.vfc_bitmap_empty[i], + struct vea_bitmap_entry, vbe_link))) { vfb = &bitmap_entry->vbe_bitmap; D_ASSERT(vfb->vfb_class == i + 1); D_ASSERT(is_bitmap_empty(vfb->vfb_bitmaps, vfb->vfb_bitmap_sz)); - d_list_del_init(&bitmap_entry->vbe_link); D_ALLOC_PTR(fca); if (!fca) return -DER_NOMEM; From 6e745b257e3b4b71900fb9707156d0788d4a134f Mon Sep 17 00:00:00 2001 From: Ravindran Padmanabhan Date: Mon, 2 Oct 2023 12:59:41 -0700 Subject: [PATCH 06/11] DAOS-14441 test: Perform exclude or drain during/after extend rebuild process. (#13095) Summary: Add new test cases to existing OSA tests (exclude or drain during/after the extend operation respectively). Add the tests to offline and online OSA tests. Drain feature is not supported when extend rebuild happens. It can work only after extend rebuild is completed. It is not the case for exclude [ie: we can perform an exclude when extend rebuild happens]. Signed-off-by: Padmanabhan --- src/tests/ftest/osa/offline_extend.py | 46 ++++++++++++++++++++++++++- src/tests/ftest/osa/online_extend.py | 45 +++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/osa/offline_extend.py b/src/tests/ftest/osa/offline_extend.py index 0f8bcc35fe5..0c3d65046be 100644 --- a/src/tests/ftest/osa/offline_extend.py +++ b/src/tests/ftest/osa/offline_extend.py @@ -3,6 +3,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ +from time import sleep from osa_utils import OSAUtils from test_utils_pool import add_pool from dmg_utils import check_system_query_status @@ -29,7 +30,8 @@ def setUp(self): self.test_oclass = None self.dmg_command.exit_status_exception = True - def run_offline_extend_test(self, num_pool, data=False, oclass=None): + def run_offline_extend_test(self, num_pool, data=False, oclass=None, + exclude_or_drain=None): """Run the offline extend without data. Args: @@ -37,6 +39,7 @@ def run_offline_extend_test(self, num_pool, data=False, oclass=None): data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (list) : list of daos object class (eg: "RP_2G8") + exclude_or_drain (str): Pass "exclude" or "drain" string. Defaults to None. """ # Create a pool pool = {} @@ -95,6 +98,19 @@ def run_offline_extend_test(self, num_pool, data=False, oclass=None): if self.test_during_aggregation is True and (num_pool > 1): self.delete_extra_container(self.pool) output = self.pool.extend(rank_val) + self.log.info(output) + if exclude_or_drain == "exclude": + self.pool.wait_for_rebuild_to_start() + # Give a 4 second delay so that some objects are moved + # as part of rebuild operation. + sleep(4) + self.log.info("Exclude rank 3 while rebuild is happening") + output = self.pool.exclude("3") + elif exclude_or_drain == "drain": + # Drain cannot be performed while extend rebuild is happening. + self.print_and_assert_on_rebuild_failure(output) + self.log.info("Drain rank 3 after extend rebuild is completed") + output = self.pool.drain("3") self.print_and_assert_on_rebuild_failure(output) free_space_after_extend = self.pool.get_total_free_space(refresh=True) @@ -202,3 +218,31 @@ def test_osa_offline_extend_after_snapshot(self): self.test_with_snapshot = self.params.get("test_with_snapshot", '/run/snapshot/*') self.log.info("Offline Extend Testing: After taking snapshot") self.run_offline_extend_test(1, data=True) + + def test_osa_offline_extend_exclude_during_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Offline extend after rebuild is started + and a rank is excluded. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,offline_extend + :avocado: tags=OSAOfflineExtend,test_osa_offline_extend_exclude_during_rebuild + """ + self.log.info("Offline Extend Testing: Exclude during Rebuild") + self.run_offline_extend_test(1, data=True, exclude_or_drain="exclude") + + def test_osa_offline_extend_drain_after_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Offline extend after rebuild is started + and a rank is drained. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,offline_extend + :avocado: tags=OSAOfflineExtend,test_osa_offline_extend_drain_after_rebuild + """ + self.log.info("Offline Extend Testing: Drain after rebuild") + self.run_offline_extend_test(1, data=True, exclude_or_drain="drain") diff --git a/src/tests/ftest/osa/online_extend.py b/src/tests/ftest/osa/online_extend.py index a2d6569c528..cb5931b33db 100644 --- a/src/tests/ftest/osa/online_extend.py +++ b/src/tests/ftest/osa/online_extend.py @@ -44,7 +44,8 @@ def daos_racer_thread(self): self.daos_racer.get_params(self) self.daos_racer.run() - def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="ior"): + def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="ior", + exclude_or_drain=None): """Run the Online extend without data. Args: @@ -52,6 +53,7 @@ def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="i racer (bool): Run the testing along with daos_racer. Defaults to False. oclass (str): Object Class (eg: RP_2G1, etc). Default to None. app_name (str): App (ior or mdtest) to run during the testing. Defaults to ior. + exclude_or_drain (str): Pass "exclude" or "drain" string. Defaults to None. """ # Pool dictionary pool = {} @@ -111,6 +113,19 @@ def run_online_extend_test(self, num_pool, racer=False, oclass=None, app_name="i # Get initial total free space (scm+nvme) initial_free_space = self.pool.get_total_free_space(refresh=True) output = self.pool.extend(self.ranks) + self.log.info(output) + if exclude_or_drain == "exclude": + self.pool.wait_for_rebuild_to_start() + # Give a 4 minute delay so that some objects are moved + # as part of rebuild operation. + time.sleep(4) + self.log.info("Exclude rank 3 while rebuild is happening") + output = self.pool.exclude("3") + elif exclude_or_drain == "drain": + # Drain cannot be performed while extend rebuild is happening. + self.print_and_assert_on_rebuild_failure(output) + self.log.info("Drain rank 3 after extend rebuild is completed") + output = self.pool.drain("3") self.print_and_assert_on_rebuild_failure(output) free_space_after_extend = self.pool.get_total_free_space(refresh=True) @@ -213,3 +228,31 @@ def test_osa_online_extend_with_aggregation(self): self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.run_online_extend_test(1) + + def test_osa_online_extend_exclude_during_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Online extend after rebuild is started + and a rank is excluded. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,online_extend + :avocado: tags=OSAOnlineExtend,test_osa_online_extend_exclude_during_rebuild + """ + self.log.info("Online Extend Testing: Exclude during Rebuild") + self.run_online_extend_test(1, exclude_or_drain="exclude") + + def test_osa_online_extend_drain_after_rebuild(self): + """Test ID: DAOS-14441. + + Test Description: Validate Online extend after rebuild is completed + and a rank is drained. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_extend,online_extend + :avocado: tags=OSAOnlineExtend,test_osa_online_extend_drain_after_rebuild + """ + self.log.info("Online Extend Testing: Drain after rebuild") + self.run_online_extend_test(1, exclude_or_drain="drain") From f0a9d3e67f8262dd32909b86e57224b45c0ecd2c Mon Sep 17 00:00:00 2001 From: Colin Howes <16161867+chowes@users.noreply.github.com> Date: Tue, 3 Oct 2023 09:35:11 -0700 Subject: [PATCH 07/11] DAOS-14420 control: use hostname -s instead of -d (#13086) The test runs hostname -d and checks that the result is a substring of os.Hostname(), which is not the case in our environment. From the man page, hostname -s should give us the hostname cut at the first dot, so I would expect this to be portable. Signed-off-by: Colin Howes --- src/control/lib/support/log_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/control/lib/support/log_test.go b/src/control/lib/support/log_test.go index 9641dcd0ce4..120659049d9 100644 --- a/src/control/lib/support/log_test.go +++ b/src/control/lib/support/log_test.go @@ -236,7 +236,7 @@ func TestSupport_cpOutputToFile(t *testing.T) { "Check valid Command with option": { target: targetTestDir, cmd: "hostname", - option: "-d", + option: "-s", expResult: hostName, expErr: nil, }, From 2fbe122b3a75b341f4461d81fceef50fb6b5bdf4 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Tue, 3 Oct 2023 13:41:06 -0400 Subject: [PATCH 08/11] DAOS-11552 doc: Document Interoperability Policy (#13027) Formally document the policy with a reference table in the Admin Guide. Signed-off-by: Michael MacDonald --- docs/admin/administration.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 6c52325ad84..a65ef4a85ce 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -960,3 +960,28 @@ DAOS v2.2 client connections to pools which were created by DAOS v2.4 will be rejected. DAOS v2.4 client should work with DAOS v2.4 and DAOS v2.2 server. To upgrade all pools to latest format after software upgrade, run `dmg pool upgrade ` + +### Interoperability Matrix + +The following table is intended to visually depict the interoperability +policies for all major components in a DAOS system. + + +||Server
(daos_server)|Engine
(daos_engine)|Agent
(daos_agent)|Client
(libdaos)|Admin
(dmg)| +|:---|:---:|:---:|:---:|:---:|:---:| +|Server|x.y.z|x.y.z|x.(y±1)|n/a|x.y| +|Engine|x.y.z|x.y.z|n/a|x.(y±1)|n/a| +|Agent|x.(y±1)|n/a|n/a|x.y.z|n/a| +|Client|n/a|x.(y±1)|x.y.z|n/a|n/a| +|Admin|x.y|n/a|n/a|n/a|n/a| + +Key: + * x.y.z: Major.Minor.Patch must be equal + * x.y: Major.Minor must be equal + * x.(y±1): Major must be equal, Minor must be equal or -1/+1 release version + * n/a: Components do not communicate + +Examples: + * daos_server 2.4.0 is only compatible with daos_engine 2.4.0 + * daos_agent 2.6.0 is compatible with daos_server 2.4.0 (2.5 is a development version) + * dmg 2.4.1 is compatible with daos_server 2.4.0 From bfa979e2e170388e7e7bdf4de1a062d283c41680 Mon Sep 17 00:00:00 2001 From: Colin Howes <16161867+chowes@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:15:15 -0700 Subject: [PATCH 09/11] DAOS-10942 utils: chmod should ignore unsupported bits (#12949) The setuid, setgid, and sticky bit can cause fatal errors when the datamover tool sets file permissions after copying a file, since these are not supported by DFS. We can just ignore this bit when calling dfs_chmod. Signed-off-by: Colin Howes --- src/control/cmd/daos/filesystem.go | 12 ++++-- src/utils/daos_hdlr.c | 61 +++++++++++++++--------------- src/utils/daos_hdlr.h | 13 ++++--- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/src/control/cmd/daos/filesystem.go b/src/control/cmd/daos/filesystem.go index a03d38ff0b9..e7fcf828a45 100644 --- a/src/control/cmd/daos/filesystem.go +++ b/src/control/cmd/daos/filesystem.go @@ -44,9 +44,10 @@ type fsCmd struct { type fsCopyCmd struct { daosCmd - Source string `long:"src" short:"s" description:"copy source" required:"1"` - Dest string `long:"dst" short:"d" description:"copy destination" required:"1"` - Preserve string `long:"preserve-props" short:"m" description:"preserve container properties, requires HDF5 library" required:"0"` + Source string `long:"src" short:"s" description:"copy source" required:"1"` + Dest string `long:"dst" short:"d" description:"copy destination" required:"1"` + Preserve string `long:"preserve-props" short:"m" description:"preserve container properties, requires HDF5 library" required:"0"` + IgnoreUnsup bool `long:"ignore-unsupported" description:"ignore unsupported filesystem features when copying to DFS" required:"0"` } func (cmd *fsCopyCmd) Execute(_ []string) error { @@ -64,6 +65,7 @@ func (cmd *fsCopyCmd) Execute(_ []string) error { ap.preserve_props = C.CString(cmd.Preserve) defer freeString(ap.preserve_props) } + ap.ignore_unsup = C.bool(cmd.IgnoreUnsup) ap.fs_op = C.FS_COPY rc := C.fs_copy_hdlr(ap) @@ -107,6 +109,10 @@ func (cmd *fsCopyCmd) Execute(_ []string) error { cmd.Infof(" Files: %d", ap.fs_copy_stats.num_files) cmd.Infof(" Links: %d", ap.fs_copy_stats.num_links) + if ap.fs_copy_stats.num_chmod_enotsup > 0 { + return errors.New(fmt.Sprintf("Copy completed successfully, but %d files had unsupported mode bits that could not be applied. Run with --ignore-unsupported to suppress this warning.", ap.fs_copy_stats.num_chmod_enotsup)) + } + return nil } diff --git a/src/utils/daos_hdlr.c b/src/utils/daos_hdlr.c index cf192241ece..431a92df867 100644 --- a/src/utils/daos_hdlr.c +++ b/src/utils/daos_hdlr.c @@ -523,11 +523,19 @@ file_close(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *file) } static int -file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, - mode_t mode) +file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, mode_t mode, + bool ignore_unsup, uint64_t *num_chmod_enotsup) { int rc = 0; + /* Unset any unsupported mode bits. We track these errors so they can + * be surfaced to the user at the end of the copy operation. + */ + if (!ignore_unsup && mode & (S_ISVTX | S_ISGID | S_ISUID)) { + (*num_chmod_enotsup)++; + } + mode &= ~(S_ISVTX | S_ISGID | S_ISUID); + if (file_dfs->type == POSIX) { rc = chmod(path, mode); /* POSIX returns -1 on error and sets errno @@ -547,12 +555,9 @@ file_chmod(struct cmd_args_s *ap, struct file_dfs *file_dfs, const char *path, } static int -fs_copy_file(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - struct stat *src_stat, - const char *src_path, - const char *dst_path) +fs_copy_file(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + struct stat *src_stat, const char *src_path, const char *dst_path, bool ignore_unsup, + uint64_t *num_chmod_enotsup) { int src_flags = O_RDONLY; int dst_flags = O_CREAT | O_TRUNC | O_WRONLY; @@ -603,7 +608,8 @@ fs_copy_file(struct cmd_args_s *ap, } /* set perms on destination to original source perms */ - rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode); + rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode, ignore_unsup, + num_chmod_enotsup); if (rc != 0) { rc = daos_errno2der(rc); DH_PERROR_DER(ap, rc, "updating dst file permissions failed"); @@ -704,12 +710,8 @@ fs_copy_symlink(struct cmd_args_s *ap, } static int -fs_copy_dir(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - struct stat *src_stat, - const char *src_path, - const char *dst_path, +fs_copy_dir(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + struct stat *src_stat, const char *src_path, const char *dst_path, bool ignore_unsup, struct fs_copy_stats *num) { DIR *src_dir = NULL; @@ -783,9 +785,9 @@ fs_copy_dir(struct cmd_args_s *ap, switch (next_src_stat.st_mode & S_IFMT) { case S_IFREG: - rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, - &next_src_stat, next_src_path, - next_dst_path); + rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &next_src_stat, + next_src_path, next_dst_path, ignore_unsup, + &num->num_chmod_enotsup); if ((rc != 0) && (rc != -DER_EXIST)) D_GOTO(out, rc); num->num_files++; @@ -800,7 +802,7 @@ fs_copy_dir(struct cmd_args_s *ap, break; case S_IFDIR: rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &next_src_stat, - next_src_path, next_dst_path, num); + next_src_path, next_dst_path, ignore_unsup, num); if ((rc != 0) && (rc != -DER_EXIST)) D_GOTO(out, rc); num->num_dirs++; @@ -815,7 +817,8 @@ fs_copy_dir(struct cmd_args_s *ap, } /* set original source perms on directories after copying */ - rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode); + rc = file_chmod(ap, dst_file_dfs, dst_path, src_stat->st_mode, ignore_unsup, + &num->num_chmod_enotsup); if (rc != 0) { rc = daos_errno2der(rc); DH_PERROR_DER(ap, rc, "updating destination permissions failed on '%s'", dst_path); @@ -842,12 +845,8 @@ fs_copy_dir(struct cmd_args_s *ap, } static int -fs_copy(struct cmd_args_s *ap, - struct file_dfs *src_file_dfs, - struct file_dfs *dst_file_dfs, - const char *src_path, - const char *dst_path, - struct fs_copy_stats *num) +fs_copy(struct cmd_args_s *ap, struct file_dfs *src_file_dfs, struct file_dfs *dst_file_dfs, + const char *src_path, const char *dst_path, bool ignore_unsup, struct fs_copy_stats *num) { int rc = 0; struct stat src_stat; @@ -902,14 +901,14 @@ fs_copy(struct cmd_args_s *ap, switch (src_stat.st_mode & S_IFMT) { case S_IFREG: - rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, - dst_path); + rc = fs_copy_file(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, dst_path, + ignore_unsup, &num->num_chmod_enotsup); if (rc == 0) num->num_files++; break; case S_IFDIR: - rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, - dst_path, num); + rc = fs_copy_dir(ap, src_file_dfs, dst_file_dfs, &src_stat, src_path, dst_path, + ignore_unsup, num); if (rc == 0) num->num_dirs++; break; @@ -1869,7 +1868,7 @@ fs_copy_hdlr(struct cmd_args_s *ap) D_GOTO(out, rc); } - rc = fs_copy(ap, &src_file_dfs, &dst_file_dfs, src_str, dst_str, num); + rc = fs_copy(ap, &src_file_dfs, &dst_file_dfs, src_str, dst_str, ap->ignore_unsup, num); if (rc != 0) { DH_PERROR_DER(ap, rc, "fs copy failed"); D_GOTO(out_disconnect, rc); diff --git a/src/utils/daos_hdlr.h b/src/utils/daos_hdlr.h index e576f2ba751..1f25c0ccf49 100644 --- a/src/utils/daos_hdlr.h +++ b/src/utils/daos_hdlr.h @@ -72,9 +72,10 @@ enum sh_op { }; struct fs_copy_stats { - uint64_t num_dirs; - uint64_t num_files; - uint64_t num_links; + uint64_t num_dirs; + uint64_t num_files; + uint64_t num_links; + uint64_t num_chmod_enotsup; }; struct dm_args { @@ -91,8 +92,7 @@ struct dm_args { uint32_t cont_prop_oid; uint32_t cont_prop_layout; uint64_t cont_layout; - uint64_t cont_oid; - + uint64_t cont_oid; }; /* cmd_args_s: consolidated result of parsing command-line arguments @@ -141,7 +141,8 @@ struct cmd_args_s { /* Container datamover related */ struct dm_args *dm_args; /* datamover arguments */ struct fs_copy_stats *fs_copy_stats; /* fs copy stats */ - bool fs_copy_posix; /* fs copy to POSIX */ + bool ignore_unsup; /* ignore unsupported filesystem features */ + bool fs_copy_posix; /* fs copy to POSIX */ FILE *outstream; /* normal output stream */ FILE *errstream; /* errors stream */ From 3188558363ac3f45f359fab42d9989c8f4f1f038 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 4 Oct 2023 06:40:06 +0900 Subject: [PATCH 10/11] DAOS-11955 pool: Ensure a PS is inside pool (#13046) * DAOS-11955 pool: Ensure a PS is inside its pool It was found that a PS leader may enter ds_pool_plan_svc_reconfs with itself being an undesirable replica. This may lead to an assertion failure at "move n replicas from undesired to to_remove" in ds_pool_plan_svc_reconfs. Moreover, such a PS leader may be outside of the pool group, making it incapable of performing many duties that involve collective communication. This patch therefore ensures that a PS leader will remove undesirable PS replicas synchronously before committing a pool map modification that introduces new undesirable PS replicas. (If we were to keep an undesirable PS replica, it might become a PS leader.) - Extend and clean up pool_svc_sched. * Allow pool_svc_reconf_ult to return an error, so that we can fail a pool map modification if its synchronous PS replica removal fails. * Allow pool_svc_reconf_ult to get an argument, so that we can tell pool_svc_reconf_ult whether we want a synchronous remove-only run or an asyncrhonous add-remove run. * Move pool_svc_sched.{psc_svc_rf,psc_force_notify} up to pool_svc. - Prevent pool_svc_step_up_cb from canceling in-progress reconfigurations by comparing pool map versions for which the reconfigurations are scheduled. - Rename POOL_GROUP_MAP_STATUS to POOL_GROUP_MAP_STATES so that we are consistent with the pool_map module. Signed-off-by: Li Wei --- src/pool/srv_internal.h | 13 +- src/pool/srv_pool.c | 322 +++++++++++++++++++++++++++++++--------- src/pool/srv_target.c | 2 +- src/pool/srv_util.c | 181 ++++++++++------------ 4 files changed, 338 insertions(+), 180 deletions(-) diff --git a/src/pool/srv_internal.h b/src/pool/srv_internal.h index a7b9a55bd86..c6936527970 100644 --- a/src/pool/srv_internal.h +++ b/src/pool/srv_internal.h @@ -16,8 +16,17 @@ #include #include -/* Map status of ranks that make up the pool group */ -#define POOL_GROUP_MAP_STATUS (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) +/* Map states of ranks that make up the pool group */ +#define POOL_GROUP_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN | PO_COMP_ST_DRAIN) + +/* Map states of ranks that make up the pool service */ +#define POOL_SVC_MAP_STATES (PO_COMP_ST_UP | PO_COMP_ST_UPIN) + +/* + * Since we want all PS replicas to belong to the pool group, + * POOL_SVC_MAP_STATES must be a subset of POOL_GROUP_MAP_STATES. + */ +D_CASSERT((POOL_SVC_MAP_STATES & POOL_GROUP_MAP_STATES) == POOL_SVC_MAP_STATES); /** * Global pool metrics diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 71e7e2d358b..0fc06e0d739 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -65,12 +65,12 @@ struct pool_svc_events { /* Pool service schedule state */ struct pool_svc_sched { - int psc_svc_rf; - bool psc_force_notify; /* for pool_svc_step_up_cb */ ABT_mutex psc_mutex; /* only for psc_cv */ ABT_cond psc_cv; bool psc_in_progress; bool psc_canceled; + void *psc_arg; + int psc_rc; }; static int @@ -89,10 +89,10 @@ sched_init(struct pool_svc_sched *sched) return dss_abterr2der(rc); } - sched->psc_svc_rf = -1; - sched->psc_force_notify = false; sched->psc_in_progress = false; sched->psc_canceled = false; + sched->psc_arg = NULL; + sched->psc_rc = 0; return 0; } @@ -104,10 +104,12 @@ sched_fini(struct pool_svc_sched *sched) } static void -sched_begin(struct pool_svc_sched *sched) +sched_begin(struct pool_svc_sched *sched, void *arg) { sched->psc_in_progress = true; sched->psc_canceled = false; + sched->psc_arg = arg; + sched->psc_rc = 0; } static void @@ -118,20 +120,32 @@ sched_end(struct pool_svc_sched *sched) } static void -sched_cancel_and_wait(struct pool_svc_sched *sched) +sched_cancel(struct pool_svc_sched *sched) +{ + if (sched->psc_in_progress) + sched->psc_canceled = true; +} + +static void +sched_wait(struct pool_svc_sched *sched) { /* * The CV requires a mutex. We don't otherwise need it for ULTs within * the same xstream. */ ABT_mutex_lock(sched->psc_mutex); - if (sched->psc_in_progress) - sched->psc_canceled = true; while (sched->psc_in_progress) ABT_cond_wait(sched->psc_cv, sched->psc_mutex); ABT_mutex_unlock(sched->psc_mutex); } +static void +sched_cancel_and_wait(struct pool_svc_sched *sched) +{ + sched_cancel(sched); + sched_wait(sched); +} + /* Pool service */ struct pool_svc { struct ds_rsvc ps_rsvc; @@ -144,8 +158,9 @@ struct pool_svc { struct ds_pool *ps_pool; struct pool_svc_events ps_events; uint32_t ps_global_version; + int ps_svc_rf; + bool ps_force_notify;/* MS of PS membership */ struct pool_svc_sched ps_reconf_sched; - /* Check all containers RF for the pool */ struct pool_svc_sched ps_rfcheck_sched; /* The global pool map version on all pool targets */ @@ -1035,6 +1050,8 @@ pool_svc_alloc_cb(d_iov_t *id, struct ds_rsvc **rsvc) uuid_copy(svc->ps_uuid, id->iov_buf); D_INIT_LIST_HEAD(&svc->ps_events.pse_queue); svc->ps_events.pse_handler = ABT_THREAD_NULL; + svc->ps_svc_rf = -1; + svc->ps_force_notify = false; rc = ABT_rwlock_create(&svc->ps_lock); if (rc != ABT_SUCCESS) { @@ -1536,9 +1553,9 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf, svc_rf_entry = daos_prop_entry_get(*prop, DAOS_PROP_PO_SVC_REDUN_FAC); D_ASSERT(svc_rf_entry != NULL); if (daos_prop_is_set(svc_rf_entry)) - svc->ps_reconf_sched.psc_svc_rf = svc_rf_entry->dpe_val; + svc->ps_svc_rf = svc_rf_entry->dpe_val; else - svc->ps_reconf_sched.psc_svc_rf = -1; + svc->ps_svc_rf = -1; out_lock: ABT_rwlock_unlock(svc->ps_lock); @@ -1637,9 +1654,10 @@ pool_svc_check_node_status(struct pool_svc *svc) D_PRINT(fmt, ## __VA_ARGS__); \ } while (0) -static void pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)); -static void pool_svc_reconf_ult(void *arg); +static int pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, + void (*func)(void *), void *arg); +static int pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, + uint32_t map_version_for, bool sync_remove); static void pool_svc_rfcheck_ult(void *arg); static int @@ -1653,7 +1671,6 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) daos_prop_t *prop = NULL; bool cont_svc_up = false; bool events_initialized = false; - bool svc_scheduled = false; d_rank_t rank = dss_self_rank(); int rc; @@ -1700,10 +1717,22 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) * Just in case the previous leader didn't finish the last series of * reconfigurations or the last MS notification. */ - svc->ps_reconf_sched.psc_force_notify = true; - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); - svc_scheduled = true; + svc->ps_force_notify = true; + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc == -DER_OP_CANCELED) { + DL_INFO(rc, DF_UUID": not scheduling pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + } else if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + goto out; + } + + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, NULL /* arg */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + goto out; + } rc = ds_pool_iv_prop_update(svc->ps_pool, prop); if (rc) { @@ -1742,11 +1771,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) if (rc != 0) { if (events_initialized) fini_events(svc); - if (svc_scheduled) { - sched_cancel_and_wait(&svc->ps_reconf_sched); - sched_cancel_and_wait(&svc->ps_rfcheck_sched); - } - + sched_cancel_and_wait(&svc->ps_rfcheck_sched); + sched_cancel_and_wait(&svc->ps_reconf_sched); if (cont_svc_up) ds_cont_svc_step_down(svc->ps_cont_svc); if (svc->ps_pool != NULL) @@ -5581,27 +5607,47 @@ ds_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, return rc; } +struct pool_svc_reconf_arg { + struct pool_map *sca_map; + uint32_t sca_map_version_for; + bool sca_sync_remove; +}; + +/* Must be used with pool_svc.ps_reconf_sched (see container_of below). */ static void -pool_svc_reconf_ult(void *arg) -{ - struct pool_svc *svc = arg; - struct pool_svc_sched *reconf = &svc->ps_reconf_sched; - d_rank_list_t *current; - d_rank_list_t *to_add; - d_rank_list_t *to_remove; - d_rank_list_t *new; - int rc; +pool_svc_reconf_ult(void *varg) +{ + struct pool_svc_sched *reconf = varg; + struct pool_svc_reconf_arg *arg = reconf->psc_arg; + struct pool_svc *svc; + struct pool_map *map; + d_rank_list_t *current; + d_rank_list_t *to_add; + d_rank_list_t *to_remove; + d_rank_list_t *new; + int rc; + + svc = container_of(reconf, struct pool_svc, ps_reconf_sched); + + if (arg->sca_map == NULL) + map = svc->ps_pool->sp_map; + else + map = arg->sca_map; D_DEBUG(DB_MD, DF_UUID": begin\n", DP_UUID(svc->ps_uuid)); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } /* When there are pending events, the pool map may be unstable. */ - while (events_pending(svc)) { + while (!arg->sca_sync_remove && events_pending(svc)) { dss_sleep(3000 /* ms */); - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out; + } } rc = rdb_get_ranks(svc->ps_rsvc.s_db, ¤t); @@ -5611,10 +5657,12 @@ pool_svc_reconf_ult(void *arg) goto out; } - ABT_rwlock_rdlock(svc->ps_pool->sp_lock); - rc = ds_pool_plan_svc_reconfs(reconf->psc_svc_rf, svc->ps_pool->sp_map, current, - dss_self_rank(), &to_add, &to_remove); - ABT_rwlock_unlock(svc->ps_pool->sp_lock); + if (arg->sca_map == NULL) + ABT_rwlock_rdlock(svc->ps_pool->sp_lock); + rc = ds_pool_plan_svc_reconfs(svc->ps_svc_rf, map, current, dss_self_rank(), &to_add, + &to_remove); + if (arg->sca_map == NULL) + ABT_rwlock_unlock(svc->ps_pool->sp_lock); if (rc != 0) { D_ERROR(DF_UUID": cannot plan pool service reconfigurations: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); @@ -5622,7 +5670,7 @@ pool_svc_reconf_ult(void *arg) } D_DEBUG(DB_MD, DF_UUID": svc_rf=%d current=%u to_add=%u to_remove=%u\n", - DP_UUID(svc->ps_uuid), reconf->psc_svc_rf, current->rl_nr, to_add->rl_nr, + DP_UUID(svc->ps_uuid), svc->ps_svc_rf, current->rl_nr, to_add->rl_nr, to_remove->rl_nr); /* @@ -5633,14 +5681,17 @@ pool_svc_reconf_ult(void *arg) * of the two calls returns an error, we still need to report any * membership changes to the MS. */ - if (to_add->rl_nr > 0) + if (!arg->sca_sync_remove && to_add->rl_nr > 0) { ds_rsvc_add_replicas_s(&svc->ps_rsvc, to_add, ds_rsvc_get_md_cap()); - if (reconf->psc_canceled) - goto out_to_add_remove; - if (to_add->rl_nr > to_remove->rl_nr) - to_remove->rl_nr = 0; - else - to_remove->rl_nr -= to_add->rl_nr; + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; + goto out_to_add_remove; + } + if (to_add->rl_nr > to_remove->rl_nr) + to_remove->rl_nr = 0; + else + to_remove->rl_nr -= to_add->rl_nr; + } if (to_remove->rl_nr > 0) { d_rank_list_t *tmp; @@ -5669,26 +5720,35 @@ pool_svc_reconf_ult(void *arg) d_rank_list_sort(current); d_rank_list_sort(new); - if (reconf->psc_force_notify || !d_rank_list_identical(new, current)) { + if (svc->ps_force_notify || !d_rank_list_identical(new, current)) { + int rc_tmp; + /* * Send RAS event to control-plane over dRPC to indicate * change in pool service replicas. */ - rc = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); - if (rc == 0) - reconf->psc_force_notify = false; + rc_tmp = ds_notify_pool_svc_update(&svc->ps_uuid, new, svc->ps_rsvc.s_term); + if (rc_tmp == 0) + svc->ps_force_notify = false; else - D_ERROR(DF_UUID": replica update notify failure: "DF_RC"\n", - DP_UUID(svc->ps_uuid), DP_RC(rc)); + DL_ERROR(rc_tmp, DF_UUID": replica update notify failure", + DP_UUID(svc->ps_uuid)); } d_rank_list_free(new); } - if (reconf->psc_canceled) + if (reconf->psc_canceled) { + rc = -DER_OP_CANCELED; goto out_to_add_remove; + } - /* Ignore the return value of this ds_rsvc_dist_stop call. */ - if (to_remove->rl_nr > 0) + /* + * Don't attempt to destroy any removed replicas in the "synchronous + * remove" mode, so that we don't delay pool_svc_update_map_internal + * for too long. Ignore the return value of this ds_rsvc_dist_stop + * call. + */ + if (!arg->sca_sync_remove && to_remove->rl_nr > 0) ds_rsvc_dist_stop(svc->ps_rsvc.s_class, &svc->ps_rsvc.s_id, to_remove, NULL /* excluded */, svc->ps_rsvc.s_term, true /* destroy */); @@ -5698,14 +5758,17 @@ pool_svc_reconf_ult(void *arg) out_cur: d_rank_list_free(current); out: + /* Do not yield between the D_FREE and the sched_end. */ + D_FREE(reconf->psc_arg); + reconf->psc_rc = rc; sched_end(reconf); ABT_cond_broadcast(reconf->psc_cv); - D_DEBUG(DB_MD, DF_UUID": end\n", DP_UUID(svc->ps_uuid)); + D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); } -static void -pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, - void (*func)(void *)) +static int +pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, void (*func)(void *), + void *arg) { enum ds_rsvc_state state; int rc; @@ -5720,13 +5783,13 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, if (state == DS_RSVC_DRAINING) { D_DEBUG(DB_MD, DF_UUID": end: service %s\n", DP_UUID(svc->ps_uuid), ds_rsvc_state_str(state)); - return; + return -DER_OP_CANCELED; } D_ASSERT(&svc->ps_reconf_sched == sched || &svc->ps_rfcheck_sched == sched); sched_cancel_and_wait(sched); - sched_begin(sched); + sched_begin(sched, arg); /* * An extra svc leader reference is not required, because @@ -5734,14 +5797,16 @@ pool_svc_schedule(struct pool_svc *svc, struct pool_svc_sched *sched, * * ULT tracking is achieved through sched, not a ULT handle. */ - rc = dss_ult_create(func, svc, DSS_XS_SELF, 0, 0, NULL /* ult */); + rc = dss_ult_create(func, sched, DSS_XS_SELF, 0, 0, NULL /* ult */); if (rc != 0) { D_ERROR(DF_UUID": failed to create ULT: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); sched_end(sched); + return rc; } D_DEBUG(DB_MD, DF_UUID": end: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); + return 0; } static int pool_find_all_targets_by_addr(struct pool_map *map, @@ -5771,11 +5836,12 @@ cont_rf_check_cb(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx, void *ar return 0; } +/* Must be used with pool_svc.ps_rfcheck_sched (see container_of below). */ static void pool_svc_rfcheck_ult(void *arg) { - struct pool_svc *svc = arg; - int rc; + struct pool_svc *svc = container_of(arg, struct pool_svc, ps_rfcheck_sched); + int rc; do { /* retry until some one stop the pool svc(rc == 1) or succeed */ @@ -5795,6 +5861,69 @@ pool_svc_rfcheck_ult(void *arg) ABT_cond_broadcast(svc->ps_rfcheck_sched.psc_cv); } +/* + * If map is NULL, map_version_for must be provided, and svc->ps_pool->sp_map + * will be used during reconfiguration; otherwise, map_version_for is ignored. + */ +static int +pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t map_version_for, + bool sync_remove) +{ + struct pool_svc_reconf_arg *reconf_arg; + uint32_t v; + int rc; + + if (map == NULL) + v = map_version_for; + else + v = pool_map_get_version(map); + + if (svc->ps_reconf_sched.psc_in_progress) { + uint32_t v_in_progress; + + /* Safe to access psc_arg as long as we don't yield. */ + reconf_arg = svc->ps_reconf_sched.psc_arg; + if (reconf_arg->sca_map == NULL) + v_in_progress = reconf_arg->sca_map_version_for; + else + v_in_progress = pool_map_get_version(reconf_arg->sca_map); + if (v_in_progress >= v) { + D_DEBUG(DB_MD, DF_UUID": stale request: v_in_progress=%u v=%u\n", + DP_UUID(svc->ps_uuid), v_in_progress, v); + return -DER_OP_CANCELED; + } + } + + D_ALLOC_PTR(reconf_arg); + if (reconf_arg == NULL) + return -DER_NOMEM; + reconf_arg->sca_map = map; + reconf_arg->sca_map_version_for = v; + reconf_arg->sca_sync_remove = sync_remove; + + /* + * If successful, this call passes the ownership of reconf_arg to + * pool_svc_reconf_ult. + */ + rc = pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult, reconf_arg); + if (rc != 0) { + D_FREE(reconf_arg); + return rc; + } + + if (sync_remove) { + sched_wait(&svc->ps_reconf_sched); + + rc = svc->ps_reconf_sched.psc_rc; + if (rc != 0) { + DL_ERROR(rc, DF_UUID": pool service reconfigurator", DP_UUID(svc->ps_uuid)); + return rc; + } + } + + return 0; +} + /* * Perform an update to the pool map of \a svc. * @@ -5834,6 +5963,7 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, uint32_t map_version_before; uint32_t map_version; struct pool_buf *map_buf = NULL; + struct pool_domain *node; bool updated = false; int rc; @@ -5903,13 +6033,13 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, } } } + /* * Attempt to modify the temporary pool map and save its versions * before and after. If the version hasn't changed, we are done. */ map_version_before = pool_map_get_version(map); - rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, - true); + rc = ds_pool_map_tgts_update(map, tgts, opc, exclude_rank, tgt_map_ver, true); if (rc != 0) D_GOTO(out_map, rc); map_version = pool_map_get_version(map); @@ -5918,6 +6048,35 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (map_version == map_version_before) D_GOTO(out_map, rc = 0); + /* + * If the map modification affects myself, leave it to a new PS leader + * if there's another PS replica, or reject it. + */ + node = pool_map_find_node_by_rank(map, dss_self_rank()); + if (node == NULL || !(node->do_comp.co_status & POOL_SVC_MAP_STATES)) { + d_rank_list_t *replicas; + + rc = rdb_get_ranks(svc->ps_rsvc.s_db, &replicas); + if (replicas->rl_nr == 1) { + D_ERROR(DF_UUID": rejecting rank exclusion: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rc = -DER_INVAL; + } else { + /* + * The handling is unreliable, for we may become a new + * PS leader again; a more reliable implementation + * requires the currently unavailable Raft leadership + * transfer support. + */ + D_INFO(DF_UUID": resigning PS leadership: self removal requested\n", + DP_UUID(svc->ps_uuid)); + rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term); + rc = -DER_NOTLEADER; + } + d_rank_list_free(replicas); + goto out_map; + } + /* Write the new pool map. */ rc = pool_buf_extract(map, &map_buf); if (rc != 0) @@ -5926,6 +6085,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, if (rc != 0) goto out_map_buf; + /* + * Remove all undesired PS replicas (if any) before committing map, so + * that the set of PS replicas remains a subset of the pool groups. + */ + rc = pool_svc_schedule_reconf(svc, map, 0 /* map_version_for */, true /* sync_remove */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID": failed to remove undesired pool service replicas", + DP_UUID(svc->ps_uuid)); + goto out_map; + } + rc = rdb_tx_commit(&tx); if (rc != 0) { D_DEBUG(DB_MD, DF_UUID": failed to commit: "DF_RC"\n", @@ -5951,9 +6121,17 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc, ds_rsvc_request_map_dist(&svc->ps_rsvc); - pool_svc_schedule(svc, &svc->ps_reconf_sched, pool_svc_reconf_ult); - if (opc == POOL_EXCLUDE) - pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult); + rc = pool_svc_schedule_reconf(svc, NULL /* map */, map_version, false /* sync_remove */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule pool service reconfiguration", + DP_UUID(svc->ps_uuid)); + + if (opc == POOL_EXCLUDE) { + rc = pool_svc_schedule(svc, &svc->ps_rfcheck_sched, pool_svc_rfcheck_ult, + NULL /* arg */); + if (rc != 0) + DL_INFO(rc, DF_UUID": failed to schedule RF check", DP_UUID(svc->ps_uuid)); + } out_map_buf: pool_buf_free(map_buf); @@ -6841,7 +7019,7 @@ ds_pool_ranks_get_handler(crt_rpc_t *rpc) D_GOTO(out, rc = -DER_INVAL); /* Get available ranks */ - rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATUS, &out_ranks); + rc = ds_pool_get_ranks(in->prgi_op.pi_uuid, POOL_GROUP_MAP_STATES, &out_ranks); if (rc != 0) { D_ERROR(DF_UUID ": get ranks failed, " DF_RC "\n", DP_UUID(in->prgi_op.pi_uuid), DP_RC(rc)); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 8d07e66d9ea..8c8eddaa561 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -1376,7 +1376,7 @@ update_pool_group(struct ds_pool *pool, struct pool_map *map) D_DEBUG(DB_MD, DF_UUID": %u -> %u\n", DP_UUID(pool->sp_uuid), version, pool_map_get_version(map)); - rc = map_ranks_init(map, POOL_GROUP_MAP_STATUS, &ranks); + rc = map_ranks_init(map, POOL_GROUP_MAP_STATES, &ranks); if (rc != 0) return rc; diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index 3657b84d647..75beb2bc8d8 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -90,7 +90,7 @@ ds_pool_map_rank_up(struct pool_map *map, d_rank_t rank) return false; D_ASSERTF(rc == 1, "%d\n", rc); - return node->do_comp.co_status & POOL_GROUP_MAP_STATUS; + return node->do_comp.co_status & POOL_GROUP_MAP_STATES; } int @@ -332,12 +332,12 @@ compute_svc_reconf_objective(int svc_rf, d_rank_list_t *replicas) } /* - * Find n ranks with states in nodes but not in blacklist_0 or blacklist_1, and - * append them to list. Return the number of ranks appended or an error. + * Find n ranks with states in nodes but not in blacklist, and append them to + * list. Return the number of ranks appended or an error. */ static int find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnodes, - d_rank_list_t *blacklist_0, d_rank_list_t *blacklist_1, d_rank_list_t *list) + d_rank_list_t *blacklist, d_rank_list_t *list) { int n_appended = 0; int i; @@ -349,9 +349,7 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode for (i = 0; i < nnodes; i++) { if (!(nodes[i].do_comp.co_status & states)) continue; - if (d_rank_list_find(blacklist_0, nodes[i].do_comp.co_rank, NULL /* idx */)) - continue; - if (d_rank_list_find(blacklist_1, nodes[i].do_comp.co_rank, NULL /* idx */)) + if (d_rank_list_find(blacklist, nodes[i].do_comp.co_rank, NULL /* idx */)) continue; rc = d_rank_list_append(list, nodes[i].do_comp.co_rank); if (rc != 0) @@ -370,7 +368,10 @@ find_ranks(int n, pool_comp_state_t states, struct pool_domain *nodes, int nnode * caller is responsible for freeing \a to_add_out and \a to_remove_out with * d_rank_list_free. * - * We desire replicas in UP or UPIN states. + * We desire replicas in POOL_SVC_MAP_STATES. The \a self replica must be in a + * desired state in \a map, or this function will return -DER_INVAL. All + * undesired replicas, if any, will be appended to \a to_remove, so that no + * replica is outside the pool group. * * If removals are necessary, we only append desired replicas to \a * to_remove_out after all undesired replicas have already been appended to the @@ -392,12 +393,10 @@ int ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replicas, d_rank_t self, d_rank_list_t **to_add_out, d_rank_list_t **to_remove_out) { - const pool_comp_state_t desired_states = PO_COMP_ST_UP | PO_COMP_ST_UPIN; struct pool_domain *nodes = NULL; int nnodes; int objective; d_rank_list_t *desired = NULL; - d_rank_list_t *undesired = NULL; d_rank_list_t *to_add = NULL; d_rank_list_t *to_remove = NULL; int i; @@ -409,93 +408,56 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic objective = compute_svc_reconf_objective(svc_rf, replicas); desired = d_rank_list_alloc(0); - undesired = d_rank_list_alloc(0); to_add = d_rank_list_alloc(0); to_remove = d_rank_list_alloc(0); - if (desired == NULL || undesired == NULL || to_add == NULL || to_remove == NULL) { + if (desired == NULL || to_add == NULL || to_remove == NULL) { rc = -DER_NOMEM; goto out; } - /* Classify replicas into desired and undesired. */ + /* Classify replicas into desired and to_remove. */ for (i = 0; i < replicas->rl_nr; i++) { + d_rank_t rank = replicas->rl_ranks[i]; d_rank_list_t *list; int j; for (j = 0; j < nnodes; j++) - if (nodes[j].do_comp.co_rank == replicas->rl_ranks[i]) + if (nodes[j].do_comp.co_rank == rank) break; if (j == nnodes) /* not found (hypothetical) */ - list = undesired; - else if (nodes[j].do_comp.co_status & desired_states) + list = to_remove; + else if (nodes[j].do_comp.co_status & POOL_SVC_MAP_STATES) list = desired; else - list = undesired; - rc = d_rank_list_append(list, replicas->rl_ranks[i]); + list = to_remove; + if (rank == self && list == to_remove) { + D_ERROR("self undesired: state=%x\n", + j < nnodes ? nodes[j].do_comp.co_status : -1); + rc = -DER_INVAL; + goto out; + } + rc = d_rank_list_append(list, rank); if (rc != 0) goto out; } - D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, undesired->rl_nr, + D_DEBUG(DB_MD, "desired=%u undesired=%u objective=%d\n", desired->rl_nr, to_remove->rl_nr, objective); - /* - * If we have too many replicas, remove undesired ones (if any) before - * desired ones. - */ - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, undesired, to_remove); - if (rc == -DER_NONEXIST) - break; - else if (rc != 0) - goto out; - } - while (desired->rl_nr + undesired->rl_nr > objective) { - rc = move_rank_except_for(self, desired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } - - /* If necessary, add more replicas towards the objective. */ - if (desired->rl_nr + undesired->rl_nr < objective) { - rc = find_ranks(objective - desired->rl_nr - undesired->rl_nr, desired_states, - nodes, nnodes, desired, undesired, to_add); - if (rc < 0) - goto out; - /* Copy the new ones to desired. */ - for (i = 0; i < to_add->rl_nr; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); + if (desired->rl_nr > objective) { + /* Too many replicas, remove one by one. */ + do { + rc = move_rank_except_for(self, desired, to_remove); + D_ASSERT(rc != -DER_NONEXIST); if (rc != 0) goto out; - } - } - - /* - * If there are undesired ones, try to replace as many of them as - * possible. - */ - if (undesired->rl_nr > 0) { - int n; - - rc = find_ranks(undesired->rl_nr, desired_states, nodes, nnodes, desired, undesired, - to_add); + } while (desired->rl_nr > objective); + } else if (desired->rl_nr < objective) { + /* Too few replicas, add some. */ + rc = find_ranks(objective - desired->rl_nr, POOL_SVC_MAP_STATES, nodes, nnodes, + desired, to_add); if (rc < 0) goto out; - n = rc; - /* Copy the n replacements to desired. */ - for (i = 0; i < n; i++) { - rc = d_rank_list_append(desired, to_add->rl_ranks[i]); - if (rc != 0) - goto out; - } - /* Move n replicas from undesired to to_remove. */ - for (i = 0; i < n; i++) { - rc = move_rank_except_for(self, undesired, to_remove); - D_ASSERT(rc != -DER_NONEXIST); - if (rc != 0) - goto out; - } } rc = 0; @@ -507,7 +469,6 @@ ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replic d_rank_list_free(to_remove); d_rank_list_free(to_add); } - d_rank_list_free(undesired); d_rank_list_free(desired); return rc; } @@ -546,10 +507,6 @@ testu_rank_sets_belong(d_rank_list_t *x, d_rank_t *y_ranks, int y_ranks_len) static struct pool_map * testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_down_ranks) { - d_rank_list_t ranks_list = { - .rl_ranks = ranks, - .rl_nr = n_ranks - }; struct pool_buf *map_buf; struct pool_map *map; uint32_t *domains; @@ -567,8 +524,7 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ domains[3 + i] = i; rc = gen_pool_buf(NULL /* map */, &map_buf, 1 /* map_version */, n_domains, - n_ranks, n_ranks * 1 /* ntargets */, domains, &ranks_list, - 1 /* dss_tgt_nr */); + n_ranks, n_ranks * 1 /* ntargets */, domains, 1 /* dss_tgt_nr */); D_ASSERT(rc == 0); rc = pool_map_create(map_buf, 1, &map); @@ -590,7 +546,8 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_ static void testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ranks[], int n_down_ranks, d_rank_t replicas_ranks[], int n_replicas_ranks, - d_rank_t self, d_rank_list_t **to_add, d_rank_list_t **to_remove) + d_rank_t self, int expected_rc, d_rank_list_t **to_add, + d_rank_list_t **to_remove) { struct pool_map *map; d_rank_list_t replicas_list; @@ -602,7 +559,7 @@ testu_plan_svc_reconfs(int svc_rf, d_rank_t ranks[], int n_ranks, d_rank_t down_ replicas_list.rl_nr = n_replicas_ranks; rc = ds_pool_plan_svc_reconfs(svc_rf, map, &replicas_list, self, to_add, to_remove); - D_ASSERT(rc == 0); + D_ASSERTF(rc == expected_rc, "rc=%d expected_rc=%d\n", rc, expected_rc); pool_map_decref(map); } @@ -614,10 +571,11 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_list_t *to_add; d_rank_list_t *to_remove; -#define call_testu_plan_svc_reconfs \ +#define call_testu_plan_svc_reconfs(expected_rc) \ testu_plan_svc_reconfs(svc_rf, ranks, ARRAY_SIZE(ranks), down_ranks, \ ARRAY_SIZE(down_ranks), replicas_ranks, \ - ARRAY_SIZE(replicas_ranks), self, &to_add, &to_remove); + ARRAY_SIZE(replicas_ranks), self, expected_rc, &to_add, \ + &to_remove); #define call_d_rank_list_free \ d_rank_list_free(to_add); \ @@ -630,7 +588,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -638,6 +596,16 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } + /* The PS leader itself must not be undesired. */ + { + int svc_rf = 1; + d_rank_t ranks[] = {0, 1, 2}; + d_rank_t down_ranks[] = {0}; + d_rank_t replicas_ranks[] = {0, 1, 2}; + + call_testu_plan_svc_reconfs(-DER_INVAL) + } + /* One lonely replica. */ { int svc_rf = 0; @@ -645,7 +613,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {}; d_rank_t replicas_ranks[] = {0}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 0); @@ -661,7 +629,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -678,7 +646,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -695,7 +663,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -704,17 +672,19 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS holds its ground when there's no replacement. */ + /* A PS removes the down rank even when there's no replacement. */ { int svc_rf = 1; d_rank_t ranks[] = {0, 1, 2}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -728,7 +698,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_add_candidates[] = {3, 4}; d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 1); D_ASSERT(testu_rank_sets_belong(to_add, expected_to_add_candidates, @@ -750,7 +720,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0}; d_rank_t expected_to_add[] = {1, 2, 3}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); @@ -767,7 +737,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t replicas_ranks[] = {0, 1, 2}; d_rank_t expected_to_remove[] = {1, 2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, @@ -776,19 +746,21 @@ ds_pool_test_plan_svc_reconfs(void) call_d_rank_list_free } - /* A PS keeps down ranks while growing. */ + /* A PS removes down ranks while growing. */ { int svc_rf = 2; - d_rank_t ranks[] = {0, 1, 2, 3, 4}; + d_rank_t ranks[] = {0, 1, 2, 3, 4, 5}; d_rank_t down_ranks[] = {2}; d_rank_t replicas_ranks[] = {0, 1, 2}; - d_rank_t expected_to_add[] = {3, 4}; + d_rank_t expected_to_add[] = {3, 4, 5}; + d_rank_t expected_to_remove[] = {2}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 0); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } @@ -802,7 +774,7 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t expected_to_remove_candidates[] = {1, 2, 3, 4, 5, 6, 7, 8}; d_rank_list_t tmp; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(to_add->rl_nr == 0); D_ASSERT(to_remove->rl_nr == 4); @@ -822,15 +794,14 @@ ds_pool_test_plan_svc_reconfs(void) d_rank_t down_ranks[] = {1, 3, 5, 7}; d_rank_t replicas_ranks[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; d_rank_t expected_to_add[] = {9}; - d_rank_t expected_to_remove_candidates[] = {1, 3, 5, 7}; + d_rank_t expected_to_remove[] = {1, 3, 5, 7}; - call_testu_plan_svc_reconfs + call_testu_plan_svc_reconfs(0) D_ASSERT(testu_rank_sets_identical(to_add, expected_to_add, ARRAY_SIZE(expected_to_add))); - D_ASSERT(to_remove->rl_nr == 3); - D_ASSERT(testu_rank_sets_belong(to_remove, expected_to_remove_candidates, - ARRAY_SIZE(expected_to_remove_candidates))); + D_ASSERT(testu_rank_sets_identical(to_remove, expected_to_remove, + ARRAY_SIZE(expected_to_remove))); call_d_rank_list_free } From 7229422791e82c1a2c3bf2ed8d149af13241e643 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 5 Oct 2023 01:08:14 -0700 Subject: [PATCH 11/11] DAOS-14450 rebuild: Only print target buffers if there are any (#13127) Only show tgts_buf if there are real targets in task structure. Signed-off-by: Di Wang --- src/rebuild/srv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index e16583436ce..5b1c2089266 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1093,7 +1093,7 @@ rebuild_debug_print_queue() * This only accumulates the targets in a single task, so it doesn't * need to be very big. 200 bytes is enough for ~30 5-digit target ids */ - char tgts_buf[200]; + char tgts_buf[200] = { 0 }; int i; /* Position in stack buffer where str data should be written next */ size_t tgts_pos; @@ -1121,7 +1121,7 @@ rebuild_debug_print_queue() } D_DEBUG(DB_REBUILD, DF_UUID" op=%s ver=%u tgts=%s\n", DP_UUID(task->dst_pool_uuid), RB_OP_STR(task->dst_rebuild_op), - task->dst_map_ver, tgts_buf); + task->dst_map_ver, task->dst_tgts.pti_number > 0 ? tgts_buf : "None"); } }