From 1d7b7a243e001fc07f24b8e648a01c8ec8e69570 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Thu, 22 Jun 2023 18:36:18 -0500 Subject: [PATCH 01/21] DAOS-13626 build: add libfabric 1.18.0 patches (#12487) (#12495) - add prov/tcp patch to fix registration lock issue - add prov/opx patch to fix 32-bit conversion issue Signed-off-by: Jerome Soumagne --- utils/build.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/build.config b/utils/build.config index df8a456afd8..c81f4630ceb 100644 --- a/utils/build.config +++ b/utils/build.config @@ -14,4 +14,4 @@ UCX=v1.13.0 [patch_versions] spdk=https://github.com/spdk/spdk/commit/b0aba3fcd5aceceea530a702922153bc75664978.diff,https://github.com/spdk/spdk/commit/445a4c808badbad3942696ecf16fa60e8129a747.diff -ofi=https://github.com/ofiwg/libfabric/commit/2abe57cd893d70a52137758c2c5b3c7fbf0be2c1.diff,https://github.com/ofiwg/libfabric/commit/9057c4c95b6440774dc92eb8219d297515141137.diff +ofi=https://github.com/ofiwg/libfabric/commit/2abe57cd893d70a52137758c2c5b3c7fbf0be2c1.diff,https://github.com/ofiwg/libfabric/commit/9057c4c95b6440774dc92eb8219d297515141137.diff,https://github.com/ofiwg/libfabric/commit/91b05e9d9ee63c49eac032029dc462fa32ef632f.diff,https://raw.githubusercontent.com/daos-stack/libfabric/06c3dce3f046f1869e87e840bd6fece81882e9af/prov_opx_u32_extended.patch From 4f3a3d60c00f489df81ed01098d2ce9f2592c3ce Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 23 Jun 2023 13:14:20 +0100 Subject: [PATCH 02/21] DAOS-13684 test: Bump docker nlt system_ram_reserved (#12432) (#12471) Increase system RAM reservation when calculating tmpfs size in NLT docker tests to avoid intermittent available memory check failures on engine start due to memory contention across multiple containers on the same host. Signed-off-by: Tom Nabarro --- ci/docker_nlt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker_nlt.sh b/ci/docker_nlt.sh index 5e422435261..d7dbc933fa1 100755 --- a/ci/docker_nlt.sh +++ b/ci/docker_nlt.sh @@ -22,7 +22,7 @@ pushd "$TMP_DIR" set +e sudo --preserve-env=VIRTUAL_ENV,PATH ./node_local_test.py \ - --no-root --memcheck no --system-ram-reserved 32 --server-debug WARN "$@" + --no-root --memcheck no --system-ram-reserved 48 --server-debug WARN "$@" RC=$? set -e From ee8ce11a18247167c33f78368d6e0bf08d403bd4 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Fri, 23 Jun 2023 10:40:48 -0400 Subject: [PATCH 03/21] DAOS-13785 gurt: protect d_log_refcount with d_log_lock (#12467) (#12474) Coverity fix for CID 1443180 Signed-off-by: Kenneth Cain --- src/gurt/debug.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gurt/debug.c b/src/gurt/debug.c index 960db55b001..4fb112b7dcc 100644 --- a/src/gurt/debug.c +++ b/src/gurt/debug.c @@ -560,9 +560,8 @@ d_log_init(void) void d_log_fini(void) { - D_ASSERT(d_log_refcount > 0); - D_MUTEX_LOCK(&d_log_lock); + D_ASSERT(d_log_refcount > 0); d_log_refcount--; if (d_log_refcount == 0) { cleanup_dbg_namebit(); From bc3e204711586db13586d15fee0348d1b56eea9e Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 23 Jun 2023 16:24:19 +0100 Subject: [PATCH 04/21] DAOS-13521 control: Reset unbound NVMe devs on daos_server nvme reset (#12397) (#12482) To avoid undesired consequences related to https://github.com/spdk/spdk/issues/2926, perform an extra SPDK reset (without PCI_ALLOWED) at the end of the daos_server nvme reset command when VMD is enabled. This will reset any dangling NVMe devices left unbound after the DRIVER_OVERRIDE=none setup call was used in daos_server nvme prepare. Signed-off-by: Tom Nabarro --- src/control/cmd/daos_server/storage_nvme.go | 42 +++-- .../cmd/daos_server/storage_nvme_test.go | 165 +++++++++++------- src/control/server/storage/bdev/backend.go | 11 +- .../server/storage/bdev/backend_test.go | 13 +- src/control/server/storage/bdev/mocks.go | 16 +- src/control/server/storage/bdev/provider.go | 4 +- 6 files changed, 150 insertions(+), 101 deletions(-) diff --git a/src/control/cmd/daos_server/storage_nvme.go b/src/control/cmd/daos_server/storage_nvme.go index 494738ad24e..11b2dc1ca29 100644 --- a/src/control/cmd/daos_server/storage_nvme.go +++ b/src/control/cmd/daos_server/storage_nvme.go @@ -305,21 +305,36 @@ func resetNVMe(resetReq storage.BdevPrepareRequest, cmd *nvmeCmd, resetBackend n if err := processNVMePrepReq(cmd.Logger, cfgParam, cmd, &resetReq); err != nil { return errors.Wrap(err, "processing request parameters") } - // As reset nvme backend doesn't use NrHugepages, overwrite any set value with zero. + + // Apply request parameter field values required specifically for reset operation. resetReq.HugepageCount = 0 + resetReq.HugeNodes = "" + resetReq.CleanHugepagesOnly = false + resetReq.Reset_ = true cmd.Debugf("nvme reset request parameters: %+v", resetReq) - // TODO SPDK-2926: If VMD is enabled and PCI_ALLOWED list is set to a subset of VMD - // controllers (as specified in the server config file) then the backing - // devices of the unselected VMD controllers will be bound to no driver - // and therefore inaccessible from both OS and SPDK. Workaround is to run - // nvme scan --ignore-config to reset driver bindings. + resetResp, err := resetBackend(resetReq) + if err != nil { + return errors.Wrap(err, "nvme reset backend") + } - // Reset NVMe device access. - _, err := resetBackend(resetReq) + // SPDK-2926: If VMD has been detected, perform an extra SPDK reset (without PCI_ALLOWED) + // to reset dangling NVMe devices left unbound after the DRIVER_OVERRIDE=none + // setup call was used in nvme prepare. + if resetResp.VMDPrepared { + resetReq.PCIAllowList = "" + resetReq.PCIBlockList = "" + resetReq.EnableVMD = false // Prevents VMD endpoints being auto populated - return err + cmd.Debugf("vmd second nvme reset request parameters: %+v", resetReq) + + if _, err := resetBackend(resetReq); err != nil { + return errors.Wrap(err, "nvme reset backend") + } + } + + return nil } func (cmd *resetNVMeCmd) Execute(_ []string) error { @@ -336,7 +351,6 @@ func (cmd *resetNVMeCmd) Execute(_ []string) error { PCIAllowList: cmd.Args.PCIAllowList, PCIBlockList: cmd.PCIBlockList, DisableVFIO: cmd.DisableVFIO, - Reset_: true, } return resetNVMe(req, &cmd.nvmeCmd, scs.NvmePrepare) @@ -391,16 +405,8 @@ func (cmd *scanNVMeCmd) scanNVMe(scanBackend nvmeScanFn, prepResetBackend nvmePr cmd.Info(bld.String()) if !cmd.SkipPrep { - // TODO SPDK-2926: If VMD is enabled and PCI_ALLOWED list is set to a subset of VMD - // controllers (as specified in the server config file) then the - // backing devices of the unselected VMD controllers will be bound - // to no driver and therefore inaccessible from both OS and SPDK. - // Workaround is to run nvme scan --ignore-config to reset driver - // bindings. - req := storage.BdevPrepareRequest{ PCIAllowList: strings.Join(req.DeviceList.Devices(), storage.BdevPciAddrSep), - Reset_: true, } if err := resetNVMe(req, &cmd.nvmeCmd, prepResetBackend); err != nil { return errors.Wrap(err, diff --git a/src/control/cmd/daos_server/storage_nvme_test.go b/src/control/cmd/daos_server/storage_nvme_test.go index 2a15c5fb858..8cd0b144698 100644 --- a/src/control/cmd/daos_server/storage_nvme_test.go +++ b/src/control/cmd/daos_server/storage_nvme_test.go @@ -290,43 +290,74 @@ func TestDaosServer_resetNVMe(t *testing.T) { bmbc *bdev.MockBackendConfig iommuDisabled bool expErr error - expResetCall *storage.BdevPrepareRequest + expResetCalls []storage.BdevPrepareRequest }{ "no devices; success": { - expResetCall: &storage.BdevPrepareRequest{ - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + // If empty TargetUser in cmd, expect current user in call. + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, - "succeeds; user params": { + "succeeds; user params; vmd prepared": { resetCmd: newResetCmd(), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + bmbc: &bdev.MockBackendConfig{ + ResetRes: &storage.BdevPrepareResponse{ + // Response flag indicates VMD is active and triggers + // second reset call. + VMDPrepared: true, + }, + }, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + EnableVMD: true, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, + // VMD was acted on in first call so reset called a second time + // without allow list. EnableVMD is false to prevent VMD domain + // addresses being automatically added to allow list in backend. + { + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, - "succeeds; different target user; multi allow list": { - resetCmd: newResetCmd().WithTargetUser("bob").WithPCIAllowList(defaultMultiAddrList), - expResetCall: &storage.BdevPrepareRequest{ - TargetUser: "bob", - PCIAllowList: spaceSepMultiAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + "succeeds; different target user; multi allow list; vmd not prepared": { + resetCmd: newResetCmd(). + WithTargetUser("bob"). + WithPCIAllowList(defaultMultiAddrList), + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + TargetUser: "bob", + PCIAllowList: spaceSepMultiAddrList, + PCIBlockList: spaceSepMultiAddrList, + Reset_: true, + }, }, }, "fails; user params": { resetCmd: newResetCmd(), bmbc: &bdev.MockBackendConfig{ + ResetRes: &storage.BdevPrepareResponse{ + VMDPrepared: true, + }, ResetErr: errors.New("backend prep reset failed"), }, - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, expErr: errors.New("backend prep reset failed"), }, @@ -341,12 +372,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { "root; vfio disabled; iommu not detected": { resetCmd: newResetCmd().WithTargetUser("root").WithDisableVFIO(true), iommuDisabled: true, - expResetCall: &storage.BdevPrepareRequest{ - TargetUser: "root", - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - DisableVFIO: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + TargetUser: "root", + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + DisableVFIO: true, + Reset_: true, + }, }, }, "config parameters ignored; settings exist": { @@ -358,11 +391,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevDeviceList(test.MockPCIAddr(8)))). WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: defaultSingleAddrList, - PCIBlockList: spaceSepMultiAddrList, - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + PCIAllowList: defaultSingleAddrList, + PCIBlockList: spaceSepMultiAddrList, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters ignored; set ignore-config in cmd": { @@ -380,9 +416,12 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - EnableVMD: true, - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + EnableVMD: true, + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters applied; disable vmd": { @@ -400,11 +439,14 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), storage.BdevPciAddrSep, - test.MockPCIAddr(8)), - PCIBlockList: test.MockPCIAddr(9), - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), + storage.BdevPciAddrSep, test.MockPCIAddr(8)), + PCIBlockList: test.MockPCIAddr(9), + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, "config parameters applied; disable vfio": { @@ -419,25 +461,30 @@ func TestDaosServer_resetNVMe(t *testing.T) { WithLegacyStorage(engine.LegacyStorage{ BdevClass: storage.ClassNvme, BdevConfig: storage.BdevConfig{ - DeviceList: storage.MustNewBdevDeviceList(test.MockPCIAddr(7)), + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(7)), }, }), engine.NewConfig(). WithLegacyStorage(engine.LegacyStorage{ BdevClass: storage.ClassNvme, BdevConfig: storage.BdevConfig{ - DeviceList: storage.MustNewBdevDeviceList(test.MockPCIAddr(8)), + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(8)), }, }), ). WithBdevExclude(test.MockPCIAddr(9)). WithNrHugepages(1024). WithDisableVMD(true), - expResetCall: &storage.BdevPrepareRequest{ - PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), storage.BdevPciAddrSep, - test.MockPCIAddr(8)), - PCIBlockList: test.MockPCIAddr(9), - Reset_: true, + expResetCalls: []storage.BdevPrepareRequest{ + { + PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(7), + storage.BdevPciAddrSep, test.MockPCIAddr(8)), + PCIBlockList: test.MockPCIAddr(9), + TargetUser: getCurrentUsername(t), + Reset_: true, + }, }, }, } { @@ -482,23 +529,9 @@ func TestDaosServer_resetNVMe(t *testing.T) { mbb.PrepareCalls[0]); diff != "" { t.Fatalf("unexpected clean hugepage calls (-want, +got):\n%s\n", diff) } - if tc.expResetCall == nil { - if len(mbb.ResetCalls) != 0 { - t.Fatalf("unexpected number of reset calls, want 0 got %d", - len(mbb.ResetCalls)) - } - } else { - if len(mbb.ResetCalls) != 1 { - t.Fatalf("unexpected number of reset calls, want 1 got %d", - len(mbb.ResetCalls)) - } - // If empty TargetUser in cmd, expect current user in call. - if tc.resetCmd.TargetUser == "" { - tc.expResetCall.TargetUser = getCurrentUsername(t) - } - if diff := cmp.Diff(*tc.expResetCall, mbb.ResetCalls[0]); diff != "" { - t.Fatalf("unexpected reset calls (-want, +got):\n%s\n", diff) - } + + if diff := cmp.Diff(tc.expResetCalls, mbb.ResetCalls); diff != "" { + t.Fatalf("unexpected reset calls (-want, +got):\n%s\n", diff) } mbb.RUnlock() }) diff --git a/src/control/server/storage/bdev/backend.go b/src/control/server/storage/bdev/backend.go index 1d3122bc232..caf4e022aa8 100644 --- a/src/control/server/storage/bdev/backend.go +++ b/src/control/server/storage/bdev/backend.go @@ -235,13 +235,16 @@ func (sb *spdkBackend) prepare(req storage.BdevPrepareRequest, vmdDetect vmdDete } // reset receives function pointers for external interfaces. -func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetectFn) error { +func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetectFn) (*storage.BdevPrepareResponse, error) { + resp := &storage.BdevPrepareResponse{} + // Update request if VMD has been explicitly enabled and there are VMD endpoints configured. if err := updatePrepareRequest(sb.log, &req, vmdDetect); err != nil { - return errors.Wrapf(err, "update prepare request") + return resp, errors.Wrapf(err, "update prepare request") } + resp.VMDPrepared = req.EnableVMD - return errors.Wrap(sb.script.Reset(&req), "unbinding nvme devices from userspace drivers") + return resp, errors.Wrap(sb.script.Reset(&req), "unbinding nvme devices from userspace drivers") } // Reset will perform a lookup on the requested target user to validate existence @@ -251,7 +254,7 @@ func (sb *spdkBackend) reset(req storage.BdevPrepareRequest, vmdDetect vmdDetect // owned by the target user. // Backend call executes the SPDK setup.sh script to rebind PCI devices as selected by // devs specified in bdev_list and bdev_exclude provided in the server config file. -func (sb *spdkBackend) Reset(req storage.BdevPrepareRequest) error { +func (sb *spdkBackend) Reset(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { sb.log.Debugf("spdk backend reset (script call): %+v", req) return sb.reset(req, DetectVMD) } diff --git a/src/control/server/storage/bdev/backend_test.go b/src/control/server/storage/bdev/backend_test.go index ec3a5b657e8..76780e6ea87 100644 --- a/src/control/server/storage/bdev/backend_test.go +++ b/src/control/server/storage/bdev/backend_test.go @@ -961,6 +961,9 @@ func TestBackend_Prepare(t *testing.T) { Args: []string{"reset"}, }, }, + expResp: &storage.BdevPrepareResponse{ + VMDPrepared: true, + }, }, "prepare setup; defaults": { req: storage.BdevPrepareRequest{ @@ -1298,14 +1301,14 @@ func TestBackend_Prepare(t *testing.T) { } var gotErr error + var gotResp *storage.BdevPrepareResponse if tc.reset { - gotErr = b.reset(tc.req, mockVmdDetect) + gotResp, gotErr = b.reset(tc.req, mockVmdDetect) } else { - var gotResp *storage.BdevPrepareResponse gotResp, gotErr = b.prepare(tc.req, mockVmdDetect, mockHpClean) - if diff := cmp.Diff(tc.expResp, gotResp); diff != "" { - t.Fatalf("\nunexpected prepare response (-want, +got):\n%s\n", diff) - } + } + if diff := cmp.Diff(tc.expResp, gotResp); diff != "" { + t.Fatalf("\nunexpected prepare response (-want, +got):\n%s\n", diff) } test.CmpErr(t, tc.expErr, gotErr) diff --git a/src/control/server/storage/bdev/mocks.go b/src/control/server/storage/bdev/mocks.go index 2e732a2a761..d6239baeca0 100644 --- a/src/control/server/storage/bdev/mocks.go +++ b/src/control/server/storage/bdev/mocks.go @@ -16,9 +16,10 @@ import ( type ( MockBackendConfig struct { VMDEnabled bool // VMD is disabled by default - ResetErr error PrepareRes *storage.BdevPrepareResponse PrepareErr error + ResetRes *storage.BdevPrepareResponse + ResetErr error ScanRes *storage.BdevScanResponse ScanErr error FormatRes *storage.BdevFormatResponse @@ -93,16 +94,19 @@ func (mb *MockBackend) Prepare(req storage.BdevPrepareRequest) (*storage.BdevPre } } -func (mb *MockBackend) Reset(req storage.BdevPrepareRequest) error { +func (mb *MockBackend) Reset(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { mb.Lock() mb.ResetCalls = append(mb.ResetCalls, req) mb.Unlock() - if mb.cfg.ResetErr != nil { - return mb.cfg.ResetErr + switch { + case mb.cfg.ResetErr != nil: + return nil, mb.cfg.ResetErr + case mb.cfg.ResetRes == nil: + return &storage.BdevPrepareResponse{}, nil + default: + return mb.cfg.ResetRes, nil } - - return nil } func (mb *MockBackend) UpdateFirmware(_ string, _ string, _ int32) error { diff --git a/src/control/server/storage/bdev/provider.go b/src/control/server/storage/bdev/provider.go index 3346af77a16..fdb9cb37c08 100644 --- a/src/control/server/storage/bdev/provider.go +++ b/src/control/server/storage/bdev/provider.go @@ -19,7 +19,7 @@ type ( // Backend defines a set of methods to be implemented by a Block Device backend. Backend interface { Prepare(storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) - Reset(storage.BdevPrepareRequest) error + Reset(storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) Scan(storage.BdevScanRequest) (*storage.BdevScanResponse, error) Format(storage.BdevFormatRequest) (*storage.BdevFormatResponse, error) UpdateFirmware(pciAddr string, path string, slot int32) error @@ -62,7 +62,7 @@ func (p *Provider) Scan(req storage.BdevScanRequest) (resp *storage.BdevScanResp func (p *Provider) Prepare(req storage.BdevPrepareRequest) (*storage.BdevPrepareResponse, error) { if req.Reset_ { p.log.Debug("run bdev storage provider prepare reset") - return &storage.BdevPrepareResponse{}, p.backend.Reset(req) + return p.backend.Reset(req) } p.log.Debug("run bdev storage provider prepare setup") From 8adf1a9a7491657b1b450388f2dac1c02dd15542 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Fri, 23 Jun 2023 12:46:51 -0400 Subject: [PATCH 05/21] DAOS-13786 client: pool_free take rdlock for dp_co_list check (#12469) (#12473) Fix for Coverity CID 1444801 Signed-off-by: Kenneth Cain --- src/pool/cli.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pool/cli.c b/src/pool/cli.c index 37219efedd0..98eb3040ccd 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -87,10 +87,14 @@ pool_free(struct d_hlink *hlink) pool = container_of(hlink, struct dc_pool, dp_hlink); D_ASSERT(daos_hhash_link_empty(&pool->dp_hlink)); + + D_RWLOCK_RDLOCK(&pool->dp_co_list_lock); + D_ASSERT(d_list_empty(&pool->dp_co_list)); + D_RWLOCK_UNLOCK(&pool->dp_co_list_lock); + D_RWLOCK_DESTROY(&pool->dp_map_lock); D_MUTEX_DESTROY(&pool->dp_client_lock); D_RWLOCK_DESTROY(&pool->dp_co_list_lock); - D_ASSERT(d_list_empty(&pool->dp_co_list)); if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); From 1bc16b307c55465998166a625bcd1ed323edbe27 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 23 Jun 2023 13:01:45 -0600 Subject: [PATCH 06/21] DAOS-13720 common: Check return code for negative value (#12416) (#12477) The daos_acl_size function could return an error, although the test expects to succeed. Signed-off-by: Kris Jacque --- src/common/tests/acl_api_tests.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/common/tests/acl_api_tests.c b/src/common/tests/acl_api_tests.c index 186121bfd46..79ad3b26ae1 100644 --- a/src/common/tests/acl_api_tests.c +++ b/src/common/tests/acl_api_tests.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2019-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1035,7 +1035,8 @@ test_acl_add_ace_everyone_to_existing_list(void **state) static void expect_add_duplicate_ace_unchanged(enum daos_acl_principal_type type) { - int num_aces = NUM_DAOS_ACL_TYPES; + int num_aces = NUM_DAOS_ACL_TYPES; + ssize_t size; struct daos_ace *ace[num_aces]; struct daos_ace *new_ace; struct daos_acl *acl; @@ -1046,10 +1047,11 @@ expect_add_duplicate_ace_unchanged(enum daos_acl_principal_type type) orig_acl = daos_acl_dup(acl); /* Create an exact duplicate */ - D_ALLOC(new_ace, daos_ace_get_size(ace[type])); + size = daos_ace_get_size(ace[type]); + assert_true(size > 0); + D_ALLOC(new_ace, size); assert_non_null(new_ace); - memcpy(new_ace, ace[type], - daos_ace_get_size(ace[type])); + memcpy(new_ace, ace[type], size); assert_rc_equal(daos_acl_add_ace(&acl, new_ace), 0); From 076cd90e8c1441c93c7050118f6e8dd7330f1f94 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 23 Jun 2023 13:11:25 -0600 Subject: [PATCH 07/21] DAOS-13595 utils: Check return codes (#12346) (#12479) Check return codes for retried functions in daos_hdlr disconnect code. Signed-off-by: Kris Jacque --- src/client/dfuse/dfuse_core.c | 11 ++--------- src/pool/cli.c | 1 + src/utils/daos_hdlr.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index aaac700d414..bb14e10bbd0 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -271,17 +271,10 @@ _ph_free(struct dfuse_pool *dfp) if (daos_handle_is_valid(dfp->dfp_poh)) { rc = daos_pool_disconnect(dfp->dfp_poh, NULL); /* Hook for fault injection testing, if the disconnect fails with out of memory - * then simply try it again, only what might have happened is that the first - * call might have disconnected, but then failed to notify about the disconnect, - * in which case the subsequent call will return -DER_NO_HDL, if that's the case - * then this is expected, if odd, behavior so silence that case and just return - * success. + * then simply try it again. */ - if (rc == -DER_NOMEM) { + if (rc == -DER_NOMEM) rc = daos_pool_disconnect(dfp->dfp_poh, NULL); - if (rc == -DER_NO_HDL) - rc = -DER_SUCCESS; - } if (rc != -DER_SUCCESS) DFUSE_TRA_ERROR(dfp, "daos_pool_disconnect() failed: " DF_RC, DP_RC(rc)); } diff --git a/src/pool/cli.c b/src/pool/cli.c index 98eb3040ccd..81b007c4351 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -774,6 +774,7 @@ pool_disconnect_cp(tse_task_t *task, void *data) */ D_ERROR("failed to notify agent of pool disconnect: "DF_RC"\n", DP_RC(rc)); + rc = 0; } /* remove pool from hhash */ diff --git a/src/utils/daos_hdlr.c b/src/utils/daos_hdlr.c index f6f87b4911b..f0cdeaf4361 100644 --- a/src/utils/daos_hdlr.c +++ b/src/utils/daos_hdlr.c @@ -1669,42 +1669,56 @@ dm_disconnect(struct cmd_args_s *ap, if (is_posix_copy) { rc = dfs_sys_umount(src_file_dfs->dfs_sys); if (rc != 0) { - rc = daos_errno2der(rc); - DH_PERROR_DER(ap, rc, "failed to unmount source"); - dfs_sys_umount(src_file_dfs->dfs_sys); + DH_PERROR_DER(ap, daos_errno2der(rc), "failed to unmount source"); + rc = dfs_sys_umount(src_file_dfs->dfs_sys); + if (rc != 0) + DH_PERROR_DER(ap, daos_errno2der(rc), + "failed to unmount source on retry"); } src_file_dfs->dfs_sys = NULL; } rc = daos_cont_close(ca->src_coh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to close source container"); - daos_cont_close(ca->src_coh, NULL); + rc = daos_cont_close(ca->src_coh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, "failed to close source container on retry"); } rc = daos_pool_disconnect(ca->src_poh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to disconnect source pool"); - daos_pool_disconnect(ca->src_poh, NULL); + rc = daos_pool_disconnect(ca->src_poh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, "failed to disconnect source pool on retry"); } } if (dst_file_dfs->type == DAOS) { if (is_posix_copy) { rc = dfs_sys_umount(dst_file_dfs->dfs_sys); if (rc != 0) { - rc = daos_errno2der(rc); - DH_PERROR_DER(ap, rc, "failed to unmount source"); - dfs_sys_umount(dst_file_dfs->dfs_sys); + DH_PERROR_DER(ap, daos_errno2der(rc), "failed to unmount source"); + rc = dfs_sys_umount(dst_file_dfs->dfs_sys); + if (rc != 0) + DH_PERROR_DER(ap, daos_errno2der(rc), + "failed to unmount source on retry"); } dst_file_dfs->dfs_sys = NULL; } rc = daos_cont_close(ca->dst_coh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to close destination container"); - daos_cont_close(ca->dst_coh, NULL); + rc = daos_cont_close(ca->dst_coh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, + "failed to close destination container on retry"); } rc = daos_pool_disconnect(ca->dst_poh, NULL); if (rc != 0) { DH_PERROR_DER(ap, rc, "failed to disconnect destination pool"); - daos_pool_disconnect(ca->dst_poh, NULL); + rc = daos_pool_disconnect(ca->dst_poh, NULL); + if (rc != 0) + DH_PERROR_DER(ap, rc, + "failed to disconnect destination pool on retry"); } } return rc; From 06a2cdc9b42430905d93b17743c460c70d6960b7 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 26 Jun 2023 16:03:33 +0100 Subject: [PATCH 08/21] DAOS-13214 control: Check engines are ready before fetching usage (#12448) (#12488) Engines send a notify ready message to indicate dRPC server is up and running. Refuse to service a storage scan request with usage flag set when a host's engines are not ready to enforce deterministic behavior. Differentiate between a dmg storage query usage command request and dmg storage scan --nvme-meta as the latter should be able to be called when engines are off-line (in which case cached data will be returned). Signed-off-by: Tom Nabarro --- src/control/server/ctl_storage_rpc.go | 26 ++++++++ src/control/server/ctl_storage_rpc_test.go | 73 +++++++++++++++++++--- src/control/server/instance_storage.go | 6 +- 3 files changed, 94 insertions(+), 11 deletions(-) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 6846e295dd1..0169f89ac4c 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -506,6 +506,21 @@ func (c *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { } } +func checkEnginesReady(instances []Engine) error { + for _, inst := range instances { + if !inst.IsReady() { + var err error = FaultDataPlaneNotStarted + if inst.IsStarted() { + err = errInstanceNotReady + } + + return errors.Wrapf(err, "instance %d", inst.Index()) + } + } + + return nil +} + // StorageScan discovers non-volatile storage hardware on node. func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScanReq) (*ctlpb.StorageScanResp, error) { if req == nil { @@ -513,6 +528,17 @@ func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScan } resp := new(ctlpb.StorageScanResp) + // In the case that usage stats are being requested, relevant flags for both SCM and NVMe + // will be set and so fail if engines are not ready for comms. This restriction should not + // be applied if only the Meta flag is set in the NVMe component of the request to continue + // to support off-line storage scan functionality which uses cached stats (e.g. dmg storage + // scan --nvme-meta). + if req.Scm.Usage && req.Nvme.Meta { + if err := checkEnginesReady(c.harness.Instances()); err != nil { + return nil, err + } + } + respScm, err := c.scanScm(ctx, req.Scm) if err != nil { return nil, err diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 272d5174b24..cee5ceb30a7 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -73,6 +73,7 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { smbc *scm.MockBackendConfig tierCfgs storage.TierConfigs expResp *ctlpb.StorageScanResp + expErr error }{ "successful scan; scm namespaces": { bmbc: &bdev.MockBackendConfig{ @@ -403,6 +404,17 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { MemInfo: proto.MockPBMemInfo(), }, }, + "scan usage": { + req: &ctlpb.StorageScanReq{ + Scm: &ctlpb.ScanScmReq{ + Usage: true, + }, + Nvme: &ctlpb.ScanNvmeReq{ + Meta: true, + }, + }, + expErr: FaultDataPlaneNotStarted, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -434,8 +446,9 @@ func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { } resp, err := cs.StorageScan(test.Context(t), tc.req) + test.CmpErr(t, tc.expErr, err) if err != nil { - t.Fatal(err) + return } if tc.req.Nvme.Health || tc.req.Nvme.Meta { @@ -680,6 +693,7 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { smsc *system.MockSysConfig storageCfgs []storage.TierConfigs engineTargetCount []int + enginesNotReady bool scanTwice bool junkResp bool drpcResps map[int][]*mockDrpcResponse @@ -1389,6 +1403,45 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { MemInfo: proto.MockPBMemInfo(), }, }, + "multi-engine; multi-tier; with usage; engines not ready": { + req: &ctlpb.StorageScanReq{ + Scm: &ctlpb.ScanScmReq{Usage: true}, + Nvme: &ctlpb.ScanNvmeReq{Meta: true}, + }, + storageCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassDcpm.String()). + WithScmMountPoint(mockPbScmMount0.Path). + WithScmDeviceList(mockPbScmNamespace0.Blockdev), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(newCtrlr(1).PciAddr), + }, + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassDcpm.String()). + WithScmMountPoint(mockPbScmMount1.Path). + WithScmDeviceList(mockPbScmNamespace1.Blockdev), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(newCtrlr(2).PciAddr), + }, + }, + engineTargetCount: []int{4, 4}, + enginesNotReady: true, + drpcResps: map[int][]*mockDrpcResponse{ + 0: { + {Message: newSmdDevResp(1)}, + {Message: newBioHealthResp(1)}, + }, + 1: { + {Message: newSmdDevResp(2)}, + {Message: newBioHealthResp(2)}, + }, + }, + expErr: errInstanceNotReady, + }, // Sometimes when more than a few ssds are assigned to engine without many targets, // some of the smd entries for the latter ssds are in state "NEW" rather than // "NORMAL", when in this state, health is unavailable and DER_NONEXIST is returned. @@ -1552,7 +1605,11 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { Controllers: *tc.eCtrlrs[idx], }) } - ne := newTestEngine(log, false, sp, ec) + te := newTestEngine(log, false, sp, ec) + + if tc.enginesNotReady { + te.ready.SetFalse() + } // mock drpc responses dcc := new(mockDrpcClientConfig) @@ -1566,17 +1623,17 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { } else { t.Fatal("drpc response mocks unpopulated") } - ne.setDrpcClient(newMockDrpcClient(dcc)) - ne._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1)) - ne.setTargetCount(tc.engineTargetCount[idx]) - for _, tc := range ne.storage.GetBdevConfigs() { + te.setDrpcClient(newMockDrpcClient(dcc)) + te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1)) + te.setTargetCount(tc.engineTargetCount[idx]) + for _, tc := range te.storage.GetBdevConfigs() { tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll) } - md := ne.storage.GetControlMetadata() + md := te.storage.GetControlMetadata() md.Path = "/foo" md.DevicePath = md.Path - cs.harness.instances[idx] = ne + cs.harness.instances[idx] = te } cs.harness.started.SetTrue() diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index ab684cd9ed5..7be5c570b86 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -185,12 +185,12 @@ func (ei *EngineInstance) logScmStorage() error { // ScanBdevTiers calls in to the private engine storage provider to scan bdev // tiers. Scan will avoid using any cached results if direct is set to true. func (ei *EngineInstance) ScanBdevTiers() ([]storage.BdevTierScanResult, error) { - isStarted := ei.IsStarted() + isUp := ei.IsReady() upDn := "down" - if isStarted { + if isUp { upDn = "up" } ei.log.Debugf("scanning engine-%d bdev tiers while engine is %s", ei.Index(), upDn) - return ei.storage.ScanBdevTiers(!isStarted) + return ei.storage.ScanBdevTiers(!isUp) } From ac36ea07f04832fabb029c99a6676b83f5791567 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 26 Jun 2023 14:43:29 -0500 Subject: [PATCH 09/21] DAOS-8781 client: allow polling timeout to be modified via env (#12124) (#12407) Introduce a new env variable called D_POLL_TIMEOUT in libdaos to tune how aggressive the polling on network progress is for synchronous operation. For asynchronous operations, the caller already passes a timeout to eq_poll or event_test. The default value (0) is not changed and means busy polling. This can be modified to a value in micro-seconds. Signed-off-by: Johann Lombardi Signed-off-by: Mohamad Chaarawi --- docs/admin/env_variables.md | 1 + src/cart/crt_init.c | 3 ++- src/client/api/event.c | 10 +++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/admin/env_variables.md b/docs/admin/env_variables.md index 3fca2f1db1e..daac2c8e6a6 100644 --- a/docs/admin/env_variables.md +++ b/docs/admin/env_variables.md @@ -68,6 +68,7 @@ Environment variables in this section only apply to the client side. |Variable |Description| |-------------------------|-----------| |FI\_MR\_CACHE\_MAX\_COUNT|Enable MR (Memory Registration) caching in OFI layer. Recommended to be set to 0 (disable) when CRT\_DISABLE\_MEM\_PIN is NOT set to 1. INTEGER. Default to unset.| +|D\_POLL\_TIMEOUT|Polling timeout passed to network progress for synchronous operations. Default to 0 (busy polling), value in micro-seconds otherwise.| ## Debug System (Client & Server) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 4d27e778bf0..44096d770b5 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -71,7 +71,8 @@ dump_envariables(void) "CRT_CTX_SHARE_ADDR", "CRT_CTX_NUM", "D_FI_CONFIG", "FI_UNIVERSE_SIZE", "CRT_ENABLE_MEM_PIN", "FI_OFI_RXM_USE_SRX", "D_LOG_FLUSH", "CRT_MRC_ENABLE", - "CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST"}; + "CRT_SECONDARY_PROVIDER", "D_PROVIDER_AUTH_KEY", "D_PORT_AUTO_ADJUST", + "D_POLL_TIMEOUT"}; D_INFO("-- ENVARS: --\n"); for (i = 0; i < ARRAY_SIZE(envars); i++) { diff --git a/src/client/api/event.c b/src/client/api/event.c index 62082374bd0..ba12a05a996 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -20,6 +20,12 @@ static __thread daos_event_t ev_thpriv; static __thread bool ev_thpriv_is_init; +/** + * Global progress timeout for synchronous operation + * busy-polling by default (0), timeout in us otherwise + */ +static uint32_t ev_prog_timeout; + #define EQ_WITH_CRT #if !defined(EQ_WITH_CRT) @@ -91,6 +97,8 @@ daos_eq_lib_init() eq_ref = 1; + d_getenv_int("D_POLL_TIMEOUT", &ev_prog_timeout); + unlock: D_MUTEX_UNLOCK(&daos_eq_lock); return rc; @@ -1261,7 +1269,7 @@ daos_event_priv_wait() /* Wait on the event to complete */ while (evx->evx_status != DAOS_EVS_READY) { - rc = crt_progress_cond(evx->evx_ctx, 0, ev_progress_cb, &epa); + rc = crt_progress_cond(evx->evx_ctx, ev_prog_timeout, ev_progress_cb, &epa); /** progress succeeded, loop can exit if event completed */ if (rc == 0) { From 48f42b8989b66ea7783b0f85aa3ba94e902026aa Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 27 Jun 2023 22:00:49 +0900 Subject: [PATCH 10/21] DAOS-13643 rdb: Remove an unnecessary assignment (#12360) (#12497) The rdb_raft_load_replicas includes an unnecessary assignment of zero to rc. Signed-off-by: Li Wei --- src/rdb/rdb_raft.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index e9515edcb45..0a9075518cc 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -256,7 +256,6 @@ rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replica rc = rdb_lc_lookup(lc, index, RDB_LC_ATTRS, &rdb_lc_nreplicas, &value); if (rc == -DER_NONEXIST) { D_DEBUG(DB_MD, "no replicas in "DF_U64"\n", index); - rc = 0; nreplicas = 0; } else if (rc != 0) { return rc; From 36860299e364db4ad553ab0491dca40cb76740c5 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 27 Jun 2023 08:03:30 -0500 Subject: [PATCH 11/21] DAOS-13792 dfs: fix for coverity 1443161 (#12486) (#12524) lock module when checking for dfs is_init. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs_internal.c | 8 ++++++-- src/client/dfs/dfs_internal.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/client/dfs/dfs_internal.c b/src/client/dfs/dfs_internal.c index b717e683055..5d9f33ed204 100644 --- a/src/client/dfs/dfs_internal.c +++ b/src/client/dfs/dfs_internal.c @@ -88,11 +88,15 @@ static d_hash_table_ops_t hdl_hash_ops = { .hop_rec_hash = rec_hash }; -int +bool dfs_is_init() { - if (module_initialized > 0) + D_MUTEX_LOCK(&module_lock); + if (module_initialized > 0) { + D_MUTEX_UNLOCK(&module_lock); return true; + } + D_MUTEX_UNLOCK(&module_lock); return false; } diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 79d674779d2..20ce7a5b003 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -43,7 +43,7 @@ dfs_hdl_insert(const char *str, int type, const char *pool, daos_handle_t *oh, struct dfs_mnt_hdls **_hdl); int dfs_hdl_cont_destroy(const char *pool, const char *cont, bool force); -int +bool dfs_is_init(); /* From b5fccc683a437dd02724f7851c5743239ecd7a8b Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Tue, 27 Jun 2023 20:43:45 -0500 Subject: [PATCH 12/21] DAOS-13086 cart: add support for ofi tcp w/o rxm (#12436) (#12530) - add support for ofi tcp without rxm Signed-off-by: Jerome Soumagne --- src/cart/crt_hg.c | 6 +++++- src/cart/crt_hg.h | 29 +++++++++++++++++++++++++++++ src/cart/crt_init.c | 8 ++++---- src/include/cart/types.h | 29 ----------------------------- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 6c72fde5256..bd2cc35812d 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -37,10 +37,14 @@ struct crt_na_dict crt_na_dict[] = { .nad_str = "ofi+gni", .nad_contig_eps = true, .nad_port_bind = false, + }, { + .nad_type = CRT_PROV_OFI_TCP, + .nad_str = "ofi+tcp", + .nad_contig_eps = true, + .nad_port_bind = true, }, { .nad_type = CRT_PROV_OFI_TCP_RXM, .nad_str = "ofi+tcp;ofi_rxm", - .nad_alt_str = "ofi+tcp", .nad_contig_eps = true, .nad_port_bind = true, }, { diff --git a/src/cart/crt_hg.h b/src/cart/crt_hg.h index 395b773e4d1..7c6c49570d7 100644 --- a/src/cart/crt_hg.h +++ b/src/cart/crt_hg.h @@ -31,6 +31,35 @@ struct crt_rpc_priv; struct crt_common_hdr; struct crt_corpc_hdr; +/** + * Enumeration specifying providers supported by the library + */ +typedef enum { + CRT_PROV_SM = 0, + CRT_PROV_OFI_SOCKETS, + CRT_PROV_OFI_VERBS_RXM, + CRT_PROV_OFI_GNI, + CRT_PROV_OFI_TCP, + CRT_PROV_OFI_TCP_RXM, + CRT_PROV_OFI_CXI, + CRT_PROV_OFI_OPX, + CRT_PROV_OFI_LAST = CRT_PROV_OFI_OPX, + CRT_PROV_UCX_RC, + CRT_PROV_UCX_UD, + CRT_PROV_UCX_RC_UD, + CRT_PROV_UCX_RC_O, + CRT_PROV_UCX_UD_O, + CRT_PROV_UCX_RC_UD_O, + CRT_PROV_UCX_RC_X, + CRT_PROV_UCX_UD_X, + CRT_PROV_UCX_RC_UD_X, + CRT_PROV_UCX_DC_X, + CRT_PROV_UCX_TCP, + CRT_PROV_UCX_LAST = CRT_PROV_UCX_TCP, + /* Note: This entry should be the last valid one in enum */ + CRT_PROV_COUNT, + CRT_PROV_UNKNOWN = -1, +} crt_provider_t; crt_provider_t crt_prov_str_to_prov(const char *prov_str); diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 44096d770b5..0c449173e0b 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -546,10 +546,6 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) if (prov == CRT_PROV_OFI_CXI) mrc_enable = 1; - else { - /* Use tagged messages for other providers, disable multi-recv */ - apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1"); - } d_getenv_int("CRT_MRC_ENABLE", &mrc_enable); if (mrc_enable == 0) { @@ -557,6 +553,10 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) setenv("FI_MR_CACHE_MAX_COUNT", "0", 1); } + /* Use tagged messages for other providers, disable multi-recv */ + if (prov != CRT_PROV_OFI_CXI && prov != CRT_PROV_OFI_TCP) + apply_if_not_set("NA_OFI_UNEXPECTED_TAG_MSG", "1"); + g_prov_settings_applied[prov] = true; } diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 5754ebbe666..a4bda75fc24 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -90,35 +90,6 @@ typedef struct crt_init_options { char *cio_auth_key; } crt_init_options_t; -/** - * Enumeration specifying providers supported by the library - */ -typedef enum { - CRT_PROV_SM = 0, - CRT_PROV_OFI_SOCKETS, - CRT_PROV_OFI_VERBS_RXM, - CRT_PROV_OFI_GNI, - CRT_PROV_OFI_TCP_RXM, - CRT_PROV_OFI_CXI, - CRT_PROV_OFI_OPX, - CRT_PROV_OFI_LAST = CRT_PROV_OFI_OPX, - CRT_PROV_UCX_RC, - CRT_PROV_UCX_UD, - CRT_PROV_UCX_RC_UD, - CRT_PROV_UCX_RC_O, - CRT_PROV_UCX_UD_O, - CRT_PROV_UCX_RC_UD_O, - CRT_PROV_UCX_RC_X, - CRT_PROV_UCX_UD_X, - CRT_PROV_UCX_RC_UD_X, - CRT_PROV_UCX_DC_X, - CRT_PROV_UCX_TCP, - CRT_PROV_UCX_LAST = CRT_PROV_UCX_TCP, - /* Note: This entry should be the last valid one in enum */ - CRT_PROV_COUNT, - CRT_PROV_UNKNOWN = -1, -} crt_provider_t; - typedef int crt_status_t; /** From 79342902bfb6dc3eacf566cce9f7d38a457cc6e0 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 28 Jun 2023 10:10:44 -0500 Subject: [PATCH 13/21] DAOS-13175 dfs: print progress of dfs mwc checker (#12470) (#12541) Every 30 seconds print how many objects have been scanned in the dfs namespace or marked in the OIT. This informs users that there is some progress being done by the checker and it's not hung in case of a large namespace. Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs.c | 99 +++++++++++++++++++++++++++++++--------- utils/node_local_test.py | 8 ++-- 2 files changed, 81 insertions(+), 26 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 79e3374aa6b..a6a432972cd 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -6453,6 +6453,7 @@ dfs_dir_anchor_set(dfs_obj_t *obj, const char name[], daos_anchor_t *anchor) #define DFS_ITER_NR 128 #define DFS_ITER_DKEY_BUF (DFS_ITER_NR * sizeof(uint64_t)) #define DFS_ITER_ENTRY_BUF (DFS_ITER_NR * DFS_MAX_NAME) +#define DFS_ELAPSED_TIME 30 struct dfs_oit_args { daos_handle_t oit; @@ -6460,6 +6461,9 @@ struct dfs_oit_args { uint64_t snap_epoch; uint64_t skipped; uint64_t failed; + time_t start_time; + time_t print_time; + uint64_t num_scanned; }; static int @@ -6555,8 +6559,19 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) daos_obj_id_t oid; d_iov_t marker; bool mark_data = true; + struct timespec current_time; int rc; + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + return errno; + oit_args->num_scanned ++; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Scanned "DF_U64" files/directories (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + /** open the entry name and get the oid */ rc = dfs_lookup_rel(dfs, parent, name, O_RDONLY, &obj, NULL, NULL); if (rc) { @@ -6702,12 +6717,24 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * d_iov_t marker; bool mark_data = true; daos_epoch_range_t epr; - struct timespec now; + struct timespec now, current_time; uid_t uid = geteuid(); gid_t gid = getegid(); unsigned int co_flags = DAOS_COO_EX; + char now_name[24]; + struct tm *now_tm; + daos_size_t len; int rc, rc2; + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + return errno; + now_tm = localtime(&now.tv_sec); + len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); + if (len == 0) + return EINVAL; + D_PRINT("DFS checker: Start (%s)\n", now_name); + if (flags & DFS_CHECK_RELINK && flags & DFS_CHECK_REMOVE) { D_ERROR("can't request remove and link to l+f at the same time\n"); return EINVAL; @@ -6728,6 +6755,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_cont, rc); } + D_PRINT("DFS checker: Create OIT table\n"); /** create snapshot for OIT */ rc = daos_cont_create_snap_opt(coh, &snap_epoch, NULL, DAOS_SNAP_OPT_CR | DAOS_SNAP_OPT_OIT, NULL); @@ -6741,6 +6769,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_dfs, rc = ENOMEM); oit_args->flags = flags; oit_args->snap_epoch = snap_epoch; + oit_args->start_time = now.tv_sec; + oit_args->print_time = now.tv_sec; /** Open OIT table */ rc = daos_oit_open(coh, snap_epoch, &oit_args->oit, NULL); @@ -6757,10 +6787,11 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_oit, rc = daos_der2errno(rc)); } rc = daos_oit_mark(oit_args->oit, dfs->root.oid, &marker, NULL); - if (rc) { + if (rc && rc != -DER_NONEXIST) { D_ERROR("Failed to mark ROOT OID in OIT: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_oit, rc = daos_der2errno(rc)); } + rc = 0; if (flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(coh, dfs->super_oid, snap_epoch); @@ -6790,6 +6821,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } } + D_PRINT("DFS checker: Iterating namespace and marking objects\n"); + oit_args->num_scanned = 2; /** iterate through the namespace and mark OITs starting from the root object */ while (!daos_anchor_is_eof(&anchor)) { rc = dfs_iterate(dfs, &dfs->root, &anchor, &nr_entries, DFS_MAX_NAME * nr_entries, @@ -6802,14 +6835,14 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * nr_entries = DFS_ITER_NR; } - /** Create lost+found directory and properly link unmarked oids there. */ - if (flags & DFS_CHECK_RELINK) { - char now_name[24]; - - rc = clock_gettime(CLOCK_REALTIME, &now); - if (rc) - D_GOTO(out_oit, rc = errno); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_oit, rc = errno); + D_PRINT("DFS checker: marked "DF_U64" files/directories (runtime: "DF_U64" sec))\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + /** Create lost+found directory to link unmarked oids there. */ + if (flags & DFS_CHECK_RELINK) { rc = dfs_open(dfs, NULL, "lost+found", S_IFDIR | 0755, O_CREAT | O_RDWR, 0, 0, NULL, &lf); if (rc) { @@ -6818,21 +6851,15 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } if (name == NULL) { - struct tm *now_tm; - size_t len; /* * Create a directory with current timestamp in l+f where leaked oids will * be linked in this run. */ - now_tm = localtime(&now.tv_sec); - len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); - if (len == 0) { - D_ERROR("Invalid time format\n"); - D_GOTO(out_lf1, rc = EINVAL); - } - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", now_name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + now_name); } else { - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + name); } rc = dfs_open(dfs, lf, name ? name : now_name, S_IFDIR | 0755, @@ -6864,6 +6891,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * * Pass 1: check directories only and descend to mark all oids in the namespace of each dir. * Pass 2: relink remaining oids in the L+F root that are unmarked still after first pass. */ + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 1)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); /** Start Pass 1 */ while (!daos_anchor_is_eof(&anchor)) { @@ -6874,6 +6903,16 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { if (flags & DFS_CHECK_RELINK) { enum daos_otype_t otype = daos_obj_id2type(oids[i]); @@ -6934,6 +6973,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * if (!(flags & DFS_CHECK_RELINK)) goto done; + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 2)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); while (!daos_anchor_is_eof(&anchor)) { nr_entries = DFS_ITER_NR; @@ -6943,11 +6984,20 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { struct dfs_entry entry = {0}; enum daos_otype_t otype = daos_obj_id2type(oids[i]); char oid_name[DFS_MAX_NAME + 1]; - daos_size_t len; if (flags & DFS_CHECK_PRINT) D_PRINT("oid["DF_U64"]: "DF_OID"\n", unmarked_entries, @@ -7016,8 +7066,12 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } done: - if (flags & DFS_CHECK_PRINT) - D_PRINT("Number of Leaked OIDs in Namespace = "DF_U64"\n", unmarked_entries); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + D_PRINT("DFS checker: Done! (runtime: "DF_U64" sec)\n", + current_time.tv_sec - oit_args->start_time); + D_PRINT("DFS checker: Number of leaked OIDs in namespace = "DF_U64"\n", unmarked_entries); if (flags & DFS_CHECK_VERIFY) { if (oit_args->failed) { D_ERROR(""DF_U64" OIDs failed data consistency check!\n", oit_args->failed); @@ -7057,6 +7111,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * rc2 = daos_cont_close(coh, NULL); if (rc == 0) rc = daos_der2errno(rc2); + return rc; } diff --git a/utils/node_local_test.py b/utils/node_local_test.py index db674bfe36e..f341e837510 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -3548,7 +3548,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 2': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 2': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3558,7 +3558,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse @@ -3613,7 +3613,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 4': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 4': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3623,7 +3623,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse From 8c804affa47bb270498228ed0b4fceffb4f97276 Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Wed, 28 Jun 2023 10:11:47 -0500 Subject: [PATCH 14/21] DAOS-13791 client: free iod->iod_recxs first, then free params (#12475) (#12525) Signed-off-by: Lei Huang --- src/client/array/dc_array.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 42cdf32d420..ed527667791 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -2285,7 +2285,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l daos_key_t *dkey; struct io_params *params = NULL; tse_task_t *io_task; - bool free_iod_recxs = true; + bool free_params = true; int rc; D_ALLOC_PTR(params); @@ -2336,7 +2336,7 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l rc = tse_task_register_comp_cb(io_task, free_io_params_cb, ¶ms, sizeof(params)); if (rc) D_GOTO(err_task, rc); - free_iod_recxs = false; + free_params = false; rc = tse_task_register_deps(props->ptask, 1, &io_task); if (rc) @@ -2351,9 +2351,10 @@ add_record(daos_handle_t oh, daos_handle_t th, struct set_size_props *props, d_l if (io_task) tse_task_complete(io_task, rc); err: - D_FREE(params); - if (free_iod_recxs) + if (free_params) { D_FREE(iod->iod_recxs); + D_FREE(params); + } return rc; } From 6642d9c55c6ff1aca4d3e4fe3847e3aafb3c4730 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 28 Jun 2023 10:27:07 -0500 Subject: [PATCH 15/21] DAOS-13650 object: remove multiple RPC version of object (#12306) (#12492) - old one is out of support (2.0) Signed-off-by: Mohamad Chaarawi --- src/client/array/dc_array.c | 7 +++--- src/object/cli_mod.c | 21 +++++------------ src/object/cli_shard.c | 8 ++++--- src/object/obj_rpc.c | 33 +++++++++------------------ src/object/obj_rpc.h | 45 ++++++++----------------------------- src/object/srv_internal.h | 3 +-- src/object/srv_mod.c | 16 +++++-------- src/object/srv_obj.c | 22 +++++------------- 8 files changed, 45 insertions(+), 110 deletions(-) diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index ed527667791..5dfb94d8501 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -1822,7 +1822,7 @@ struct key_query_props { char akey_val; daos_recx_t recx; daos_size_t *size; - daos_size_t max_epoch; + daos_epoch_t max_epoch; tse_task_t *ptask; }; @@ -1831,8 +1831,7 @@ free_query_cb(tse_task_t *task, void *data) { struct key_query_props *props = *((struct key_query_props **)data); - if (props->array) - array_decref(props->array); + array_decref(props->array); D_FREE(props); return 0; } @@ -1905,7 +1904,7 @@ dc_array_get_size(tse_task_t *task) query_args->dkey = &kqp->dkey; query_args->akey = &kqp->akey; query_args->recx = &kqp->recx; - query_args->max_epoch = NULL; + query_args->max_epoch = &kqp->max_epoch; rc = tse_task_register_comp_cb(task, free_query_cb, &kqp, sizeof(kqp)); if (rc != 0) diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 06257056c36..66fcc0c78e7 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -25,7 +25,7 @@ int dc_obj_proto_version; int dc_obj_init(void) { - uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; + uint32_t ver_array[1] = {DAOS_OBJ_VERSION}; int rc; rc = obj_utils_init(); @@ -37,15 +37,12 @@ dc_obj_init(void) D_GOTO(out_utils, rc); dc_obj_proto_version = 0; - rc = daos_rpc_proto_query(obj_proto_fmt_0.cpf_base, ver_array, 2, &dc_obj_proto_version); + rc = daos_rpc_proto_query(obj_proto_fmt.cpf_base, ver_array, 1, &dc_obj_proto_version); if (rc) D_GOTO(out_class, rc); - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) { - rc = daos_rpc_register(&obj_proto_fmt_0, OBJ_PROTO_CLI_COUNT, NULL, - DAOS_OBJ_MODULE); - } else if (dc_obj_proto_version == DAOS_OBJ_VERSION) { - rc = daos_rpc_register(&obj_proto_fmt_1, OBJ_PROTO_CLI_COUNT, NULL, + if (dc_obj_proto_version == DAOS_OBJ_VERSION) { + rc = daos_rpc_register(&obj_proto_fmt, OBJ_PROTO_CLI_COUNT, NULL, DAOS_OBJ_MODULE); } else { D_ERROR("%d version object RPC not supported.\n", dc_obj_proto_version); @@ -61,10 +58,7 @@ dc_obj_init(void) rc = obj_ec_codec_init(); if (rc) { D_ERROR("failed to obj_ec_codec_init: "DF_RC"\n", DP_RC(rc)); - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) - daos_rpc_unregister(&obj_proto_fmt_0); - else - daos_rpc_unregister(&obj_proto_fmt_1); + daos_rpc_unregister(&obj_proto_fmt); D_GOTO(out_class, rc); } out_class: @@ -82,10 +76,7 @@ dc_obj_init(void) void dc_obj_fini(void) { - if (dc_obj_proto_version == DAOS_OBJ_VERSION - 1) - daos_rpc_unregister(&obj_proto_fmt_0); - else - daos_rpc_unregister(&obj_proto_fmt_1); + daos_rpc_unregister(&obj_proto_fmt); obj_ec_codec_fini(); obj_class_fini(); obj_utils_fini(); diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 3c13f7b83d4..9040559db7b 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -2097,8 +2097,8 @@ static int obj_shard_query_key_cb(tse_task_t *task, void *data) { struct obj_query_key_cb_args *cb_args; - struct obj_query_key_1_in *okqi; - struct obj_query_key_1_out *okqo; + struct obj_query_key_in *okqi; + struct obj_query_key_out *okqo; uint32_t flags; int opc; int ret = task->dt_result; @@ -2247,7 +2247,7 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint struct dtx_id *dti, uint32_t *map_ver, daos_handle_t th, tse_task_t *task) { struct dc_pool *pool = NULL; - struct obj_query_key_1_in *okqi; + struct obj_query_key_in *okqi; crt_rpc_t *req; struct obj_query_key_cb_args cb_args; daos_unit_oid_t oid; @@ -2296,6 +2296,8 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint okqi->okqi_epoch_first = epoch->oe_first; okqi->okqi_api_flags = flags; okqi->okqi_oid = oid; + d_iov_set(&okqi->okqi_dkey, NULL, 0); + d_iov_set(&okqi->okqi_akey, NULL, 0); if (dkey != NULL) okqi->okqi_dkey = *dkey; if (akey != NULL) diff --git a/src/object/obj_rpc.c b/src/object/obj_rpc.c index a8625605f8d..7328ac6d780 100644 --- a/src/object/obj_rpc.c +++ b/src/object/obj_rpc.c @@ -1078,8 +1078,7 @@ crt_proc_struct_daos_cpd_sg(crt_proc_t proc, crt_proc_op_t proc_op, CRT_RPC_DEFINE(obj_rw, DAOS_ISEQ_OBJ_RW, DAOS_OSEQ_OBJ_RW) CRT_RPC_DEFINE(obj_key_enum, DAOS_ISEQ_OBJ_KEY_ENUM, DAOS_OSEQ_OBJ_KEY_ENUM) CRT_RPC_DEFINE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) -CRT_RPC_DEFINE(obj_query_key_0, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_0) -CRT_RPC_DEFINE(obj_query_key_1, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_1) +CRT_RPC_DEFINE(obj_query_key, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY) CRT_RPC_DEFINE(obj_sync, DAOS_ISEQ_OBJ_SYNC, DAOS_OSEQ_OBJ_SYNC) CRT_RPC_DEFINE(obj_migrate, DAOS_ISEQ_OBJ_MIGRATE, DAOS_OSEQ_OBJ_MIGRATE) CRT_RPC_DEFINE(obj_ec_agg, DAOS_ISEQ_OBJ_EC_AGG, DAOS_OSEQ_OBJ_EC_AGG) @@ -1098,29 +1097,17 @@ CRT_RPC_DEFINE(obj_key2anchor, DAOS_ISEQ_OBJ_KEY2ANCHOR, DAOS_OSEQ_OBJ_KEY2ANCHO .prf_co_ops = NULL, \ }, -static struct crt_proto_rpc_format obj_proto_rpc_fmt_0[] = { - OBJ_PROTO_CLI_RPC_LIST(0) -}; - -static struct crt_proto_rpc_format obj_proto_rpc_fmt_1[] = { - OBJ_PROTO_CLI_RPC_LIST(1) +static struct crt_proto_rpc_format obj_proto_rpc_fmt[] = { + OBJ_PROTO_CLI_RPC_LIST }; #undef X -struct crt_proto_format obj_proto_fmt_0 = { - .cpf_name = "daos-object", - .cpf_ver = DAOS_OBJ_VERSION - 1, - .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt_0), - .cpf_prf = obj_proto_rpc_fmt_0, - .cpf_base = DAOS_RPC_OPCODE(0, DAOS_OBJ_MODULE, 0) -}; - -struct crt_proto_format obj_proto_fmt_1 = { +struct crt_proto_format obj_proto_fmt = { .cpf_name = "daos-object", .cpf_ver = DAOS_OBJ_VERSION, - .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt_1), - .cpf_prf = obj_proto_rpc_fmt_1, + .cpf_count = ARRAY_SIZE(obj_proto_rpc_fmt), + .cpf_prf = obj_proto_rpc_fmt, .cpf_base = DAOS_RPC_OPCODE(0, DAOS_OBJ_MODULE, 0) }; @@ -1153,7 +1140,7 @@ obj_reply_set_status(crt_rpc_t *rpc, int status) ((struct obj_punch_out *)reply)->opo_ret = status; break; case DAOS_OBJ_RPC_QUERY_KEY: - ((struct obj_query_key_0_out *)reply)->okqo_ret = status; + ((struct obj_query_key_out *)reply)->okqo_ret = status; break; case DAOS_OBJ_RPC_SYNC: ((struct obj_sync_out *)reply)->oso_ret = status; @@ -1197,7 +1184,7 @@ obj_reply_get_status(crt_rpc_t *rpc) case DAOS_OBJ_RPC_TGT_PUNCH_AKEYS: return ((struct obj_punch_out *)reply)->opo_ret; case DAOS_OBJ_RPC_QUERY_KEY: - return ((struct obj_query_key_0_out *)reply)->okqo_ret; + return ((struct obj_query_key_out *)reply)->okqo_ret; case DAOS_OBJ_RPC_SYNC: return ((struct obj_sync_out *)reply)->oso_ret; case DAOS_OBJ_RPC_EC_AGGREGATE: @@ -1241,7 +1228,7 @@ obj_reply_map_version_set(crt_rpc_t *rpc, uint32_t map_version) ((struct obj_punch_out *)reply)->opo_map_version = map_version; break; case DAOS_OBJ_RPC_QUERY_KEY: - ((struct obj_query_key_0_out *)reply)->okqo_map_version = map_version; + ((struct obj_query_key_out *)reply)->okqo_map_version = map_version; break; case DAOS_OBJ_RPC_SYNC: ((struct obj_sync_out *)reply)->oso_map_version = map_version; @@ -1285,7 +1272,7 @@ obj_reply_map_version_get(crt_rpc_t *rpc) case DAOS_OBJ_RPC_TGT_PUNCH_AKEYS: return ((struct obj_punch_out *)reply)->opo_map_version; case DAOS_OBJ_RPC_QUERY_KEY: - return ((struct obj_query_key_0_out *)reply)->okqo_map_version; + return ((struct obj_query_key_out *)reply)->okqo_map_version; case DAOS_OBJ_RPC_SYNC: return ((struct obj_sync_out *)reply)->oso_map_version; case DAOS_OBJ_RPC_CPD: diff --git a/src/object/obj_rpc.h b/src/object/obj_rpc.h index 7a2e00f2315..3b65c705034 100644 --- a/src/object/obj_rpc.h +++ b/src/object/obj_rpc.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -36,16 +36,7 @@ * OPCODE, flags, FMT, handler, corpc_hdlr and name */ -#define QUERY_KEY_0 \ - X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, &CQF_obj_query_key, \ - ds_obj_query_key_handler_0, NULL, "key_query") -#define QUERY_KEY_1 \ - X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, &CQF_obj_query_key, \ - ds_obj_query_key_handler_1, NULL, "key_query") - -#define OBJ_PROTO_CLI_RPC_LIST(ver) \ +#define OBJ_PROTO_CLI_RPC_LIST \ X(DAOS_OBJ_RPC_UPDATE, \ 0, &CQF_obj_rw, \ ds_obj_rw_handler, NULL, "update") \ @@ -74,9 +65,8 @@ 0, &CQF_obj_punch, \ ds_obj_punch_handler, NULL, "akey_punch") \ X(DAOS_OBJ_RPC_QUERY_KEY, \ - 0, ver == 0 ? &CQF_obj_query_key_0 : &CQF_obj_query_key_1, \ - ver == 0 ? ds_obj_query_key_handler_0 : \ - ds_obj_query_key_handler_1, NULL, "key_query") \ + 0, &CQF_obj_query_key, \ + ds_obj_query_key_handler, NULL, "key_query") \ X(DAOS_OBJ_RPC_SYNC, \ 0, &CQF_obj_sync, \ ds_obj_sync_handler, NULL, "obj_sync") \ @@ -111,14 +101,13 @@ /* Define for RPC enum population below */ #define X(a, b, c, d, e, f) a, enum obj_rpc_opc { - OBJ_PROTO_CLI_RPC_LIST(1) + OBJ_PROTO_CLI_RPC_LIST OBJ_PROTO_CLI_COUNT, OBJ_PROTO_CLI_LAST = OBJ_PROTO_CLI_COUNT - 1, }; #undef X -extern struct crt_proto_format obj_proto_fmt_0; -extern struct crt_proto_format obj_proto_fmt_1; +extern struct crt_proto_format obj_proto_fmt; extern int dc_obj_proto_version; /* Helper function to convert opc to name */ @@ -127,7 +116,7 @@ obj_opc_to_str(crt_opcode_t opc) { switch (opc) { #define X(a, b, c, d, e, f) case a: return f; - OBJ_PROTO_CLI_RPC_LIST(1) + OBJ_PROTO_CLI_RPC_LIST #undef X } return "unknown"; @@ -306,7 +295,7 @@ CRT_RPC_DECLARE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) ((daos_key_t) (okqi_dkey) CRT_VAR) \ ((daos_key_t) (okqi_akey) CRT_VAR) -#define DAOS_OSEQ_OBJ_QUERY_KEY_0 /* output fields */ \ +#define DAOS_OSEQ_OBJ_QUERY_KEY /* output fields */ \ ((int32_t) (okqo_ret) CRT_VAR) \ ((uint32_t) (okqo_map_version) CRT_VAR) \ ((uint64_t) (okqo_epoch) CRT_VAR) \ @@ -320,27 +309,11 @@ CRT_RPC_DECLARE(obj_punch, DAOS_ISEQ_OBJ_PUNCH, DAOS_OSEQ_OBJ_PUNCH) /* recx for EC parity space */ \ ((daos_recx_t) (okqo_recx_parity) CRT_VAR) \ /* recx for punched EC extents */ \ - ((daos_recx_t) (okqo_recx_punched) CRT_VAR) - -#define DAOS_OSEQ_OBJ_QUERY_KEY_1 /* output fields */ \ - ((int32_t) (okqo_ret) CRT_VAR) \ - ((uint32_t) (okqo_map_version) CRT_VAR) \ - ((uint64_t) (okqo_epoch) CRT_VAR) \ - ((uint32_t) (okqo_flags) CRT_VAR) \ - ((uint32_t) (okqo_pad32_1) CRT_VAR) \ - ((daos_key_t) (okqo_dkey) CRT_VAR) \ - ((daos_key_t) (okqo_akey) CRT_VAR) \ - /* recx for visible extent */ \ - ((daos_recx_t) (okqo_recx) CRT_VAR) \ - /* recx for EC parity space */ \ - ((daos_recx_t) (okqo_recx_parity) CRT_VAR) \ - /* recx for punched EC extents */ \ ((daos_recx_t) (okqo_recx_punched) CRT_VAR) \ /* epoch for max write */ \ ((uint64_t) (okqo_max_epoch) CRT_VAR) -CRT_RPC_DECLARE(obj_query_key_0, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_0) -CRT_RPC_DECLARE(obj_query_key_1, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY_1) +CRT_RPC_DECLARE(obj_query_key, DAOS_ISEQ_OBJ_QUERY_KEY, DAOS_OSEQ_OBJ_QUERY_KEY) #define DAOS_ISEQ_OBJ_SYNC /* input fields */ \ ((uuid_t) (osi_co_hdl) CRT_VAR) \ diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 0581a836172..2e417e83629 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -259,8 +259,7 @@ void ds_obj_enum_handler(crt_rpc_t *rpc); void ds_obj_key2anchor_handler(crt_rpc_t *rpc); void ds_obj_punch_handler(crt_rpc_t *rpc); void ds_obj_tgt_punch_handler(crt_rpc_t *rpc); -void ds_obj_query_key_handler_0(crt_rpc_t *rpc); -void ds_obj_query_key_handler_1(crt_rpc_t *rpc); +void ds_obj_query_key_handler(crt_rpc_t *rpc); void ds_obj_sync_handler(crt_rpc_t *rpc); void ds_obj_migrate_handler(crt_rpc_t *rpc); void ds_obj_ec_agg_handler(crt_rpc_t *rpc); diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index f0b05d25656..15d52941c64 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -67,12 +67,8 @@ obj_mod_fini(void) .dr_corpc_ops = e, \ }, -static struct daos_rpc_handler obj_handlers_0[] = { - OBJ_PROTO_CLI_RPC_LIST(0) -}; - -static struct daos_rpc_handler obj_handlers_1[] = { - OBJ_PROTO_CLI_RPC_LIST(1) +static struct daos_rpc_handler obj_handlers[] = { + OBJ_PROTO_CLI_RPC_LIST }; #undef X @@ -348,10 +344,10 @@ struct dss_module obj_module = { .sm_ver = DAOS_OBJ_VERSION, .sm_init = obj_mod_init, .sm_fini = obj_mod_fini, - .sm_proto_count = 2, - .sm_proto_fmt = {&obj_proto_fmt_0, &obj_proto_fmt_1}, - .sm_cli_count = {OBJ_PROTO_CLI_COUNT, OBJ_PROTO_CLI_COUNT}, - .sm_handlers = {obj_handlers_0, obj_handlers_1}, + .sm_proto_count = 1, + .sm_proto_fmt = {&obj_proto_fmt}, + .sm_cli_count = {OBJ_PROTO_CLI_COUNT}, + .sm_handlers = {obj_handlers}, .sm_key = &obj_module_key, .sm_mod_ops = &ds_obj_mod_ops, .sm_metrics = &obj_metrics, diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 05df46ca104..ca767f03266 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -3873,11 +3873,11 @@ ds_obj_punch_handler(crt_rpc_t *rpc) obj_ioc_end(&ioc, rc); } -static void -ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) +void +ds_obj_query_key_handler(crt_rpc_t *rpc) { - struct obj_query_key_1_in *okqi; - struct obj_query_key_1_out *okqo; + struct obj_query_key_in *okqi; + struct obj_query_key_out *okqo; daos_key_t *dkey; daos_key_t *akey; struct dtx_handle *dth = NULL; @@ -3935,7 +3935,7 @@ ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) re_query: rc = vos_obj_query_key(ioc.ioc_vos_coh, okqi->okqi_oid, query_flags, okqi->okqi_epoch, dkey, akey, &okqo->okqo_recx, - return_epoch ? &okqo->okqo_max_epoch : NULL, + &okqo->okqo_max_epoch, cell_size, stripe_size, dth); if (obj_dtx_need_refresh(dth, rc)) { rc = dtx_refresh(dth, ioc.ioc_coc); @@ -3956,18 +3956,6 @@ ds_obj_query_key_handler(crt_rpc_t *rpc, bool return_epoch) D_ERROR("send reply failed: "DF_RC"\n", DP_RC(rc)); } -void -ds_obj_query_key_handler_0(crt_rpc_t *rpc) -{ - ds_obj_query_key_handler(rpc, false); -} - -void -ds_obj_query_key_handler_1(crt_rpc_t *rpc) -{ - ds_obj_query_key_handler(rpc, true); -} - void ds_obj_sync_handler(crt_rpc_t *rpc) { From 5a583af1e5681189a7849f3da7d85cf8eef4a6d1 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 28 Jun 2023 10:27:45 -0500 Subject: [PATCH 16/21] DAOS-13801 array: fix possible double free with FI (#12507) (#12539) Fix issue when truncating arrays. Signed-off-by: Mohamad Chaarawi --- src/client/array/dc_array.c | 22 +++++++++++++--------- src/client/dfs/dfs.c | 8 +++++++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 5dfb94d8501..da44df06060 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -2478,6 +2478,7 @@ dc_array_set_size(tse_task_t *task) daos_obj_list_dkey_t *enum_args; struct set_size_props *set_size_props = NULL; tse_task_t *enum_task; + bool cleanup = true; int rc; args = daos_task_get_args(task); @@ -2537,31 +2538,34 @@ dc_array_set_size(tse_task_t *task) enum_args->sgl = &set_size_props->sgl; enum_args->dkey_anchor = &set_size_props->anchor; - rc = tse_task_register_cbs(enum_task, NULL, NULL, 0, adjust_array_size_cb, &set_size_props, - sizeof(set_size_props)); + rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props, + sizeof(set_size_props)); if (rc) D_GOTO(err_enum_task, rc); + cleanup = false; - rc = tse_task_register_deps(task, 1, &enum_task); + rc = tse_task_register_comp_cb(enum_task, adjust_array_size_cb, &set_size_props, + sizeof(set_size_props)); if (rc) D_GOTO(err_enum_task, rc); - rc = tse_task_register_comp_cb(task, free_set_size_cb, &set_size_props, - sizeof(set_size_props)); + rc = tse_task_register_deps(task, 1, &enum_task); if (rc) D_GOTO(err_enum_task, rc); rc = tse_task_schedule(enum_task, true); if (rc) - D_GOTO(err_enum_task, rc); + D_GOTO(err_task, rc); return 0; err_enum_task: tse_task_complete(enum_task, rc); err_task: - D_FREE(set_size_props); - if (array) - array_decref(array); tse_task_complete(task, rc); + if (cleanup) { + D_FREE(set_size_props); + if (array) + array_decref(array); + } return rc; } /* end daos_array_set_size */ diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index a6a432972cd..280ec52318d 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1246,8 +1246,14 @@ open_file(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, rc = insert_entry(dfs->layout_v, parent->oh, DAOS_TX_NONE, file->name, len, DAOS_COND_DKEY_INSERT, entry); if (rc == EEXIST && !oexcl) { + int rc2; + /** just try fetching entry to open the file */ - daos_array_close(file->oh, NULL); + rc2 = daos_array_close(file->oh, NULL); + if (rc2 == -DER_NOMEM) + rc2 = daos_array_close(file->oh, NULL); + if (rc2) + return daos_der2errno(rc2); } else if (rc) { int rc2; From 462fe7d88bb74bcff6b75c0779035fa009c0f6b3 Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Wed, 28 Jun 2023 10:35:27 -0500 Subject: [PATCH 17/21] DAOS-13759 client: handle "/" as special case when call dfs_access() (#12483) (#12529) also add "mkdir -p" in NLT test. Signed-off-by: Lei Huang --- src/client/dfuse/pil4dfs/int_dfs.c | 13 ++++++++++--- utils/node_local_test.py | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 75156ad9ede..36ab227ab40 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -3789,6 +3789,7 @@ chdir(const char *path) char item_name[DFS_MAX_NAME]; char *parent_dir = NULL; char *full_path = NULL; + bool is_root; if (next_chdir == NULL) { next_chdir = dlsym(RTLD_NEXT, "chdir"); @@ -3811,10 +3812,13 @@ chdir(const char *path) return rc; } - if (!parent && (strncmp(item_name, "/", 2) == 0)) + if (!parent && (strncmp(item_name, "/", 2) == 0)) { + is_root = true; rc = dfs_stat(dfs_mt->dfs, NULL, NULL, &stat_buf); - else + } else { + is_root = false; rc = dfs_stat(dfs_mt->dfs, parent, item_name, &stat_buf); + } if (rc) D_GOTO(out_err, rc); if (!S_ISDIR(stat_buf.st_mode)) { @@ -3822,7 +3826,10 @@ chdir(const char *path) strerror(ENOTDIR)); D_GOTO(out_err, rc = ENOTDIR); } - rc = dfs_access(dfs_mt->dfs, parent, item_name, X_OK); + if (is_root) + rc = dfs_access(dfs_mt->dfs, NULL, NULL, X_OK); + else + rc = dfs_access(dfs_mt->dfs, parent, item_name, X_OK); if (rc) D_GOTO(out_err, rc); len_str = snprintf(cur_dir, DFS_MAX_PATH, "%s%s", dfs_mt->fs_root, full_path); diff --git a/utils/node_local_test.py b/utils/node_local_test.py index f341e837510..28620350452 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -3851,6 +3851,10 @@ def test_pil4dfs(self): dir1 = join(path, 'dir1') self.server.run_daos_client_cmd_pil4dfs(['mkdir', dir1]) + # create multiple levels dirs + dirabcd = join(path, 'dira/dirb/dirc/dird') + self.server.run_daos_client_cmd_pil4dfs(['mkdir', '-p', dirabcd]) + # find to list all files/dirs. self.server.run_daos_client_cmd_pil4dfs(['find', path]) From d1e2e304d514a385ad8fb2ac1a2967f79a9d94bd Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Wed, 28 Jun 2023 10:55:58 -0600 Subject: [PATCH 18/21] DAOS-13113 common: Fixed coverity issue in dav code (#12493) (#12542) Fixed a potential leak in alloc_class_new() CID: 474927 Signed-off-by: sherintg --- src/common/dav/alloc_class.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/dav/alloc_class.c b/src/common/dav/alloc_class.c index a0b2c0f4ae8..3dc5745db6a 100644 --- a/src/common/dav/alloc_class.c +++ b/src/common/dav/alloc_class.c @@ -212,7 +212,7 @@ alloc_class_new(int id, struct alloc_class_collection *ac, if (id < 0 && alloc_class_find_first_free_slot(ac, &slot) != 0) - goto error_class_alloc; + goto error_map_insert; id = slot; size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(c->unit_size, From e4c8c4a2350ec15123d78a2d33e7269138a8a855 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Wed, 28 Jun 2023 11:00:41 -0600 Subject: [PATCH 19/21] DAOS-13783 common: Address a couple of coverity issues (#12481) (#12533) 1444800 - field accessed without a lock 1444804 - unused value Signed-off-by: Jeff Olivier --- src/common/ad_mem.c | 3 +-- src/common/dav/heap.c | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index ffd6750fc3c..91c6c373b57 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -2184,8 +2184,7 @@ arena_reserve_addr(struct ad_arena *arena, daos_size_t size, struct ad_reserv_ac if (rc == -DER_ENOENT || /* no arena, no group */ rc == -DER_NOSPACE) { /* no space in this arena */ grp_at = 0; - grp = NULL; - rc = 0; + grp = NULL; /* fall through */ } else if (rc != 0) { D_ERROR("Failed to find group, arena=%d, size=%d, rc=%d\n", diff --git a/src/common/dav/heap.c b/src/common/dav/heap.c index 5d11dff9f4b..4384fe40f8c 100644 --- a/src/common/dav/heap.c +++ b/src/common/dav/heap.c @@ -1371,13 +1371,16 @@ heap_get_arena_buckets(struct palloc_heap *heap, unsigned arena_id) int heap_get_arena_auto(struct palloc_heap *heap, unsigned arena_id) { + int value; + util_mutex_lock(&heap->rt->arenas.lock); struct arena *a = heap_get_arena_by_id(heap, arena_id); + value = a->automatic; util_mutex_unlock(&heap->rt->arenas.lock); - return a->automatic; + return value; } /* From d3cb8fb3988d3f3f318a5dab7af4c719d63d87e3 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Wed, 28 Jun 2023 18:38:41 -0700 Subject: [PATCH 20/21] DAOS-13451 cart: Don't drain queue for cart clients (#12535) (#12553) - Avoid context queue draining for clients. This reduces cart_ctl run by ~5 seconds. Signed-off-by: Alexander A Oganezov --- src/cart/utils/crt_utils.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/cart/utils/crt_utils.c b/src/cart/utils/crt_utils.c index 9a249bfc9f0..c65aa284a08 100644 --- a/src/cart/utils/crt_utils.c +++ b/src/cart/utils/crt_utils.c @@ -134,18 +134,19 @@ crtu_progress_fn(void *data) while (opts.shutdown == 0) crt_progress(*p_ctx, 1000); - if (opts.is_swim_enabled && idx == 0) - crt_swim_disable_all(); + if (opts.is_server) { + if (opts.is_swim_enabled && idx == 0) + crt_swim_disable_all(); - rc = crtu_drain_queue(*p_ctx); - D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc); + rc = crtu_drain_queue(*p_ctx); + D_ASSERTF(rc == 0, "crtu_drain_queue() failed with rc=%d\n", rc); - if (opts.delay_shutdown_sec > 0) - sleep(opts.delay_shutdown_sec); + if (opts.delay_shutdown_sec > 0) + sleep(opts.delay_shutdown_sec); + } rc = crt_context_destroy(*p_ctx, 1); - D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n", - p_ctx, rc); + D_ASSERTF(rc == 0, "Failed to destroy context %p rc=%d\n", p_ctx, rc); pthread_exit(rc ? *p_ctx : NULL); From 776249a50a670917fe9beb6fda27e85c981007e6 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 29 Jun 2023 05:55:23 -0700 Subject: [PATCH 21/21] DAOS-13716 cart: Fix coverity issue cid 1404275 (#12459) (#12491) - Fix cid 1404275 Copy into fixed size buffer Signed-off-by: Alexander A Oganezov --- src/cart/crt_hg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index bd2cc35812d..6dc8f11f7de 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -659,7 +659,7 @@ crt_get_opx_info_string(char *provider, char *domain, char *ip, D_GOTO(out, rc = -DER_INVAL); } - strcpy(domain_name, domain); + strncpy(domain_name, domain, sizeof(domain_name) - 1); strtok_r(domain_name, &domain[delimiter], &hfi_str); hfi = (unsigned int)strtoul(hfi_str, NULL, 10);