From 9d2fa58f57cab9b45191e3edd9b3915f0b29143c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 9 Oct 2023 12:10:24 +0000 Subject: [PATCH 01/13] DAOS-13490 test: Update valgrind suppressions. A go change has introduced some new failure traces, suppress them. Required-githooks: true Signed-off-by: Ashley Pittman --- src/cart/utils/memcheck-cart.supp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index 0dd52c688e2..2b9f1f06815 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -444,6 +444,24 @@ ... fun:indexbytebody } +{ + go-cond-racecall + Memcheck:Cond + ... + fun:racecall +} +{ + go-value8-write_racecall + Memcheck:Value8 + fun:__tsan_write + fun:racecall +} +{ + go-value8-racecall + Memcheck:Value8 + fun:_ZN6__tsan9ShadowSetEPNS_9RawShadowES1_S0_ + fun:racecall +} { FI leak 8 Memcheck:Leak From 2dbb8ab2d550292a631b792869b66bbda9cb54b1 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 9 Oct 2023 12:26:31 +0000 Subject: [PATCH 02/13] Add a test for container handle evict. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/lookup.c | 53 +++++++++++++++++++++++--- src/include/daos/common.h | 2 +- src/object/cli_shard.c | 11 +++--- utils/cq/words.dict | 1 + utils/node_local_test.py | 72 ++++++++++++++++++++++++++++++++++- 5 files changed, 125 insertions(+), 14 deletions(-) diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index 1eef3e69882..8c4051151b9 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -206,12 +206,25 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch D_GOTO(out_dfs, rc); } - rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, &ie->ie_stat); + ie->ie_obj = 0; + + /* Due to DAOS-14476 using the lookup to perform a stat will always succeed, so instead + * call ostat afterwards + */ + rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, NULL); if (rc) { DFUSE_TRA_ERROR(dfs, "dfs_lookup() returned: %d (%s)", rc, strerror(rc)); D_GOTO(out_dfs, rc); } + rc = dfs_ostat(dfs->dfs_ns, ie->ie_obj, &ie->ie_stat); + if (rc) { + ie->ie_stat.st_ino = dfs->dfs_ino; + ie->ie_dfs = NULL; + DHS_ERROR(dfs, rc, "dfs_ostat() failed"); + goto out_dfs; + } + ie->ie_stat.st_ino = dfs->dfs_ino; dfs_obj2id(ie->ie_obj, &ie->ie_oid); @@ -226,7 +239,9 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch out_dfs: d_hash_rec_decref(&dfp->dfp_cont_table, &dfs->dfs_entry); out_dfp: - d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); + /* TODO: This was causing a hash table reference count error, needs fixing on 2.4 + * d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); + */ out_err: duns_destroy_attr(&dattr); @@ -234,8 +249,7 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch } void -dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, - const char *name) +dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *name) { struct dfuse_info *dfuse_info = fuse_req_userdata(req); struct dfuse_inode_entry *ie; @@ -243,6 +257,9 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, char out[DUNS_MAX_XATTR_LEN]; char *outp = &out[0]; daos_size_t attr_len = DUNS_MAX_XATTR_LEN; + bool evict = false; + ino_t pinode = parent->ie_stat.st_ino; + ino_t cinode = 0; DFUSE_TRA_DEBUG(parent, "Parent:%#lx " DF_DE, parent->ie_stat.st_ino, DP_DE(name)); @@ -279,8 +296,16 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, if (S_ISDIR(ie->ie_stat.st_mode) && attr_len) { rc = check_for_uns_ep(dfuse_info, ie, out, attr_len); DFUSE_TRA_DEBUG(ie, "check_for_uns_ep() returned %d", rc); - if (rc != 0) - D_GOTO(out_release, rc); + if (rc != 0) { + /* At this point, we know the dentry exists but there's an error, so try and + * evict the dentry afterwards + */ + if (rc == EINVAL || rc == EIO) { + evict = true; + cinode = ie->ie_stat.st_ino; + } + goto out_release; + } } dfuse_reply_entry(dfuse_info, ie, NULL, false, req); @@ -299,4 +324,20 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, } else { DFUSE_REPLY_ERR_RAW(parent, req, rc); } + + if (evict) { + D_INFO("Calling forget %#lx " DF_DE, pinode, DP_DE(name)); + rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, pinode, name, + strnlen(name, NAME_MAX)); + DS_ERROR(-rc, "inval_entry() replied"); + if (rc && rc != -ENOENT) + DS_ERROR(-rc, "inval_entry() failed"); + + if (cinode != 0) { + rc = fuse_lowlevel_notify_inval_inode(dfuse_info->di_session, cinode, 0, 0); + DS_ERROR(-rc, "inval_inode() replied"); + if (rc && rc != -ENOENT) + DS_ERROR(-rc, "inval_inode() failed"); + } + } } diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 78acb71858c..a13e2ba22bf 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -625,7 +625,6 @@ daos_der2errno(int err) case -DER_NOTYPE: case -DER_NOSCHEMA: case -DER_NOLOCAL: - case -DER_NO_HDL: case -DER_IO_INVAL: return EINVAL; case -DER_KEY2BIG: case -DER_REC2BIG: return E2BIG; @@ -640,6 +639,7 @@ daos_der2errno(int err) case -DER_EQ_BUSY: return EBUSY; case -DER_AGAIN: return EAGAIN; case -DER_PROTO: return EPROTO; + case -DER_NO_HDL: case -DER_IO: return EIO; case -DER_CANCELED: return ECANCELED; case -DER_OVERFLOW: return EOVERFLOW; diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 21787b85151..c5258e6dd19 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -744,10 +744,10 @@ dc_rw_cb(tse_task_t *task, void *arg) DP_UOID(orw->orw_oid), rw_args->rpc, opc, rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); else - D_ERROR(DF_CONT DF_UOID" rpc %p opc %d to rank %d tag %d: "DF_RC"\n", - DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), - DP_UOID(orw->orw_oid), rw_args->rpc, opc, - rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); + DL_ERROR(rc, DF_CONT DF_UOID " rpc %p opc %d to rank %d tag %d", + DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), + DP_UOID(orw->orw_oid), rw_args->rpc, opc, + rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag); if (opc == DAOS_OBJ_RPC_FETCH) { /* For EC obj fetch, set orr_epoch as highest server @@ -1901,8 +1901,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) D_DEBUG(DB_TRACE, "rpc %p RPC %d may need retry: %d\n", cb_args->rpc, opc, rc); else - D_ERROR("rpc %p RPC %d failed: %d\n", - cb_args->rpc, opc, rc); + DL_ERROR(rc, "rpc %p RPC %d failed", cb_args->rpc, opc); D_GOTO(out, rc); } diff --git a/utils/cq/words.dict b/utils/cq/words.dict index 3d6eaffba4e..fab670da250 100644 --- a/utils/cq/words.dict +++ b/utils/cq/words.dict @@ -216,6 +216,7 @@ iface indata infiniband init +inode interoperability io iod diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 51872dca987..dc6a96f865b 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4652,6 +4652,72 @@ def run_dfuse(server, conf): return fatal_errors.errors +def run_evict_test(server): + """Run dfuse, do some I/O and then evict the container + + Create a container which will be persistent, then create a new container within that to + test on. + """ + pool = server.get_test_pool_obj() + + container = create_cont(server.conf, pool=pool, label='evict_cont', ctype='POSIX') + + pool = server.get_test_pool_obj() + dfuse = DFuse(server, server.conf, container=container, caching=False) + dfuse.start() + + cont_path = join(dfuse.dir, 'subcont') + + sub_cont = create_cont(server.conf, pool=pool, path=cont_path) + + # Sample the inode number of the sub-container, and check it's accessible. + sub_stat = os.stat(cont_path) + dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) + print(dfuse_stat) + + # pylint: disable-next=consider-using-with + fd = open(join(cont_path, 'testfile'), 'wb', buffering=0) + fd.write(b'hello') + + run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) + + try: + fd.write(b'world') + assert False + except OSError as error: + if error.errno != errno.EIO: + raise + + subprocess.run(['dd', 'if=/dev/zero', 'bs=16k', 'count=64', # nosec + f'of={join(cont_path, "dd_file")}'], check=False) + + fd.close() + + # Try a lookup again without any open files to trigger an inval_dentry in dfuse. + try: + print(os.stat(cont_path)) + assert False + except OSError as error: + if error.errno != errno.EIO: + raise + + # Re-sample the sub-container to see if it's been evicted. This does not access it but queries + # the mount. + count = 3 + while True: + dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) + print(dfuse_stat) + if dfuse_stat['resident'] is False: + print('Path has been evicted') + break + count -= 1 + if count == 0: + assert False, 'Path should have been evicted' + time.sleep(1) + + dfuse.stop() + + def run_in_fg(server, conf, args): """Run dfuse in the foreground. @@ -5998,6 +6064,7 @@ def run(wf, args): else: with DaosServer(conf, test_class='first', wf=wf_server, fatal_errors=fatal_errors) as server: + if args.mode == 'launch': run_in_fg(server, conf, args) elif args.mode == 'overlay': @@ -6012,6 +6079,8 @@ def run(wf, args): test_pydaos_kv(server, conf) test_pydaos_kv_obj_class(server, conf) fatal_errors.add_result(server.set_fi()) + elif args.mode == 'evict-test': + fatal_errors.add_result(run_evict_test(server)) elif args.test == 'all': fatal_errors.add_result(run_posix_tests(server, conf)) elif args.test: @@ -6148,7 +6217,8 @@ def main(): parser.add_argument('--log-usage-save') parser.add_argument('--dtx', action='store_true') parser.add_argument('--test', help="Use '--test list' for list") - parser.add_argument('mode', nargs='*') + parser.add_argument('mode', nargs='*', choices=['fi', 'all', 'overlay', 'set-fi', 'launch', + 'evict-test']) args = parser.parse_args() if args.server_fi: From 182e37fc999f61989f72f85d9bf96b2c05f23a11 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 10 Oct 2023 09:00:03 +0000 Subject: [PATCH 03/13] Fix merge and spelling. Required-githooks: true Signed-off-by: Ashley Pittman --- src/cart/utils/memcheck-cart.supp | 18 ------------------ src/client/dfuse/ops/read.c | 2 +- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index 2b9f1f06815..0dd52c688e2 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -444,24 +444,6 @@ ... fun:indexbytebody } -{ - go-cond-racecall - Memcheck:Cond - ... - fun:racecall -} -{ - go-value8-write_racecall - Memcheck:Value8 - fun:__tsan_write - fun:racecall -} -{ - go-value8-racecall - Memcheck:Value8 - fun:_ZN6__tsan9ShadowSetEPNS_9RawShadowES1_S0_ - fun:racecall -} { FI leak 8 Memcheck:Leak diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 7a5bbedc0d2..ef119509eb8 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -83,7 +83,7 @@ dfuse_readahead_reply(fuse_req_t req, size_t len, off_t position, struct dfuse_o /* At this point there is a buffer of known length that contains the data, and a read * request. * If the attempted read is bigger than the data then it will be truncated. - * It the atttempted read is smaller than the buffer it will be met in full. + * It the attempted read is smaller than the buffer it will be met in full. */ if (position + len < oh->doh_readahead->dra_ev->de_readahead_len) { From f8dc011e5bbd736ff44fff375818dbe58e48e999 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 14 Nov 2023 10:13:54 +0000 Subject: [PATCH 04/13] Add some more testing. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/opendir.c | 7 +++--- utils/node_local_test.py | 39 +++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/client/dfuse/ops/opendir.c b/src/client/dfuse/ops/opendir.c index 12d6536e3f3..d33a5b1b1b8 100644 --- a/src/client/dfuse/ops/opendir.c +++ b/src/client/dfuse/ops/opendir.c @@ -60,9 +60,9 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1); atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1); - DFUSE_TRA_DEBUG(oh, "Kernel cache flags invalid %d started %d finished %d", + DFUSE_TRA_DEBUG(oh, "Kernel cache flags invalid %d started %d finished %d drop " DF_BOOL, oh->doh_kreaddir_invalid, oh->doh_kreaddir_started, - oh->doh_kreaddir_finished); + oh->doh_kreaddir_finished, DP_BOOL(oh->doh_evict_on_close)); if ((!oh->doh_kreaddir_invalid) && oh->doh_kreaddir_finished) { DFUSE_TRA_DEBUG(oh, "Directory handle may have populated cache, saving"); @@ -73,7 +73,8 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f dfuse_dre_drop(dfuse_info, oh); if (oh->doh_evict_on_close) { int rc; - + D_INFO("Calling inval_entry %#lx " DF_DE, oh->doh_ie->ie_parent, + DP_DE(oh->doh_ie->ie_name)); rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, oh->doh_ie->ie_parent, oh->doh_ie->ie_name, strnlen(oh->doh_ie->ie_name, NAME_MAX)); diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 675fc0451ed..28f56a3dd05 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4668,7 +4668,7 @@ def run_dfuse(server, conf): def run_evict_test(server): - """Run dfuse, do some I/O and then evict the container + """Run dfuse, do some I/O and then evict the container to see what happens. Create a container which will be persistent, then create a new container within that to test on. @@ -4681,17 +4681,23 @@ def run_evict_test(server): dfuse = DFuse(server, server.conf, container=container, caching=False) dfuse.start() - cont_path = join(dfuse.dir, 'subcont') + p_path = join(dfuse.dir, "intermediate_dir") + os.mkdir(p_path) + cont_path = join(p_path, 'subcont') sub_cont = create_cont(server.conf, pool=pool, path=cont_path) - # Sample the inode number of the sub-container, and check it's accessible. - sub_stat = os.stat(cont_path) + test_path = join(cont_path, "test_dir") + + os.mkdir(test_path) + + # Sample the inode number of the test dir in the sub container, and check it's accessible. + sub_stat = os.stat(test_path) dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) print(dfuse_stat) # pylint: disable-next=consider-using-with - fd = open(join(cont_path, 'testfile'), 'wb', buffering=0) + fd = open(join(test_path, 'testfile'), 'wb', buffering=0) fd.write(b'hello') run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) @@ -4704,7 +4710,7 @@ def run_evict_test(server): raise subprocess.run(['dd', 'if=/dev/zero', 'bs=16k', 'count=64', # nosec - f'of={join(cont_path, "dd_file")}'], check=False) + f'of={join(test_path, "dd_file")}'], check=False) fd.close() @@ -4717,7 +4723,8 @@ def run_evict_test(server): raise # Re-sample the sub-container to see if it's been evicted. This does not access it but queries - # the mount. + # the mount. The logic here is that dfuse should have received an I/O error and therefore + # evicted the inode. count = 3 while True: dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) @@ -4727,9 +4734,25 @@ def run_evict_test(server): break count -= 1 if count == 0: - assert False, 'Path should have been evicted' + break + # assert False, 'Path should have been evicted' time.sleep(1) + # Now evict the whole new container. This will cause dfuse to flush everything and then + # unmount the container. + # Any missing refs in dfuse should cause an assertion, any extra refs should cause the number + # of inodes to be incorrect. + # TODO: Evicting a already failed path doesn't work as you can't open it. For that reason use + # another directory under the main container and evict that here. + dfuse.evict_and_wait([p_path]) + + # Now check there is only the root inode, everything else should be disconnected/closed. + dfuse_stat = dfuse.check_usage() + assert dfuse_stat['inodes'] == 1 + assert dfuse_stat['open_files'] == 1 + assert dfuse_stat['pools'] == 1 + assert dfuse_stat['containers'] == 1 + dfuse.stop() From 19e2872f15253ba15f05f2db4f17cd4fd8454ab6 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 30 Nov 2023 12:42:54 +0000 Subject: [PATCH 05/13] Fix warnings. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/opendir.c | 3 +-- utils/cq/words.dict | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/ops/opendir.c b/src/client/dfuse/ops/opendir.c index 1875eb1cbee..20929bbdc4d 100644 --- a/src/client/dfuse/ops/opendir.c +++ b/src/client/dfuse/ops/opendir.c @@ -78,8 +78,7 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f DFUSE_REPLY_ZERO_OH(oh, req); if (ie) { int rc; - D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, - DP_DE(ie->ie_name)); + D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, DP_DE(ie->ie_name)); rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); diff --git a/utils/cq/words.dict b/utils/cq/words.dict index 14c2f8bae25..01f3d461e04 100644 --- a/utils/cq/words.dict +++ b/utils/cq/words.dict @@ -217,6 +217,7 @@ indata infiniband init inode +inodes interoperability io iod From 72f934dbbd048d80836814768f068289fbd27a1a Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 30 Nov 2023 14:40:43 +0000 Subject: [PATCH 06/13] Clean up some logging. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/lookup.c | 4 +--- utils/node_local_test.py | 7 ++----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index 42d88a84880..68b0837f00c 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -291,8 +291,7 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na dfs_obj2id(ie->ie_obj, &ie->ie_oid); - dfuse_compute_inode(ie->ie_dfs, &ie->ie_oid, - &ie->ie_stat.st_ino); + dfuse_compute_inode(ie->ie_dfs, &ie->ie_oid, &ie->ie_stat.st_ino); if (S_ISDIR(ie->ie_stat.st_mode) && attr_len) { rc = check_for_uns_ep(dfuse_info, ie, out, attr_len); @@ -330,7 +329,6 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na D_INFO("Calling forget %#lx " DF_DE, pinode, DP_DE(name)); rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, pinode, name, strnlen(name, NAME_MAX)); - DS_ERROR(-rc, "inval_entry() replied"); if (rc && rc != -ENOENT) DS_ERROR(-rc, "inval_entry() failed"); diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 28f56a3dd05..8dad583fef4 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4700,6 +4700,7 @@ def run_evict_test(server): fd = open(join(test_path, 'testfile'), 'wb', buffering=0) fd.write(b'hello') + # Evict the container. run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) try: @@ -4747,11 +4748,7 @@ def run_evict_test(server): dfuse.evict_and_wait([p_path]) # Now check there is only the root inode, everything else should be disconnected/closed. - dfuse_stat = dfuse.check_usage() - assert dfuse_stat['inodes'] == 1 - assert dfuse_stat['open_files'] == 1 - assert dfuse_stat['pools'] == 1 - assert dfuse_stat['containers'] == 1 + dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1,containers=1) dfuse.stop() From 5f2805d0c4ce232af447ff3758e5117681d79c7a Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 30 Nov 2023 16:47:05 +0000 Subject: [PATCH 07/13] Revise the locking. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 2 -- src/client/dfuse/dfuse_core.c | 5 +++-- src/client/dfuse/ops/ioctl.c | 3 +++ src/client/dfuse/ops/lookup.c | 12 ++++++++---- src/client/dfuse/ops/open.c | 2 ++ src/client/dfuse/ops/opendir.c | 1 + utils/node_local_test.py | 3 +-- 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 810d7f51201..04194f82794 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -799,8 +799,6 @@ struct fuse_lowlevel_ops dfuse_ops; #define DFUSE_REPLY_ENTRY(inode, req, entry) \ do { \ int __rc; \ - DFUSE_TRA_DEBUG(inode, "Returning entry inode %#lx mode %#o size %#zx", \ - (entry).attr.st_ino, (entry).attr.st_mode, (entry).attr.st_size); \ if ((entry).attr_timeout > 0) { \ (inode)->ie_stat = (entry).attr; \ dfuse_mcache_set_time(inode); \ diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 8054b903603..d3bf5dfb40f 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1142,8 +1142,9 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) struct dfuse_cont *dfc = ie->ie_dfs; struct dfuse_pool *dfp = dfc->dfs_dfp; - DFUSE_TRA_INFO(ie, "Closing poh %d coh %d", daos_handle_is_valid(dfp->dfp_poh), - daos_handle_is_valid(dfc->dfs_coh)); + DFUSE_TRA_DEBUG(ie, "Closing poh " DF_BOOL " coh " DF_BOOL, + DP_BOOL(daos_handle_is_valid(dfp->dfp_poh)), + DP_BOOL(daos_handle_is_valid(dfc->dfs_coh))); d_hash_rec_decref(&dfp->dfp_cont_table, &dfc->dfs_entry); } diff --git a/src/client/dfuse/ops/ioctl.c b/src/client/dfuse/ops/ioctl.c index 5f2b06e34e1..ea5630f47fa 100644 --- a/src/client/dfuse/ops/ioctl.c +++ b/src/client/dfuse/ops/ioctl.c @@ -315,6 +315,9 @@ handle_cont_qe_ioctl_helper(struct dfuse_obj_hdl *oh, fuse_req_t req, dfuse_inode_decref(dfuse_info, ie); } D_RWLOCK_UNLOCK(&dfuse_info->di_forget_lock); + + DFUSE_TRA_DEBUG(oh->doh_ie, "Query of %#lx returning " DF_BOOL, in_query->ino, + DP_BOOL(query.found)); } query.inode_count = atomic_load_relaxed(&dfuse_info->di_inode_count); diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index 68b0837f00c..1ef9e677500 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -232,7 +232,7 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch ie->ie_dfs = dfs; - DFUSE_TRA_INFO(dfs, "UNS entry point activated, root %#lx", dfs->dfs_ino); + DFUSE_TRA_DEBUG(dfs, "UNS entry point activated, root %#lx", dfs->dfs_ino); duns_destroy_attr(&dattr); @@ -326,7 +326,8 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na } if (evict) { - D_INFO("Calling forget %#lx " DF_DE, pinode, DP_DE(name)); + D_INFO("Calling inval_entry %#lx " DF_DE " cinode %#lx", pinode, DP_DE(name), + cinode); rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, pinode, name, strnlen(name, NAME_MAX)); if (rc && rc != -ENOENT) @@ -334,9 +335,12 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na if (cinode != 0) { rc = fuse_lowlevel_notify_inval_inode(dfuse_info->di_session, cinode, 0, 0); - DS_ERROR(-rc, "inval_inode() replied"); if (rc && rc != -ENOENT) - DS_ERROR(-rc, "inval_inode() failed"); + DS_ERROR(-rc, "inval_inode() cinode %#lx failed", cinode); } + + rc = fuse_lowlevel_notify_inval_inode(dfuse_info->di_session, pinode, 0, 0); + if (rc && rc != -ENOENT) + DS_ERROR(-rc, "inval_inode() pinode %#lx failed", pinode); } } diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index f5b07f9c970..8e96cb42059 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -222,6 +222,8 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) dfuse_inode_decref(dfuse_info, oh->doh_parent_dir); } if (ie) { + D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, DP_DE(ie->ie_name)); + rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); diff --git a/src/client/dfuse/ops/opendir.c b/src/client/dfuse/ops/opendir.c index 20929bbdc4d..79f59c12ef0 100644 --- a/src/client/dfuse/ops/opendir.c +++ b/src/client/dfuse/ops/opendir.c @@ -78,6 +78,7 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f DFUSE_REPLY_ZERO_OH(oh, req); if (ie) { int rc; + D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, DP_DE(ie->ie_name)); rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 8dad583fef4..f732e7d2122 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4735,8 +4735,7 @@ def run_evict_test(server): break count -= 1 if count == 0: - break - # assert False, 'Path should have been evicted' + assert False, 'Path should have been evicted' time.sleep(1) # Now evict the whole new container. This will cause dfuse to flush everything and then From 052b807956c246be661ee3ba27e7553ec505a16d Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 13 Feb 2024 12:05:49 +0000 Subject: [PATCH 08/13] Back out conflict. Required-githooks: true Signed-off-by: Ashley Pittman --- src/object/cli_shard.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index b03ee33dada..4f5cb220770 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -751,10 +751,10 @@ dc_rw_cb(tse_task_t *task, void *arg) DP_UOID(orw->orw_oid), rw_args->rpc, opc, rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); else - DL_ERROR(rc, DF_CONT DF_UOID " rpc %p opc %d to rank %d tag %d", - DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), - DP_UOID(orw->orw_oid), rw_args->rpc, opc, - rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag); + D_ERROR(DF_CONT DF_UOID " rpc %p opc %d to rank %d tag %d: " DF_RC "\n", + DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), + DP_UOID(orw->orw_oid), rw_args->rpc, opc, + rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); if (opc == DAOS_OBJ_RPC_FETCH) { /* For EC obj fetch, set orr_epoch as highest server @@ -2080,7 +2080,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) D_DEBUG(DB_TRACE, "rpc %p RPC %d may need retry: %d\n", cb_args->rpc, opc, rc); else - DL_ERROR(rc, "rpc %p RPC %d failed", cb_args->rpc, opc); + D_ERROR("rpc %p RPC %d failed: %d\n", cb_args->rpc, opc, rc); if (rc == -DER_OVERLOAD_RETRY) { uint32_t timeout = 0; From 8efd8abfb421147ff7f0e646997629f9b6dc8b0c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 13 Feb 2024 12:07:26 +0000 Subject: [PATCH 09/13] Back out cahnge. Required-githooks: true Signed-off-by: Ashley Pittman --- src/object/cli_shard.c | 47 ++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 5d60775f902..00980c433ee 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -751,7 +751,7 @@ dc_rw_cb(tse_task_t *task, void *arg) DP_UOID(orw->orw_oid), rw_args->rpc, opc, rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); else - D_ERROR(DF_CONT DF_UOID " rpc %p opc %d to rank %d tag %d: " DF_RC "\n", + D_ERROR(DF_CONT DF_UOID" rpc %p opc %d to rank %d tag %d: "DF_RC"\n", DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), DP_UOID(orw->orw_oid), rw_args->rpc, opc, rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); @@ -1999,32 +1999,25 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) } } - if (rc != 0) { - if (rc == -DER_NONEXIST) { - D_SPIN_LOCK(&cb_args->obj->cob_spin); - D_GOTO(set_max_epoch, rc = 0); - } - - if (rc == -DER_INPROGRESS || rc == -DER_TX_BUSY || rc == -DER_OVERLOAD_RETRY) - D_DEBUG(DB_TRACE, "rpc %p RPC %d may need retry: %d\n", - cb_args->rpc, opc, rc); - else - D_ERROR("rpc %p RPC %d failed: %d\n", cb_args->rpc, opc, rc); - - if (rc == -DER_OVERLOAD_RETRY) { - uint32_t timeout = 0; - struct obj_query_key_v10_out *okqo_v10; - - okqo_v10 = crt_reply_get(cb_args->rpc); - if (*cb_args->queue_id == 0) - *cb_args->queue_id = okqo_v10->okqo_comm_out.req_out_enqueue_id; - crt_req_get_timeout(cb_args->rpc, &timeout); - if (timeout > *cb_args->max_delay) - *cb_args->max_delay = timeout; - - } - D_GOTO(out, rc); - } + oqma.oca = &cb_args->obj->cob_oca; + oqma.oid = okqi->okqi_oid; + oqma.src_epoch = okqo->okqo_max_epoch; + oqma.in_dkey = &okqi->okqi_dkey; + oqma.src_dkey = &okqo->okqo_dkey; + oqma.tgt_dkey = cb_args->dkey; + oqma.src_akey = &okqo->okqo_akey; + oqma.tgt_akey = cb_args->akey; + oqma.src_recx = &okqo->okqo_recx; + oqma.tgt_recx = cb_args->recx; + oqma.tgt_epoch = cb_args->max_epoch; + oqma.tgt_map_ver = cb_args->map_ver; + oqma.max_delay = cb_args->max_delay; + oqma.queue_id = cb_args->queue_id; + oqma.rpc = cb_args->rpc; + oqma.flags = okqi->okqi_api_flags; + oqma.opc = DAOS_OBJ_RPC_QUERY_KEY; + oqma.src_map_ver = obj_reply_map_version_get(rpc); + oqma.ret = rc; D_SPIN_LOCK(&cb_args->obj->cob_spin); rc = daos_obj_merge_query_merge(&oqma); From ec1e27d29f4d19a6ad402206382ea20422567d98 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 13 Feb 2024 15:35:32 +0000 Subject: [PATCH 10/13] Fix merge and update test. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/SConscript | 2 +- src/client/dfuse/ops/lookup.c | 16 +--- utils/node_local_test.py | 161 +++++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 18 deletions(-) diff --git a/src/client/dfuse/SConscript b/src/client/dfuse/SConscript index 3df409cd2e0..1071d6f280f 100644 --- a/src/client/dfuse/SConscript +++ b/src/client/dfuse/SConscript @@ -236,7 +236,7 @@ def scons(): else: cenv.require('fuse') static_fuse = [] - extra_libs = [] + extra_libs = ['dl'] configure_fuse(cenv) diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index c3b6253ff80..2880cf41ea8 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -209,21 +209,12 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch ie->ie_obj = 0; - /* Due to DAOS-14476 using the lookup to perform a stat will always succeed, so instead - * call ostat afterwards - */ - rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, NULL); + /* This is where a I/O failure will happen when accessing a idle but evicted container */ + rc = dfs_lookup(dfs->dfs_ns, "/", O_RDWR, &ie->ie_obj, NULL, &ie->ie_stat); if (rc) { DFUSE_TRA_ERROR(dfs, "dfs_lookup() returned: %d (%s)", rc, strerror(rc)); - D_GOTO(out_dfs, rc); - } - - rc = dfs_ostat(dfs->dfs_ns, ie->ie_obj, &ie->ie_stat); - if (rc) { ie->ie_stat.st_ino = dfs->dfs_ino; - ie->ie_dfs = NULL; - DHS_ERROR(dfs, rc, "dfs_ostat() failed"); - goto out_dfs; + D_GOTO(out_dfs, rc); } ie->ie_stat.st_ino = dfs->dfs_ino; @@ -320,7 +311,6 @@ dfuse_cb_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na DFUSE_REPLY_NO_ENTRY(parent, req, parent->ie_dfs->dfc_ndentry_timeout); else DFUSE_REPLY_ERR_RAW(parent, req, rc); - } if (evict) { D_INFO("Calling inval_entry %#lx " DF_DE " cinode %#lx", pinode, DP_DE(name), diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 9007ec9821c..eb1d2e36550 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4744,6 +4744,12 @@ def run_evict_test(server): Create a container which will be persistent, then create a new container within that to test on. + + first container + /intermediate_dir + /intermediate_dir/subcont second (evicted) container + /intermediate_dir/subcont/test_dir + /intermediate_dir/subcont/test_dir/dd_file opened after eviction """ pool = server.get_test_pool_obj() @@ -4771,10 +4777,151 @@ def run_evict_test(server): # pylint: disable-next=consider-using-with fd = open(join(test_path, 'testfile'), 'wb', buffering=0) fd.write(b'hello') + fd.close() # Evict the container. run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) + # The evict can take some time to come into effect. + time.sleep(20) + + subprocess.run(['dd', 'if=/dev/zero', 'bs=16k', 'count=64', # nosec + f'of={join(test_path, "dd_file")}'], check=False) + + time.sleep(2) + + print(os.stat(cont_path)) + + # Now evict the whole new container. This will cause dfuse to flush everything and then + # unmount the container. + # Any missing refs in dfuse should cause an assertion, any extra refs should cause the number + # of inodes to be incorrect. + dfuse.evict_and_wait([p_path]) + + # Now check there is only the root inode, everything else should be disconnected/closed. + dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) + + dfuse.stop() + return False + +def run_evict_test_fd(server): + """Run dfuse, do some I/O and then evict the container to see what happens. + + Create a container which will be persistent, then create a new container within that to + test on. + + first container + /intermediate_dir + /intermediate_dir/subcont second (evicted) container + /intermediate_dir/subcont/test_dir + /intermediate_dir/subcont/test_dir/testfile file with I/O errors + /intermediate_dir/subcont/test_dir/dd_file opened after eviction + """ + pool = server.get_test_pool_obj() + + container = create_cont(server.conf, pool=pool, label='evict_cont', ctype='POSIX') + + pool = server.get_test_pool_obj() + dfuse = DFuse(server, server.conf, container=container, caching=False) + dfuse.start() + + p_path = join(dfuse.dir, "intermediate_dir") + os.mkdir(p_path) + cont_path = join(p_path, 'subcont') + + sub_cont = create_cont(server.conf, pool=pool, path=cont_path) + + test_path = join(cont_path, "test_dir") + + os.mkdir(test_path) + + # Sample the inode number of the test dir in the sub container, and check it's accessible. + sub_stat = os.stat(test_path) + dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) + print(dfuse_stat) + + # pylint: disable-next=consider-using-with + fd = open(join(test_path, 'testfile'), 'wb', buffering=0) + fd.write(b'hello') + + # Evict the container. + run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) + + # The evict can take some time to come into effect. + time.sleep(20) + + try: + fd.write(b'world') + assert False + except OSError as error: + if error.errno != errno.EIO: + raise + + fd.close() + + time.sleep(2) + + # Try a lookup again without any open files to trigger an inval_dentry in dfuse. + print(os.stat(cont_path)) + + # Now evict the whole new container. This will cause dfuse to flush everything and then + # unmount the container. + # Any missing refs in dfuse should cause an assertion, any extra refs should cause the number + # of inodes to be incorrect. + dfuse.evict_and_wait([p_path]) + + # Now check there is only the root inode, everything else should be disconnected/closed. + dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) + + dfuse.stop() + + +def run_evict_test_fd_dd(server): + """Run dfuse, do some I/O and then evict the container to see what happens. + + Create a container which will be persistent, then create a new container within that to + test on. + + first container + /intermediate_dir + /intermediate_dir/subcont second (evicted) container + /intermediate_dir/subcont/test_dir + /intermediate_dir/subcont/test_dir/testfile file with I/O errors + /intermediate_dir/subcont/test_dir/dd_file opened after eviction + """ + pool = server.get_test_pool_obj() + + container = create_cont(server.conf, pool=pool, label='evict_cont', ctype='POSIX') + + pool = server.get_test_pool_obj() + dfuse = DFuse(server, server.conf, container=container, caching=False) + dfuse.start() + + p_path = join(dfuse.dir, "intermediate_dir") + os.mkdir(p_path) + cont_path = join(p_path, 'subcont') + + sub_cont = create_cont(server.conf, pool=pool, path=cont_path) + + test_path = join(cont_path, "test_dir") + + os.mkdir(test_path) + + # Sample the inode number of the test dir in the sub container, and check it's accessible. + sub_stat = os.stat(test_path) + dfuse_stat = dfuse.check_usage(ino=sub_stat.st_ino) + print(dfuse_stat) + + # pylint: disable-next=consider-using-with + fd = open(join(test_path, 'testfile'), 'wb', buffering=0) + fd.write(b'hello') + + # Evict the container. + run_daos_cmd(server.conf, ['container', 'evict', '--all', pool.id(), sub_cont.id()]) + + # The evict can take some time to come into effect. + time.sleep(20) + try: fd.write(b'world') assert False @@ -4787,6 +4934,8 @@ def run_evict_test(server): fd.close() + time.sleep(2) + # Try a lookup again without any open files to trigger an inval_dentry in dfuse. try: print(os.stat(cont_path)) @@ -4807,19 +4956,19 @@ def run_evict_test(server): break count -= 1 if count == 0: - assert False, 'Path should have been evicted' + print(f'Path with inode 0x{sub_stat.st_ino:x} should have been evicted') + break + # assert False, f'Path with inode 0x{sub_stat.st_ino:x} should have been evicted' time.sleep(1) # Now evict the whole new container. This will cause dfuse to flush everything and then # unmount the container. # Any missing refs in dfuse should cause an assertion, any extra refs should cause the number # of inodes to be incorrect. - # TODO: Evicting a already failed path doesn't work as you can't open it. For that reason use - # another directory under the main container and evict that here. dfuse.evict_and_wait([p_path]) # Now check there is only the root inode, everything else should be disconnected/closed. - dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1,containers=1) + dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) dfuse.stop() @@ -6210,7 +6359,11 @@ def run(wf, args): test_pydaos_kv_obj_class(server, conf) fatal_errors.add_result(server.set_fi()) elif args.mode == 'evict-test': + # These two pass. fatal_errors.add_result(run_evict_test(server)) + fatal_errors.add_result(run_evict_test_fd(server)) + # This one fails still. + fatal_errors.add_result(run_evict_test_fd_dd(server)) elif args.test == 'all': fatal_errors.add_result(run_posix_tests(server, conf)) elif args.test: From ae55f24ef61de9db7ef52fcebe21897a0c5c0ff1 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 26 Feb 2024 12:15:55 +0000 Subject: [PATCH 11/13] Change the lookup fix. Required-githooks: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/lookup.c | 10 ++++------ utils/node_local_test.py | 1 + 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index ef7de076a27..37aeb90108c 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -198,9 +198,6 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch if (rc != 0) D_GOTO(out_dfp, rc); - /* The inode has a reference to the dfs, so keep that. */ - d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); - rc = dfs_release(ie->ie_obj); if (rc) { DFUSE_TRA_ERROR(dfs, "dfs_release() failed: %d (%s)", rc, strerror(rc)); @@ -217,6 +214,9 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch D_GOTO(out_dfs, rc); } + /* The inode has a reference to the dfs, so keep that. */ + d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); + ie->ie_stat.st_ino = dfs->dfs_ino; dfs_obj2id(ie->ie_obj, &ie->ie_oid); @@ -231,9 +231,7 @@ check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, ch out_dfs: d_hash_rec_decref(dfp->dfp_cont_table, &dfs->dfs_entry); out_dfp: - /* TODO: This was causing a hash table reference count error, needs fixing on 2.4 - * d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); - */ + d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry); out_err: duns_destroy_attr(&dattr); diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 69b9cd45e41..8af8a8ca891 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4939,6 +4939,7 @@ def run_evict_test(server): dfuse.stop() return False + def run_evict_test_fd(server): """Run dfuse, do some I/O and then evict the container to see what happens. From 9781740e3ae0a6a30262cd702932198cf7868710 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 18 Mar 2024 12:57:20 +0000 Subject: [PATCH 12/13] Improve test. Signed-off-by: Ashley Pittman --- utils/node_local_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index cbdb034ca3b..15dfdb0af5b 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -4833,7 +4833,8 @@ def sizeof_fmt(num, suffix='B'): lto.hide_fi_calls = skip_fi if ignore_einval: - lto.skip_suffixes.append(': 22 (Invalid argument)') + # lto.skip_suffixes.append(': 22 (Invalid argument)') + lto.skip_suffixes.append(': 5 (Input/output error)') lto.skip_suffixes.append(" DER_NO_HDL(-1002): 'Invalid handle'") if ignore_busy: @@ -5049,7 +5050,7 @@ def run_evict_test(server): # Now check there is only the root inode, everything else should be disconnected/closed. dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) - dfuse.stop() + dfuse.stop(ignore_einval=True) return False @@ -5122,7 +5123,8 @@ def run_evict_test_fd(server): # Now check there is only the root inode, everything else should be disconnected/closed. dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) - dfuse.stop() + dfuse.stop(ignore_einval=True) + return False def run_evict_test_fd_dd(server): @@ -5206,8 +5208,7 @@ def run_evict_test_fd_dd(server): count -= 1 if count == 0: print(f'Path with inode 0x{sub_stat.st_ino:x} should have been evicted') - break - # assert False, f'Path with inode 0x{sub_stat.st_ino:x} should have been evicted' + assert False, f'Path with inode 0x{sub_stat.st_ino:x} should have been evicted' time.sleep(1) # Now evict the whole new container. This will cause dfuse to flush everything and then @@ -5219,7 +5220,8 @@ def run_evict_test_fd_dd(server): # Now check there is only the root inode, everything else should be disconnected/closed. dfuse_stat = dfuse.check_usage(inodes=1, open_files=1, pools=1, containers=1) - dfuse.stop() + dfuse.stop(ignore_einval=True) + return False def run_in_fg(server, conf, args): @@ -6618,10 +6620,8 @@ def run(wf, args): test_pydaos_kv_obj_class(server, conf) fatal_errors.add_result(server.set_fi()) elif args.mode == 'evict-test': - # These two pass. fatal_errors.add_result(run_evict_test(server)) fatal_errors.add_result(run_evict_test_fd(server)) - # This one fails still. fatal_errors.add_result(run_evict_test_fd_dd(server)) elif args.test == 'all': fatal_errors.add_result(run_posix_tests(server, conf)) @@ -6760,7 +6760,7 @@ def main(): parser.add_argument('--dtx', action='store_true') parser.add_argument('--test', help="Use '--test list' for list") parser.add_argument('mode', nargs='*', choices=['fi', 'all', 'overlay', 'set-fi', 'launch', - 'evict-test']) + 'evict-test', []]) args = parser.parse_args() if args.server_fi: From 9cfd87644bb4583f215c7475ed101429c59dc2e8 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 11 Apr 2024 13:03:14 +0000 Subject: [PATCH 13/13] Remove debug changes. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/ioctl.c | 3 --- src/client/dfuse/ops/open.c | 4 +--- src/client/dfuse/ops/opendir.c | 8 +++----- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/client/dfuse/ops/ioctl.c b/src/client/dfuse/ops/ioctl.c index 5a59666a6aa..dd801b3ae46 100644 --- a/src/client/dfuse/ops/ioctl.c +++ b/src/client/dfuse/ops/ioctl.c @@ -315,9 +315,6 @@ handle_cont_qe_ioctl_helper(struct dfuse_obj_hdl *oh, fuse_req_t req, dfuse_inode_decref(dfuse_info, ie); } D_RWLOCK_UNLOCK(&dfuse_info->di_forget_lock); - - DFUSE_TRA_DEBUG(oh->doh_ie, "Query of %#lx returning " DF_BOOL, in_query->ino, - DP_BOOL(query.found)); } query.inode_count = atomic_load_relaxed(&dfuse_info->di_inode_count); diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index a45c3c8c83a..16d2d98ced2 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -229,8 +229,6 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) dfuse_inode_decref(dfuse_info, oh->doh_parent_dir); } if (ie) { - D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, DP_DE(ie->ie_name)); - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); diff --git a/src/client/dfuse/ops/opendir.c b/src/client/dfuse/ops/opendir.c index d915f1c82b2..e1c7a432c07 100644 --- a/src/client/dfuse/ops/opendir.c +++ b/src/client/dfuse/ops/opendir.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -59,9 +59,9 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1); atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1); - DFUSE_TRA_DEBUG(oh, "Kernel cache flags invalid %d started %d finished %d drop " DF_BOOL, + DFUSE_TRA_DEBUG(oh, "Kernel cache flags invalid %d started %d finished %d", oh->doh_kreaddir_invalid, oh->doh_kreaddir_started, - oh->doh_kreaddir_finished, DP_BOOL(oh->doh_evict_on_close)); + oh->doh_kreaddir_finished); if ((!oh->doh_kreaddir_invalid) && oh->doh_kreaddir_finished) { DFUSE_TRA_DEBUG(oh, "Directory handle may have populated cache, saving"); @@ -79,8 +79,6 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f if (ie) { int rc; - D_INFO("Calling inval_entry %#lx " DF_DE, ie->ie_parent, DP_DE(ie->ie_name)); - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, ie->ie_name, strnlen(ie->ie_name, NAME_MAX));