diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index b114db75b1e..3bd68e9e5bd 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -6454,6 +6454,7 @@ dfs_dir_anchor_set(dfs_obj_t *obj, const char name[], daos_anchor_t *anchor) #define DFS_ITER_NR 128 #define DFS_ITER_DKEY_BUF (DFS_ITER_NR * sizeof(uint64_t)) #define DFS_ITER_ENTRY_BUF (DFS_ITER_NR * DFS_MAX_NAME) +#define DFS_ELAPSED_TIME 30 struct dfs_oit_args { daos_handle_t oit; @@ -6461,6 +6462,9 @@ struct dfs_oit_args { uint64_t snap_epoch; uint64_t skipped; uint64_t failed; + time_t start_time; + time_t print_time; + uint64_t num_scanned; }; static int @@ -6556,8 +6560,19 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) daos_obj_id_t oid; d_iov_t marker; bool mark_data = true; + struct timespec current_time; int rc; + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + return errno; + oit_args->num_scanned ++; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Scanned "DF_U64" files/directories (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + /** open the entry name and get the oid */ rc = dfs_lookup_rel(dfs, parent, name, O_RDONLY, &obj, NULL, NULL); if (rc) { @@ -6703,12 +6718,24 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * d_iov_t marker; bool mark_data = true; daos_epoch_range_t epr; - struct timespec now; + struct timespec now, current_time; uid_t uid = geteuid(); gid_t gid = getegid(); unsigned int co_flags = DAOS_COO_EX; + char now_name[24]; + struct tm *now_tm; + daos_size_t len; int rc, rc2; + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + return errno; + now_tm = localtime(&now.tv_sec); + len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); + if (len == 0) + return EINVAL; + D_PRINT("DFS checker: Start (%s)\n", now_name); + if (flags & DFS_CHECK_RELINK && flags & DFS_CHECK_REMOVE) { D_ERROR("can't request remove and link to l+f at the same time\n"); return EINVAL; @@ -6729,6 +6756,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_cont, rc); } + D_PRINT("DFS checker: Create OIT table\n"); /** create snapshot for OIT */ rc = daos_cont_create_snap_opt(coh, &snap_epoch, NULL, DAOS_SNAP_OPT_CR | DAOS_SNAP_OPT_OIT, NULL); @@ -6742,6 +6770,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_dfs, rc = ENOMEM); oit_args->flags = flags; oit_args->snap_epoch = snap_epoch; + oit_args->start_time = now.tv_sec; + oit_args->print_time = now.tv_sec; /** Open OIT table */ rc = daos_oit_open(coh, snap_epoch, &oit_args->oit, NULL); @@ -6758,10 +6788,11 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_oit, rc = daos_der2errno(rc)); } rc = daos_oit_mark(oit_args->oit, dfs->root.oid, &marker, NULL); - if (rc) { + if (rc && rc != -DER_NONEXIST) { D_ERROR("Failed to mark ROOT OID in OIT: "DF_RC"\n", DP_RC(rc)); D_GOTO(out_oit, rc = daos_der2errno(rc)); } + rc = 0; if (flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(coh, dfs->super_oid, snap_epoch); @@ -6791,6 +6822,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } } + D_PRINT("DFS checker: Iterating namespace and marking objects\n"); + oit_args->num_scanned = 2; /** iterate through the namespace and mark OITs starting from the root object */ while (!daos_anchor_is_eof(&anchor)) { rc = dfs_iterate(dfs, &dfs->root, &anchor, &nr_entries, DFS_MAX_NAME * nr_entries, @@ -6803,14 +6836,14 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * nr_entries = DFS_ITER_NR; } - /** Create lost+found directory and properly link unmarked oids there. */ - if (flags & DFS_CHECK_RELINK) { - char now_name[24]; - - rc = clock_gettime(CLOCK_REALTIME, &now); - if (rc) - D_GOTO(out_oit, rc = errno); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_oit, rc = errno); + D_PRINT("DFS checker: marked "DF_U64" files/directories (runtime: "DF_U64" sec))\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + /** Create lost+found directory to link unmarked oids there. */ + if (flags & DFS_CHECK_RELINK) { rc = dfs_open(dfs, NULL, "lost+found", S_IFDIR | 0755, O_CREAT | O_RDWR, 0, 0, NULL, &lf); if (rc) { @@ -6819,21 +6852,15 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } if (name == NULL) { - struct tm *now_tm; - size_t len; /* * Create a directory with current timestamp in l+f where leaked oids will * be linked in this run. */ - now_tm = localtime(&now.tv_sec); - len = strftime(now_name, sizeof(now_name), "%Y-%m-%d-%H:%M:%S", now_tm); - if (len == 0) { - D_ERROR("Invalid time format\n"); - D_GOTO(out_lf1, rc = EINVAL); - } - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", now_name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + now_name); } else { - D_PRINT("Leaked OIDs will be inserted in /lost+found/%s\n", name); + D_PRINT("DFS checker: Leaked OIDs will be inserted in /lost+found/%s\n", + name); } rc = dfs_open(dfs, lf, name ? name : now_name, S_IFDIR | 0755, @@ -6865,6 +6892,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * * Pass 1: check directories only and descend to mark all oids in the namespace of each dir. * Pass 2: relink remaining oids in the L+F root that are unmarked still after first pass. */ + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 1)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); /** Start Pass 1 */ while (!daos_anchor_is_eof(&anchor)) { @@ -6875,6 +6904,16 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { if (flags & DFS_CHECK_RELINK) { enum daos_otype_t otype = daos_obj_id2type(oids[i]); @@ -6935,6 +6974,8 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * if (!(flags & DFS_CHECK_RELINK)) goto done; + D_PRINT("DFS checker: Checking unmarked OIDs (Pass 2)\n"); + oit_args->num_scanned = 0; memset(&anchor, 0, sizeof(anchor)); while (!daos_anchor_is_eof(&anchor)) { nr_entries = DFS_ITER_NR; @@ -6944,11 +6985,20 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } + clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + oit_args->num_scanned += nr_entries; + if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { + D_PRINT("DFS checker: Checked "DF_U64" objects (runtime: "DF_U64" sec)\n", + oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); + oit_args->print_time = current_time.tv_sec; + } + for (i = 0; i < nr_entries; i++) { struct dfs_entry entry = {0}; enum daos_otype_t otype = daos_obj_id2type(oids[i]); char oid_name[DFS_MAX_NAME + 1]; - daos_size_t len; if (flags & DFS_CHECK_PRINT) D_PRINT("oid["DF_U64"]: "DF_OID"\n", unmarked_entries, @@ -7017,8 +7067,12 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } done: - if (flags & DFS_CHECK_PRINT) - D_PRINT("Number of Leaked OIDs in Namespace = "DF_U64"\n", unmarked_entries); + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) + D_GOTO(out_lf2, rc = errno); + D_PRINT("DFS checker: Done! (runtime: "DF_U64" sec)\n", + current_time.tv_sec - oit_args->start_time); + D_PRINT("DFS checker: Number of leaked OIDs in namespace = "DF_U64"\n", unmarked_entries); if (flags & DFS_CHECK_VERIFY) { if (oit_args->failed) { D_ERROR(""DF_U64" OIDs failed data consistency check!\n", oit_args->failed); @@ -7058,6 +7112,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * rc2 = daos_cont_close(coh, NULL); if (rc == 0) rc = daos_der2errno(rc2); + return rc; } diff --git a/utils/node_local_test.py b/utils/node_local_test.py index db674bfe36e..f341e837510 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -3548,7 +3548,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 2': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 2': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3558,7 +3558,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse @@ -3613,7 +3613,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 4': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 4': raise NLTestFail('Wrong number of Leaked OIDs') # run again to check nothing is detected @@ -3623,7 +3623,7 @@ def test_daos_fs_check(self): assert rc.returncode == 0 output = rc.stdout.decode('utf-8') line = output.splitlines() - if line[-1] != 'Number of Leaked OIDs in Namespace = 0': + if line[-1] != 'DFS checker: Number of leaked OIDs in namespace = 0': raise NLTestFail('Wrong number of Leaked OIDs') # remount dfuse