Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16686 dfuse: Improve concurrent overlapping read handling #15298

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9aa13e0
DAOS-15682 dfuse: Fail on concurrent read.
ashleypittman Oct 10, 2024
1d4019f
Try and fix issue.
ashleypittman Oct 11, 2024
122faf0
First stab at a fix.
ashleypittman Oct 11, 2024
270360f
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Oct 11, 2024
f74f053
Fix invalid free and leak.
ashleypittman Oct 11, 2024
6ad750b
Fix a logging line.
ashleypittman Oct 11, 2024
578be24
Add some debugging.
ashleypittman Oct 14, 2024
e771912
Track duplicate reads. This avoids a crash but there's still a memor…
ashleypittman Oct 17, 2024
6c407e7
Fix logic.
ashleypittman Oct 17, 2024
7f892f3
Rework to support blocking on network requests.
ashleypittman Oct 17, 2024
6e286d1
Bump array size and add stats.
ashleypittman Oct 18, 2024
b2a21c3
Fix a segv in the stats.
ashleypittman Oct 18, 2024
b685591
Track EOF better in reads.
ashleypittman Oct 18, 2024
33409b3
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Nov 11, 2024
4fcedda
Fixup after merge
ashleypittman Nov 11, 2024
018449e
Move active read list to active.
ashleypittman Nov 11, 2024
52e827c
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Nov 12, 2024
221c849
Rebase and iterate on comments.
ashleypittman Nov 12, 2024
f4956ac
fix: remove an extra list operation.
ashleypittman Nov 12, 2024
33a2ee3
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Nov 13, 2024
0733c0b
Back out test and stat changes.
ashleypittman Nov 13, 2024
46a565b
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Nov 14, 2024
3a2bbd1
Try and solve patchelf problem.
ashleypittman Nov 14, 2024
2308feb
Change failure mode.
ashleypittman Nov 15, 2024
e3e9836
Merge branch 'master' into amd/dfuse-concurrent-read
ashleypittman Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions site_scons/prereq_tools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ def run_commands(self, commands, subdir=None, env=None):
retval = True
else:
print(f"RUN: {' '.join(cmd)}")
if subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV']) != 0:
rc = subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV'])
if rc != 0:
print(f"Command failed with {rc}")
retval = False
break
return retval
Expand Down Expand Up @@ -1418,7 +1420,10 @@ def _patch_rpaths(self):
full_lib = os.path.join(path, lib)
cmd = ['patchelf', '--set-rpath', ':'.join(rpath), full_lib]
if not RUNNER.run_commands([cmd]):
print(f'Skipped patching {full_lib}')
if lib == 'libspdk.so':
print(f'Skipped patching {full_lib}')
else:
raise BuildFailure(f"Failed to patch {lib}")

def build(self, env, needed_libs):
"""Build the component, if necessary
Expand Down
16 changes: 13 additions & 3 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,13 @@ struct dfuse_event {
d_iov_t de_iov;
d_sg_list_t de_sgl;
d_list_t de_list;

/* Position in a list of events, this will either be off active->open_reads or
* de->de_read_slaves.
*/
d_list_t de_read_list;
/* List of slave events */
d_list_t de_read_slaves;
struct dfuse_eq *de_eqt;
union {
struct dfuse_obj_hdl *de_oh;
Expand Down Expand Up @@ -1016,8 +1023,11 @@ struct dfuse_inode_entry {
};

struct active_inode {
d_list_t chunks;
pthread_spinlock_t lock;
d_list_t chunks;
size_t file_size;
bool seen_eof;
d_list_t open_reads;
pthread_spinlock_t lock;
struct dfuse_pre_read *readahead;
};

Expand Down Expand Up @@ -1133,7 +1143,7 @@ dfuse_cache_evict_dir(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *i
* Returns true if feature was used.
*/
bool
read_chunk_close(struct dfuse_inode_entry *ie);
read_chunk_close(struct active_inode *active);

/* Metadata caching functions. */

Expand Down
2 changes: 2 additions & 0 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,8 @@ dfuse_read_event_size(void *arg, size_t size)
ev->de_sgl.sg_nr = 1;
}

D_INIT_LIST_HEAD(&ev->de_read_slaves);

rc = daos_event_init(&ev->de_ev, ev->de_eqt->de_eq, NULL);
if (rc != -DER_SUCCESS) {
return false;
Expand Down
5 changes: 3 additions & 2 deletions src/client/dfuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ active_ie_init(struct dfuse_inode_entry *ie, bool *preread)
goto out;
}
D_INIT_LIST_HEAD(&ie->ie_active->chunks);
D_INIT_LIST_HEAD(&ie->ie_active->open_reads);
if (preread && *preread) {
D_ALLOC_PTR(ie->ie_active->readahead);
if (ie->ie_active->readahead) {
Expand Down Expand Up @@ -96,7 +97,7 @@ active_oh_decref(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh)
if (oc != 1)
goto out;

rcb = read_chunk_close(oh->doh_ie);
rcb = read_chunk_close(oh->doh_ie->ie_active);

ah_free(dfuse_info, oh->doh_ie);
out:
Expand All @@ -118,7 +119,7 @@ active_ie_decref(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
if (oc != 1)
goto out;

read_chunk_close(ie);
read_chunk_close(ie->ie_active);

ah_free(dfuse_info, ie);
out:
Expand Down
Loading
Loading