Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-12751 control: Add a daos filesystem evict command. #12331

Merged
merged 16 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/user/filesystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,30 @@ These are two command line options to control the DFuse process itself.

These will affect all containers accessed via DFuse, regardless of any container attributes.

### Managing memory usage and disconnecting from containers

DFuse can be instructed to evict paths from local memory which drops and open handles on containers
or pools as well as reducing the working set size and memory consumption. This is an asynchronous
operation and there is no automatic way to tell if it's completed, in addition any lookup of the
path specified in the eviction call will cause a new lookup and prevent the eviction from
completing.
ashleypittman marked this conversation as resolved.
Show resolved Hide resolved

Paths can be requested for eviction from dfuse using the `daos filesystem evict` command, this does
not change any data that is stored in DAOS in any way but rather releases local resources. This
command will return the inode number of the path as well as key dfuse metrics.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Paths can be requested for eviction from dfuse using the `daos filesystem evict` command, this does
not change any data that is stored in DAOS in any way but rather releases local resources. This
command will return the inode number of the path as well as key dfuse metrics.
Paths can be requested for eviction from dfuse using the `daos filesystem evict` command. This does
not change any data that is stored in DAOS in any way but rather releases local resources. This
command will return the inode number of the path as well as key dfuse metrics.


DFuse metrics can be queried with the `daos filesystem query` command which takes an optional
`--inode` parameter. This will return information on the number of inodes held in memory, the
number of open files as well as the number of pools and containers that DFuse is connected to. If
the `--inode` option is given then this command will also report if the inode is in memory or not.

Together these two commands can be used to request eviction of a path and to poll for it's release,
although lookups from other processes might block the eviction process.
ashleypittman marked this conversation as resolved.
Show resolved Hide resolved

If `daos filesystem evict` is passed the root of the DFuse mount then the path itself cannot be
evicted, in this case all top-level entries in the directory are evicted instead and no inode
number is returned.
ashleypittman marked this conversation as resolved.
Show resolved Hide resolved

### Permissions

DFuse can serve data from any user's container, but needs appropriate permissions in order to do
Expand Down
5 changes: 3 additions & 2 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ struct dfuse_obj_hdl {
bool doh_kreaddir_started;
/* Set to true if readdir calls reach EOF made on this handle */
bool doh_kreaddir_finished;

bool doh_evict_on_close;
};

/* Readdir support.
Expand Down Expand Up @@ -707,8 +709,7 @@ struct fuse_lowlevel_ops dfuse_ops;
strerror(-__rc)); \
} while (0)

#define DFUSE_REPLY_IOCTL(desc, req, arg) \
DFUSE_REPLY_IOCTL_SIZE(desc, req, &(arg), sizeof(arg))
#define DFUSE_REPLY_IOCTL(desc, req, arg) DFUSE_REPLY_IOCTL_SIZE(desc, req, &(arg), sizeof(arg))

/**
* Inode handle.
Expand Down
54 changes: 40 additions & 14 deletions src/client/dfuse/ops/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,31 @@ handle_dooh_ioctl(struct dfuse_obj_hdl *oh, size_t size, fuse_req_t req)
DFUSE_REPLY_ERR_RAW(oh, req, rc);
}

static void
handle_cont_qe_ioctl_helper(fuse_req_t req, const struct dfuse_mem_query *in_query)
{
struct dfuse_info *dfuse_info = fuse_req_userdata(req);
struct dfuse_mem_query query = {};

if (in_query && in_query->ino) {
d_list_t *rlink;

rlink =
d_hash_rec_find(&dfuse_info->dpi_iet, &in_query->ino, sizeof(in_query->ino));
if (rlink) {
query.found = true;
d_hash_rec_decref(&dfuse_info->dpi_iet, rlink);
}
}

query.inode_count = atomic_load_relaxed(&dfuse_info->di_inode_count);
query.fh_count = atomic_load_relaxed(&dfuse_info->di_fh_count);
query.pool_count = atomic_load_relaxed(&dfuse_info->di_pool_count);
query.container_count = atomic_load_relaxed(&dfuse_info->di_container_count);

DFUSE_REPLY_IOCTL(dfuse_info, req, query);
}

static void
handle_cont_query_ioctl(fuse_req_t req, const void *in_buf, size_t in_bufsz)
{
Expand All @@ -308,22 +333,22 @@ handle_cont_query_ioctl(fuse_req_t req, const void *in_buf, size_t in_bufsz)
if (in_bufsz != sizeof(query))
D_GOTO(err, rc = EIO);

/* Not supported yet, future-proofing for DAOS-12751 */
if (in_query->ino != 0)
D_GOTO(err, rc = EIO);

query.inode_count = atomic_load_relaxed(&dfuse_info->di_inode_count);
query.fh_count = atomic_load_relaxed(&dfuse_info->di_fh_count);
query.pool_count = atomic_load_relaxed(&dfuse_info->di_pool_count);
query.container_count = atomic_load_relaxed(&dfuse_info->di_container_count);

DFUSE_REPLY_IOCTL_SIZE(dfuse_info, req, &query, sizeof(query));
handle_cont_qe_ioctl_helper(req, in_query);
return;

err:
DFUSE_REPLY_ERR_RAW(dfuse_info, req, rc);
}

static void
handle_cont_evict_ioctl(fuse_req_t req, struct dfuse_obj_hdl *oh, const void *in_buf,
size_t in_bufsz)
{
oh->doh_evict_on_close = true;

handle_cont_qe_ioctl_helper(req, NULL);
}

#ifdef FUSE_IOCTL_USE_INT
void dfuse_cb_ioctl(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg,
struct fuse_file_info *fi, unsigned int flags,
Expand Down Expand Up @@ -363,10 +388,11 @@ void dfuse_cb_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,

DFUSE_TRA_DEBUG(oh, "ioctl cmd=%#x", cmd);

if (cmd == DFUSE_IOCTL_COUNT_QUERY) {
handle_cont_query_ioctl(req, in_buf, in_bufsz);
return;
}
if (cmd == DFUSE_IOCTL_COUNT_QUERY)
return handle_cont_query_ioctl(req, in_buf, in_bufsz);

if (cmd == DFUSE_IOCTL_DFUSE_EVICT)
return handle_cont_evict_ioctl(req, oh, in_buf, in_bufsz);

if (cmd == DFUSE_IOCTL_IL) {
if (out_bufsz < sizeof(struct dfuse_il_reply))
Expand Down
12 changes: 11 additions & 1 deletion src/client/dfuse/ops/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
struct dfuse_projection_info *fs_handle = fuse_req_userdata(req);
struct dfuse_inode_entry *ie;
d_list_t *rlink;
d_list_t *rlink;
struct dfuse_obj_hdl *oh = NULL;
struct fuse_file_info fi_out = {0};
int rc;
Expand Down Expand Up @@ -153,5 +153,15 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
DFUSE_REPLY_ZERO(oh, req);
else
DFUSE_REPLY_ERR_RAW(oh, req, rc);

if (oh->doh_evict_on_close) {
rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, oh->doh_ie->ie_parent,
oh->doh_ie->ie_name,
strnlen(oh->doh_ie->ie_name, NAME_MAX));

if (rc != 0)
DFUSE_TRA_ERROR(oh->doh_ie, "inval_entry() returned: %d (%s)", rc,
strerror(-rc));
}
dfuse_oh_free(dfuse_info, oh);
}
11 changes: 11 additions & 0 deletions src/client/dfuse/ops/opendir.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,16 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f

DFUSE_REPLY_ZERO(oh, req);
dfuse_dre_drop(dfuse_info, oh);
if (oh->doh_evict_on_close) {
int rc;

rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, oh->doh_ie->ie_parent,
oh->doh_ie->ie_name,
strnlen(oh->doh_ie->ie_name, NAME_MAX));

if (rc != 0)
DFUSE_TRA_ERROR(oh->doh_ie, "inval_entry() returned: %d (%s)", rc,
strerror(-rc));
}
dfuse_oh_free(dfuse_info, oh);
};
89 changes: 85 additions & 4 deletions src/control/cmd/daos/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type fsCmd struct {
ResetChunkSize fsResetChunkSizeCmd `command:"reset-chunk-size" description:"reset fs chunk size"`
ResetObjClass fsResetOclassCmd `command:"reset-oclass" description:"reset fs obj class"`
DfuseQuery fsDfuseQueryCmd `command:"query" description:"Query dfuse for memory usage"`
DfuseEvict fsDfuseEvictCmd `command:"evict" description:"Evict object from dfuse"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is ONLY for dfuse - not DFS - maybe evict -> dfuse-evict?
Or maybe it's okay if it only makes sense for dfuse anyway

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the only thing al these commands have in common is they're for POSIX containers and therefore DFS at some level. evict doesn't mean anything in dfs directly that I'm aware of.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one thing i was thinking about here is that the daos fs options are on the container (server side), but these dfuse commands apply only on the client side (just the node they are running on). so they are just local operations and only executed on the node where this runs.
i don't know if that counts as grounds that we need to split this off into 2 tools or just have an extension to the dfuse command, or just keep it as it is. what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed this extensively in the working group calls, this was your suggestion.

I'd be fine with making a new subcommand for the daos command other than daos filesystem but I don't want to do it in this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes i know, and i was just rethinking this. im fine to keeping it that way for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should just add a comment that this is a local client operation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree though, there's a clear distinction between commands that have an effect on the container itself vs commands that change local in-memory state.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something else to keep in mind is that we currently don't guarantee backwards compatibility for the command line, but I wonder if that will/should be a requirement in the future, since changing the command line interface later can be disruptive to deployment, job setup/cleanup, etc.

}

type fsCopyCmd struct {
Expand Down Expand Up @@ -433,6 +434,8 @@ func (cmd *fsFixRootCmd) Execute(_ []string) error {
type fsDfuseQueryCmd struct {
daosCmd

Ino uint64 `long:"inode" description:"inode number to query"`

Args struct {
Path string `positional-arg-name:"path" description:"DFuse path to query" required:"1"`
} `positional-args:"yes"`
Expand All @@ -448,31 +451,109 @@ func (cmd *fsDfuseQueryCmd) Execute(_ []string) error {
defer freeString(ap.path)
defer deallocCmdArgs()

if cmd.Ino != 0 {
ap.dfuse_mem.ino = C.ulong(cmd.Ino)
}

rc := C.dfuse_count_query(ap)
if err := daosError(rc); err != nil {
return errors.Wrapf(err, "failed to query %s", cmd.Args.Path)
}

if cmd.JSONOutputEnabled() {
if cmd.Ino == 0 {
jsonAttrs := &struct {
NumInodes uint64 `json:"inodes"`
NumFileHandles uint64 `json:"open_files"`
NumPools uint64 `json:"pools"`
NumContainers uint64 `json:"containers"`
}{
NumInodes: uint64(ap.dfuse_mem.inode_count),
NumFileHandles: uint64(ap.dfuse_mem.fh_count),
NumPools: uint64(ap.dfuse_mem.pool_count),
NumContainers: uint64(ap.dfuse_mem.container_count),
}
return cmd.OutputJSON(jsonAttrs, nil)
} else {
jsonAttrs := &struct {
NumInodes uint64 `json:"inodes"`
NumFileHandles uint64 `json:"open_files"`
NumPools uint64 `json:"pools"`
NumContainers uint64 `json:"containers"`
Found bool `json:"resident"`
}{
NumInodes: uint64(ap.dfuse_mem.inode_count),
NumFileHandles: uint64(ap.dfuse_mem.fh_count),
NumPools: uint64(ap.dfuse_mem.pool_count),
NumContainers: uint64(ap.dfuse_mem.container_count),
Found: bool(ap.dfuse_mem.found),
}
return cmd.OutputJSON(jsonAttrs, nil)
}
}

cmd.Infof("DFuse descriptor usage.")
cmd.Infof(" Pools: %d", ap.dfuse_mem.pool_count)
cmd.Infof(" Containers: %d", ap.dfuse_mem.container_count)
cmd.Infof(" Inodes: %d", ap.dfuse_mem.inode_count)
cmd.Infof(" Open files: %d", ap.dfuse_mem.fh_count)
if cmd.Ino != 0 {
if ap.dfuse_mem.found {
cmd.Infof(" Inode %#lx resident", cmd.Ino)
} else {
cmd.Infof(" Inode %#lx not resident", cmd.Ino)
}
}

return nil
}

type fsDfuseEvictCmd struct {
daosCmd

Args struct {
Path string `positional-arg-name:"path" description:"Path to evict from dfuse" required:"1"`
} `positional-args:"yes"`
}

func (cmd *fsDfuseEvictCmd) Execute(_ []string) error {
ap, deallocCmdArgs, err := allocCmdArgs(cmd.Logger)
if err != nil {
return err
}

ap.path = C.CString(cmd.Args.Path)
defer freeString(ap.path)
defer deallocCmdArgs()

rc := C.dfuse_evict(ap)
if err := daosError(rc); err != nil {
return errors.Wrapf(err, "failed to evict %s", cmd.Args.Path)
}

if cmd.JSONOutputEnabled() {
jsonAttrs := &struct {
NumInodes uint64 `json:"inodes"`
NumFileHandles uint64 `json:"open_files"`
NumPools uint64 `json:"pools"`
NumContainers uint64 `json:"containers"`
Inode uint64 `json:"inode,omitempty"`
}{
NumInodes: uint64(ap.dfuse_mem.inode_count),
NumFileHandles: uint64(ap.dfuse_mem.fh_count),
NumPools: uint64(ap.dfuse_mem.pool_count),
NumContainers: uint64(ap.dfuse_mem.container_count),
Inode: uint64(ap.dfuse_mem.ino),
}
return cmd.OutputJSON(jsonAttrs, nil)
}

cmd.Infof("DFuse descriptor usage.")
cmd.Infof(" Pools: %d", ap.dfuse_mem.pool_count)
cmd.Infof(" Containers: %d", ap.dfuse_mem.container_count)
cmd.Infof(" Inodes: %d", ap.dfuse_mem.inode_count)
cmd.Infof(" Open files: %d", ap.dfuse_mem.fh_count)
cmd.Infof(" Evicted inode: %d", ap.dfuse_mem.ino)
cmd.Infof(" Pools: %d", ap.dfuse_mem.pool_count)
cmd.Infof(" Containers: %d", ap.dfuse_mem.container_count)
cmd.Infof(" Inodes: %d", ap.dfuse_mem.inode_count)
cmd.Infof(" Open files: %d", ap.dfuse_mem.fh_count)

return nil
}
6 changes: 5 additions & 1 deletion src/include/dfuse_ioctl.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2022 Intel Corporation.
* (C) Copyright 2017-2023 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -27,6 +27,7 @@

#define DFUSE_IOCTL_R_DFUSE_USER (DFUSE_IOCTL_REPLY_BASE + 9)
#define DFUSE_COUNT_QUERY_CMD (DFUSE_IOCTL_REPLY_BASE + 10)
#define DFUSE_IOCTL_EVICT_NR (DFUSE_IOCTL_REPLY_BASE + 11)

/** Metadada caching is enabled for this file */
#define DFUSE_IOCTL_FLAGS_MCACHE (0x1)
Expand Down Expand Up @@ -87,4 +88,7 @@ struct dfuse_mem_query {
#define DFUSE_IOCTL_COUNT_QUERY \
((int)_IOWR(DFUSE_IOCTL_TYPE, DFUSE_COUNT_QUERY_CMD, struct dfuse_mem_query))

#define DFUSE_IOCTL_DFUSE_EVICT \
((int)_IOR(DFUSE_IOCTL_TYPE, DFUSE_IOCTL_EVICT_NR, struct dfuse_mem_query))

#endif /* __DFUSE_IOCTL_H__ */
Loading