Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fdp: support scheme placement id selection #1757

Merged
merged 2 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions HOWTO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2529,8 +2529,12 @@ with the caveat that when used on the command line, they must come after the
Round robin over available placement IDs. This is the
default.

The available placement ID (indices) are defined by the option
:option:`plids`.
**scheme**
Choose a placement ID (index) based on the scheme file defined by
the option :option:`dp_scheme`.

The available placement ID (indices) are defined by the option :option:`fdp_pli`
or :option:`plids` except for the case of **scheme**.

.. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme]

Expand All @@ -2541,6 +2545,26 @@ with the caveat that when used on the command line, they must come after the
identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For
streams this should be a comma-separated list of Stream IDs.

.. option:: dp_scheme=str : [io_uring_cmd] [xnvme]

Defines which placement ID (index) to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:

0, 10737418240, 0
10737418240, 21474836480, 1
21474836480, 32212254720, 2
...

Each line, a scheme entry, contains start offset, end offset, and placement ID
(index) separated by comma(,). If the write offset is within the range of a certain
scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
(index) will be selected. If the write offset belongs to multiple scheme entries,
the first matched scheme entry will be applied. If the offset is not within any range
of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
multiple devices in a job, all devices of the job will be affected by the scheme. If
this option is specified, the option :option:`plids` or :option:`fdp_pli` will be
ignored.)

.. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]

Size in bytes for separate metadata buffer per IO. Default: 0.
Expand Down
2 changes: 2 additions & 0 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
string_to_cpu(&o->ioscheduler, top->ioscheduler);
string_to_cpu(&o->profile, top->profile);
string_to_cpu(&o->cgroup, top->cgroup);
string_to_cpu(&o->dp_scheme_file, top->dp_scheme_file);

o->allow_create = le32_to_cpu(top->allow_create);
o->allow_mounted_write = le32_to_cpu(top->allow_mounted_write);
Expand Down Expand Up @@ -398,6 +399,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
string_to_net(top->ioscheduler, o->ioscheduler);
string_to_net(top->profile, o->profile);
string_to_net(top->cgroup, o->cgroup);
string_to_net(top->dp_scheme_file, o->dp_scheme_file);

top->allow_create = cpu_to_le32(o->allow_create);
top->allow_mounted_write = cpu_to_le32(o->allow_mounted_write);
Expand Down
78 changes: 78 additions & 0 deletions dataplacement.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,56 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f)
return ret;
}

static int init_ruh_scheme(struct thread_data *td, struct fio_file *f)
{
struct fio_ruhs_scheme *ruh_scheme;
FILE *scheme_fp;
unsigned long long start, end;
uint16_t pli;
int ret = 0;

if (td->o.dp_id_select != FIO_DP_SCHEME)
return 0;

/* Get the scheme from the file */
scheme_fp = fopen(td->o.dp_scheme_file, "r");

if (!scheme_fp) {
log_err("fio: ruh scheme failed to open scheme file %s\n",
td->o.dp_scheme_file);
ret = -errno;
goto out;
}

ruh_scheme = scalloc(1, sizeof(*ruh_scheme));
if (!ruh_scheme) {
ret = -ENOMEM;
goto out_with_close_fp;
}

for (int i = 0;
i < DP_MAX_SCHEME_ENTRIES && fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3;
i++) {

ruh_scheme->scheme_entries[i].start_offset = start;
ruh_scheme->scheme_entries[i].end_offset = end;
ruh_scheme->scheme_entries[i].pli = pli;
ruh_scheme->nr_schemes++;
}

if (fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3)
log_info("fio: too many scheme entries in %s. Only the first %d scheme entries are applied\n",
td->o.dp_scheme_file,
DP_MAX_SCHEME_ENTRIES);

f->ruhs_scheme = ruh_scheme;

out_with_close_fp:
fclose(scheme_fp);
out:
return ret;
}

int dp_init(struct thread_data *td)
{
struct fio_file *f;
Expand All @@ -109,6 +159,10 @@ int dp_init(struct thread_data *td)
ret = init_ruh_info(td, f);
if (ret)
break;

ret = init_ruh_scheme(td, f);
if (ret)
break;
}
return ret;
}
Expand All @@ -119,6 +173,11 @@ void fdp_free_ruhs_info(struct fio_file *f)
return;
sfree(f->ruhs_info);
f->ruhs_info = NULL;

if (!f->ruhs_scheme)
return;
sfree(f->ruhs_scheme);
f->ruhs_scheme = NULL;
}

void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
Expand All @@ -138,6 +197,25 @@ void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
ruhs->pli_loc = 0;

dspec = ruhs->plis[ruhs->pli_loc++];
} else if (td->o.dp_id_select == FIO_DP_SCHEME) {
struct fio_ruhs_scheme *ruhs_scheme = f->ruhs_scheme;
unsigned long long offset = io_u->offset;
int i;

for (i = 0; i < ruhs_scheme->nr_schemes; i++) {
if (offset >= ruhs_scheme->scheme_entries[i].start_offset &&
offset < ruhs_scheme->scheme_entries[i].end_offset) {
dspec = ruhs_scheme->scheme_entries[i].pli;
break;
}
}

/*
* If the write offset is not affected by any scheme entry,
* 0(default RUH) will be assigned to dspec
*/
if (i == ruhs_scheme->nr_schemes)
dspec = 0;
} else {
ruhs->pli_loc = rand_between(&td->fdp_state, 0, ruhs->nr_ruhs - 1);
dspec = ruhs->plis[ruhs->pli_loc];
Expand Down
14 changes: 13 additions & 1 deletion dataplacement.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define FDP_DIR_DTYPE 2
#define FDP_MAX_RUHS 128
#define FIO_MAX_DP_IDS 16
#define DP_MAX_SCHEME_ENTRIES 32

/*
* How fio chooses what placement identifier to use next. Choice of
Expand All @@ -15,9 +16,9 @@
enum {
FIO_DP_RANDOM = 0x1,
FIO_DP_RR = 0x2,
FIO_DP_SCHEME = 0x3,
};


enum {
FIO_DP_NONE = 0x0,
FIO_DP_FDP = 0x1,
Expand All @@ -30,6 +31,17 @@ struct fio_ruhs_info {
uint16_t plis[];
};

struct fio_ruhs_scheme_entry {
unsigned long long start_offset;
unsigned long long end_offset;
uint16_t pli;
};

struct fio_ruhs_scheme {
uint16_t nr_schemes;
struct fio_ruhs_scheme_entry scheme_entries[DP_MAX_SCHEME_ENTRIES];
};

int dp_init(struct thread_data *td);
void fdp_free_ruhs_info(struct fio_file *f);
void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u);
Expand Down
1 change: 1 addition & 0 deletions file.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct fio_file {
uint64_t io_size;

struct fio_ruhs_info *ruhs_info;
struct fio_ruhs_scheme *ruhs_scheme;

/*
* Zoned block device information. See also zonemode=zbd.
Expand Down
32 changes: 31 additions & 1 deletion fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -2294,9 +2294,14 @@ Choose a placement ID at random (uniform).
.TP
.B roundrobin
Round robin over available placement IDs. This is the default.
.TP
.B scheme
Choose a placement ID (index) based on the scheme file defined by
the option \fBdp_scheme\fP.
.RE
.P
The available placement ID (indices) are defined by the \fBplids\fR option.
The available placement ID (indices) are defined by \fBplids\fR or
\fBfdp_pli\fR option except for the case of \fBscheme\fP.
.RE
.TP
.BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr
Expand All @@ -2307,6 +2312,31 @@ jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
specify, you would set `plids=0,2,5`. For streams this should be a
comma-separated list of Stream IDs.
.TP
.BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str
Defines which placement ID (index) to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:
.sp
.RS
.RS
0, 10737418240, 0
.br
10737418240, 21474836480, 1
.br
21474836480, 32212254720, 2
.br
\&...
.RE
.sp
Each line, a scheme entry, contains start offset, end offset, and placement ID
(index) separated by comma(,). If the write offset is within the range of a certain
scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
(index) will be selected. If the write offset belongs to multiple scheme entries,
the first matched scheme entry will be applied. If the offset is not within any range
of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
multiple devices in a job, all devices of the job will be affected by the scheme. If
this option is specified, the option \fBplids\fP or \fBfdp_pli\fP will be ignored.)
.RE
.TP
.BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
Expand Down
52 changes: 52 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,43 @@ static int str_fdp_pli_cb(void *data, const char *input)
return 0;
}

/* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option
This function validates the fdp_scheme filename. */
static int str_dp_scheme_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
struct stat sb;
char *filename;
int ret = 0;

if (parse_dryrun())
return 0;

filename = strdup(td->o.dp_scheme_file);
strip_blank_front(&filename);
strip_blank_end(filename);

strcpy(td->o.dp_scheme_file, filename);

if (lstat(filename, &sb) < 0){
ret = errno;
log_err("fio: lstat() error related to %s\n", filename);
td_verror(td, ret, "lstat");
goto out;
}

if (!S_ISREG(sb.st_mode)) {
ret = errno;
log_err("fio: %s is not a file\n", filename);
td_verror(td, ret, "S_ISREG");
goto out;
}

out:
free(filename);
return ret;
}

static int str_bssplit_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
Expand Down Expand Up @@ -3760,6 +3797,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.oval = FIO_DP_RR,
.help = "Round robin select Placement IDs",
},
{ .ival = "scheme",
.oval = FIO_DP_SCHEME,
.help = "Use a scheme(based on LBA) to select Placement IDs",
},
},
},
{
Expand All @@ -3774,6 +3815,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "dp_scheme",
.lname = "Data Placement Scheme",
.type = FIO_OPT_STR_STORE,
.cb = str_dp_scheme_cb,
.off1 = offsetof(struct thread_options, dp_scheme_file),
.maxlen = PATH_MAX,
.help = "scheme file that specifies offset-RUH mapping",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "lockmem",
.lname = "Lock memory",
Expand Down
2 changes: 1 addition & 1 deletion server.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
};

enum {
FIO_SERVER_VER = 104,
FIO_SERVER_VER = 105,

FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
Expand Down
Loading