Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record job start time to fix time pain points #1621

Merged
merged 2 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions HOWTO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,10 @@ Time related parameters
calls will be excluded from other uses. Fio will manually clear it from the
CPU mask of other jobs.

.. option:: job_start_clock_id=int
The clock_id passed to the call to `clock_gettime` used to record job_start
in the `json` output format. Default is 0, or CLOCK_REALTIME.


Target file/device
~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -3966,6 +3970,13 @@ Measurements and reporting
same reporting group, unless if separated by a :option:`stonewall`, or by
using :option:`new_group`.

NOTE: When :option: `group_reporting` is used along with `json` output,
there are certain per-job properties which can be different between jobs
but do not have a natural group-level equivalent. Examples include
`kb_base`, `unit_base`, `sig_figs`, `thread_number`, `pid`, and
`job_start`. For these properties, the values for the first job are
recorded for the group.

.. option:: new_group

Start a new reporting group. See: :option:`group_reporting`. If not given,
Expand Down Expand Up @@ -4103,9 +4114,7 @@ Measurements and reporting

.. option:: log_unix_epoch=bool

If set, fio will log Unix timestamps to the log files produced by enabling
write_type_log for each log type, instead of the default zero-based
timestamps.
Backwards compatible alias for log_alternate_epoch.

.. option:: log_alternate_epoch=bool

Expand All @@ -4116,9 +4125,9 @@ Measurements and reporting

.. option:: log_alternate_epoch_clock_id=int

Specifies the clock_id to be used by clock_gettime to obtain the alternate epoch
if either log_unix_epoch or log_alternate_epoch are true. Otherwise has no
effect. Default value is 0, or CLOCK_REALTIME.
Specifies the clock_id to be used by clock_gettime to obtain the alternate
epoch if log_alternate_epoch is true. Otherwise has no effect. Default
value is 0, or CLOCK_REALTIME.

.. option:: block_error_percentiles=bool

Expand Down
2 changes: 1 addition & 1 deletion backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1858,7 +1858,7 @@ static void *thread_main(void *data)
if (rate_submit_init(td, sk_out))
goto err;

set_epoch_time(td, o->log_unix_epoch | o->log_alternate_epoch, o->log_alternate_epoch_clock_id);
set_epoch_time(td, o->log_alternate_epoch_clock_id, o->job_start_clock_id);
fio_getrusage(&td->ru_start);
memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch));
memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch));
Expand Down
4 changes: 2 additions & 2 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ int convert_thread_options_to_cpu(struct thread_options *o,
o->log_prio = le32_to_cpu(top->log_prio);
o->log_gz = le32_to_cpu(top->log_gz);
o->log_gz_store = le32_to_cpu(top->log_gz_store);
o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch);
o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
o->log_alternate_epoch_clock_id = le32_to_cpu(top->log_alternate_epoch_clock_id);
o->job_start_clock_id = le32_to_cpu(top->job_start_clock_id);
o->norandommap = le32_to_cpu(top->norandommap);
o->softrandommap = le32_to_cpu(top->softrandommap);
o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
Expand Down Expand Up @@ -455,9 +455,9 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->log_prio = cpu_to_le32(o->log_prio);
top->log_gz = cpu_to_le32(o->log_gz);
top->log_gz_store = cpu_to_le32(o->log_gz_store);
top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch);
top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
top->log_alternate_epoch_clock_id = cpu_to_le32(o->log_alternate_epoch_clock_id);
top->job_start_clock_id = cpu_to_le32(o->job_start_clock_id);
top->norandommap = cpu_to_le32(o->norandommap);
top->softrandommap = cpu_to_le32(o->softrandommap);
top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
Expand Down
1 change: 1 addition & 0 deletions client.c
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
dst->error = le32_to_cpu(src->error);
dst->thread_number = le32_to_cpu(src->thread_number);
dst->groupid = le32_to_cpu(src->groupid);
dst->job_start = le64_to_cpu(src->job_start);
dst->pid = le32_to_cpu(src->pid);
dst->members = le32_to_cpu(src->members);
dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep);
Expand Down
23 changes: 17 additions & 6 deletions fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,10 @@ copy that segment, instead of entering the kernel with a
\fBgettimeofday\fR\|(2) call. The CPU set aside for doing these time
calls will be excluded from other uses. Fio will manually clear it from the
CPU mask of other jobs.
.TP
.BI job_start_clock_id \fR=\fPint
The clock_id passed to the call to \fBclock_gettime\fR used to record job_start
in the \fBjson\fR output format. Default is 0, or CLOCK_REALTIME.
.SS "Target file/device"
.TP
.BI directory \fR=\fPstr
Expand Down Expand Up @@ -3664,6 +3668,15 @@ quickly becomes unwieldy. To see the final report per-group instead of
per-job, use \fBgroup_reporting\fR. Jobs in a file will be part of the
same reporting group, unless if separated by a \fBstonewall\fR, or by
using \fBnew_group\fR.
.RS
.P
NOTE: When \fBgroup_reporting\fR is used along with \fBjson\fR output, there
are certain per-job properties which can be different between jobs but do not
have a natural group-level equivalent. Examples include \fBkb_base\fR,
\fBunit_base\fR, \fBsig_figs\fR, \fBthread_number\fR, \fBpid\fR, and
\fBjob_start\fR. For these properties, the values for the first job are
recorded for the group.
.RE
.TP
.BI new_group
Start a new reporting group. See: \fBgroup_reporting\fR. If not given,
Expand Down Expand Up @@ -3795,9 +3808,7 @@ decompressed with fio, using the \fB\-\-inflate\-log\fR command line
parameter. The files will be stored with a `.fz' suffix.
.TP
.BI log_unix_epoch \fR=\fPbool
If set, fio will log Unix timestamps to the log files produced by enabling
write_type_log for each log type, instead of the default zero-based
timestamps.
Backward-compatible alias for \fBlog_alternate_epoch\fR.
.TP
.BI log_alternate_epoch \fR=\fPbool
If set, fio will log timestamps based on the epoch used by the clock specified
Expand All @@ -3806,9 +3817,9 @@ enabling write_type_log for each log type, instead of the default zero-based
timestamps.
.TP
.BI log_alternate_epoch_clock_id \fR=\fPint
Specifies the clock_id to be used by clock_gettime to obtain the alternate epoch
if either \fBBlog_unix_epoch\fR or \fBlog_alternate_epoch\fR are true. Otherwise has no
effect. Default value is 0, or CLOCK_REALTIME.
Specifies the clock_id to be used by clock_gettime to obtain the alternate
epoch if \fBlog_alternate_epoch\fR is true. Otherwise has no effect. Default
value is 0, or CLOCK_REALTIME.
.TP
.BI block_error_percentiles \fR=\fPbool
If set, record errors in trim block-sized units from writes and trims and
Expand Down
3 changes: 2 additions & 1 deletion fio.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,8 @@ struct thread_data {

struct timespec start; /* start of this loop */
struct timespec epoch; /* time job was started */
unsigned long long alternate_epoch; /* Time job was started, clock_gettime's clock_id epoch based. */
unsigned long long alternate_epoch; /* Time job was started, as clock_gettime(log_alternate_epoch_clock_id) */
unsigned long long job_start; /* Time job was started, as clock_gettime(job_start_clock_id) */
struct timespec last_issue;
long time_offset;
struct timespec ts_cache;
Expand Down
2 changes: 1 addition & 1 deletion fio_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ extern bool ramp_time_over(struct thread_data *);
extern bool in_ramp_time(struct thread_data *);
extern void fio_time_init(void);
extern void timespec_add_msec(struct timespec *, unsigned int);
extern void set_epoch_time(struct thread_data *, int, clockid_t);
extern void set_epoch_time(struct thread_data *, clockid_t, clockid_t);

#endif
2 changes: 1 addition & 1 deletion libfio.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void reset_all_stats(struct thread_data *td)
td->ts.runtime[i] = 0;
}

set_epoch_time(td, td->o.log_unix_epoch | td->o.log_alternate_epoch, td->o.log_alternate_epoch_clock_id);
set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id);
memcpy(&td->start, &td->epoch, sizeof(td->epoch));
memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch));
memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch));
Expand Down
22 changes: 12 additions & 10 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -4600,17 +4600,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.help = "Install libz-dev(el) to get compression support",
},
#endif
{
.name = "log_unix_epoch",
.lname = "Log epoch unix",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct thread_options, log_unix_epoch),
.help = "Use Unix time in log files",
.category = FIO_OPT_C_LOG,
.group = FIO_OPT_G_INVALID,
},
{
.name = "log_alternate_epoch",
.alias = "log_unix_epoch",
.lname = "Log epoch alternate",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct thread_options, log_alternate_epoch),
Expand All @@ -4623,7 +4615,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.lname = "Log alternate epoch clock_id",
.type = FIO_OPT_INT,
.off1 = offsetof(struct thread_options, log_alternate_epoch_clock_id),
.help = "If log_alternate_epoch or log_unix_epoch is true, this option specifies the clock_id from clock_gettime whose epoch should be used. If neither of those is true, this option has no effect. Default value is 0, or CLOCK_REALTIME",
.help = "If log_alternate_epoch is true, this option specifies the clock_id from clock_gettime whose epoch should be used. If log_alternate_epoch is false, this option has no effect. Default value is 0, or CLOCK_REALTIME",
.category = FIO_OPT_C_LOG,
.group = FIO_OPT_G_INVALID,
},
Expand Down Expand Up @@ -4952,6 +4944,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_GENERAL,
.group = FIO_OPT_G_CLOCK,
},
{
.name = "job_start_clock_id",
.lname = "Job start clock_id",
.type = FIO_OPT_INT,
.off1 = offsetof(struct thread_options, job_start_clock_id),
.help = "The clock_id passed to the call to clock_gettime used to record job_start in the json output format. Default is 0, or CLOCK_REALTIME",
.verify = gtod_cpu_verify,
.category = FIO_OPT_C_GENERAL,
.group = FIO_OPT_G_CLOCK,
},
{
.name = "unified_rw_reporting",
.lname = "Unified RW Reporting",
Expand Down
2 changes: 1 addition & 1 deletion rate-submit.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw)
if (td->io_ops->post_init && td->io_ops->post_init(td))
goto err_io_init;

set_epoch_time(td, td->o.log_unix_epoch | td->o.log_alternate_epoch, td->o.log_alternate_epoch_clock_id);
set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id);
fio_getrusage(&td->ru_start);
clear_io_state(td, 1);

Expand Down
1 change: 1 addition & 0 deletions server.c
Original file line number Diff line number Diff line change
Expand Up @@ -1706,6 +1706,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
p.ts.error = cpu_to_le32(ts->error);
p.ts.thread_number = cpu_to_le32(ts->thread_number);
p.ts.groupid = cpu_to_le32(ts->groupid);
p.ts.job_start = cpu_to_le64(ts->job_start);
p.ts.pid = cpu_to_le32(ts->pid);
p.ts.members = cpu_to_le32(ts->members);
p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep);
Expand Down
6 changes: 5 additions & 1 deletion stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1712,6 +1712,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
root = json_create_object();
json_object_add_value_string(root, "jobname", ts->name);
json_object_add_value_int(root, "groupid", ts->groupid);
json_object_add_value_int(root, "job_start", ts->job_start);
json_object_add_value_int(root, "error", ts->error);

/* ETA Info */
Expand Down Expand Up @@ -2526,6 +2527,7 @@ void __show_run_stats(void)
*/
ts->thread_number = td->thread_number;
ts->groupid = td->groupid;
ts->job_start = td->job_start;

/*
* first pid in group, not very useful...
Expand Down Expand Up @@ -3048,7 +3050,9 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
s = get_sample(iolog, cur_log, cur_log->nr_samples);

s->data = data;
s->time = t + (iolog->td ? iolog->td->alternate_epoch : 0);
s->time = t;
if (iolog->td && iolog->td->o.log_alternate_epoch)
s->time += iolog->td->alternate_epoch;
io_sample_set_ddir(iolog, s, ddir);
s->bs = bs;
s->priority = priority;
Expand Down
1 change: 1 addition & 0 deletions stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ struct thread_stat {
uint32_t error;
uint32_t thread_number;
uint32_t groupid;
uint64_t job_start; /* Time job was started, as clock_gettime(job_start_clock_id) */
uint32_t pid;
char description[FIO_JOBDESC_SIZE];
uint32_t members;
Expand Down
8 changes: 3 additions & 5 deletions thread_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ struct thread_options {
unsigned int log_offset;
unsigned int log_gz;
unsigned int log_gz_store;
unsigned int log_unix_epoch;
unsigned int log_alternate_epoch;
unsigned int log_alternate_epoch_clock_id;
unsigned int norandommap;
Expand Down Expand Up @@ -273,6 +272,7 @@ struct thread_options {
unsigned int unified_rw_rep;
unsigned int gtod_reduce;
unsigned int gtod_cpu;
unsigned int job_start_clock_id;
enum fio_cs clocksource;
unsigned int no_stall;
unsigned int trim_percentage;
Expand Down Expand Up @@ -422,7 +422,6 @@ struct thread_options_pack {
uint32_t iodepth_batch_complete_min;
uint32_t iodepth_batch_complete_max;
uint32_t serialize_overlap;
uint32_t pad;

uint64_t size;
uint64_t io_size;
Expand All @@ -433,13 +432,11 @@ struct thread_options_pack {
uint32_t fill_device;
uint32_t file_append;
uint32_t unique_filename;
uint32_t pad3;
uint64_t file_size_low;
uint64_t file_size_high;
uint64_t start_offset;
uint64_t start_offset_align;
uint32_t start_offset_nz;
uint32_t pad4;

uint64_t bs[DDIR_RWDIR_CNT];
uint64_t ba[DDIR_RWDIR_CNT];
Expand Down Expand Up @@ -494,7 +491,6 @@ struct thread_options_pack {
uint32_t log_offset;
uint32_t log_gz;
uint32_t log_gz_store;
uint32_t log_unix_epoch;
uint32_t log_alternate_epoch;
uint32_t log_alternate_epoch_clock_id;
uint32_t norandommap;
Expand Down Expand Up @@ -593,6 +589,7 @@ struct thread_options_pack {
uint32_t unified_rw_rep;
uint32_t gtod_reduce;
uint32_t gtod_cpu;
uint32_t job_start_clock_id;
uint32_t clocksource;
uint32_t no_stall;
uint32_t trim_percentage;
Expand All @@ -603,6 +600,7 @@ struct thread_options_pack {
uint32_t lat_percentiles;
uint32_t slat_percentiles;
uint32_t percentile_precision;
uint32_t pad;
fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];

uint8_t read_iolog_file[FIO_TOP_STR_MAX];
Expand Down
20 changes: 14 additions & 6 deletions time.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,22 @@ void set_genesis_time(void)
fio_gettime(&genesis, NULL);
}

void set_epoch_time(struct thread_data *td, int log_alternate_epoch, clockid_t clock_id)
void set_epoch_time(struct thread_data *td, clockid_t log_alternate_epoch_clock_id, clockid_t job_start_clock_id)
{
struct timespec ts;
fio_gettime(&td->epoch, NULL);
if (log_alternate_epoch) {
struct timespec ts;
clock_gettime(clock_id, &ts);
td->alternate_epoch = (unsigned long long)(ts.tv_sec) * 1000 +
(unsigned long long)(ts.tv_nsec) / 1000000;
clock_gettime(log_alternate_epoch_clock_id, &ts);
td->alternate_epoch = (unsigned long long)(ts.tv_sec) * 1000 +
(unsigned long long)(ts.tv_nsec) / 1000000;
if (job_start_clock_id == log_alternate_epoch_clock_id)
{
td->job_start = td->alternate_epoch;
}
else
{
clock_gettime(job_start_clock_id, &ts);
td->job_start = (unsigned long long)(ts.tv_sec) * 1000 +
(unsigned long long)(ts.tv_nsec) / 1000000;
}
}

Expand Down