From 12d325cac3a05f2eb1fb0f4026346129d38c023b Mon Sep 17 00:00:00 2001 From: aggieNick02 Date: Fri, 1 Sep 2023 10:50:34 -0500 Subject: [PATCH 1/2] Record job start time to fix time pain points Add a new key in the json per-job output, job_start, that records the job start time obtained via a call to clock_gettime using the clock_id specified by the new job_start_clock_id option. This allows times of fio jobs and log entries to be compared/ordered against each other and against other system events recorded against the same clock_id. Add a note to the documentation for group_reporting about how there are several per-job values for which only the first job's value is recorded in the json output format when group_reporting is enabled. Fixes #1544 Signed-off-by: Nick Neumann nick@pcpartpicker.com --- HOWTO.rst | 11 +++++++++++ backend.c | 2 +- cconv.c | 2 ++ client.c | 1 + fio.1 | 13 +++++++++++++ fio.h | 3 ++- fio_time.h | 2 +- libfio.c | 2 +- options.c | 10 ++++++++++ rate-submit.c | 2 +- server.c | 1 + stat.c | 6 +++++- stat.h | 1 + thread_options.h | 7 ++++--- time.c | 20 ++++++++++++++------ 15 files changed, 68 insertions(+), 15 deletions(-) diff --git a/HOWTO.rst b/HOWTO.rst index 8903294156..9c825cc27d 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -755,6 +755,10 @@ Time related parameters calls will be excluded from other uses. Fio will manually clear it from the CPU mask of other jobs. +.. option:: job_start_clock_id=int + The clock_id passed to the call to `clock_gettime` used to record job_start + in the `json` output format. Default is 0, or CLOCK_REALTIME. + Target file/device ~~~~~~~~~~~~~~~~~~ @@ -3966,6 +3970,13 @@ Measurements and reporting same reporting group, unless if separated by a :option:`stonewall`, or by using :option:`new_group`. + NOTE: When :option: `group_reporting` is used along with `json` output, + there are certain per-job properties which can be different between jobs + but do not have a natural group-level equivalent. Examples include + `kb_base`, `unit_base`, `sig_figs`, `thread_number`, `pid`, and + `job_start`. For these properties, the values for the first job are + recorded for the group. + .. option:: new_group Start a new reporting group. See: :option:`group_reporting`. If not given, diff --git a/backend.c b/backend.c index 5f0740395b..a5895fec4d 100644 --- a/backend.c +++ b/backend.c @@ -1858,7 +1858,7 @@ static void *thread_main(void *data) if (rate_submit_init(td, sk_out)) goto err; - set_epoch_time(td, o->log_unix_epoch | o->log_alternate_epoch, o->log_alternate_epoch_clock_id); + set_epoch_time(td, o->log_alternate_epoch_clock_id, o->job_start_clock_id); fio_getrusage(&td->ru_start); memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch)); memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch)); diff --git a/cconv.c b/cconv.c index ce6acbe6ab..b0127dd870 100644 --- a/cconv.c +++ b/cconv.c @@ -219,6 +219,7 @@ int convert_thread_options_to_cpu(struct thread_options *o, o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch); o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch); o->log_alternate_epoch_clock_id = le32_to_cpu(top->log_alternate_epoch_clock_id); + o->job_start_clock_id = le32_to_cpu(top->job_start_clock_id); o->norandommap = le32_to_cpu(top->norandommap); o->softrandommap = le32_to_cpu(top->softrandommap); o->bs_unaligned = le32_to_cpu(top->bs_unaligned); @@ -458,6 +459,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch); top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch); top->log_alternate_epoch_clock_id = cpu_to_le32(o->log_alternate_epoch_clock_id); + top->job_start_clock_id = cpu_to_le32(o->job_start_clock_id); top->norandommap = cpu_to_le32(o->norandommap); top->softrandommap = cpu_to_le32(o->softrandommap); top->bs_unaligned = cpu_to_le32(o->bs_unaligned); diff --git a/client.c b/client.c index c257036bf5..345fa9101b 100644 --- a/client.c +++ b/client.c @@ -956,6 +956,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->error = le32_to_cpu(src->error); dst->thread_number = le32_to_cpu(src->thread_number); dst->groupid = le32_to_cpu(src->groupid); + dst->job_start = le64_to_cpu(src->job_start); dst->pid = le32_to_cpu(src->pid); dst->members = le32_to_cpu(src->members); dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep); diff --git a/fio.1 b/fio.1 index f0dc49ab00..3e5b840c94 100644 --- a/fio.1 +++ b/fio.1 @@ -537,6 +537,10 @@ copy that segment, instead of entering the kernel with a \fBgettimeofday\fR\|(2) call. The CPU set aside for doing these time calls will be excluded from other uses. Fio will manually clear it from the CPU mask of other jobs. +.TP +.BI job_start_clock_id \fR=\fPint +The clock_id passed to the call to \fBclock_gettime\fR used to record job_start +in the \fBjson\fR output format. Default is 0, or CLOCK_REALTIME. .SS "Target file/device" .TP .BI directory \fR=\fPstr @@ -3664,6 +3668,15 @@ quickly becomes unwieldy. To see the final report per-group instead of per-job, use \fBgroup_reporting\fR. Jobs in a file will be part of the same reporting group, unless if separated by a \fBstonewall\fR, or by using \fBnew_group\fR. +.RS +.P +NOTE: When \fBgroup_reporting\fR is used along with \fBjson\fR output, there +are certain per-job properties which can be different between jobs but do not +have a natural group-level equivalent. Examples include \fBkb_base\fR, +\fBunit_base\fR, \fBsig_figs\fR, \fBthread_number\fR, \fBpid\fR, and +\fBjob_start\fR. For these properties, the values for the first job are +recorded for the group. +.RE .TP .BI new_group Start a new reporting group. See: \fBgroup_reporting\fR. If not given, diff --git a/fio.h b/fio.h index a54f57c93e..1322656fd4 100644 --- a/fio.h +++ b/fio.h @@ -388,7 +388,8 @@ struct thread_data { struct timespec start; /* start of this loop */ struct timespec epoch; /* time job was started */ - unsigned long long alternate_epoch; /* Time job was started, clock_gettime's clock_id epoch based. */ + unsigned long long alternate_epoch; /* Time job was started, as clock_gettime(log_alternate_epoch_clock_id) */ + unsigned long long job_start; /* Time job was started, as clock_gettime(job_start_clock_id) */ struct timespec last_issue; long time_offset; struct timespec ts_cache; diff --git a/fio_time.h b/fio_time.h index 62d92120a5..b20e734c4a 100644 --- a/fio_time.h +++ b/fio_time.h @@ -30,6 +30,6 @@ extern bool ramp_time_over(struct thread_data *); extern bool in_ramp_time(struct thread_data *); extern void fio_time_init(void); extern void timespec_add_msec(struct timespec *, unsigned int); -extern void set_epoch_time(struct thread_data *, int, clockid_t); +extern void set_epoch_time(struct thread_data *, clockid_t, clockid_t); #endif diff --git a/libfio.c b/libfio.c index 237ce34cb3..5c43327769 100644 --- a/libfio.c +++ b/libfio.c @@ -149,7 +149,7 @@ void reset_all_stats(struct thread_data *td) td->ts.runtime[i] = 0; } - set_epoch_time(td, td->o.log_unix_epoch | td->o.log_alternate_epoch, td->o.log_alternate_epoch_clock_id); + set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id); memcpy(&td->start, &td->epoch, sizeof(td->epoch)); memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch)); memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch)); diff --git a/options.c b/options.c index 48aa0d7b1c..281ea609e4 100644 --- a/options.c +++ b/options.c @@ -4952,6 +4952,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_GENERAL, .group = FIO_OPT_G_CLOCK, }, + { + .name = "job_start_clock_id", + .lname = "Job start clock_id", + .type = FIO_OPT_INT, + .off1 = offsetof(struct thread_options, job_start_clock_id), + .help = "The clock_id passed to the call to clock_gettime used to record job_start in the json output format. Default is 0, or CLOCK_REALTIME", + .verify = gtod_cpu_verify, + .category = FIO_OPT_C_GENERAL, + .group = FIO_OPT_G_CLOCK, + }, { .name = "unified_rw_reporting", .lname = "Unified RW Reporting", diff --git a/rate-submit.c b/rate-submit.c index 6f6d15bd66..92be3df75e 100644 --- a/rate-submit.c +++ b/rate-submit.c @@ -185,7 +185,7 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw) if (td->io_ops->post_init && td->io_ops->post_init(td)) goto err_io_init; - set_epoch_time(td, td->o.log_unix_epoch | td->o.log_alternate_epoch, td->o.log_alternate_epoch_clock_id); + set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id); fio_getrusage(&td->ru_start); clear_io_state(td, 1); diff --git a/server.c b/server.c index bb423702a7..27332e326e 100644 --- a/server.c +++ b/server.c @@ -1706,6 +1706,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) p.ts.error = cpu_to_le32(ts->error); p.ts.thread_number = cpu_to_le32(ts->thread_number); p.ts.groupid = cpu_to_le32(ts->groupid); + p.ts.job_start = cpu_to_le64(ts->job_start); p.ts.pid = cpu_to_le32(ts->pid); p.ts.members = cpu_to_le32(ts->members); p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep); diff --git a/stat.c b/stat.c index 7b791628f4..8c8d69f047 100644 --- a/stat.c +++ b/stat.c @@ -1712,6 +1712,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, root = json_create_object(); json_object_add_value_string(root, "jobname", ts->name); json_object_add_value_int(root, "groupid", ts->groupid); + json_object_add_value_int(root, "job_start", ts->job_start); json_object_add_value_int(root, "error", ts->error); /* ETA Info */ @@ -2526,6 +2527,7 @@ void __show_run_stats(void) */ ts->thread_number = td->thread_number; ts->groupid = td->groupid; + ts->job_start = td->job_start; /* * first pid in group, not very useful... @@ -3048,7 +3050,9 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data, s = get_sample(iolog, cur_log, cur_log->nr_samples); s->data = data; - s->time = t + (iolog->td ? iolog->td->alternate_epoch : 0); + s->time = t; + if (iolog->td && (iolog->td->o.log_unix_epoch || iolog->td->o.log_alternate_epoch)) + s->time += iolog->td->alternate_epoch; io_sample_set_ddir(iolog, s, ddir); s->bs = bs; s->priority = priority; diff --git a/stat.h b/stat.h index 8ceabc48c7..bd986d4e71 100644 --- a/stat.h +++ b/stat.h @@ -169,6 +169,7 @@ struct thread_stat { uint32_t error; uint32_t thread_number; uint32_t groupid; + uint64_t job_start; /* Time job was started, as clock_gettime(job_start_clock_id) */ uint32_t pid; char description[FIO_JOBDESC_SIZE]; uint32_t members; diff --git a/thread_options.h b/thread_options.h index 38a9993d23..91c194cf71 100644 --- a/thread_options.h +++ b/thread_options.h @@ -273,6 +273,7 @@ struct thread_options { unsigned int unified_rw_rep; unsigned int gtod_reduce; unsigned int gtod_cpu; + unsigned int job_start_clock_id; enum fio_cs clocksource; unsigned int no_stall; unsigned int trim_percentage; @@ -422,7 +423,6 @@ struct thread_options_pack { uint32_t iodepth_batch_complete_min; uint32_t iodepth_batch_complete_max; uint32_t serialize_overlap; - uint32_t pad; uint64_t size; uint64_t io_size; @@ -433,13 +433,11 @@ struct thread_options_pack { uint32_t fill_device; uint32_t file_append; uint32_t unique_filename; - uint32_t pad3; uint64_t file_size_low; uint64_t file_size_high; uint64_t start_offset; uint64_t start_offset_align; uint32_t start_offset_nz; - uint32_t pad4; uint64_t bs[DDIR_RWDIR_CNT]; uint64_t ba[DDIR_RWDIR_CNT]; @@ -511,6 +509,7 @@ struct thread_options_pack { struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX]; uint32_t zone_split_nr[DDIR_RWDIR_CNT]; + uint32_t pad; fio_fp64_t zipf_theta; fio_fp64_t pareto_h; fio_fp64_t gauss_dev; @@ -593,6 +592,7 @@ struct thread_options_pack { uint32_t unified_rw_rep; uint32_t gtod_reduce; uint32_t gtod_cpu; + uint32_t job_start_clock_id; uint32_t clocksource; uint32_t no_stall; uint32_t trim_percentage; @@ -603,6 +603,7 @@ struct thread_options_pack { uint32_t lat_percentiles; uint32_t slat_percentiles; uint32_t percentile_precision; + uint32_t pad2; fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN]; uint8_t read_iolog_file[FIO_TOP_STR_MAX]; diff --git a/time.c b/time.c index 5c4d6de039..7cbab6ff4f 100644 --- a/time.c +++ b/time.c @@ -172,14 +172,22 @@ void set_genesis_time(void) fio_gettime(&genesis, NULL); } -void set_epoch_time(struct thread_data *td, int log_alternate_epoch, clockid_t clock_id) +void set_epoch_time(struct thread_data *td, clockid_t log_alternate_epoch_clock_id, clockid_t job_start_clock_id) { + struct timespec ts; fio_gettime(&td->epoch, NULL); - if (log_alternate_epoch) { - struct timespec ts; - clock_gettime(clock_id, &ts); - td->alternate_epoch = (unsigned long long)(ts.tv_sec) * 1000 + - (unsigned long long)(ts.tv_nsec) / 1000000; + clock_gettime(log_alternate_epoch_clock_id, &ts); + td->alternate_epoch = (unsigned long long)(ts.tv_sec) * 1000 + + (unsigned long long)(ts.tv_nsec) / 1000000; + if (job_start_clock_id == log_alternate_epoch_clock_id) + { + td->job_start = td->alternate_epoch; + } + else + { + clock_gettime(job_start_clock_id, &ts); + td->job_start = (unsigned long long)(ts.tv_sec) * 1000 + + (unsigned long long)(ts.tv_nsec) / 1000000; } } From d252275b59feb8ca92078ca8f61bab691e539186 Mon Sep 17 00:00:00 2001 From: aggieNick02 Date: Fri, 8 Sep 2023 15:34:09 -0500 Subject: [PATCH 2/2] Make log_unix_epoch an official alias of log_alternate_epoch log_alternate_epoch was introduced along with log_alternate_epoch_clock_id, and generalized the idea of log_unix_epoch. Both options had the same effect. So we make log_unix_epoch an official alias of log_alternate_epoch, instead of maintaining both redundant options. Signed-off-by: Nick Neumann nick@pcpartpicker.com --- HOWTO.rst | 10 ++++------ cconv.c | 2 -- fio.1 | 10 ++++------ options.c | 12 ++---------- stat.c | 2 +- thread_options.h | 5 +---- 6 files changed, 12 insertions(+), 29 deletions(-) diff --git a/HOWTO.rst b/HOWTO.rst index 9c825cc27d..7f26978a74 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -4114,9 +4114,7 @@ Measurements and reporting .. option:: log_unix_epoch=bool - If set, fio will log Unix timestamps to the log files produced by enabling - write_type_log for each log type, instead of the default zero-based - timestamps. + Backwards compatible alias for log_alternate_epoch. .. option:: log_alternate_epoch=bool @@ -4127,9 +4125,9 @@ Measurements and reporting .. option:: log_alternate_epoch_clock_id=int - Specifies the clock_id to be used by clock_gettime to obtain the alternate epoch - if either log_unix_epoch or log_alternate_epoch are true. Otherwise has no - effect. Default value is 0, or CLOCK_REALTIME. + Specifies the clock_id to be used by clock_gettime to obtain the alternate + epoch if log_alternate_epoch is true. Otherwise has no effect. Default + value is 0, or CLOCK_REALTIME. .. option:: block_error_percentiles=bool diff --git a/cconv.c b/cconv.c index b0127dd870..341388d456 100644 --- a/cconv.c +++ b/cconv.c @@ -216,7 +216,6 @@ int convert_thread_options_to_cpu(struct thread_options *o, o->log_prio = le32_to_cpu(top->log_prio); o->log_gz = le32_to_cpu(top->log_gz); o->log_gz_store = le32_to_cpu(top->log_gz_store); - o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch); o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch); o->log_alternate_epoch_clock_id = le32_to_cpu(top->log_alternate_epoch_clock_id); o->job_start_clock_id = le32_to_cpu(top->job_start_clock_id); @@ -456,7 +455,6 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->log_prio = cpu_to_le32(o->log_prio); top->log_gz = cpu_to_le32(o->log_gz); top->log_gz_store = cpu_to_le32(o->log_gz_store); - top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch); top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch); top->log_alternate_epoch_clock_id = cpu_to_le32(o->log_alternate_epoch_clock_id); top->job_start_clock_id = cpu_to_le32(o->job_start_clock_id); diff --git a/fio.1 b/fio.1 index 3e5b840c94..8159caa47f 100644 --- a/fio.1 +++ b/fio.1 @@ -3808,9 +3808,7 @@ decompressed with fio, using the \fB\-\-inflate\-log\fR command line parameter. The files will be stored with a `.fz' suffix. .TP .BI log_unix_epoch \fR=\fPbool -If set, fio will log Unix timestamps to the log files produced by enabling -write_type_log for each log type, instead of the default zero-based -timestamps. +Backward-compatible alias for \fBlog_alternate_epoch\fR. .TP .BI log_alternate_epoch \fR=\fPbool If set, fio will log timestamps based on the epoch used by the clock specified @@ -3819,9 +3817,9 @@ enabling write_type_log for each log type, instead of the default zero-based timestamps. .TP .BI log_alternate_epoch_clock_id \fR=\fPint -Specifies the clock_id to be used by clock_gettime to obtain the alternate epoch -if either \fBBlog_unix_epoch\fR or \fBlog_alternate_epoch\fR are true. Otherwise has no -effect. Default value is 0, or CLOCK_REALTIME. +Specifies the clock_id to be used by clock_gettime to obtain the alternate +epoch if \fBlog_alternate_epoch\fR is true. Otherwise has no effect. Default +value is 0, or CLOCK_REALTIME. .TP .BI block_error_percentiles \fR=\fPbool If set, record errors in trim block-sized units from writes and trims and diff --git a/options.c b/options.c index 281ea609e4..95001b4aaf 100644 --- a/options.c +++ b/options.c @@ -4600,17 +4600,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .help = "Install libz-dev(el) to get compression support", }, #endif - { - .name = "log_unix_epoch", - .lname = "Log epoch unix", - .type = FIO_OPT_BOOL, - .off1 = offsetof(struct thread_options, log_unix_epoch), - .help = "Use Unix time in log files", - .category = FIO_OPT_C_LOG, - .group = FIO_OPT_G_INVALID, - }, { .name = "log_alternate_epoch", + .alias = "log_unix_epoch", .lname = "Log epoch alternate", .type = FIO_OPT_BOOL, .off1 = offsetof(struct thread_options, log_alternate_epoch), @@ -4623,7 +4615,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .lname = "Log alternate epoch clock_id", .type = FIO_OPT_INT, .off1 = offsetof(struct thread_options, log_alternate_epoch_clock_id), - .help = "If log_alternate_epoch or log_unix_epoch is true, this option specifies the clock_id from clock_gettime whose epoch should be used. If neither of those is true, this option has no effect. Default value is 0, or CLOCK_REALTIME", + .help = "If log_alternate_epoch is true, this option specifies the clock_id from clock_gettime whose epoch should be used. If log_alternate_epoch is false, this option has no effect. Default value is 0, or CLOCK_REALTIME", .category = FIO_OPT_C_LOG, .group = FIO_OPT_G_INVALID, }, diff --git a/stat.c b/stat.c index 8c8d69f047..7cf6bee194 100644 --- a/stat.c +++ b/stat.c @@ -3051,7 +3051,7 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data, s->data = data; s->time = t; - if (iolog->td && (iolog->td->o.log_unix_epoch || iolog->td->o.log_alternate_epoch)) + if (iolog->td && iolog->td->o.log_alternate_epoch) s->time += iolog->td->alternate_epoch; io_sample_set_ddir(iolog, s, ddir); s->bs = bs; diff --git a/thread_options.h b/thread_options.h index 91c194cf71..fdde055e7d 100644 --- a/thread_options.h +++ b/thread_options.h @@ -170,7 +170,6 @@ struct thread_options { unsigned int log_offset; unsigned int log_gz; unsigned int log_gz_store; - unsigned int log_unix_epoch; unsigned int log_alternate_epoch; unsigned int log_alternate_epoch_clock_id; unsigned int norandommap; @@ -492,7 +491,6 @@ struct thread_options_pack { uint32_t log_offset; uint32_t log_gz; uint32_t log_gz_store; - uint32_t log_unix_epoch; uint32_t log_alternate_epoch; uint32_t log_alternate_epoch_clock_id; uint32_t norandommap; @@ -509,7 +507,6 @@ struct thread_options_pack { struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX]; uint32_t zone_split_nr[DDIR_RWDIR_CNT]; - uint32_t pad; fio_fp64_t zipf_theta; fio_fp64_t pareto_h; fio_fp64_t gauss_dev; @@ -603,7 +600,7 @@ struct thread_options_pack { uint32_t lat_percentiles; uint32_t slat_percentiles; uint32_t percentile_precision; - uint32_t pad2; + uint32_t pad; fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN]; uint8_t read_iolog_file[FIO_TOP_STR_MAX];