From bd496333eda623e544801e17e4aa3247ce3c8d95 Mon Sep 17 00:00:00 2001 From: Ashmeen Kaur <57195160+ashmeenkaur@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:32:34 +0530 Subject: [PATCH] use fio's job_start parameter to compute start and end time of fio jobs. (#1457) * testing changes testing changes testing changes testing changes testing changes testing changes testing changes testing changes testing changes testing changes testing changes revert seq_rand_read_write.fio clean up PR * revert local testing changes * remove unused files * clean up pr * review comments * review comments * added unit tests --- perfmetrics/scripts/fio/constants.py | 1 + perfmetrics/scripts/fio/fio_metrics.py | 71 ++--- perfmetrics/scripts/fio/fio_metrics_test.py | 56 +++- .../scripts/fio/testdata/good_out_job.json | 1 + .../fio/testdata/missing_metric_key.json | 1 + .../multiple_jobs_global_options.json | 4 +- .../testdata/multiple_jobs_job_options.json | 2 + .../fio/testdata/no_global_ramp_time.json | 299 ++++++++++++++++++ .../scripts/fio/testdata/no_metrics.json | 1 + .../scripts/fio/testdata/partial_metrics.json | 2 + .../run_load_test_and_fetch_metrics.sh | 16 +- 11 files changed, 410 insertions(+), 44 deletions(-) create mode 100644 perfmetrics/scripts/fio/testdata/no_global_ramp_time.json diff --git a/perfmetrics/scripts/fio/constants.py b/perfmetrics/scripts/fio/constants.py index a6cb5e711b..0314f22d24 100644 --- a/perfmetrics/scripts/fio/constants.py +++ b/perfmetrics/scripts/fio/constants.py @@ -16,6 +16,7 @@ START_TIME = 'start_time' END_TIME = 'end_time' RW = 'rw' +JOB_START = 'job_start' READ = 'read' WRITE = 'write' METRICS = 'metrics' diff --git a/perfmetrics/scripts/fio/fio_metrics.py b/perfmetrics/scripts/fio/fio_metrics.py index 5f976dcd44..dc3f259278 100644 --- a/perfmetrics/scripts/fio/fio_metrics.py +++ b/perfmetrics/scripts/fio/fio_metrics.py @@ -189,6 +189,23 @@ def _load_file_dict(self, filepath) -> Dict[str, Any]: raise NoValuesError(f'JSON file {filepath} returned empty object') return fio_out + def _get_global_ramp_time(self, out_json): + global_ramptime_ms = 0 + if consts.GLOBAL_OPTS in out_json: + if consts.RAMPTIME in out_json[consts.GLOBAL_OPTS]: + global_ramptime_ms = _convert_value( + out_json[consts.GLOBAL_OPTS][consts.RAMPTIME], + consts.TIME_TO_MS_CONVERSION, 's') + return global_ramptime_ms + + def _get_job_ramp_time(self, job): + ramptime_ms = 0 + if consts.JOB_OPTS in job: + if consts.RAMPTIME in job[consts.JOB_OPTS]: + ramptime_ms = _convert_value(job[consts.JOB_OPTS][consts.RAMPTIME], + consts.TIME_TO_MS_CONVERSION, 's') + return ramptime_ms + def _get_start_end_times(self, out_json, job_params) -> List[Tuple[int]]: """Returns start and end times of each job as a list. @@ -204,54 +221,28 @@ def _get_start_end_times(self, out_json, job_params) -> List[Tuple[int]]: KeyError: If RW is not present in any dict in job_params """ - # Creating a list of just the 'rw' job parameter. Later, we will - # loop through the jobs from the end, therefore we are creating - # reversed rw list for easy access - rw_rev_list = [job_param[consts.RW] for job_param in reversed(job_params)] + # Creating a list of just the 'rw' job parameter. + rw_list = [job_param[consts.RW] for job_param in job_params] - global_ramptime_ms = 0 - global_startdelay_ms = 0 - if consts.GLOBAL_OPTS in out_json: - if consts.RAMPTIME in out_json[consts.GLOBAL_OPTS]: - global_ramptime_ms = _convert_value( - out_json[consts.GLOBAL_OPTS][consts.RAMPTIME], - consts.TIME_TO_MS_CONVERSION, 's') - if consts.STARTDELAY in out_json[consts.GLOBAL_OPTS]: - global_startdelay_ms = _convert_value( - out_json[consts.GLOBAL_OPTS][consts.STARTDELAY], - consts.TIME_TO_MS_CONVERSION, 's') - - next_end_time_ms = 0 - rev_start_end_times = [] - # Looping from end since the given time is the final end time - for i, job in enumerate(list(reversed(out_json[consts.JOBS]))): - rw = rw_rev_list[i] + global_ramptime_ms = self._get_global_ramp_time(out_json) + start_end_times = [] + for i, job in enumerate(list(out_json[consts.JOBS])): + rw = rw_list[i] job_rw = job[_get_rw(rw)] - ramptime_ms = 0 - startdelay_ms = 0 - if consts.JOB_OPTS in job: - if consts.RAMPTIME in job[consts.JOB_OPTS]: - ramptime_ms = _convert_value(job[consts.JOB_OPTS][consts.RAMPTIME], - consts.TIME_TO_MS_CONVERSION, 's') + ramptime_ms = self._get_job_ramp_time(job) if ramptime_ms == 0: ramptime_ms = global_ramptime_ms - if startdelay_ms == 0: - startdelay_ms = global_startdelay_ms - # for multiple jobs, end time of one job = start time of next job - end_time_ms = next_end_time_ms if next_end_time_ms > 0 else out_json[ - consts.TIMESTAMP_MS] - # job start time = job end time - job runtime - ramp time - start_time_ms = end_time_ms - job_rw[consts.RUNTIME] - ramptime_ms - next_end_time_ms = start_time_ms - startdelay_ms + start_time_ms = job[consts.JOB_START] + end_time_ms = start_time_ms + job_rw[consts.RUNTIME] + ramptime_ms # converting start and end time to seconds start_time_s = start_time_ms // 1000 - end_time_s = round(end_time_ms/1000) - rev_start_end_times.append((start_time_s, end_time_s)) + end_time_s = round(end_time_ms / 1000) + start_end_times.append((start_time_s, end_time_s)) - return list(reversed(rev_start_end_times)) + return list(start_end_times) def _get_job_params(self, out_json): """Returns parameter values of each job. @@ -435,8 +426,8 @@ def _add_to_gsheet(self, jobs, worksheet_name): gsheet.write_to_google_sheet(worksheet_name, values) def get_metrics(self, - filepath, - worksheet_name=None) -> List[Dict[str, Any]]: + filepath, + worksheet_name=None) -> List[Dict[str, Any]]: """Returns job metrics obtained from given filepath and writes to gsheets. Args: diff --git a/perfmetrics/scripts/fio/fio_metrics_test.py b/perfmetrics/scripts/fio/fio_metrics_test.py index de631d363d..0732aa5e5c 100644 --- a/perfmetrics/scripts/fio/fio_metrics_test.py +++ b/perfmetrics/scripts/fio/fio_metrics_test.py @@ -3,8 +3,10 @@ Usage from perfmetrics/scripts folder: python3 -m fio.fio_metrics_test """ import unittest +import json from unittest import mock from fio import fio_metrics +from fio import constants as consts TEST_PATH = './fio/testdata/' GOOD_FILE = 'good_out_job.json' @@ -16,6 +18,7 @@ BAD_FORMAT_FILE = 'bad_format.json' MULTIPLE_JOBS_GLOBAL_OPTIONS_FILE = 'multiple_jobs_global_options.json' MULTIPLE_JOBS_JOB_OPTIONS_FILE = 'multiple_jobs_job_options.json' +NO_GLOBAL_RAMP_TIME = 'no_global_ramp_time.json' SPREADSHEET_ID = '1kvHv1OBCzr9GnFxRu9RTJC7jjQjc9M4rAiDnhyak2Sg' WORKSHEET_NAME = 'fio_metrics' @@ -63,6 +66,7 @@ def test_load_file_dict_good_file(self): }, 'jobs': [{ 'jobname': '1_thread', + "job_start": 1653027084555, 'groupid': 0, 'error': 0, 'eta': 0, @@ -401,6 +405,56 @@ def test_get_start_end_times_no_rw_raises_key_error(self): with self.assertRaises(KeyError): _ = self.fio_metrics_obj._get_start_end_times({}, extracted_job_params) + def test_get_global_ramp_time_when_global_ramp_time_is_present(self): + fio_out = {} + f = open(get_full_filepath(GOOD_FILE), 'r') + fio_out = json.load(f) + f.close() + expected_global_ramp_time = 10000 + + extracted_global_ramp_time = self.fio_metrics_obj \ + ._get_global_ramp_time(fio_out) + + self.assertEqual(expected_global_ramp_time, extracted_global_ramp_time) + + def test_get_global_ramp_time_when_global_ramp_time_is_not_present(self): + fio_out = {} + f = open(get_full_filepath(NO_GLOBAL_RAMP_TIME), 'r') + fio_out = json.load(f) + f.close() + expected_global_ramp_time = 0 + + extracted_global_ramp_time = self.fio_metrics_obj._get_global_ramp_time( + fio_out) + + self.assertEqual(expected_global_ramp_time, extracted_global_ramp_time) + + def test_get_job_ramp_time_when_job_ramp_time_is_present(self): + fio_out = {} + f = open(get_full_filepath(NO_GLOBAL_RAMP_TIME), 'r') + fio_out = json.load(f) + f.close() + job = list(fio_out[consts.JOBS])[0] + expected_job_ramp_time = 20000 + + extracted_job_ramp_time = self.fio_metrics_obj \ + ._get_job_ramp_time(job) + + self.assertEqual(expected_job_ramp_time, extracted_job_ramp_time) + + def test_get_job_ramp_time_when_job_ramp_time_is_not_present(self): + fio_out = {} + f = open(get_full_filepath(GOOD_FILE), 'r') + fio_out = json.load(f) + f.close() + job = list(fio_out[consts.JOBS])[0] + expected_job_ramp_time = 0 + + extracted_job_ramp_time = self.fio_metrics_obj._get_job_ramp_time( + job) + + self.assertEqual(expected_job_ramp_time, extracted_job_ramp_time) + def test_extract_metrics_from_good_file(self): json_obj = self.fio_metrics_obj._load_file_dict( get_full_filepath(GOOD_FILE)) @@ -673,7 +727,7 @@ def test_get_metrics_for_multiple_jobs_global_options(self): }, range='{}!A2'.format(WORKSHEET_NAME)) ] - + with mock.patch.object(fio_metrics.gsheet, '_get_sheets_service_client' ) as get_sheets_service_client_mock: get_sheets_service_client_mock.return_value = sheets_service_mock diff --git a/perfmetrics/scripts/fio/testdata/good_out_job.json b/perfmetrics/scripts/fio/testdata/good_out_job.json index e1dd1ff508..52773650a0 100644 --- a/perfmetrics/scripts/fio/testdata/good_out_job.json +++ b/perfmetrics/scripts/fio/testdata/good_out_job.json @@ -26,6 +26,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653027084555, "groupid" : 0, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/fio/testdata/missing_metric_key.json b/perfmetrics/scripts/fio/testdata/missing_metric_key.json index ebea232f3b..f0f3992a7d 100644 --- a/perfmetrics/scripts/fio/testdata/missing_metric_key.json +++ b/perfmetrics/scripts/fio/testdata/missing_metric_key.json @@ -26,6 +26,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653027084555, "groupid" : 0, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/fio/testdata/multiple_jobs_global_options.json b/perfmetrics/scripts/fio/testdata/multiple_jobs_global_options.json index 0de9447e36..e0200b5e22 100644 --- a/perfmetrics/scripts/fio/testdata/multiple_jobs_global_options.json +++ b/perfmetrics/scripts/fio/testdata/multiple_jobs_global_options.json @@ -29,6 +29,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653381667555, "groupid" : 0, "error" : 0, "eta" : 0, @@ -288,9 +289,10 @@ "latency_percentile" : 100.000000, "latency_window" : 0 }, - + { "jobname" : "2_thread", + "job_start" : 1653381757234, "groupid" : 1, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/fio/testdata/multiple_jobs_job_options.json b/perfmetrics/scripts/fio/testdata/multiple_jobs_job_options.json index 41e3e04217..17121840d2 100644 --- a/perfmetrics/scripts/fio/testdata/multiple_jobs_job_options.json +++ b/perfmetrics/scripts/fio/testdata/multiple_jobs_job_options.json @@ -25,6 +25,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653596980555, "groupid" : 0, "error" : 0, "eta" : 0, @@ -289,6 +290,7 @@ }, { "jobname" : "2_thread", + "job_start" : 1653597076112, "groupid" : 1, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/fio/testdata/no_global_ramp_time.json b/perfmetrics/scripts/fio/testdata/no_global_ramp_time.json new file mode 100644 index 0000000000..99a60f9f4e --- /dev/null +++ b/perfmetrics/scripts/fio/testdata/no_global_ramp_time.json @@ -0,0 +1,299 @@ +{ + "fio version" : "fio-3.30", + "timestamp" : 1653381828, + "timestamp_ms" : 1653381828458, + "time" : "Tue May 24 08:43:48 2022", + "global options" : { + "ioengine" : "libaio", + "direct" : "1", + "fadvise_hint" : "0", + "verify" : "0", + "rw" : "read", + "bs" : "1M", + "iodepth" : "64", + "invalidate" : "1", + "startdelay" : "20", + "runtime" : "60s", + "time_based" : "1", + "nrfiles" : "1", + "thread" : "1", + "numjobs" : "10", + "filesize" : "50M", + "openfiles" : "1", + "group_reporting" : "1", + "allrandrepeat" : "1", + "directory" : "gcs/50mb", + "filename_format" : "$jobname.$jobnum.$filenum" + }, + "jobs" : [ + { + "jobname" : "1_thread", + "job_start" : 1653381667555, + "groupid" : 0, + "error" : 0, + "eta" : 0, + "elapsed" : 166, + "job options" : { + "filesize" : "5M", + "directory" : "gcs/5mb", + "numjobs" : "10", + "rw": "write", + "ramp_time" : "20s", + "startdelay" : "100s" + }, + "read" : { + "io_bytes" : 8405385216, + "io_kbytes" : 8208384, + "bw_bytes" : 138911322, + "bw" : 135655, + "iops" : 115.354741, + "runtime" : 60509, + "total_ios" : 6980, + "short_ios" : 0, + "drop_ios" : 0, + "slat_ns" : { + "min" : 202973029, + "max" : 2158004154, + "mean" : 344839778.795726, + "stddev" : 140617761.331012, + "N" : 7020 + }, + "clat_ns" : { + "min" : 8830, + "max" : 27877285691, + "mean" : 18144971743.948357, + "stddev" : 6432692993.333396, + "N" : 8016, + "percentile" : { + "1.000000" : 557842432, + "5.000000" : 3338665984, + "10.000000" : 6677331968, + "20.000000" : 13220446208, + "30.000000" : 17112760320, + "40.000000" : 17112760320, + "50.000000" : 17112760320, + "60.000000" : 17112760320, + "70.000000" : 17112760320, + "80.000000" : 17112760320, + "90.000000" : 17112760320, + "95.000000" : 17112760320, + "99.000000" : 17112760320, + "99.500000" : 17112760320, + "99.900000" : 17112760320, + "99.950000" : 17112760320, + "99.990000" : 17112760320 + } + }, + "lat_ns" : { + "min" : 249737264, + "max" : 28958587178, + "mean" : 18494668007.316742, + "stddev" : 6446218652.946330, + "N" : 8016, + "percentile" : { + "1.000000" : 375390208, + "5.000000" : 379584512, + "10.000000" : 379584512, + "20.000000" : 379584512, + "30.000000" : 383778816, + "40.000000" : 383778816, + "50.000000" : 387973120, + "60.000000" : 387973120, + "70.000000" : 396361728, + "80.000000" : 408944640, + "90.000000" : 492830720, + "95.000000" : 526385152, + "99.000000" : 893386752, + "99.500000" : 1568669696, + "99.900000" : 1635778560, + "99.950000" : 1652555776, + "99.990000" : 1702887424 + } + }, + "bw_min" : 81731, + "bw_max" : 164160, + "bw_agg" : 92.033068, + "bw_mean" : 124848.354388, + "bw_dev" : 1000.978241, + "bw_samples" : 3573, + "iops_min" : 59, + "iops_max" : 160, + "iops_mean" : 121.396736, + "iops_stddev" : 0.983586, + "iops_samples" : 3573 + }, + "write" : { + "io_bytes" : 0, + "io_kbytes" : 0, + "bw_bytes" : 0, + "bw" : 0, + "iops" : 0.000000, + "runtime" : 0, + "total_ios" : 0, + "short_ios" : 0, + "drop_ios" : 0, + "slat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "clat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "lat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "bw_min" : 0, + "bw_max" : 0, + "bw_agg" : 0.000000, + "bw_mean" : 0.000000, + "bw_dev" : 0.000000, + "bw_samples" : 0, + "iops_min" : 0, + "iops_max" : 0, + "iops_mean" : 0.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 0 + }, + "trim" : { + "io_bytes" : 0, + "io_kbytes" : 0, + "bw_bytes" : 0, + "bw" : 0, + "iops" : 0.000000, + "runtime" : 0, + "total_ios" : 0, + "short_ios" : 0, + "drop_ios" : 0, + "slat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "clat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "lat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + }, + "bw_min" : 0, + "bw_max" : 0, + "bw_agg" : 0.000000, + "bw_mean" : 0.000000, + "bw_dev" : 0.000000, + "bw_samples" : 0, + "iops_min" : 0, + "iops_max" : 0, + "iops_mean" : 0.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 0 + }, + "sync" : { + "total_ios" : 0, + "lat_ns" : { + "min" : 0, + "max" : 0, + "mean" : 0.000000, + "stddev" : 0.000000, + "N" : 0 + } + }, + "job_runtime" : 2406109, + "usr_cpu" : 0.007107, + "sys_cpu" : 0.158098, + "ctx" : 7414, + "majf" : 0, + "minf" : 2181, + "iodepth_level" : { + "1" : 0.000000, + "2" : 0.000000, + "4" : 0.000000, + "8" : 0.000000, + "16" : 2.922636, + "32" : 18.338109, + ">=64" : 78.739255 + }, + "iodepth_submit" : { + "0" : 0.000000, + "4" : 100.000000, + "8" : 0.000000, + "16" : 0.000000, + "32" : 0.000000, + "64" : 0.000000, + ">=64" : 0.000000 + }, + "iodepth_complete" : { + "0" : 0.000000, + "4" : 99.277457, + "8" : 0.000000, + "16" : 0.000000, + "32" : 0.000000, + "64" : 0.722543, + ">=64" : 0.000000 + }, + "latency_ns" : { + "2" : 0.000000, + "4" : 0.000000, + "10" : 0.000000, + "20" : 0.000000, + "50" : 0.000000, + "100" : 0.000000, + "250" : 0.000000, + "500" : 0.000000, + "750" : 0.000000, + "1000" : 0.000000 + }, + "latency_us" : { + "2" : 0.000000, + "4" : 0.000000, + "10" : 0.028653, + "20" : 0.372493, + "50" : 0.143266, + "100" : 0.014327, + "250" : 0.000000, + "500" : 0.014327, + "750" : 0.000000, + "1000" : 0.000000 + }, + "latency_ms" : { + "2" : 0.000000, + "4" : 0.000000, + "10" : 0.000000, + "20" : 0.000000, + "50" : 0.000000, + "100" : 0.000000, + "250" : 0.014327, + "500" : 0.501433, + "750" : 0.429799, + "1000" : 0.487106, + "2000" : 1.618911, + ">=2000" : 111.217765 + }, + "latency_depth" : 64, + "latency_target" : 0, + "latency_percentile" : 100.000000, + "latency_window" : 0 + } + ] +} + diff --git a/perfmetrics/scripts/fio/testdata/no_metrics.json b/perfmetrics/scripts/fio/testdata/no_metrics.json index fd19989365..13e394c05c 100644 --- a/perfmetrics/scripts/fio/testdata/no_metrics.json +++ b/perfmetrics/scripts/fio/testdata/no_metrics.json @@ -26,6 +26,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653027084555, "groupid" : 0, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/fio/testdata/partial_metrics.json b/perfmetrics/scripts/fio/testdata/partial_metrics.json index bf1af21a9a..aca99a6eab 100644 --- a/perfmetrics/scripts/fio/testdata/partial_metrics.json +++ b/perfmetrics/scripts/fio/testdata/partial_metrics.json @@ -26,6 +26,7 @@ "jobs" : [ { "jobname" : "1_thread", + "job_start" : 1653027084555, "groupid" : 0, "error" : 0, "eta" : 0, @@ -307,6 +308,7 @@ }, { "jobname" : "2_thread", + "job_start" : 1653027084555, "groupid" : 0, "error" : 0, "eta" : 0, diff --git a/perfmetrics/scripts/run_load_test_and_fetch_metrics.sh b/perfmetrics/scripts/run_load_test_and_fetch_metrics.sh index 7b96268a39..8bc744d4e3 100644 --- a/perfmetrics/scripts/run_load_test_and_fetch_metrics.sh +++ b/perfmetrics/scripts/run_load_test_and_fetch_metrics.sh @@ -14,10 +14,22 @@ # limitations under the License. set -e -echo "Installing fio" -sudo apt-get install fio -y + echo "Installing pip" sudo apt-get install pip -y +echo "Installing fio" +# install libaio as fio has a dependency on libaio +sudo apt-get install libaio-dev +# We are building fio from source because of issue: https://github.com/axboe/fio/issues/1640. +# The fix is not currently released in a package as of 20th Oct, 2023. +# TODO: install fio via package when release > 3.35 is available. +sudo rm -rf "${KOKORO_ARTIFACTS_DIR}/github/fio" +git clone https://github.com/axboe/fio.git "${KOKORO_ARTIFACTS_DIR}/github/fio" +cd "${KOKORO_ARTIFACTS_DIR}/github/fio" && \ +git checkout c5d8ce3fc736210ded83b126c71e3225c7ffd7c9 && \ +./configure && make && sudo make install + +cd "${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/perfmetrics/scripts" echo Print the time when FIO tests start date echo Running fio test..