From 940e3beff55e1077e0f8a10792a2ac5eb92da27b Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 5 Sep 2024 23:21:48 -0700 Subject: [PATCH] Complete redo of workflow, switch to compute-benchmarks --- .github/workflows/sycl-linux-run-tests.yml | 2 +- devops/scripts/benchmarking/aggregate.py | 70 ++++++++ devops/scripts/benchmarking/benchmark-ci.conf | 26 +++ devops/scripts/benchmarking/benchmark.sh | 170 ++++++++++++++++++ devops/scripts/benchmarking/common.py | 43 +++++ devops/scripts/benchmarking/compare.py | 43 +++++ .../scripts/benchmarking/enabled_tests.conf | 3 + devops/scripts/sycl-bench.sh | 105 ----------- 8 files changed, 356 insertions(+), 106 deletions(-) create mode 100644 devops/scripts/benchmarking/aggregate.py create mode 100644 devops/scripts/benchmarking/benchmark-ci.conf create mode 100755 devops/scripts/benchmarking/benchmark.sh create mode 100644 devops/scripts/benchmarking/common.py create mode 100644 devops/scripts/benchmarking/compare.py create mode 100644 devops/scripts/benchmarking/enabled_tests.conf delete mode 100755 devops/scripts/sycl-bench.sh diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 48200bfb2748c..089b6020d2577 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -367,7 +367,7 @@ jobs: - name: Run sycl-bench microbenchmarks id: run_benchmarks if: inputs.tests_selector == 'benchmark' - run: ./devops/scripts/sycl-bench.sh https://github.com/ianayl/sycl-bench + run: ./devops/scripts/benchmarking/benchmark.sh - name: Upload sycl-bench microbenchmark results if: inputs.tests_selector == 'benchmark' && steps.run_benchmarks.outcome == 'success' uses: actions/upload-artifact@v4 diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py new file mode 100644 index 0000000000000..95fd21964d896 --- /dev/null +++ b/devops/scripts/benchmarking/aggregate.py @@ -0,0 +1,70 @@ +import csv +import sys +from pathlib import Path +import heapq + +import common + +class StreamingMedian: + + def __init__(self): + self.minheap_larger = [] + self.maxheap_smaller = [] + # Note: numbers on maxheap should be negative, as heapq + # is minheap by default + + def add(self, n: float): + if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n: + heapq.heappush(self.maxheap_smaller, -n) + else: + heapq.heappush(self.minheap_larger, n) + + if len(self.maxheap_smaller) > len(self.minheap_larger) + 1: + heapq.heappush(self.minheap_larger, + -heapq.heappop(self.maxheap_smaller)) + elif len(self.maxheap_smaller) < len(self.minheap_larger): + heapq.heappush(self.maxheap_smaller, + -heapq.heappop(self.minheap_larger)) + + def get_median(self) -> float: + if len(self.maxheap_smaller) == len(self.minheap_larger): + return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0 + else: + return -self.maxheap_smaller[0] + + +def aggregate_median(benchmark: str): + + def csv_samples() -> list[str]: + # TODO check that the path below is valid directory + with Path(f"{common.PERF_RES_PATH}/{benchmark}") as cache_dir: + # TODO check for time range; What time range do I want? + return filter(lambda f: f.is_file(), + cache_dir.glob(f"{benchmark}-*.csv")) + + # Calculate median of every desired metric: + aggregate_s = dict() + for sample_path in csv_samples(): + with open(sample_path, mode='r') as sample_file: + for s in csv.DictReader(sample_file): + if s["TestCase"] not in aggregate_s: + aggregate_s[s["TestCase"]] = \ + { metric: StreamingMedian() for metric in common.metrics_variance } + for metric in common.metrics_variance: + aggregate_s[s["TestCase"]][metric].add(common.sanitize(s[metric])) + + with open(f"{common.PERF_RES_PATH}/{benchmark}/{benchmark}-median.csv", 'w') as output_csv: + writer = csv.DictWriter(output_csv, + fieldnames=["TestCase", *common.metrics_variance.keys()]) + writer.writeheader() + for test_case in aggregate_s: + writer.writerow({ "TestCase": test_case } | + { metric: aggregate_s[test_case][metric].get_median() + for metric in common.metrics_variance }) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} ") + exit() + aggregate_median(sys.argv[1]) diff --git a/devops/scripts/benchmarking/benchmark-ci.conf b/devops/scripts/benchmarking/benchmark-ci.conf new file mode 100644 index 0000000000000..679b93604b9d0 --- /dev/null +++ b/devops/scripts/benchmarking/benchmark-ci.conf @@ -0,0 +1,26 @@ +# Git branch settings for llvm-ci-perf-results +PERF_RES_GIT_REPO="https://github.com/intel-sandbox/llvm-ci-perf-results" +PERF_RES_BRANCH="test-compute-bench" +# Path where llvm-ci-perf-results are cloned +PERF_RES_PATH="./llvm-ci-perf-res" + +# Git branch settings for compute-benchmarks +COMPUTE_BENCH_GIT_REPO="https://github.com/ianayl/compute-benchmarks" +COMPUTE_BENCH_BRANCH="update-sycl" + +# Path to compile and build compute-benchmarks +COMPUTE_BENCH_PATH="./compute-benchmarks" + +# Path to temporarily store compute-benchmark results +OUTPUT_PATH="." + +# Metrics to benchmark, and their allowed variance as a Python dictionary +METRICS_VARIANCE='{"Median": 0.5}' +#METRICS_VARIANCE='{"Median": 0.5, "StdDev": 4.0}' + +# Metrics to record using aggregate.py +METRICS_RECORDED='["Median", "StdDev"]' + +# Threshold to store benchmark files before benchmarking +AVERAGE_THRESHOLD=7 +# TODO reconsider this \ No newline at end of file diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh new file mode 100755 index 0000000000000..66c75fcdd8d80 --- /dev/null +++ b/devops/scripts/benchmarking/benchmark.sh @@ -0,0 +1,170 @@ +#!/bin/sh + +# +# benchmark.sh: Benchmark dpcpp using compute-benchmarks +# + +# TODO fix +usage () { + >&2 echo "Usage: $0 [-B ] + -B Path to clone and build compute-benchmarks on + +This script builds and runs benchmarks from compute-benchmarks." + exit 1 +} + +clone_perf_res() { + echo "### Cloning llvm-ci-perf-res ($PERF_RES_GIT_REPO:$PERF_RES_BRANCH) ###" + mkdir -p "$(dirname $PERF_RES_PATH)" + git clone -b $PERF_RES_BRANCH $PERF_RES_GIT_REPO $PERF_RES_PATH + [ "$?" -ne 0 ] && exit $? +} + +clone_compute_bench() { + echo "### Cloning compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###" + mkdir -p "$(dirname $COMPUTE_BENCH_PATH)" + git clone -b $COMPUTE_BENCH_BRANCH \ + --recurse-submodules $COMPUTE_BENCH_GIT_REPO \ + $COMPUTE_BENCH_PATH + [ "$?" -ne 0 ] && exit $? +} + +build_compute_bench() { + echo "### Building compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###" + mkdir $COMPUTE_BENCH_PATH/build && cd $COMPUTE_BENCH_PATH/build && + cmake .. -DBUILD_SYCL=ON && cmake --build . + compute_bench_build_stat=$? + cd - + [ "$compute_bench_build_stat" -ne 0 ] && exit $compute_bench_build_stat +} + +print_bench_res() { + # Usage: print_bench_res + if [ ! -s $1 ]; then + printf "NO OUTPUT! (Status $2)\n" | tee -a $3 + return # Do not proceed if file is empty + fi + + get_csv_col_index $1 run-time-mean + tmp_run_time_mean_i=$tmp_csv_col_i + get_csv_col_index $1 run-time-median + tmp_run_time_median_i=$tmp_csv_col_i + get_csv_col_index $1 run-time-throughput + tmp_run_time_throughput_i=$tmp_csv_col_i + + # `sycl-bench` output seems to like inserting the header multiple times. + # Here we cache the header to make sure it prints only once: + tmp_header_title="$(cat $1 | head -n 1 | sed 's/^\# Benchmark name/benchmark/')" + tmp_result="$(cat $1 | grep '^[^\#]')" + + printf "%s\n%s" "$tmp_header_title" "$tmp_result" \ + | awk -F',' -v me="$tmp_run_time_mean_i" \ + -v md="$tmp_run_time_median_i" \ + -v th="$tmp_run_time_throughput_i" \ + '{printf "%-57s %-13s %-15s %-20s\n", $1, $me, $md, $th }' \ + | tee -a $3 # Print to summary file +} + +### +STATUS_SUCCESS=0 +STATUS_FAILED=1 +### + +samples_under_threshold () { + mkdir -p $1 + file_count="$(find $1 -maxdepth 1 -type f | wc -l )" + [ "$file_count" -lt "$AVERAGE_THRESHOLD" ] +} + +check_regression() { + if samples_under_threshold "$PERF_RES_PATH/$1"; then + echo "Not enough samples to construct an average, performance check skipped!" + return $STATUS_SUCCESS + fi + BENCHMARKING_ROOT="$BENCHMARKING_ROOT" python "$BENCHMARKING_ROOT/compare.py" "$1" "$2" + return $? + # return $STATUS_FAILED +} + +cache() { + mv "$2" "$PERF_RES_PATH/$1/" +} + +# Check for a regression, and cache if no regression found +check_and_cache() { + echo "Checking $testcase..." + if check_regression $1 $2; then + echo "Caching $testcase..." + cache $1 $2 + else + echo "Not caching!" + fi +} + +process_benchmarks() { + TIMESTAMP="$(date '+%Y%m%d_%H%M%S')" + mkdir -p "$PERF_RES_PATH" + + echo "### Running and processing selected benchmarks ###" + if [ -z "$TESTS_CONFIG" ]; then + echo "Setting tests to run via cli is not currently supported." + exit $STATUS_FAILED + else + # Ignore lines in the test config starting with #'s + grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do + echo "# Running $testcase..." + test_csv_output="$OUTPUT_PATH/$testcase-$TIMESTAMP.csv" + $COMPUTE_BENCH_PATH/build/bin/$testcase --csv | tail +8 > "$test_csv_output" + # The tail +8 filters out initial debug prints not in csv format + if [ "$?" -eq 0 ] && [ -s "$test_csv_output" ]; then + check_and_cache $testcase $test_csv_output + else + echo "ERROR @ $test_case" + fi + done + fi +} + +cleanup() { + rm -r $COMPUTE_BENCH_PATH +} + +load_configs() { + # This script needs to know where the "BENCHMARKING_ROOT" directory is, + # containing all the configuration files and the compare script. + # + # If this is not provided, this function tries to guess where the files + # are based on how the script is called, and verifies that all necessary + # configs and scripts are reachable. + [ -z "$BENCHMARKING_ROOT" ] && BENCHMARKING_ROOT="$(dirname $0)" + + BENCHMARK_CI_CONFIG="$BENCHMARKING_ROOT/benchmark-ci.conf" + TESTS_CONFIG="$BENCHMARKING_ROOT/enabled_tests.conf" + COMPARE_PATH="$BENCHMARKING_ROOT/compare.py" + + for file in "$BENCHMARK_CI_CONFIG" "$TESTS_CONFIG" "$COMPARE_PATH"; do + if [ ! -f "$file" ]; then + echo "$(basename $file) not found, please provide path to BENCHMARKING_ROOT." + exit -1 + fi + done + + . $BENCHMARK_CI_CONFIG +} + +load_configs + +# CLI overrides to configuration options +while getopts "p:b:r:" opt; do + case $opt in + p) COMPUTE_BENCH_PATH=$OPTARG ;; + r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;; + b) COMPUTE_BENCH_BRANCH=$OPTARG ;; + \?) usage ;; + esac +done + +[ ! -d "$PERF_RES_PATH" ] && clone_perf_res +[ ! -d "$COMPUTE_BENCH_PATH" ] && clone_compute_bench +[ ! -d "$COMPUTE_BENCH_PATH/build" ] && build_compute_bench +process_benchmarks \ No newline at end of file diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py new file mode 100644 index 0000000000000..61272db6db618 --- /dev/null +++ b/devops/scripts/benchmarking/common.py @@ -0,0 +1,43 @@ +import os +import re +import ast + +PERF_RES_PATH, metrics_variance, metrics_recorded = None, None, None + +def sanitize(stat: str) -> float: + # Get rid of % + if stat[-1] == '%': + stat = stat[:-1] + return float(stat) + + +def load_configs(): + BENCHMARKING_ROOT = os.getenv("BENCHMARKING_ROOT") + if BENCHMARKING_ROOT is None: + # Try to predict where BENCHMARKING_ROOT is based on executable + BENCHMARKING_ROOT = os.path.dirname(os.path.abspath(__file__)) + + benchmarking_ci_conf_path = f"{BENCHMARKING_ROOT}/benchmark-ci.conf" + if not os.path.isfile(benchmarking_ci_conf_path): + raise Exception(f"Please provide path to a valid BENCHMARKING_ROOT.") + + global PERF_RES_PATH, metrics_variance, metrics_recorded + perf_res_re = re.compile(r'^PERF_RES_PATH=(.*)$', re.M) + m_variance_re = re.compile(r'^METRICS_VARIANCE=(.*)$', re.M) + m_recorded_re = re.compile(r'^METRICS_RECORDED=(.*)$', re.M) + + with open(benchmarking_ci_conf_path, 'r') as configs_file: + configs_str = configs_file.read() + + for m_variance in m_variance_re.findall(configs_str): + metrics_variance = ast.literal_eval(m_variance.strip()[1:-1]) + if not isinstance(metrics_variance, dict): + raise TypeError("Error in benchmark-ci.conf: METRICS_VARIANCE is not a python dict.") + + for m_recorded in m_recorded_re.findall(configs_str): + metrics_recorded = ast.literal_eval(m_recorded.strip()[1:-1]) + if not isinstance(metrics_recorded, list): + raise TypeError("Error in benchmark-ci.conf: METRICS_RECORDED is not a python list.") + + for perf_res in perf_res_re.findall(configs_str): + PERF_RES_PATH = str(perf_res[1:-1]) \ No newline at end of file diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py new file mode 100644 index 0000000000000..9987938256330 --- /dev/null +++ b/devops/scripts/benchmarking/compare.py @@ -0,0 +1,43 @@ +import csv +import sys +from pathlib import Path + +import common + +# TODO compare_to(metric) instead? +def compare_to_median(test_name: str, test_csv_path: str): + median = dict() + with open(f"{common.PERF_RES_PATH}/{test_name}/{test_name}-median.csv", mode='r') as median_csv: + for stat in csv.DictReader(median_csv): + median[stat["TestCase"]] = \ + { metric: float(stat[metric]) for metric in common.metrics_variance } + + # TODO read status codes from a config file + status = 0 + failure_counts = { metric: 0 for metric in common.metrics_variance } + with open(test_csv_path, mode='r') as sample_csv: + for sample in csv.DictReader(sample_csv): + # Ignore test cases we haven't profiled before + if sample["TestCase"] not in median: + continue + test_median = median[sample["TestCase"]] + for metric, threshold in common.metrics_variance.items(): + max_tolerated = test_median[metric] * (1 + threshold) + if common.sanitize(sample[metric]) > max_tolerated: + print("vvv FAILED vvv") + print(sample['TestCase']) + print(f"{metric}: {metric} {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}% -- {max_tolerated})") + print("^^^^^^^^^^^^^^") + status = 1 + failure_counts[metric] += 1 + if status != 0: + print(f"Failure counts: {failure_counts}") + return status + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} ") + exit(-1) + common.load_configs() + exit(compare_to_median(sys.argv[1], sys.argv[2])) diff --git a/devops/scripts/benchmarking/enabled_tests.conf b/devops/scripts/benchmarking/enabled_tests.conf new file mode 100644 index 0000000000000..7aaec4919a416 --- /dev/null +++ b/devops/scripts/benchmarking/enabled_tests.conf @@ -0,0 +1,3 @@ +# Test cases to be enabled: +api_overhead_benchmark_sycl +memory_benchmark_sycl diff --git a/devops/scripts/sycl-bench.sh b/devops/scripts/sycl-bench.sh deleted file mode 100755 index 4b00a60f178aa..0000000000000 --- a/devops/scripts/sycl-bench.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/sh - -# sycl-bench.sh: Benchmark dpcpp using sycl-bench - -usage () { - >&2 echo "Usage: $0 [-B ] - -B Path to clone and build sycl-bench on - -This script builds and runs benchmarks from sycl-bench." - exit 1 -} - -clone() { - mkdir -p $SYCL_BENCH_PATH - git clone $SYCL_BENCH_GIT_REPO $SYCL_BENCH_PATH || return $? -} - -build() { - cd $SYCL_BENCH_PATH - cmake -DSYCL_IMPL=dpcpp -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin -S . -B ./build && - cmake --build ./build || return $? - cd - -} - -get_csv_col_index() { - # Determine the index of a column in a CSV given its title - # Usage: get_csv_col_index - tmp_csv_col_i="$(cat "$1" | head -n 1 | grep -o "^.*$2," | grep -o ',' | wc -l)" -} - -print_bench_res() { - # Usage: print_bench_res - if [ ! -s $1 ]; then - printf "NO OUTPUT! (Status $2)\n" | tee -a $3 - return # Do not proceed if file is empty - fi - - get_csv_col_index $1 run-time-mean - tmp_run_time_mean_i=$tmp_csv_col_i - get_csv_col_index $1 run-time-median - tmp_run_time_median_i=$tmp_csv_col_i - get_csv_col_index $1 run-time-throughput - tmp_run_time_throughput_i=$tmp_csv_col_i - - # `sycl-bench` output seems to like inserting the header multiple times. - # Here we cache the header to make sure it prints only once: - tmp_header_title="$(cat $1 | head -n 1 | sed 's/^\# Benchmark name/benchmark/')" - tmp_result="$(cat $1 | grep '^[^\#]')" - - printf "%s\n%s" "$tmp_header_title" "$tmp_result" \ - | awk -F',' -v me="$tmp_run_time_mean_i" \ - -v md="$tmp_run_time_median_i" \ - -v th="$tmp_run_time_throughput_i" \ - '{printf "%-57s %-13s %-15s %-20s\n", $1, $me, $md, $th }' \ - | tee -a $3 # Print to summary file -} - -# run sycl bench step -run() { - TIMESTAMP="$(date '+%Y%m%d_%H%M%S')" - mkdir "$SYCL_BENCH_PATH/build/bench-$TIMESTAMP/" - tmp_summary_file="$SYCL_BENCH_PATH/build/bench-$TIMESTAMP/summary.txt" - - for file in $SYCL_BENCH_PATH/build/bin/*; do - # TODO -size should not be always 256, caution - tmp_bench_output="$SYCL_BENCH_PATH/build/bench-$TIMESTAMP/$(basename $file).csv" - tmp_bench_log="$SYCL_BENCH_PATH/build/bench-$TIMESTAMP/$(basename $file).log" - - tmp_err="0" - printf "\n### Results for $(basename $file) ###\n" | tee -a $tmp_summary_file - # The pipe here suppresses errors in a way that doesn't stop github actions: - $file --output=$tmp_bench_output --no-verification --size=256 2> "$tmp_bench_log" || tmp_err=$? - print_bench_res $tmp_bench_output $tmp_err $tmp_summary_file - # Remove log if nothing logged - [ ! -s "$tmp_bench_log" ] && rm "$tmp_bench_log" || cat "$tmp_bench_log" | tee -a $tmp_summary_file - done - - # Export timestamp for later use - [ -f "$GITHUB_OUTPUT" ] && echo TIMESTAMP=$TIMESTAMP >> $GITHUB_OUTPUT -} - -compress() { - tar -I gzip -cf "$SYCL_BENCH_PATH/build/bench-$TIMESTAMP.tar.gz" -C "$SYCL_BENCH_PATH/build/bench-$TIMESTAMP" . - if [ -f "$SYCL_BENCH_PATH/build/bench-$TIMESTAMP.tar.gz" ] && [ -f "$GITHUB_OUTPUT" ]; then - echo BENCHMARK_RESULTS="$SYCL_BENCH_PATH/build/bench-$TIMESTAMP.tar.gz" >> $GITHUB_OUTPUT - fi -} - -cleanup() { - rm -r $SYCL_BENCH_PATH -} - - -[ "$#" -lt "1" ] && usage - -SYCL_BENCH_GIT_REPO="$1"; shift -SYCL_BENCH_PATH="./sycl-bench" -while getopts "B:" opt; do - case $opt in - B) SYCL_BENCH_PATH=$OPTARG ;; - \?) usage ;; - esac -done - -clone && build && run && compress