Skip to content

Commit

Permalink
Complete redo of workflow, switch to compute-benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
ianayl committed Sep 6, 2024
1 parent 6d14a32 commit 940e3be
Show file tree
Hide file tree
Showing 8 changed files with 356 additions and 106 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sycl-linux-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ jobs:
- name: Run sycl-bench microbenchmarks
id: run_benchmarks
if: inputs.tests_selector == 'benchmark'
run: ./devops/scripts/sycl-bench.sh https://github.com/ianayl/sycl-bench
run: ./devops/scripts/benchmarking/benchmark.sh
- name: Upload sycl-bench microbenchmark results
if: inputs.tests_selector == 'benchmark' && steps.run_benchmarks.outcome == 'success'
uses: actions/upload-artifact@v4
Expand Down
70 changes: 70 additions & 0 deletions devops/scripts/benchmarking/aggregate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import csv
import sys
from pathlib import Path
import heapq

import common

class StreamingMedian:

def __init__(self):
self.minheap_larger = []
self.maxheap_smaller = []
# Note: numbers on maxheap should be negative, as heapq
# is minheap by default

def add(self, n: float):
if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
heapq.heappush(self.maxheap_smaller, -n)
else:
heapq.heappush(self.minheap_larger, n)

if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
heapq.heappush(self.minheap_larger,
-heapq.heappop(self.maxheap_smaller))
elif len(self.maxheap_smaller) < len(self.minheap_larger):
heapq.heappush(self.maxheap_smaller,
-heapq.heappop(self.minheap_larger))

def get_median(self) -> float:
if len(self.maxheap_smaller) == len(self.minheap_larger):
return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
else:
return -self.maxheap_smaller[0]


def aggregate_median(benchmark: str):

def csv_samples() -> list[str]:
# TODO check that the path below is valid directory
with Path(f"{common.PERF_RES_PATH}/{benchmark}") as cache_dir:
# TODO check for time range; What time range do I want?
return filter(lambda f: f.is_file(),
cache_dir.glob(f"{benchmark}-*.csv"))

# Calculate median of every desired metric:
aggregate_s = dict()
for sample_path in csv_samples():
with open(sample_path, mode='r') as sample_file:
for s in csv.DictReader(sample_file):
if s["TestCase"] not in aggregate_s:
aggregate_s[s["TestCase"]] = \
{ metric: StreamingMedian() for metric in common.metrics_variance }
for metric in common.metrics_variance:
aggregate_s[s["TestCase"]][metric].add(common.sanitize(s[metric]))

with open(f"{common.PERF_RES_PATH}/{benchmark}/{benchmark}-median.csv", 'w') as output_csv:
writer = csv.DictWriter(output_csv,
fieldnames=["TestCase", *common.metrics_variance.keys()])
writer.writeheader()
for test_case in aggregate_s:
writer.writerow({ "TestCase": test_case } |
{ metric: aggregate_s[test_case][metric].get_median()
for metric in common.metrics_variance })


if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <test case name>")
exit()
aggregate_median(sys.argv[1])
26 changes: 26 additions & 0 deletions devops/scripts/benchmarking/benchmark-ci.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Git branch settings for llvm-ci-perf-results
PERF_RES_GIT_REPO="https://github.com/intel-sandbox/llvm-ci-perf-results"
PERF_RES_BRANCH="test-compute-bench"
# Path where llvm-ci-perf-results are cloned
PERF_RES_PATH="./llvm-ci-perf-res"

# Git branch settings for compute-benchmarks
COMPUTE_BENCH_GIT_REPO="https://github.com/ianayl/compute-benchmarks"
COMPUTE_BENCH_BRANCH="update-sycl"

# Path to compile and build compute-benchmarks
COMPUTE_BENCH_PATH="./compute-benchmarks"

# Path to temporarily store compute-benchmark results
OUTPUT_PATH="."

# Metrics to benchmark, and their allowed variance as a Python dictionary
METRICS_VARIANCE='{"Median": 0.5}'
#METRICS_VARIANCE='{"Median": 0.5, "StdDev": 4.0}'

# Metrics to record using aggregate.py
METRICS_RECORDED='["Median", "StdDev"]'

# Threshold to store benchmark files before benchmarking
AVERAGE_THRESHOLD=7
# TODO reconsider this
170 changes: 170 additions & 0 deletions devops/scripts/benchmarking/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/bin/sh

#
# benchmark.sh: Benchmark dpcpp using compute-benchmarks
#

# TODO fix
usage () {
>&2 echo "Usage: $0 <compute-benchmarks git repo> [-B <compute-benchmarks build path>]
-B Path to clone and build compute-benchmarks on
This script builds and runs benchmarks from compute-benchmarks."
exit 1
}

clone_perf_res() {
echo "### Cloning llvm-ci-perf-res ($PERF_RES_GIT_REPO:$PERF_RES_BRANCH) ###"
mkdir -p "$(dirname $PERF_RES_PATH)"
git clone -b $PERF_RES_BRANCH $PERF_RES_GIT_REPO $PERF_RES_PATH
[ "$?" -ne 0 ] && exit $?
}

clone_compute_bench() {
echo "### Cloning compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###"
mkdir -p "$(dirname $COMPUTE_BENCH_PATH)"
git clone -b $COMPUTE_BENCH_BRANCH \
--recurse-submodules $COMPUTE_BENCH_GIT_REPO \
$COMPUTE_BENCH_PATH
[ "$?" -ne 0 ] && exit $?
}

build_compute_bench() {
echo "### Building compute-benchmarks ($COMPUTE_BENCH_GIT_REPO:$COMPUTE_BENCH_BRANCH) ###"
mkdir $COMPUTE_BENCH_PATH/build && cd $COMPUTE_BENCH_PATH/build &&
cmake .. -DBUILD_SYCL=ON && cmake --build .
compute_bench_build_stat=$?
cd -
[ "$compute_bench_build_stat" -ne 0 ] && exit $compute_bench_build_stat
}

print_bench_res() {
# Usage: print_bench_res <benchmark output .csv file> <benchmark status code> <summary file>
if [ ! -s $1 ]; then
printf "NO OUTPUT! (Status $2)\n" | tee -a $3
return # Do not proceed if file is empty
fi

get_csv_col_index $1 run-time-mean
tmp_run_time_mean_i=$tmp_csv_col_i
get_csv_col_index $1 run-time-median
tmp_run_time_median_i=$tmp_csv_col_i
get_csv_col_index $1 run-time-throughput
tmp_run_time_throughput_i=$tmp_csv_col_i

# `sycl-bench` output seems to like inserting the header multiple times.
# Here we cache the header to make sure it prints only once:
tmp_header_title="$(cat $1 | head -n 1 | sed 's/^\# Benchmark name/benchmark/')"
tmp_result="$(cat $1 | grep '^[^\#]')"

printf "%s\n%s" "$tmp_header_title" "$tmp_result" \
| awk -F',' -v me="$tmp_run_time_mean_i" \
-v md="$tmp_run_time_median_i" \
-v th="$tmp_run_time_throughput_i" \
'{printf "%-57s %-13s %-15s %-20s\n", $1, $me, $md, $th }' \
| tee -a $3 # Print to summary file
}

###
STATUS_SUCCESS=0
STATUS_FAILED=1
###

samples_under_threshold () {
mkdir -p $1
file_count="$(find $1 -maxdepth 1 -type f | wc -l )"
[ "$file_count" -lt "$AVERAGE_THRESHOLD" ]
}

check_regression() {
if samples_under_threshold "$PERF_RES_PATH/$1"; then
echo "Not enough samples to construct an average, performance check skipped!"
return $STATUS_SUCCESS
fi
BENCHMARKING_ROOT="$BENCHMARKING_ROOT" python "$BENCHMARKING_ROOT/compare.py" "$1" "$2"
return $?
# return $STATUS_FAILED
}

cache() {
mv "$2" "$PERF_RES_PATH/$1/"
}

# Check for a regression, and cache if no regression found
check_and_cache() {
echo "Checking $testcase..."
if check_regression $1 $2; then
echo "Caching $testcase..."
cache $1 $2
else
echo "Not caching!"
fi
}

process_benchmarks() {
TIMESTAMP="$(date '+%Y%m%d_%H%M%S')"
mkdir -p "$PERF_RES_PATH"

echo "### Running and processing selected benchmarks ###"
if [ -z "$TESTS_CONFIG" ]; then
echo "Setting tests to run via cli is not currently supported."
exit $STATUS_FAILED
else
# Ignore lines in the test config starting with #'s
grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
echo "# Running $testcase..."
test_csv_output="$OUTPUT_PATH/$testcase-$TIMESTAMP.csv"
$COMPUTE_BENCH_PATH/build/bin/$testcase --csv | tail +8 > "$test_csv_output"
# The tail +8 filters out initial debug prints not in csv format
if [ "$?" -eq 0 ] && [ -s "$test_csv_output" ]; then
check_and_cache $testcase $test_csv_output
else
echo "ERROR @ $test_case"
fi
done
fi
}

cleanup() {
rm -r $COMPUTE_BENCH_PATH
}

load_configs() {
# This script needs to know where the "BENCHMARKING_ROOT" directory is,
# containing all the configuration files and the compare script.
#
# If this is not provided, this function tries to guess where the files
# are based on how the script is called, and verifies that all necessary
# configs and scripts are reachable.
[ -z "$BENCHMARKING_ROOT" ] && BENCHMARKING_ROOT="$(dirname $0)"

BENCHMARK_CI_CONFIG="$BENCHMARKING_ROOT/benchmark-ci.conf"
TESTS_CONFIG="$BENCHMARKING_ROOT/enabled_tests.conf"
COMPARE_PATH="$BENCHMARKING_ROOT/compare.py"

for file in "$BENCHMARK_CI_CONFIG" "$TESTS_CONFIG" "$COMPARE_PATH"; do
if [ ! -f "$file" ]; then
echo "$(basename $file) not found, please provide path to BENCHMARKING_ROOT."
exit -1
fi
done

. $BENCHMARK_CI_CONFIG
}

load_configs

# CLI overrides to configuration options
while getopts "p:b:r:" opt; do
case $opt in
p) COMPUTE_BENCH_PATH=$OPTARG ;;
r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;;
b) COMPUTE_BENCH_BRANCH=$OPTARG ;;
\?) usage ;;
esac
done

[ ! -d "$PERF_RES_PATH" ] && clone_perf_res
[ ! -d "$COMPUTE_BENCH_PATH" ] && clone_compute_bench
[ ! -d "$COMPUTE_BENCH_PATH/build" ] && build_compute_bench
process_benchmarks
43 changes: 43 additions & 0 deletions devops/scripts/benchmarking/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import re
import ast

PERF_RES_PATH, metrics_variance, metrics_recorded = None, None, None

def sanitize(stat: str) -> float:
# Get rid of %
if stat[-1] == '%':
stat = stat[:-1]
return float(stat)


def load_configs():
BENCHMARKING_ROOT = os.getenv("BENCHMARKING_ROOT")
if BENCHMARKING_ROOT is None:
# Try to predict where BENCHMARKING_ROOT is based on executable
BENCHMARKING_ROOT = os.path.dirname(os.path.abspath(__file__))

benchmarking_ci_conf_path = f"{BENCHMARKING_ROOT}/benchmark-ci.conf"
if not os.path.isfile(benchmarking_ci_conf_path):
raise Exception(f"Please provide path to a valid BENCHMARKING_ROOT.")

global PERF_RES_PATH, metrics_variance, metrics_recorded
perf_res_re = re.compile(r'^PERF_RES_PATH=(.*)$', re.M)
m_variance_re = re.compile(r'^METRICS_VARIANCE=(.*)$', re.M)
m_recorded_re = re.compile(r'^METRICS_RECORDED=(.*)$', re.M)

with open(benchmarking_ci_conf_path, 'r') as configs_file:
configs_str = configs_file.read()

for m_variance in m_variance_re.findall(configs_str):
metrics_variance = ast.literal_eval(m_variance.strip()[1:-1])
if not isinstance(metrics_variance, dict):
raise TypeError("Error in benchmark-ci.conf: METRICS_VARIANCE is not a python dict.")

for m_recorded in m_recorded_re.findall(configs_str):
metrics_recorded = ast.literal_eval(m_recorded.strip()[1:-1])
if not isinstance(metrics_recorded, list):
raise TypeError("Error in benchmark-ci.conf: METRICS_RECORDED is not a python list.")

for perf_res in perf_res_re.findall(configs_str):
PERF_RES_PATH = str(perf_res[1:-1])
43 changes: 43 additions & 0 deletions devops/scripts/benchmarking/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import csv
import sys
from pathlib import Path

import common

# TODO compare_to(metric) instead?
def compare_to_median(test_name: str, test_csv_path: str):
median = dict()
with open(f"{common.PERF_RES_PATH}/{test_name}/{test_name}-median.csv", mode='r') as median_csv:
for stat in csv.DictReader(median_csv):
median[stat["TestCase"]] = \
{ metric: float(stat[metric]) for metric in common.metrics_variance }

# TODO read status codes from a config file
status = 0
failure_counts = { metric: 0 for metric in common.metrics_variance }
with open(test_csv_path, mode='r') as sample_csv:
for sample in csv.DictReader(sample_csv):
# Ignore test cases we haven't profiled before
if sample["TestCase"] not in median:
continue
test_median = median[sample["TestCase"]]
for metric, threshold in common.metrics_variance.items():
max_tolerated = test_median[metric] * (1 + threshold)
if common.sanitize(sample[metric]) > max_tolerated:
print("vvv FAILED vvv")
print(sample['TestCase'])
print(f"{metric}: {metric} {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}% -- {max_tolerated})")
print("^^^^^^^^^^^^^^")
status = 1
failure_counts[metric] += 1
if status != 0:
print(f"Failure counts: {failure_counts}")
return status


if __name__ == "__main__":
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} <test name> <test csv path>")
exit(-1)
common.load_configs()
exit(compare_to_median(sys.argv[1], sys.argv[2]))
3 changes: 3 additions & 0 deletions devops/scripts/benchmarking/enabled_tests.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Test cases to be enabled:
api_overhead_benchmark_sycl
memory_benchmark_sycl
Loading

0 comments on commit 940e3be

Please sign in to comment.