Skip to content

Commit

Permalink
Fixed aggregate, improved logging
Browse files Browse the repository at this point in the history
  • Loading branch information
ianayl committed Sep 18, 2024
1 parent cc29c23 commit 3b5454f
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sycl-benchmark-aggregate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
with:
# TODO populate default values before loading configuration
path: ${{ env.PERF_RES_PATH }}
repository: ${{ env.PERF_RES_GIT_REPO }}
repository: intel-sandbox/llvm-ci-perf-results #${{ env.PERF_RES_GIT_REPO }}
branch: ${{ env.PERF_RES_BRANCH }}
- name: Run aggregator on cloned data
run: |
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/sycl-linux-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ on:
cache_results:
type: boolean
default: false
required: False
required: false

workflow_dispatch:
inputs:
Expand Down Expand Up @@ -109,7 +109,7 @@ on:
cache_results:
type: boolean
default: false
required: False
required: false

permissions:
contents: write
Expand Down Expand Up @@ -242,8 +242,7 @@ jobs:
clang++ --version
ls
export CMPLR_ROOT=$PWD/toolchain
./devops/scripts/benchmarking/benchmark.sh
exit $?
./devops/scripts/benchmarking/benchmark.sh ${{ inputs.cache_results == true && '-s' }}
- name: debug -- delete after
env:
SSH_KEY: ${{secrets.ACTIONS_DEPLOY_KEY}}
Expand Down
6 changes: 3 additions & 3 deletions devops/scripts/benchmarking/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ def csv_samples() -> list[str]:
with Path(f"{common.PERF_RES_PATH}/{benchmark}") as cache_dir:
# TODO check for time range; What time range do I want?
return filter(lambda f: f.is_file() and
common.valid_timestamp(str(f)[-13:]) and str(f)[-13:] > cutoff,
common.valid_timestamp(str(f)[-17:-4]) and str(f)[-17:-4] > cutoff,
cache_dir.glob(f"{benchmark}-*_*.csv"))

# Calculate median of every desired metric:
aggregate_s = dict()
for sample_path in csv_samples():
with open(sample_path, mode='r') as sample_file:
with open(sample_path, 'r') as sample_file:
for s in csv.DictReader(sample_file):
if s["TestCase"] not in aggregate_s:
aggregate_s[s["TestCase"]] = \
Expand All @@ -71,5 +71,5 @@ def csv_samples() -> list[str]:
if not common.valid_timestamp(sys.argv[2]):
print(f"Bad cutoff timestamp, please use YYMMDD_HHMMSS.")
exit(1)

common.load_configs()
aggregate_median(sys.argv[1], sys.argv[2])
13 changes: 9 additions & 4 deletions devops/scripts/benchmarking/benchmark-ci.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Git branch settings for llvm-ci-perf-results
PERF_RES_GIT_REPO="https://github.com/ianayl/llvm-ci-perf-results"
PERF_RES_GIT_REPO="https://github.com/intel-sandbox/llvm-ci-perf-results"
PERF_RES_BRANCH="test-compute-bench"
# Path where llvm-ci-perf-results are cloned
PERF_RES_PATH="./llvm-ci-perf-res"
Expand All @@ -17,17 +17,22 @@ COMPUTE_BENCH_COMPILE_FLAGS="-j2"
OUTPUT_PATH="."

# Metrics to benchmark, and their allowed variance as a Python dictionary
METRICS_VARIANCE='{"Median": 0.5}'
METRICS_VARIANCE='{"Median": 0.15}'
#METRICS_VARIANCE='{"Median": 0.5, "StdDev": 4.0}'

# Metrics to record using aggregate.py
METRICS_RECORDED='["Median", "StdDev"]'

# Threshold to store benchmark files before benchmarking
# TODO reconsider this
AVERAGE_THRESHOLD=7
AVERAGE_THRESHOLD=3
# Default period of time to aggregate for the average
AVERAGE_CUTOFF_RANGE="7 days ago"

# Format of timestamps used (linux `date` format string)
TIMESTAMP_FORMAT='%Y%m%d_%H%M%S'
TIMESTAMP_FORMAT='%Y%m%d_%H%M%S'

# Log file for test cases that perform over the allowed variance
BENCHMARK_SLOW_LOG="./benchmarks-over_tolerance.log"
# Log file for test cases that errored / failed to build
BENCHMARK_ERROR_LOG="./benchmarks-errored.log"
41 changes: 32 additions & 9 deletions devops/scripts/benchmarking/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ build_compute_bench() {

###
STATUS_SUCCESS=0
STATUS_FAILED=1
STATUS_ERROR=1
###

samples_under_threshold () {
Expand All @@ -89,7 +89,6 @@ check_regression() {
fi
BENCHMARKING_ROOT="$BENCHMARKING_ROOT" python "$BENCHMARKING_ROOT/compare.py" "$1" "$2"
return $?
# return $STATUS_FAILED
}

cache() {
Expand All @@ -100,10 +99,14 @@ cache() {
check_and_cache() {
echo "Checking $testcase..."
if check_regression $1 $2; then
echo "Caching $testcase..."
cache $1 $2
if [ "$CACHE_RESULTS" -eq "1" ]; then
echo "Caching $testcase..."
cache $1 $2
fi
else
echo "Not caching!"
if [ "$CACHE_RESULTS" -eq "1" ]; then
echo "Not caching!"
fi
fi
}

Expand All @@ -113,8 +116,9 @@ process_benchmarks() {
echo "### Running and processing selected benchmarks ###"
if [ -z "$TESTS_CONFIG" ]; then
echo "Setting tests to run via cli is not currently supported."
exit $STATUS_FAILED
exit $STATUS_ERROR
else
rm "$BENCHMARK_ERROR_LOG" "$BENCHMARK_SLOW_LOG" 2> /dev/null
# Ignore lines in the test config starting with #'s
grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
echo "# Running $testcase..."
Expand All @@ -124,16 +128,32 @@ process_benchmarks() {
if [ "$?" -eq 0 ] && [ -s "$test_csv_output" ]; then
check_and_cache $testcase $test_csv_output
else
# TODO consider capturing error for logging
echo "ERROR @ $test_case"
echo "-- $testcase: error $?" >> "$BENCHMARK_ERROR_LOG"
fi
done
fi
}

process_results() {
if [ -s "$BENCHMARK_SLOW_LOG" ]; then
printf "\n### Tests performing over acceptable range of average: ###\n"
cat "$BENCHMARK_SLOW_LOG"
echo ""
fi
if [ -s "$BENCHMARK_ERROR_LOG" ]; then
printf "\n### Tests that failed to run: ###\n"
cat "$BENCHMARK_ERROR_LOG"
echo ""
fi
[ ! -s "$BENCHMARKING_SLOW_LOG" ] && [ ! -s "$BENCHMARK_ERROR_LOG" ]
}

cleanup() {
echo "### Cleaning up compute-benchmark builds from prior runs ###"
rm -rf $COMPUTE_BENCH_PATH
#rm -rf $PERF_RES_PATH
rm -rf $PERF_RES_PATH
[ ! -z "$_exit_after_cleanup" ] && exit
}

Expand Down Expand Up @@ -163,10 +183,11 @@ load_configs() {
load_configs

COMPUTE_BENCH_COMPILE_FLAGS=""
CACHE_RESULTS="0"
TIMESTAMP="$(date +"$TIMESTAMP_FORMAT")"

# CLI overrides to configuration options
while getopts "p:b:r:f:cC" opt; do
while getopts "p:b:r:f:cCs" opt; do
case $opt in
p) COMPUTE_BENCH_PATH=$OPTARG ;;
r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;;
Expand All @@ -176,6 +197,7 @@ while getopts "p:b:r:f:cC" opt; do
# performing cleanup
c) _cleanup=1 ;;
C) _cleanup=1 && _exit_after_cleanup=1 ;;
s) CACHE_RESULTS="1";;
\?) usage ;;
esac
done
Expand All @@ -189,4 +211,5 @@ fi
[ ! -d "$PERF_RES_PATH" ] && clone_perf_res
[ ! -d "$COMPUTE_BENCH_PATH" ] && clone_compute_bench
[ ! -d "$COMPUTE_BENCH_PATH/build" ] && build_compute_bench
process_benchmarks
process_benchmarks
process_results
10 changes: 10 additions & 0 deletions devops/scripts/benchmarking/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ast

PERF_RES_PATH, metrics_variance, metrics_recorded = None, None, None
BENCHMARK_SLOW_LOG, BENCHMARK_ERROR_LOG = None, None

def sanitize(stat: str) -> float:
# Get rid of %
Expand All @@ -22,9 +23,12 @@ def load_configs():
raise Exception(f"Please provide path to a valid BENCHMARKING_ROOT.")

global PERF_RES_PATH, metrics_variance, metrics_recorded
global BENCHMARK_ERROR_LOG, BENCHMARK_SLOW_LOG
perf_res_re = re.compile(r'^PERF_RES_PATH=(.*)$', re.M)
m_variance_re = re.compile(r'^METRICS_VARIANCE=(.*)$', re.M)
m_recorded_re = re.compile(r'^METRICS_RECORDED=(.*)$', re.M)
b_slow_re = re.compile(r'^BENCHMARK_SLOW_LOG=(.*)$', re.M)
b_error_re = re.compile(r'^BENCHMARK_ERROR_LOG=(.*)$', re.M)

with open(benchmarking_ci_conf_path, 'r') as configs_file:
configs_str = configs_file.read()
Expand All @@ -41,6 +45,12 @@ def load_configs():

for perf_res in perf_res_re.findall(configs_str):
PERF_RES_PATH = str(perf_res[1:-1])

for b_slow_log in b_slow_re.findall(configs_str):
BENCHMARK_SLOW_LOG = str(b_slow_log[1:-1])

for b_error_log in b_error_re.findall(configs_str):
BENCHMARK_ERROR_LOG = str(b_error_log[1:-1])


def valid_timestamp(timestamp: str) -> bool:
Expand Down
11 changes: 8 additions & 3 deletions devops/scripts/benchmarking/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def compare_to_median(test_name: str, test_csv_path: str):
exit(-1)

median = dict()
with open(median_path, mode='r') as median_csv:
with open(median_path, 'r') as median_csv:
for stat in csv.DictReader(median_csv):
median[stat["TestCase"]] = \
{ metric: float(stat[metric]) for metric in common.metrics_variance }

# TODO read status codes from a config file
status = 0
failure_counts = { metric: 0 for metric in common.metrics_variance }
with open(test_csv_path, mode='r') as sample_csv:
with open(test_csv_path, 'r') as sample_csv:
for sample in csv.DictReader(sample_csv):
# Ignore test cases we haven't profiled before
if sample["TestCase"] not in median:
Expand All @@ -37,8 +37,13 @@ def compare_to_median(test_name: str, test_csv_path: str):
if common.sanitize(sample[metric]) > max_tolerated:
print("vvv FAILED vvv")
print(sample['TestCase'])
print(f"{metric}: {metric} {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}% -- {max_tolerated})")
print(f"{metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}%: {max_tolerated})")
print("^^^^^^^^^^^^^^")
with open(common.BENCHMARK_SLOW_LOG, 'a') as slow_log:
slow_log.write(
f"-- {test_name}::{sample['TestCase']}\n"
f" {metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
)
status = 1
failure_counts[metric] += 1
if status != 0:
Expand Down

0 comments on commit 3b5454f

Please sign in to comment.