From 06bdfd547f53f76b707e0fa9f3eb72e4c434fd56 Mon Sep 17 00:00:00 2001
From: "Mateusz P. Nowak" <mateusz.p.nowak@intel.com>
Date: Mon, 30 Sep 2024 05:08:22 -0700
Subject: [PATCH] final updates

---
 .github/workflows/bench_publish.yml     | 41 ------------
 scripts/benchmarks/benches/compute.py   |  3 -
 scripts/benchmarks/benches/syclbench.py | 50 ++++----------
 scripts/benchmarks/main.py              |  4 +-
 scripts/benchmarks/output.py            | 88 ++++++++++++-------------
 5 files changed, 60 insertions(+), 126 deletions(-)
 delete mode 100644 .github/workflows/bench_publish.yml

diff --git a/.github/workflows/bench_publish.yml b/.github/workflows/bench_publish.yml
deleted file mode 100644
index f1283d4d11..0000000000
--- a/.github/workflows/bench_publish.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: Benchmarks Publish
-
-on: 
-  # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab)
-  workflow_dispatch:
-  
-  push:
-    branches:
-      - main  # Trigger on push to the main branch
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Upload PNG artifact
-        uses: actions/upload-artifact@v3
-        with:
-          name: benchmark-artifact
-          path: benchmarks_result.png  # Path to the PNG file in your repository
-
-  deploy:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
-      - name: Download PNG artifact
-        uses: actions/download-artifact@v3
-        with:
-          name: benchmark-artifact
-
-      - name: Deploy to GitHub Pages
-        uses: peaceiris/actions-gh-pages@v3
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: .  # Directory where the PNG file should be published
\ No newline at end of file
diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py
index 31efd748e6..98be6c0df0 100644
--- a/scripts/benchmarks/benches/compute.py
+++ b/scripts/benchmarks/benches/compute.py
@@ -18,8 +18,6 @@ def __init__(self, directory):
         return
 
     def setup(self):
-        # self.bins = os.path.join(self.directory, 'compute-benchmarks-build', 'bin')
-
         if self.built:
             return
 
@@ -53,7 +51,6 @@ def __init__(self, bench, name, test):
         self.bench_name = name
         self.test = test
         super().__init__(bench.directory)
-        # self.setup()
 
     def bin_args(self) -> list[str]:
         return []
diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py
index ccb63a3f6a..f52c68c2dd 100644
--- a/scripts/benchmarks/benches/syclbench.py
+++ b/scripts/benchmarks/benches/syclbench.py
@@ -39,13 +39,13 @@ def setup(self):
 
         print(f"Run {configure_command}")
         run(configure_command, add_sycl=True)
-        
+
         print(f"Run cmake --build {build_path}")
         run(f"cmake --build {build_path} -j", add_sycl=True)
 
         self.built = True
         self.bins = build_path
-    
+
 class SyclBenchmark(Benchmark):
     def __init__(self, bench, name, test):
         self.bench = bench
@@ -81,27 +81,19 @@ def run(self, env_vars) -> list[Result]:
 
         # no output to stdout, all in outputfile
         self.run_bench(command, env_vars)
-        
+
         with open(outputfile, 'r') as f:
             reader = csv.reader(f)
             res_list = []
             for row in reader:
                 if not row[0].startswith('#'):
                     res_list.append(
-                        Result(label=row[0], 
-                               value=float(row[12]) * 1000, # convert to ms 
-                               command=command, 
-                               env=env_vars, 
+                        Result(label=row[0],
+                               value=float(row[12]) * 1000, # convert to ms
+                               command=command,
+                               env=env_vars,
                                stdout=row))
-                            #    stdout=result))
-            
-            # median_list = []
-            # for label in set(result.label for result in res_list):
-            #     values = [result.value for result in res_list if result.label == label]
-            #     median_value = sorted(values)[len(values) // 2]
-            #     median_list.append(Result(label=label, value=median_value, command=command, env=env_vars, stdout=result))
-
-        # return median_list
+
         return res_list
 
     def teardown(self):
@@ -162,7 +154,7 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=20480",
         ]
-    
+
 class Correlation(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "correlation", "Correlation")
@@ -189,7 +181,7 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=8192",
         ]
-    
+
 class Gesumv(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "gesummv", "Gesummv")
@@ -244,17 +236,6 @@ def bin_args(self) -> list[str]:
             f"--size=2048",
         ]
 
-# ** bad input file path **
-# 
-# class Median(SyclBenchmark):
-#     def __init__(self, bench):
-#         super().__init__(bench, "median", "MedianFilter")
-
-#     def bin_args(self) -> list[str]:
-#         return [
-#             f"--size=512",
-#         ]
-
 class MolDyn(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "mol_dyn", "MolecularDynamics")
@@ -272,7 +253,7 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=32767",
         ]
-    
+
 class NBody(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "nbody", "NBody_")
@@ -290,11 +271,6 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=--size=100000000",
         ]
-# ../../share/Brommy.bmp could not be opened
-#    
-# class SobelX(SyclBenchmark):
-#     def __init__(self, bench):
-#         super().__init__(bench, "sobel", "SobelFilter")
 
 class Syr2k(SyclBenchmark):
     def __init__(self, bench):
@@ -304,7 +280,7 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=6144",
         ]
-    
+
 class Syrk(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "syrk", "Syrk")
@@ -333,7 +309,7 @@ def bin_args(self) -> list[str]:
         return [
             f"--size=32768",
         ]
-    
+
 class DagTaskS(SyclBenchmark):
     def __init__(self, bench):
         super().__init__(bench, "dag_task_throughput_sequential", "DAGTaskThroughput_multi")
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
index c35e4f4670..546ed36164 100755
--- a/scripts/benchmarks/main.py
+++ b/scripts/benchmarks/main.py
@@ -43,7 +43,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024),
         ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024),
         VectorSum(cb),
-        
+
         # *** Velocity benchmarks
         Hashtable(vb),
         Bitcracker(vb),
@@ -186,6 +186,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5)
     parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
     parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)
+    parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005)
     parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true")
     parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true")
 
@@ -197,6 +198,7 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
+    options.epsilon = args.epsilon
     options.ur_dir = args.ur_dir
     options.ur_adapter_name = args.ur_adapter_name
     options.exit_on_failure = args.exit_on_failure
diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py
index 0895fe17d7..eec8957fe7 100644
--- a/scripts/benchmarks/output.py
+++ b/scripts/benchmarks/output.py
@@ -71,10 +71,10 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
     improved = 0
     regressed = 0
     no_change = 0
-    
+
     for bname, results in benchmark_results.items():
-        l = OutputLine(bname)
-        l.row = f"| {bname} |"
+        oln = OutputLine(bname)
+        oln.row = f"| {bname} |"
         best_value = None
         best_key = None
 
@@ -90,11 +90,11 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
             if key in results:
                 intv = results[key].value
                 if key == best_key:
-                    l.row += f" <ins>{intv:3f}</ins> {results[key].unit} |"  # Highlight the best value
+                    oln.row += f" <ins>{intv:3f}</ins> {results[key].unit} |"  # Highlight the best value
                 else:
-                    l.row += f" {intv:.3f} {results[key].unit} |"
+                    oln.row += f" {intv:.3f} {results[key].unit} |"
             else:
-                l.row += " - |"
+                oln.row += " - |"
 
         if len(chart_data.keys()) == 2:
             key0 = list(chart_data.keys())[0]
@@ -103,71 +103,72 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
                 v0 = results[key0].value
                 v1 = results[key1].value
                 diff = None
-                if v0 != 0 and results[key0].lower_is_better: 
+                if v0 != 0 and results[key0].lower_is_better:
                     diff = v1/v0
                 elif v1 != 0 and not results[key0].lower_is_better:
                     diff = v0/v1
-                    
+
                 if diff != None:
-                    l.row += f"{(diff * 100):.2f}%"
-                    l.diff = diff
+                    oln.row += f"{(diff * 100):.2f}%"
+                    oln.diff = diff
 
-        output_detailed_list.append(l)
+        output_detailed_list.append(oln)
 
 
     sorted_detailed_list = sorted(output_detailed_list, key=lambda x: (x.diff is not None, x.diff), reverse=True)
 
-    diff_values = [l.diff for l in sorted_detailed_list if l.diff is not None]
+    diff_values = [oln.diff for oln in sorted_detailed_list if oln.diff is not None]
 
     if len(diff_values) > 0:
         max_diff = max(max(diff_values) - 1, 1 - min(diff_values))
 
-        for l in sorted_detailed_list:
-            if l.diff != None:
-                l.row += f" | {(l.diff - 1)*100:.2f}%"
-                epsilon = 0.005
-                delta = l.diff - 1
-                l.bars = round(10*(l.diff - 1)/max_diff)
-                if l.bars == 0 or abs(delta) < epsilon:
-                    l.row += " | . |"
-                elif l.bars > 0: 
-                    l.row += f" | {'+' * l.bars} |"
+        for oln in sorted_detailed_list:
+            if oln.diff != None:
+                oln.row += f" | {(oln.diff - 1)*100:.2f}%"
+                delta = oln.diff - 1
+                oln.bars = round(10*(oln.diff - 1)/max_diff)
+                if oln.bars == 0 or abs(delta) < options.epsilon:
+                    oln.row += " | . |"
+                elif oln.bars > 0:
+                    oln.row += f" | {'+' * oln.bars} |"
                 else:
-                    l.row += f" | {'-' * (-l.bars)} |"
-                if options.verbose: print(l.row)
-                
+                    oln.row += f" | {'-' * (-oln.bars)} |"
+
                 mean_cnt += 1
-                if abs(delta) > epsilon:
+                if abs(delta) > options.epsilon:
                     if delta > 0:
                         improved+=1
                     else:
                         regressed+=1
                 else:
                     no_change+=1
-                    
-                global_product *= l.diff
+
+                global_product *= oln.diff
             else:
-                l.row += " |   |"              
-            summary_table += l.row + "\n"
+                oln.row += " |   |"
+
+            if options.verbose: print(oln.row)
+            summary_table += oln.row + "\n"
     else:
-        for l in sorted_detailed_list: 
-            l.row += " |   |"
-            summary_table += l.row + "\n"
+        for oln in sorted_detailed_list:
+            oln.row += " |   |"
+            if options.verbose: print(oln.row)
+            summary_table += oln.row + "\n"
 
 
     grouped_objects = collections.defaultdict(list)
 
-    for l in output_detailed_list:
-        s = l.label
+    for oln in output_detailed_list:
+        s = oln.label
         prefix = re.match(r'^[^_\s]+', s)[0]
-        grouped_objects[prefix].append(l)
+        grouped_objects[prefix].append(oln)
 
     grouped_objects = dict(grouped_objects)
 
     if mean_cnt > 0:
-        global_mean = global_product ** (1/mean_cnt)      
+        global_mean = global_product ** (1/mean_cnt)
         summary_line = f"Total {mean_cnt} benchmarks in mean. "
-        summary_line += "\n" + f"Geomean {global_mean*100:.3f}%. \nImproved {improved} Regressed {regressed} (treshold {epsilon*100:.2f}%)"
+        summary_line += "\n" + f"Geomean {global_mean*100:.3f}%. \nImproved {improved} Regressed {regressed} (threshold {options.epsilon*100:.2f}%)"
     else:
         summary_line = f"No diffs to calculate performance change"
 
@@ -181,9 +182,9 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
         product = 1.0
         n = len(outgroup_s)
         r = 0
-        for l in outgroup_s:
-            if l.diff != None: 
-                product *= l.diff
+        for oln in outgroup_s:
+            if oln.diff != None:
+                product *= oln.diff
                 r += 1
         if r > 0:
             summary_table += f"""
@@ -200,8 +201,8 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
         summary_table += "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n"
         summary_table += "|---" * (len(chart_data) + 4) + "|\n"
 
-        for l in outgroup_s:
-            summary_table += f"{l.row}\n"
+        for oln in outgroup_s:
+            summary_table += f"{oln.row}\n"
 
         summary_table += f"""
 </details>
@@ -211,7 +212,6 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
     return summary_line, summary_table
 
 def generate_markdown(chart_data: dict[str, list[Result]]):
-    # mermaid_script = generate_mermaid_script(chart_data)
     (summary_line, summary_table) = generate_summary_table_and_chart(chart_data)
 
     return f"""