Organize benchmarks into suites

This change closes bytecodealliance#202 by adding a way to group benchmarks into `*.suite` files. This incorporates @jameysharp's comments, e.g., to calculate the benchmark path relative to the suite path and to detect suite files based on their `.suite` extension. Though more suites could be added in the future, the result of this is that suite files can be built (see `benchmarks/shootout.suite`, e.g.) and then run like: ```console $ sightglass-cli benchmark benchmarks/shootout.suite -e ... ```
abrown · Oct 10, 2022 · 4b900aa · 4b900aa
1 parent b4971ae
commit 4b900aa
Show file tree

Hide file tree

Showing 5 changed files with 190 additions and 11 deletions.
diff --git a/benchmarks/default.suite b/benchmarks/default.suite
@@ -0,0 +1,8 @@
+# These benchmarks are the default Sightglass benchmarks to run when no others
+# are specified. Other benchmarks in the repository are valuable, but running
+# all benchmarks could take quite some time. This list is a compromise between
+# picking representative real-world benchmarks and measurement time.
+
+bz2/benchmark.wasm
+pulldown-cmark/benchmark.wasm
+spidermonkey/benchmark.wasm
diff --git a/benchmarks/shootout.suite b/benchmarks/shootout.suite
@@ -0,0 +1,25 @@
+# These benchmarks are adapted from the "The Computer Language Benchmarks Game"
+# for use within Sightglass. Sightglass only includes a subset of those
+# benchmarks which are typically smaller kernels (in some cases, true
+# micro-benchmarks) extracted from larger programs. More information is
+# available at https://benchmarksgame-team.pages.debian.net/benchmarksgame.
+
+shootout-ackermann/benchmark.wasm
+shootout-base64/benchmark.wasm
+shootout-ctype/benchmark.wasm
+shootout-ed25519/benchmark.wasm
+shootout-fib2/benchmark.wasm
+shootout-gimli/benchmark.wasm
+shootout-heapsort/benchmark.wasm
+shootout-keccak/benchmark.wasm
+shootout-matrix/benchmark.wasm
+shootout-memmove/benchmark.wasm
+shootout-minicsv/benchmark.wasm
+shootout-nestedloop/benchmark.wasm
+shootout-random/benchmark.wasm
+shootout-ratelimit/benchmark.wasm
+shootout-seqhash/benchmark.wasm
+shootout-sieve/benchmark.wasm
+shootout-switch/benchmark.wasm
+shootout-xblabla20/benchmark.wasm
+shootout-xchacha20/benchmark.wasm
diff --git a/crates/cli/src/benchmark.rs b/crates/cli/src/benchmark.rs
@@ -1,3 +1,4 @@
+use crate::suite::BenchmarkOrSuite;
 use anyhow::{anyhow, Context, Result};
 use rand::{rngs::SmallRng, Rng, SeedableRng};
 use sightglass_data::{Format, Measurement, Phase};
@@ -19,14 +20,23 @@ use structopt::StructOpt;
 /// NUMBER_OF_ITERATIONS_PER_PROCESS`.
 #[derive(StructOpt, Debug)]
 pub struct BenchmarkCommand {
-    /// The path to the Wasm file(s) to benchmark.
+    /// The path to the file(s) to benchmark. This accepts one or more:
+    ///
+    /// - `*.wasm` files: individual benchmarks that meet the requirements
+    /// outlined in `benchmarks/README.md`
+    ///
+    /// - `*.suite` files: a file containing a newline-delimited list of
+    ///   benchmarks to execute. A `*.suite` file may contain `#`-prefixed line
+    ///   comments. Relative paths are resolved against the parent directory of
+    ///   the `*.suite` file.
+    ///
+    /// By default, this will use `benchmarks/default.suite`.
     #[structopt(
         index = 1,
-        required = true,
-        value_name = "WASMFILE",
-        parse(from_os_str)
+        default_value = "benchmarks/default.suite",
+        value_name = "FILE"
     )]
-    wasm_files: Vec<PathBuf>,
+    benchmarks: Vec<BenchmarkOrSuite>,
 
     /// The benchmark engine(s) with which to run the benchmark.
     ///
@@ -143,11 +153,7 @@ impl BenchmarkCommand {
             bind_to_single_core().context("attempting to pin execution to a single core")?;
         }
 
-        let wasm_files: Vec<_> = self
-            .wasm_files
-            .iter()
-            .map(|f| f.display().to_string())
-            .collect();
+        let wasm_files: Vec<_> = self.benchmarks.iter().flat_map(|f| f.paths()).collect();
         let mut all_measurements = vec![];
 
         for engine in &self.engines {
@@ -308,7 +314,7 @@ impl BenchmarkCommand {
             // and therefore potentially invalidating relative paths used here).
             let engine = check_engine_path(engine)?;
 
-            for wasm in &self.wasm_files {
+            for wasm in &self.benchmarks {
                 choices.push((engine.clone(), wasm, self.processes));
             }
         }

diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs
@@ -1,6 +1,7 @@
 mod benchmark;
 mod effect_size;
 mod fingerprint;
+mod suite;
 mod summarize;
 mod upload;
 mod validate;

diff --git a/crates/cli/src/suite.rs b/crates/cli/src/suite.rs
@@ -0,0 +1,139 @@
+//! Group benchmarks into suites.
+//!
+//! [Suite]s are files that contain a newline-separated list of benchmark files
+//! to run; see [Suite::parse] for more details on the syntax of these files. To
+//! distinguish between a [Suite] file and a regular benchmark file, we use
+//! [BenchmarkOrSuite].
+
+use anyhow::Result;
+use std::{
+    ffi::OsStr,
+    fs, io,
+    path::{Path, PathBuf},
+    str::FromStr,
+};
+
+/// Decide between a suite of benchmarks or an individual benchmark file.
+#[derive(Debug)]
+pub enum BenchmarkOrSuite {
+    Suite(Suite),
+    Benchmark(PathBuf),
+}
+
+impl BenchmarkOrSuite {
+    /// List all of paths to the benchmarks to run.
+    pub fn paths(&self) -> Vec<String> {
+        match self {
+            BenchmarkOrSuite::Suite(suite) => suite
+                .benchmarks
+                .iter()
+                .map(|p| p.display().to_string())
+                .collect(),
+            BenchmarkOrSuite::Benchmark(path) => vec![path.display().to_string()],
+        }
+    }
+}
+
+/// It is helpful to reference the original path.
+impl AsRef<OsStr> for BenchmarkOrSuite {
+    fn as_ref(&self) -> &OsStr {
+        match self {
+            BenchmarkOrSuite::Suite(suite) => suite.path.as_os_str(),
+            BenchmarkOrSuite::Benchmark(path) => path.as_os_str(),
+        }
+    }
+}
+
+/// Parse a [BenchmarkOrSuite] from a string path; files ending in `.suite` are
+/// assumed to be suite files.
+impl FromStr for BenchmarkOrSuite {
+    type Err = anyhow::Error;
+    fn from_str(path: &str) -> Result<Self, Self::Err> {
+        Ok(if Suite::has_extension(path) {
+            Self::Suite(Suite::parse(path)?)
+        } else {
+            Self::Benchmark(PathBuf::from(path))
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct Suite {
+    path: PathBuf,
+    benchmarks: Vec<PathBuf>,
+}
+
+impl Suite {
+    /// Parse the contents of a suite file:
+    /// - empty lines are ignored
+    /// - `#`-prefixed lines are ignored
+    /// - relative paths are resolved using the parent directory of the
+    ///   `suite_path`.
+    pub fn parse<P: AsRef<Path>>(suite_path: P) -> Result<Self> {
+        Self::parse_contents(suite_path.as_ref(), &fs::read(suite_path.as_ref())?)
+    }
+
+    /// Utility function for easier testing.
+    fn parse_contents<P: AsRef<Path>>(suite_path: P, file_contents: &[u8]) -> Result<Self> {
+        use io::BufRead;
+        let suite_path = suite_path.as_ref().to_path_buf();
+        let parent_dir = suite_path
+            .parent()
+            .expect("the suite path must have a parent directory");
+        let mut benchmarks = vec![];
+        for line in io::BufReader::new(file_contents)
+            .lines()
+            .filter_map(Result::ok)
+        {
+            let line = line.trim();
+            if !line.starts_with("#") && !line.is_empty() {
+                benchmarks.push(parent_dir.join(line))
+            }
+        }
+        Ok(Self {
+            path: suite_path,
+            benchmarks,
+        })
+    }
+
+    /// Check if a path has the `suite` extension.
+    fn has_extension<P: AsRef<Path>>(path: P) -> bool {
+        match path.as_ref().extension() {
+            Some(ext) => ext == "suite",
+            None => false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const SUITE_PATH: &str = "/home/bench.suite";
+    const CONTENTS: &str = "
+        # These benchmarks are...
+        a.wasm
+        /b.wasm
+        ../c.wasm
+    ";
+
+    #[test]
+    fn parse() {
+        let suite = Suite::parse_contents(SUITE_PATH, CONTENTS.as_bytes()).unwrap();
+
+        // The suite path should be preserved.
+        assert_eq!(suite.path, PathBuf::from(SUITE_PATH));
+
+        assert_eq!(
+            suite.benchmarks,
+            vec![
+                // Relative paths are appended.
+                PathBuf::from("/home/a.wasm"),
+                // Absolute paths are preserved.
+                PathBuf::from("/b.wasm"),
+                // Canonicalization happens later.
+                PathBuf::from("/home/../c.wasm")
+            ]
+        )
+    }
+}