Add an upload command (bytecodealliance#200)

* Add an `upload` command This change adds the ability to upload some Sightglass measurement data to an ElasticSearch server. To do so, the parts of the data that are environment-specific (machine, engine, benchmark) are "fingerprinted," or given a unique ID to differentiate them from other data points. The "fingerprinting" also serves to unify data points more safely. E.g., if the same benchmark is run on two different machines with the same engine, the database will contain two machine entries and a single entry each for the engine and benchmark; the measurement entries will reference the de-duplicated ID. For example, to upload some measurements to the (default) localhost server: ```console $ sightglass-cli upload -f measurements.json ``` The mechanism for uploading the various data points to an HTTP endpoint is specific to ElasticSearch in expectation of a future commit defining how to run such a database. With some work, this `upload` functionality could be made more generic or adapted to other database types. This commit also contains the ability to upload measurement data at a later time. This is helpful if the ElasticSearch endpoint is not available from wherever the measurements are collected. In this scenario (which I have often been in), the fingerprinted, timestamped measurements can be "packaged up" into a JSON file using the `--dry-run` flag and moved to where they can subsequently be uploaded. For example: ```console $ sightglass-cli upload --dry-run -f measurements.json > package.json [move package.json to some other place] $ sightglass-cli upload --from-package package.json ``` * fix: engine suffix in test fixtures only matches on Linux * review: rename command to 'upload-elastic'
abrown · Sep 1, 2022 · b4971ae · b4971ae
1 parent 0717df0
commit b4971ae
Show file tree

Hide file tree

Showing 12 changed files with 7,256 additions and 18 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -6,7 +6,8 @@ members = [
     "crates/cli",
     "crates/data",
     "crates/fingerprint",
-    "crates/recorder"
+    "crates/recorder",
+    "crates/upload",
 ]
 default-members = [
     "crates/cli"

diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
@@ -15,6 +15,7 @@ sightglass-build = { path = "../build" }
 sightglass-data = { path = "../data" }
 sightglass-fingerprint = { path = "../fingerprint" }
 sightglass-recorder = { path = "../recorder" }
+sightglass-upload = { path = "../upload" }
 structopt = { version = "0.3", features = ["color", "suggestions"] }
 thiserror = "1.0"
 rand = { version = "0.7.3", features = ["small_rng"] }

diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs
@@ -2,6 +2,7 @@ mod benchmark;
 mod effect_size;
 mod fingerprint;
 mod summarize;
+mod upload;
 mod validate;
 
 use anyhow::Result;
@@ -11,6 +12,7 @@ use fingerprint::FingerprintCommand;
 use log::trace;
 use structopt::{clap::AppSettings, StructOpt};
 use summarize::SummarizeCommand;
+use upload::UploadCommand;
 use validate::ValidateCommand;
 
 /// Main entry point for CLI.
@@ -32,21 +34,23 @@ fn main() -> Result<()> {
 )]
 enum SightglassCommand {
     Benchmark(BenchmarkCommand),
-    Validate(ValidateCommand),
-    Summarize(SummarizeCommand),
     EffectSize(EffectSizeCommand),
     Fingerprint(FingerprintCommand),
+    Summarize(SummarizeCommand),
+    UploadElastic(UploadCommand),
+    Validate(ValidateCommand),
 }
 
 impl SightglassCommand {
     fn execute(&self) -> Result<()> {
         trace!("Executing command: {:?}", &self);
         match self {
             SightglassCommand::Benchmark(benchmark) => benchmark.execute(),
-            SightglassCommand::Validate(validate) => validate.execute(),
-            SightglassCommand::Summarize(summarize) => summarize.execute(),
             SightglassCommand::EffectSize(effect_size) => effect_size.execute(),
             SightglassCommand::Fingerprint(fingerprint) => fingerprint.execute(),
+            SightglassCommand::Summarize(summarize) => summarize.execute(),
+            SightglassCommand::UploadElastic(upload) => upload.execute(),
+            SightglassCommand::Validate(validate) => validate.execute(),
         }
     }
 }
diff --git a/crates/cli/src/upload.rs b/crates/cli/src/upload.rs
@@ -0,0 +1,68 @@
+use anyhow::{Context, Result};
+use sightglass_data::{Format, Measurement};
+use sightglass_upload::{upload, upload_package, MeasurementPackage};
+use std::{
+    fs::File,
+    io::{self, BufReader, Read},
+};
+use structopt::StructOpt;
+
+/// Upload benchmark output to an ElasticSearch server; accepts raw benchmark
+/// results in `stdin` (i.e., from `sightglass-cli benchmark ...`).
+#[derive(Debug, StructOpt)]
+#[structopt(name = "upload-elastic")]
+pub struct UploadCommand {
+    /// The format of the input data. Either 'json' or 'csv'.
+    #[structopt(short = "i", long = "input-format", default_value = "json")]
+    input_format: Format,
+
+    /// Path to the file that will be read from, or none to indicate stdin
+    /// (default).
+    #[structopt(short = "f", long = "input-file")]
+    input_file: Option<String>,
+
+    /// The URL of a server receiving results; this command only understands how
+    /// to upload results to an ElasticSearch server; e.g.,
+    /// `http://localhost:9200`.
+    #[structopt(index = 1, default_value = "http://localhost:9200", value_name = "URL")]
+    server: String,
+
+    /// Setting this flag will prevent any uploading to the server. Instead,
+    /// the command will emit a JSON "package" to stdout that can be used to
+    /// upload at a later time, see `--from-package`.
+    #[structopt(short = "d", long = "dry-run")]
+    dry_run: bool,
+
+    /// Path to a file containing a package of measurements and fingerprint data
+    /// to be uploaded. If this is set, `--input-file` and `--input-format` are
+    /// ignored.
+    #[structopt(short = "p", long = "from-package")]
+    from_package: Option<String>,
+
+    /// The number of measurements to upload together; this can speed up the
+    /// upload. Defaults to `2000`.
+    #[structopt(short = "b", long = "batch-size", default_value = "2000")]
+    batch_size: usize,
+}
+
+impl UploadCommand {
+    pub fn execute(&self) -> Result<()> {
+        if let Some(file) = &self.from_package {
+            let reader =
+                BufReader::new(File::open(file).context("unable to open --from-package path")?);
+            let package: MeasurementPackage =
+                serde_json::from_reader(reader).context("unable to parse --from-package JSON")?;
+            upload_package(&self.server, self.batch_size, self.dry_run, package)
+        } else {
+            let file: Box<dyn Read> = if let Some(file) = self.input_file.as_ref() {
+                Box::new(BufReader::new(
+                    File::open(file).context("unable to open --input-file")?,
+                ))
+            } else {
+                Box::new(io::stdin())
+            };
+            let measurements: Vec<Measurement> = self.input_format.read(file)?;
+            upload(&self.server, self.batch_size, self.dry_run, measurements)
+        }
+    }
+}
diff --git a/crates/cli/tests/all/main.rs b/crates/cli/tests/all/main.rs
@@ -1,6 +1,7 @@
 mod benchmark;
 mod fingerprint;
 mod help;
+mod upload;
 mod util;
 
 fn main() {}
diff --git a/crates/cli/tests/all/upload.rs b/crates/cli/tests/all/upload.rs
@@ -0,0 +1,56 @@
+//! Test `sightglass-cli upload`.
+
+use super::util::{benchmark, sightglass_cli, test_engine};
+use assert_cmd::prelude::*;
+use predicates::prelude::*;
+
+// Because the `results.json` contains `*.so` suffixes for the engine, this test
+// can only run where the fingerprinted engine will have a matching suffix,
+// i.e., Linux.
+#[cfg(target_os = "linux")]
+#[test]
+fn upload_dryrun() {
+    let assert = sightglass_cli()
+        .arg("upload-elastic")
+        .arg("--dry-run")
+        .arg("--input-file")
+        .arg("tests/results.json")
+        .arg("--batch-size")
+        .arg("200")
+        .env("RUST_LOG", "debug")
+        .assert();
+
+    // Gather up the logged output from stderr.
+    let stderr = std::str::from_utf8(&assert.get_output().stderr).unwrap();
+    eprintln!("=== stderr ===\n{}\n===========", stderr);
+
+    // Gather the fingerprints of the system under test.
+    let engine = sightglass_fingerprint::Engine::fingerprint(test_engine()).unwrap();
+    let benchmark = sightglass_fingerprint::Benchmark::fingerprint(benchmark("noop")).unwrap();
+    let machine = sightglass_fingerprint::Machine::fingerprint().unwrap();
+
+    // Check that we upload measurement records for each of the measurements in the file.
+    let num_uploaded_batches = stderr
+        .matches("Batching up 200 records to index 'measurements'")
+        .count();
+    assert_eq!(num_uploaded_batches, 3);
+
+    // Also, heck that we create records for the engine/machine/benchmark.
+    use predicate::str::*;
+    assert
+        .stderr(
+            contains(format!(
+                r#"Creating record in 'engines' with ID Some("{}")"#,
+                engine.id
+            ))
+            .and(contains(format!(
+                r#"Creating record in 'machines' with ID Some("{}")"#,
+                machine.id
+            )))
+            .and(contains(format!(
+                r#"Creating record in 'benchmarks' with ID Some("{}")"#,
+                benchmark.id
+            ))),
+        )
+        .success();
+}