Skip to content

Commit

Permalink
Add an upload command (bytecodealliance#200)
Browse files Browse the repository at this point in the history
* Add an `upload` command

This change adds the ability to upload some Sightglass measurement data
to an ElasticSearch server. To do so, the parts of the data that are
environment-specific (machine, engine, benchmark) are "fingerprinted,"
or given a unique ID to differentiate them from other data points. The
"fingerprinting" also serves to unify data points more safely. E.g., if
the same benchmark is run on two different machines with the same
engine, the database will contain two machine entries and a single entry
each for the engine and benchmark; the measurement entries will
reference the de-duplicated ID.

For example, to upload some measurements to the (default) localhost server:

```console
$ sightglass-cli upload -f measurements.json
```

The mechanism for uploading the various data points to an HTTP endpoint
is specific to ElasticSearch in expectation of a future commit defining
how to run such a database. With some work, this `upload` functionality
could be made more generic or adapted to other database types.

This commit also contains the ability to upload measurement data at a
later time. This is helpful if the ElasticSearch endpoint is not
available from wherever the measurements are collected. In this scenario
(which I have often been in), the fingerprinted, timestamped
measurements can be "packaged up" into a JSON file using the `--dry-run`
flag and moved to where they can subsequently be uploaded. For example:

```console
$ sightglass-cli upload --dry-run -f measurements.json > package.json
[move package.json to some other place]
$ sightglass-cli upload --from-package package.json
```

* fix: engine suffix in test fixtures only matches on Linux

* review: rename command to 'upload-elastic'
  • Loading branch information
abrown authored Sep 1, 2022
1 parent 0717df0 commit b4971ae
Show file tree
Hide file tree
Showing 12 changed files with 7,256 additions and 18 deletions.
703 changes: 690 additions & 13 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ members = [
"crates/cli",
"crates/data",
"crates/fingerprint",
"crates/recorder"
"crates/recorder",
"crates/upload",
]
default-members = [
"crates/cli"
Expand Down
1 change: 1 addition & 0 deletions crates/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ sightglass-build = { path = "../build" }
sightglass-data = { path = "../data" }
sightglass-fingerprint = { path = "../fingerprint" }
sightglass-recorder = { path = "../recorder" }
sightglass-upload = { path = "../upload" }
structopt = { version = "0.3", features = ["color", "suggestions"] }
thiserror = "1.0"
rand = { version = "0.7.3", features = ["small_rng"] }
Expand Down
12 changes: 8 additions & 4 deletions crates/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod benchmark;
mod effect_size;
mod fingerprint;
mod summarize;
mod upload;
mod validate;

use anyhow::Result;
Expand All @@ -11,6 +12,7 @@ use fingerprint::FingerprintCommand;
use log::trace;
use structopt::{clap::AppSettings, StructOpt};
use summarize::SummarizeCommand;
use upload::UploadCommand;
use validate::ValidateCommand;

/// Main entry point for CLI.
Expand All @@ -32,21 +34,23 @@ fn main() -> Result<()> {
)]
enum SightglassCommand {
Benchmark(BenchmarkCommand),
Validate(ValidateCommand),
Summarize(SummarizeCommand),
EffectSize(EffectSizeCommand),
Fingerprint(FingerprintCommand),
Summarize(SummarizeCommand),
UploadElastic(UploadCommand),
Validate(ValidateCommand),
}

impl SightglassCommand {
fn execute(&self) -> Result<()> {
trace!("Executing command: {:?}", &self);
match self {
SightglassCommand::Benchmark(benchmark) => benchmark.execute(),
SightglassCommand::Validate(validate) => validate.execute(),
SightglassCommand::Summarize(summarize) => summarize.execute(),
SightglassCommand::EffectSize(effect_size) => effect_size.execute(),
SightglassCommand::Fingerprint(fingerprint) => fingerprint.execute(),
SightglassCommand::Summarize(summarize) => summarize.execute(),
SightglassCommand::UploadElastic(upload) => upload.execute(),
SightglassCommand::Validate(validate) => validate.execute(),
}
}
}
68 changes: 68 additions & 0 deletions crates/cli/src/upload.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use anyhow::{Context, Result};
use sightglass_data::{Format, Measurement};
use sightglass_upload::{upload, upload_package, MeasurementPackage};
use std::{
fs::File,
io::{self, BufReader, Read},
};
use structopt::StructOpt;

/// Upload benchmark output to an ElasticSearch server; accepts raw benchmark
/// results in `stdin` (i.e., from `sightglass-cli benchmark ...`).
#[derive(Debug, StructOpt)]
#[structopt(name = "upload-elastic")]
pub struct UploadCommand {
/// The format of the input data. Either 'json' or 'csv'.
#[structopt(short = "i", long = "input-format", default_value = "json")]
input_format: Format,

/// Path to the file that will be read from, or none to indicate stdin
/// (default).
#[structopt(short = "f", long = "input-file")]
input_file: Option<String>,

/// The URL of a server receiving results; this command only understands how
/// to upload results to an ElasticSearch server; e.g.,
/// `http://localhost:9200`.
#[structopt(index = 1, default_value = "http://localhost:9200", value_name = "URL")]
server: String,

/// Setting this flag will prevent any uploading to the server. Instead,
/// the command will emit a JSON "package" to stdout that can be used to
/// upload at a later time, see `--from-package`.
#[structopt(short = "d", long = "dry-run")]
dry_run: bool,

/// Path to a file containing a package of measurements and fingerprint data
/// to be uploaded. If this is set, `--input-file` and `--input-format` are
/// ignored.
#[structopt(short = "p", long = "from-package")]
from_package: Option<String>,

/// The number of measurements to upload together; this can speed up the
/// upload. Defaults to `2000`.
#[structopt(short = "b", long = "batch-size", default_value = "2000")]
batch_size: usize,
}

impl UploadCommand {
pub fn execute(&self) -> Result<()> {
if let Some(file) = &self.from_package {
let reader =
BufReader::new(File::open(file).context("unable to open --from-package path")?);
let package: MeasurementPackage =
serde_json::from_reader(reader).context("unable to parse --from-package JSON")?;
upload_package(&self.server, self.batch_size, self.dry_run, package)
} else {
let file: Box<dyn Read> = if let Some(file) = self.input_file.as_ref() {
Box::new(BufReader::new(
File::open(file).context("unable to open --input-file")?,
))
} else {
Box::new(io::stdin())
};
let measurements: Vec<Measurement> = self.input_format.read(file)?;
upload(&self.server, self.batch_size, self.dry_run, measurements)
}
}
}
1 change: 1 addition & 0 deletions crates/cli/tests/all/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod benchmark;
mod fingerprint;
mod help;
mod upload;
mod util;

fn main() {}
56 changes: 56 additions & 0 deletions crates/cli/tests/all/upload.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//! Test `sightglass-cli upload`.
use super::util::{benchmark, sightglass_cli, test_engine};
use assert_cmd::prelude::*;
use predicates::prelude::*;

// Because the `results.json` contains `*.so` suffixes for the engine, this test
// can only run where the fingerprinted engine will have a matching suffix,
// i.e., Linux.
#[cfg(target_os = "linux")]
#[test]
fn upload_dryrun() {
let assert = sightglass_cli()
.arg("upload-elastic")
.arg("--dry-run")
.arg("--input-file")
.arg("tests/results.json")
.arg("--batch-size")
.arg("200")
.env("RUST_LOG", "debug")
.assert();

// Gather up the logged output from stderr.
let stderr = std::str::from_utf8(&assert.get_output().stderr).unwrap();
eprintln!("=== stderr ===\n{}\n===========", stderr);

// Gather the fingerprints of the system under test.
let engine = sightglass_fingerprint::Engine::fingerprint(test_engine()).unwrap();
let benchmark = sightglass_fingerprint::Benchmark::fingerprint(benchmark("noop")).unwrap();
let machine = sightglass_fingerprint::Machine::fingerprint().unwrap();

// Check that we upload measurement records for each of the measurements in the file.
let num_uploaded_batches = stderr
.matches("Batching up 200 records to index 'measurements'")
.count();
assert_eq!(num_uploaded_batches, 3);

// Also, heck that we create records for the engine/machine/benchmark.
use predicate::str::*;
assert
.stderr(
contains(format!(
r#"Creating record in 'engines' with ID Some("{}")"#,
engine.id
))
.and(contains(format!(
r#"Creating record in 'machines' with ID Some("{}")"#,
machine.id
)))
.and(contains(format!(
r#"Creating record in 'benchmarks' with ID Some("{}")"#,
benchmark.id
))),
)
.success();
}
Loading

0 comments on commit b4971ae

Please sign in to comment.