Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Add a link to documentation on OOM task failure #3602

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions src/agent/onefuzz-task/src/managed/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,19 @@ pub async fn run(args: &clap::ArgMatches) -> Result<()> {

let min_available_memory_bytes = 1_000_000 * config.common().min_available_memory_mb;

let mut error_documentation: Option<&str> = None;
let result = match min_available_memory_bytes {
0 => {
log::info!("memory watchdog is disabled: this task may fail suddenly if it runs out of memory.");
config.run().await

tokio::select! {
result = config.run() => result,

_shutdown = shutdown_listener => Ok(()),
}
}
_ => {
// If the memory limit is 0, this will never return.
let check_oom = out_of_memory(min_available_memory_bytes);
min_bytes => {
let check_oom = out_of_memory(min_bytes);

tokio::select! {
result = config.run() => result,
Expand All @@ -112,18 +117,23 @@ pub async fn run(args: &clap::ArgMatches) -> Result<()> {
Ok(oom) = check_oom => {
// Convert the OOM notification to an error, so we can log it below.
let err = anyhow::format_err!("out of memory: {} bytes available, {} required", oom.available_bytes, oom.min_bytes);
error_documentation = Some("https://eng.ms/docs/cloud-ai-platform/azure-edge-platform-aep/aep-security/epsf-edge-and-platform-security-fundamentals/the-onefuzz-service/onefuzz/howto/understanding-task-oom");
Err(err)
},

_shutdown = shutdown_listener => {
Ok(())
}
_shutdown = shutdown_listener => Ok(()),
}
}
};

if let Err(err) = &result {
error!("error running task: {:?}", err);
if let Some(doc_url) = error_documentation {
info!(
"check out the documentation for more info about the error above: {}",
doc_url
);
}
}

onefuzz_telemetry::try_flush_and_close().await;
Expand Down
Loading