From 35805f3307f94ac2dd526cb937180fa25d3baa98 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 10:38:05 +0200 Subject: [PATCH 01/62] wip --- Cargo.lock | 1 + relay-server/Cargo.toml | 1 + .../src/services/buffer/envelopebuffer.rs | 129 ++++++++++++++++++ .../src/services/buffer/envelopestack.rs | 9 ++ relay-server/src/services/buffer/mod.rs | 2 + relay-server/src/services/mod.rs | 1 + 6 files changed, 143 insertions(+) create mode 100644 relay-server/src/services/buffer/envelopebuffer.rs create mode 100644 relay-server/src/services/buffer/envelopestack.rs create mode 100644 relay-server/src/services/buffer/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 5e6b42f148..24f4c63a2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4078,6 +4078,7 @@ dependencies = [ "multer", "once_cell", "pin-project-lite", + "priority-queue", "rand", "rayon", "regex", diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 7914a7b088..92eca9cde0 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -64,6 +64,7 @@ minidump = { workspace = true, optional = true } multer = { workspace = true } once_cell = { workspace = true } pin-project-lite = { workspace = true } +priority-queue = { workspace = true } rand = { workspace = true } rayon = { workspace = true } regex = { workspace = true } diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs new file mode 100644 index 0000000000..9a1a1e0547 --- /dev/null +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -0,0 +1,129 @@ +use std::cmp::Ordering; +use std::time::Instant; + +use relay_base_schema::project::ProjectKey; + +use crate::envelope::Envelope; +use crate::services::buffer::envelopestack::EnvelopeStack; + +pub trait EnvelopeBuffer { + fn push(&mut self, envelope: Box); + fn pop(&mut self) -> Option>; + fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); +} + +#[derive(Hash, PartialEq, Eq)] +struct StackKey { + own_key: ProjectKey, + sampling_key: ProjectKey, +} + +impl StackKey { + fn from_envelope(envelope: &Envelope) -> Self {} +} + +struct PriorityEnvelopeBuffer { + own_keys: hashbrown::HashMap>, + sampling_keys: hashbrown::HashMap>, + stacks: priority_queue::PriorityQueue>, +} + +impl EnvelopeBuffer for PriorityEnvelopeBuffer { + fn push(&mut self, envelope: Box) { + let stack_key = StackKey::from_envelope(&envelope); + let updated = self.stacks.change_priority_by(&stack_key, |stack| {}); + if !updated { + let old = self.stacks.push(stack_key, PrioritizedStack::new(envelope)); + debug_assert!(old.is_none()); + } + self.own_keys + .entry(stack_key.own_key) + .or_default() + .push(stack_key); + self.sampling_keys + .entry(stack_key.sampling_key) + .or_default() + .push(stack_key); + } + + fn pop(&mut self) -> Option> { + let (stack_key, stack) = self.stacks.peek_mut()?; + let entry = self + .own_keys + .entry(stack_key.own_key) + .or_default() + .push(stack_key); + self.sampling_keys + .entry(stack_key.sampling_key) + .or_default() + .push(stack_key); + } + + fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { + if let Some(stack_keys) = self.own_keys.get(project) { + for stack_key in stack_keys { + self.stacks.change_priority_by(stack_key, |stack| { + stack.own_ready = is_ready; + }); + } + } + if let Some(stack_keys) = self.sampling_keys.get(project) { + for stack_key in stack_keys { + self.stacks.change_priority_by(stack_key, |stack| { + stack.sampling_ready = is_ready; + }); + } + } + } +} + +struct PrioritizedStack { + own_ready: bool, + sampling_ready: bool, + received_at: Instant, + stack: S, +} + +impl PrioritizedStack { + fn ready(&self) -> bool { + self.own_ready && self.sampling_ready + } +} + +impl PrioritizedStack { + fn new(received_at: Instant) -> Self { + Self { + own_ready: false, + sampling_ready: false, + received_at, + stack: S::default(), + } + } +} + +impl PartialEq for PrioritizedStack { + fn eq(&self, other: &Self) -> bool { + self.ready() == other.ready() && self.received_at == other.received_at + } +} + +impl PartialOrd for PrioritizedStack { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for PrioritizedStack {} + +impl Ord for PrioritizedStack { + fn cmp(&self, other: &Self) -> Ordering { + match (self.ready(), other.ready()) { + (true, true) => self.received_at.cmp(&other.received_at), + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + // For non-ready stacks, we invert the priority, such that projects that are not + // ready and did not receive envelopes recently can be evicted. + (false, false) => self.received_at.cmp(&other.received_at).reverse(), + } + } +} diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs new file mode 100644 index 0000000000..a75c9047de --- /dev/null +++ b/relay-server/src/services/buffer/envelopestack.rs @@ -0,0 +1,9 @@ +use crate::envelope::Envelope; + +pub trait EnvelopeStack { + fn push(&mut self, envelope: Box); + + fn pop(&mut self) -> Option>; + + fn peek(&self) -> Option<&Envelope>; +} diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs new file mode 100644 index 0000000000..e1c03e125e --- /dev/null +++ b/relay-server/src/services/buffer/mod.rs @@ -0,0 +1,2 @@ +mod envelopebuffer; +mod envelopestack; diff --git a/relay-server/src/services/mod.rs b/relay-server/src/services/mod.rs index b4291be02e..128314fc77 100644 --- a/relay-server/src/services/mod.rs +++ b/relay-server/src/services/mod.rs @@ -27,6 +27,7 @@ //! Controller::run(|| Server::start()) //! .expect("failed to start relay"); //! ``` +mod buffer; pub mod cogs; pub mod global_config; pub mod health_check; From 06ff0ac6dadf26ece3d501881ba2c9cc62e80269 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 11:21:42 +0200 Subject: [PATCH 02/62] wip: Borrow --- .../src/services/buffer/envelopebuffer.rs | 122 +++++++++++++----- 1 file changed, 87 insertions(+), 35 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index 9a1a1e0547..ba62cf352a 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -1,4 +1,5 @@ use std::cmp::Ordering; +use std::collections::BTreeSet; use std::time::Instant; use relay_base_schema::project::ProjectKey; @@ -12,7 +13,7 @@ pub trait EnvelopeBuffer { fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } -#[derive(Hash, PartialEq, Eq)] +#[derive(Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { own_key: ProjectKey, sampling_key: ProjectKey, @@ -23,40 +24,68 @@ impl StackKey { } struct PriorityEnvelopeBuffer { - own_keys: hashbrown::HashMap>, - sampling_keys: hashbrown::HashMap>, - stacks: priority_queue::PriorityQueue>, + own_keys: hashbrown::HashMap>, + sampling_keys: hashbrown::HashMap>, + stacks: priority_queue::PriorityQueue, Priority>, } impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { + let received_at = envelope.received_at(); let stack_key = StackKey::from_envelope(&envelope); - let updated = self.stacks.change_priority_by(&stack_key, |stack| {}); - if !updated { - let old = self.stacks.push(stack_key, PrioritizedStack::new(envelope)); - debug_assert!(old.is_none()); + if let Some(qi) = self.stacks.get_mut(&stack_key) { + qi.0.value.push(envelope); + } else { + self.stacks.push( + QueueItem { + key: stack_key, + value: S::new(envelope), + }, + Priority::new(received_at), + ); + self.own_keys + .entry(stack_key.own_key) + .or_default() + .insert(stack_key); + self.sampling_keys + .entry(stack_key.sampling_key) + .or_default() + .insert(stack_key); } - self.own_keys - .entry(stack_key.own_key) - .or_default() - .push(stack_key); - self.sampling_keys - .entry(stack_key.sampling_key) - .or_default() - .push(stack_key); + self.stacks.change_priority_by(stack_key, |prio| { + prio.received_at = received_at; + }); } fn pop(&mut self) -> Option> { - let (stack_key, stack) = self.stacks.peek_mut()?; - let entry = self - .own_keys - .entry(stack_key.own_key) - .or_default() - .push(stack_key); - self.sampling_keys - .entry(stack_key.sampling_key) - .or_default() - .push(stack_key); + let ( + QueueItem { + key: stack_key, + value: stack, + }, + priority, + ) = self.stacks.peek_mut()?; + let envelope = stack.pop(); + debug_assert!(envelope.is_some()); + match stack.peek() { + None => { + self.own_keys + .get_mut(&stack_key.own_key) + .expect("own_keys") + .remove(&stack_key); + self.sampling_keys + .get_mut(&stack_key.sampling_key) + .expect("sampling_keys") + .remove(&stack_key); + self.stacks.remove(stack_key); + } + Some(next_envelope) => { + self.stacks.change_priority_by(stack_key, |prio| { + prio.received_at = next_envelope.received_at(); + }); + } + } + envelope } fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { @@ -77,45 +106,68 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { } } -struct PrioritizedStack { +struct QueueItem { + key: K, + value: V, +} + +impl std::borrow::Borrow for QueueItem { + fn borrow(&self) -> &K { + &self.key + } +} + +impl std::hash::Hash for QueueItem { + fn hash(&self, state: &mut H) { + self.key.hash(state); + } +} + +impl PartialEq for QueueItem { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} + +impl Eq for QueueItem {} + +struct Priority { own_ready: bool, sampling_ready: bool, received_at: Instant, - stack: S, } -impl PrioritizedStack { +impl Priority { fn ready(&self) -> bool { self.own_ready && self.sampling_ready } } -impl PrioritizedStack { +impl Priority { fn new(received_at: Instant) -> Self { Self { own_ready: false, sampling_ready: false, received_at, - stack: S::default(), } } } -impl PartialEq for PrioritizedStack { +impl PartialEq for Priority { fn eq(&self, other: &Self) -> bool { self.ready() == other.ready() && self.received_at == other.received_at } } -impl PartialOrd for PrioritizedStack { +impl PartialOrd for Priority { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Eq for PrioritizedStack {} +impl Eq for Priority {} -impl Ord for PrioritizedStack { +impl Ord for Priority { fn cmp(&self, other: &Self) -> Ordering { match (self.ready(), other.ready()) { (true, true) => self.received_at.cmp(&other.received_at), From cb71cce16ee2a55ac70c242f14b0b3bfe8296c63 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 11:36:31 +0200 Subject: [PATCH 03/62] it compiles --- .../src/services/buffer/envelopebuffer.rs | 39 +++++++++++-------- .../src/services/buffer/envelopestack.rs | 2 + 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index ba62cf352a..a28a7302b6 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -13,14 +13,20 @@ pub trait EnvelopeBuffer { fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } -#[derive(Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { own_key: ProjectKey, sampling_key: ProjectKey, } impl StackKey { - fn from_envelope(envelope: &Envelope) -> Self {} + fn from_envelope(envelope: &Envelope) -> Self { + let own_key = envelope.meta().public_key(); + Self { + own_key, + sampling_key: envelope.sampling_key().unwrap_or(own_key), + } + } } struct PriorityEnvelopeBuffer { @@ -31,7 +37,7 @@ struct PriorityEnvelopeBuffer { impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { - let received_at = envelope.received_at(); + let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); if let Some(qi) = self.stacks.get_mut(&stack_key) { qi.0.value.push(envelope); @@ -46,28 +52,27 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { self.own_keys .entry(stack_key.own_key) .or_default() - .insert(stack_key); + .insert(stack_key.clone()); self.sampling_keys .entry(stack_key.sampling_key) .or_default() .insert(stack_key); } - self.stacks.change_priority_by(stack_key, |prio| { + self.stacks.change_priority_by(&stack_key, |prio| { prio.received_at = received_at; }); } fn pop(&mut self) -> Option> { - let ( - QueueItem { - key: stack_key, - value: stack, - }, - priority, - ) = self.stacks.peek_mut()?; + let (QueueItem { key, value: stack }, _) = self.stacks.peek_mut()?; + let stack_key = *key; let envelope = stack.pop(); debug_assert!(envelope.is_some()); - match stack.peek() { + + let next_received_at = stack + .peek() + .map(|next_envelope| next_envelope.meta().start_time()); + match next_received_at { None => { self.own_keys .get_mut(&stack_key.own_key) @@ -77,11 +82,11 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { .get_mut(&stack_key.sampling_key) .expect("sampling_keys") .remove(&stack_key); - self.stacks.remove(stack_key); + self.stacks.remove(&stack_key); } - Some(next_envelope) => { - self.stacks.change_priority_by(stack_key, |prio| { - prio.received_at = next_envelope.received_at(); + Some(next_received_at) => { + self.stacks.change_priority_by(&stack_key, |prio| { + prio.received_at = next_received_at; }); } } diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs index a75c9047de..f5f58db230 100644 --- a/relay-server/src/services/buffer/envelopestack.rs +++ b/relay-server/src/services/buffer/envelopestack.rs @@ -1,6 +1,8 @@ use crate::envelope::Envelope; pub trait EnvelopeStack { + fn new(envelope: Box) -> Self; + fn push(&mut self, envelope: Box); fn pop(&mut self) -> Option>; From b60cb8bc5c72a45d6111b6943fbbc4f89eaa1e07 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 13:20:01 +0200 Subject: [PATCH 04/62] some tests --- .../src/services/buffer/envelopebuffer.rs | 116 +++++++++++++++++- .../src/services/buffer/envelopestack.rs | 20 +++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index a28a7302b6..436723789d 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -9,6 +9,7 @@ use crate::services::buffer::envelopestack::EnvelopeStack; pub trait EnvelopeBuffer { fn push(&mut self, envelope: Box); + fn peek(&mut self) -> Option<&Envelope>; fn pop(&mut self) -> Option>; fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } @@ -35,6 +36,16 @@ struct PriorityEnvelopeBuffer { stacks: priority_queue::PriorityQueue, Priority>, } +impl PriorityEnvelopeBuffer { + fn new() -> Self { + Self { + own_keys: Default::default(), + sampling_keys: Default::default(), + stacks: Default::default(), + } + } +} + impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); @@ -52,7 +63,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { self.own_keys .entry(stack_key.own_key) .or_default() - .insert(stack_key.clone()); + .insert(stack_key); self.sampling_keys .entry(stack_key.sampling_key) .or_default() @@ -63,6 +74,17 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { }); } + fn peek(&mut self) -> Option<&Envelope> { + let ( + QueueItem { + key: _, + value: stack, + }, + _, + ) = self.stacks.peek_mut()?; + stack.peek() + } + fn pop(&mut self) -> Option> { let (QueueItem { key, value: stack }, _) = self.stacks.peek_mut()?; let stack_key = *key; @@ -184,3 +206,95 @@ impl Ord for Priority { } } } + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use relay_common::Dsn; + + use crate::extractors::RequestMeta; + use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; + + use super::*; + + fn new_envelope(project_key: ProjectKey, sampling_key: Option) -> Box { + let envelope = Envelope::from_request( + None, + RequestMeta::new(Dsn::from_str(&format!("http://{project_key}@localhost/1")).unwrap()), + ); + // TODO: sampling key + envelope + } + + #[test] + fn insert_pop() { + let mut buffer = PriorityEnvelopeBuffer::::new(); + + let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let project_key3 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); + + assert!(buffer.pop().is_none()); + assert!(buffer.peek().is_none()); + + buffer.push(new_envelope(project_key1, None)); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + + buffer.push(new_envelope(project_key2, None)); + // Both projects are not ready, so project 1 is on top (has the oldest envelopes): + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + + buffer.push(new_envelope(project_key3, None)); + // All projects are not ready, so project 1 is on top (has the oldest envelopes): + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + + // After marking a project ready, it goes to the top: + buffer.mark_ready(&project_key3, true); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key3); + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key3); + + // After popping, project 1 is on top again: + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + + // Mark project 1 as ready (still on top): + buffer.mark_ready(&project_key1, true); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + + // Mark project 2 as ready as well (now on top because most recent): + buffer.mark_ready(&project_key2, true); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key2); + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key2); + + // Pop last element: + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key1); + assert!(buffer.pop().is_none()); + assert!(buffer.peek().is_none()); + } + + #[test] + fn project_internal_order() { + let mut buffer = PriorityEnvelopeBuffer::::new(); + + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + + let envelope1 = new_envelope(project_key, None); + let instant1 = envelope1.meta().start_time(); + let envelope2 = new_envelope(project_key, None); + let instant2 = envelope2.meta().start_time(); + + assert!(instant2 > instant1); + + buffer.push(envelope1); + buffer.push(envelope2); + + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); + assert!(buffer.pop().is_none()); + } + + #[test] + fn sampling_projects() { + todo!() + } +} diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs index f5f58db230..417ea7b78c 100644 --- a/relay-server/src/services/buffer/envelopestack.rs +++ b/relay-server/src/services/buffer/envelopestack.rs @@ -9,3 +9,23 @@ pub trait EnvelopeStack { fn peek(&self) -> Option<&Envelope>; } + +pub struct InMemoryEnvelopeStack(Vec>); + +impl EnvelopeStack for InMemoryEnvelopeStack { + fn new(envelope: Box) -> Self { + Self(vec![envelope]) + } + + fn push(&mut self, envelope: Box) { + self.0.push(envelope) + } + + fn pop(&mut self) -> Option> { + self.0.pop() + } + + fn peek(&self) -> Option<&Envelope> { + self.0.last().map(Box::as_ref) + } +} From ed56465acf98e743d91d4523b1ec03d117ea1ffb Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 13:59:13 +0200 Subject: [PATCH 05/62] test again --- .../src/services/buffer/envelopebuffer.rs | 64 ++++++++++++++++++- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index 436723789d..d9d9b8a02e 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -212,18 +212,35 @@ mod tests { use std::str::FromStr; use relay_common::Dsn; + use relay_sampling::DynamicSamplingContext; + use uuid::Uuid; + use crate::envelope::{Item, ItemType}; use crate::extractors::RequestMeta; use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; use super::*; fn new_envelope(project_key: ProjectKey, sampling_key: Option) -> Box { - let envelope = Envelope::from_request( + let mut envelope = Envelope::from_request( None, RequestMeta::new(Dsn::from_str(&format!("http://{project_key}@localhost/1")).unwrap()), ); - // TODO: sampling key + if let Some(sampling_key) = sampling_key { + envelope.set_dsc(DynamicSamplingContext { + public_key: sampling_key, + trace_id: Uuid::new_v4(), + release: None, + user: Default::default(), + replay_id: None, + environment: None, + transaction: None, + sample_rate: None, + sampled: None, + other: Default::default(), + }); + envelope.add_item(Item::new(ItemType::Transaction)); + } envelope } @@ -295,6 +312,47 @@ mod tests { #[test] fn sampling_projects() { - todo!() + let mut buffer = PriorityEnvelopeBuffer::::new(); + + let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); + + let envelope1 = new_envelope(project_key1, None); + let instant1 = envelope1.meta().start_time(); + buffer.push(envelope1); + + let envelope2 = new_envelope(project_key2, None); + let instant2 = envelope2.meta().start_time(); + buffer.push(envelope2); + + let envelope3 = new_envelope(project_key1, Some(project_key2)); + let instant3 = envelope3.meta().start_time(); + buffer.push(envelope3); + + // Nothing is ready, instant1 is on top: + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + + // Mark project 2 ready, gets on top: + buffer.mark_ready(&project_key2, true); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + + // Revert + buffer.mark_ready(&project_key2, false); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + + // Project 1 ready: + buffer.mark_ready(&project_key1, true); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + + // when both projects are ready, event no 3 ends up on top: + buffer.mark_ready(&project_key2, true); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant3); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + + buffer.mark_ready(&project_key2, false); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); + + assert!(buffer.pop().is_none()); } } From 005c0935b5606c793e4c6f3683f498b27393778b Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 15:43:12 +0200 Subject: [PATCH 06/62] Push to envelope buffer --- relay-config/src/config.rs | 21 +++++ relay-server/src/endpoints/common.rs | 4 +- relay-server/src/service.rs | 24 ++++- .../src/services/buffer/envelopebuffer.rs | 90 +++++++++++++------ .../src/services/buffer/envelopestack.rs | 3 +- relay-server/src/services/buffer/mod.rs | 2 + relay-server/src/services/mod.rs | 2 +- 7 files changed, 110 insertions(+), 36 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 292d318399..2fbbf4e190 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -867,6 +867,19 @@ pub struct EnvelopeSpool { /// The interval in milliseconds to trigger unspool. #[serde(default = "spool_envelopes_unspool_interval")] unspool_interval: u64, + + /// Version of the spooler + version: EnvelopeSpoolVersion, +} + +// TODO(docs) +#[derive(Debug, Default, Deserialize, Serialize)] +pub enum EnvelopeSpoolVersion { + #[default] + #[serde(rename = "1")] + V1, + #[serde(rename = "2")] + V2, } impl Default for EnvelopeSpool { @@ -878,6 +891,7 @@ impl Default for EnvelopeSpool { max_disk_size: spool_envelopes_max_disk_size(), max_memory_size: spool_envelopes_max_memory_size(), unspool_interval: spool_envelopes_unspool_interval(), // 100ms + version: EnvelopeSpoolVersion::V2, } } } @@ -2076,6 +2090,13 @@ impl Config { self.values.spool.envelopes.max_memory_size.as_bytes() } + pub fn spool_v2(&self) -> bool { + matches!( + self.values.spool.envelopes.version, + EnvelopeSpoolVersion::V2 + ) + } + /// Returns the maximum size of an event payload in bytes. pub fn max_event_size(&self) -> usize { self.values.limits.max_event_size.as_bytes() diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 798867f6e7..0d10874d00 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -305,7 +305,8 @@ fn queue_envelope( ); envelope.scope(scoping); - state.project_cache().send(ValidateEnvelope::new(envelope)); + // TODO(jjbayer): schedule prefetch on project state here. + state.enqueue(envelope); } // The entire envelope is taken for a split above, and it's empty at this point, we can just // accept it without additional checks. @@ -333,6 +334,7 @@ pub async fn handle_envelope( ) } + // TODO(jjbayer): Move this check to spool impl if state.memory_checker().check_memory().is_exceeded() { // NOTE: Long-term, we should not reject the envelope here, but spool it to disk instead. // This will be fixed with the new spool implementation. diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 4b45fcdf67..0d481aa53c 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,9 +1,10 @@ use std::convert::Infallible; use std::fmt; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::time::Duration; use crate::metrics::{MetricOutcomes, MetricStats}; +use crate::services::buffer::{create_envelope_buffer, EnvelopeBuffer}; use crate::services::stats::RelayStats; use anyhow::{Context, Result}; use axum::extract::FromRequestParts; @@ -22,13 +23,15 @@ use crate::services::metrics::{Aggregator, RouterService}; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::project_cache::{ProjectCache, ProjectCacheService, Services}; +use crate::services::project_cache::{ + ProjectCache, ProjectCacheService, Services, ValidateEnvelope, +}; use crate::services::relays::{RelayCache, RelayCacheService}; #[cfg(feature = "processing")] use crate::services::store::StoreService; use crate::services::test_store::{TestStore, TestStoreService}; use crate::services::upstream::{UpstreamRelay, UpstreamRelayService}; -use crate::utils::{MemoryChecker, MemoryStat}; +use crate::utils::{ManagedEnvelope, MemoryChecker, MemoryStat}; /// Indicates the type of failure of the server. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, thiserror::Error)] @@ -138,6 +141,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, + envelope_buffer: Arc>, registry: Registry, } @@ -295,7 +299,8 @@ impl ServiceState { let state = StateInner { config: config.clone(), - memory_checker: MemoryChecker::new(memory_stat, config), + memory_checker: MemoryChecker::new(memory_stat, config.clone()), + envelope_buffer: create_envelope_buffer(&config), registry, }; @@ -304,6 +309,17 @@ impl ServiceState { }) } + pub fn enqueue(&self, mut envelope: ManagedEnvelope) { + if self.config().spool_v2() { + // TODO(jjbayer): What do we lose by dropping the rest of the managed envelope? + // How does the old spooler handle this? + let mut guard = self.inner.envelope_buffer.lock().expect("poisoned lock"); + guard.push(envelope.take_envelope()); + } else { + self.project_cache().send(ValidateEnvelope::new(envelope)); + } + } + /// Returns a reference to the Relay configuration. pub fn config(&self) -> &Config { &self.inner.config diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index d9d9b8a02e..187e3d9dd5 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -1,19 +1,29 @@ use std::cmp::Ordering; use std::collections::BTreeSet; +use std::sync::{Arc, Mutex}; use std::time::Instant; use relay_base_schema::project::ProjectKey; +use relay_config::Config; use crate::envelope::Envelope; -use crate::services::buffer::envelopestack::EnvelopeStack; +use crate::services::buffer::envelopestack::{EnvelopeStack, InMemoryEnvelopeStack}; -pub trait EnvelopeBuffer { +pub trait EnvelopeBuffer: std::fmt::Debug + Send { fn push(&mut self, envelope: Box); fn peek(&mut self) -> Option<&Envelope>; fn pop(&mut self) -> Option>; fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } +// TODO: docs +pub fn create_envelope_buffer(config: &Config) -> Arc> { + // TODO: create a DiskMemoryStack + Arc::new(Mutex::new( + PriorityEnvelopeBuffer::::new(), + )) +} + #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { own_key: ProjectKey, @@ -30,6 +40,7 @@ impl StackKey { } } +#[derive(Debug)] struct PriorityEnvelopeBuffer { own_keys: hashbrown::HashMap>, sampling_keys: hashbrown::HashMap>, @@ -46,28 +57,55 @@ impl PriorityEnvelopeBuffer { } } -impl EnvelopeBuffer for PriorityEnvelopeBuffer { +impl PriorityEnvelopeBuffer { + fn push_stack(&mut self, envelope: Box) { + let received_at = envelope.meta().start_time(); + let stack_key = StackKey::from_envelope(&envelope); + self.stacks.push( + QueueItem { + key: stack_key, + value: S::new(envelope), + }, + Priority::new(received_at), + ); + self.own_keys + .entry(stack_key.own_key) + .or_default() + .insert(stack_key); + self.sampling_keys + .entry(stack_key.sampling_key) + .or_default() + .insert(stack_key); + } + + fn pop_stack(&mut self, stack_key: StackKey) { + self.own_keys + .get_mut(&stack_key.own_key) + .expect("own_keys") + .remove(&stack_key); + self.sampling_keys + .get_mut(&stack_key.sampling_key) + .expect("sampling_keys") + .remove(&stack_key); + self.stacks.remove(&stack_key); + } +} + +impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); - if let Some(qi) = self.stacks.get_mut(&stack_key) { - qi.0.value.push(envelope); + if let Some(( + QueueItem { + key: _, + value: stack, + }, + _, + )) = self.stacks.get_mut(&stack_key) + { + stack.push(envelope); } else { - self.stacks.push( - QueueItem { - key: stack_key, - value: S::new(envelope), - }, - Priority::new(received_at), - ); - self.own_keys - .entry(stack_key.own_key) - .or_default() - .insert(stack_key); - self.sampling_keys - .entry(stack_key.sampling_key) - .or_default() - .insert(stack_key); + self.push_stack(envelope); } self.stacks.change_priority_by(&stack_key, |prio| { prio.received_at = received_at; @@ -96,15 +134,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { .map(|next_envelope| next_envelope.meta().start_time()); match next_received_at { None => { - self.own_keys - .get_mut(&stack_key.own_key) - .expect("own_keys") - .remove(&stack_key); - self.sampling_keys - .get_mut(&stack_key.sampling_key) - .expect("sampling_keys") - .remove(&stack_key); - self.stacks.remove(&stack_key); + self.pop_stack(stack_key); } Some(next_received_at) => { self.stacks.change_priority_by(&stack_key, |prio| { @@ -133,6 +163,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuffer { } } +#[derive(Debug)] struct QueueItem { key: K, value: V, @@ -158,6 +189,7 @@ impl PartialEq for QueueItem { impl Eq for QueueItem {} +#[derive(Debug)] struct Priority { own_ready: bool, sampling_ready: bool, diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs index 417ea7b78c..e307769caf 100644 --- a/relay-server/src/services/buffer/envelopestack.rs +++ b/relay-server/src/services/buffer/envelopestack.rs @@ -1,6 +1,6 @@ use crate::envelope::Envelope; -pub trait EnvelopeStack { +pub trait EnvelopeStack: Send { fn new(envelope: Box) -> Self; fn push(&mut self, envelope: Box); @@ -10,6 +10,7 @@ pub trait EnvelopeStack { fn peek(&self) -> Option<&Envelope>; } +#[derive(Debug)] pub struct InMemoryEnvelopeStack(Vec>); impl EnvelopeStack for InMemoryEnvelopeStack { diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index e1c03e125e..fb6b0d92c4 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,2 +1,4 @@ mod envelopebuffer; mod envelopestack; + +pub use envelopebuffer::{create_envelope_buffer, EnvelopeBuffer}; diff --git a/relay-server/src/services/mod.rs b/relay-server/src/services/mod.rs index 128314fc77..be7b542b06 100644 --- a/relay-server/src/services/mod.rs +++ b/relay-server/src/services/mod.rs @@ -27,7 +27,7 @@ //! Controller::run(|| Server::start()) //! .expect("failed to start relay"); //! ``` -mod buffer; +pub mod buffer; pub mod cogs; pub mod global_config; pub mod health_check; From 9350674dc2e322cdab16c476787ba86100a68d42 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Jul 2024 16:59:27 +0200 Subject: [PATCH 07/62] async interface --- relay-server/src/endpoints/common.rs | 13 ++++-- relay-server/src/service.rs | 23 ++++------ .../src/services/buffer/envelopebuffer.rs | 5 +- .../src/services/buffer/envelopestack.rs | 2 +- relay-server/src/services/buffer/mod.rs | 46 ++++++++++++++++++- relay-server/src/services/project_cache.rs | 1 + tests/integration/fixtures/relay.py | 2 +- 7 files changed, 69 insertions(+), 23 deletions(-) diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 0d10874d00..70b1b6b613 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -259,7 +259,7 @@ pub fn event_id_from_items(items: &Items) -> Result, BadStoreReq /// /// Queueing can fail if the queue exceeds `envelope_buffer_size`. In this case, `Err` is /// returned and the envelope is not queued. -fn queue_envelope( +async fn queue_envelope( state: &ServiceState, mut managed_envelope: ManagedEnvelope, ) -> Result<(), BadStoreRequest> { @@ -306,7 +306,14 @@ fn queue_envelope( envelope.scope(scoping); // TODO(jjbayer): schedule prefetch on project state here. - state.enqueue(envelope); + if state.config().spool_v2() { + relay_log::trace!("Pushing envelope to V2 buffer"); + // TODO(jjbayer): What do we lose by dropping the rest of the managed envelope? + // How does the old spooler handle this? + state.envelope_buffer().push(envelope.into_envelope()).await; + } else { + state.project_cache().send(ValidateEnvelope::new(envelope)); + } } // The entire envelope is taken for a split above, and it's empty at this point, we can just // accept it without additional checks. @@ -380,7 +387,7 @@ pub async fn handle_envelope( return Err(BadStoreRequest::Overflow(offender)); } - queue_envelope(state, managed_envelope)?; + queue_envelope(state, managed_envelope).await?; if checked.rate_limits.is_limited() { // Even if some envelope items have been queued, there might be active rate limits on diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 0d481aa53c..c19f2b335b 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,10 +1,10 @@ use std::convert::Infallible; use std::fmt; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::time::Duration; use crate::metrics::{MetricOutcomes, MetricStats}; -use crate::services::buffer::{create_envelope_buffer, EnvelopeBuffer}; +use crate::services::buffer::EnvelopeBuffer; use crate::services::stats::RelayStats; use anyhow::{Context, Result}; use axum::extract::FromRequestParts; @@ -141,7 +141,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Arc>, + envelope_buffer: EnvelopeBuffer, registry: Registry, } @@ -300,7 +300,7 @@ impl ServiceState { let state = StateInner { config: config.clone(), memory_checker: MemoryChecker::new(memory_stat, config.clone()), - envelope_buffer: create_envelope_buffer(&config), + envelope_buffer: EnvelopeBuffer::from_config(&config), registry, }; @@ -309,17 +309,6 @@ impl ServiceState { }) } - pub fn enqueue(&self, mut envelope: ManagedEnvelope) { - if self.config().spool_v2() { - // TODO(jjbayer): What do we lose by dropping the rest of the managed envelope? - // How does the old spooler handle this? - let mut guard = self.inner.envelope_buffer.lock().expect("poisoned lock"); - guard.push(envelope.take_envelope()); - } else { - self.project_cache().send(ValidateEnvelope::new(envelope)); - } - } - /// Returns a reference to the Relay configuration. pub fn config(&self) -> &Config { &self.inner.config @@ -332,6 +321,10 @@ impl ServiceState { &self.inner.memory_checker } + pub fn envelope_buffer(&self) -> &EnvelopeBuffer { + &self.inner.envelope_buffer + } + /// Returns the address of the [`ProjectCache`] service. pub fn project_cache(&self) -> &Addr { &self.inner.registry.project_cache diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer.rs index 187e3d9dd5..0dc90b8954 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer.rs @@ -1,10 +1,11 @@ use std::cmp::Ordering; use std::collections::BTreeSet; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::time::Instant; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use tokio::sync::Mutex; use crate::envelope::Envelope; use crate::services::buffer::envelopestack::{EnvelopeStack, InMemoryEnvelopeStack}; @@ -17,7 +18,7 @@ pub trait EnvelopeBuffer: std::fmt::Debug + Send { } // TODO: docs -pub fn create_envelope_buffer(config: &Config) -> Arc> { +pub fn create(config: &Config) -> Arc> { // TODO: create a DiskMemoryStack Arc::new(Mutex::new( PriorityEnvelopeBuffer::::new(), diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs index e307769caf..b720fb8875 100644 --- a/relay-server/src/services/buffer/envelopestack.rs +++ b/relay-server/src/services/buffer/envelopestack.rs @@ -11,7 +11,7 @@ pub trait EnvelopeStack: Send { } #[derive(Debug)] -pub struct InMemoryEnvelopeStack(Vec>); +pub struct InMemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); impl EnvelopeStack for InMemoryEnvelopeStack { fn new(envelope: Box) -> Self { diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index fb6b0d92c4..8ad7a8ced7 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,4 +1,48 @@ +#![deny(missing_docs)] +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use tokio::sync::{Mutex, MutexGuard}; + +use crate::envelope::Envelope; + mod envelopebuffer; mod envelopestack; -pub use envelopebuffer::{create_envelope_buffer, EnvelopeBuffer}; +/// Wrapper for the EnvelopeBuffer implementation. +#[derive(Debug)] +pub struct EnvelopeBuffer(Arc>); + +impl EnvelopeBuffer { + pub fn from_config(config: &Config) -> Self { + // TODO: create a DiskMemoryStack if db config is given. + Self(envelopebuffer::create(config)) + } + + pub async fn push(&self, envelope: Box) { + let mut guard = self.0.lock().await; + guard.push(envelope); + } + + pub async fn peek(&self) -> Peek { + Peek(self.0.lock().await) + } + + pub async fn mark_ready(&self, project: &ProjectKey, is_ready: bool) { + let mut guard = self.0.lock().await; + guard.mark_ready(project, is_ready) + } +} + +pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); + +impl Peek<'_> { + pub fn get(&mut self) -> Option<&Envelope> { + self.0.peek() + } + + pub fn remove(&mut self) -> Option> { + self.0.pop() + } +} diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 724642396a..f541eac6e3 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,6 +5,7 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; +use crate::services::buffer::EnvelopeBuffer; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; diff --git a/tests/integration/fixtures/relay.py b/tests/integration/fixtures/relay.py index 28f21d45e6..905117ef49 100644 --- a/tests/integration/fixtures/relay.py +++ b/tests/integration/fixtures/relay.py @@ -148,7 +148,7 @@ def inner( }, "spool": { # Unspool as quickly as possible - "envelopes": {"unspool_interval": 1}, + "envelopes": {"unspool_interval": 1, "version": "2"}, }, } From 9428e5cae079fbe58a9db2d85beaba08a53a7bff Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 08:47:52 +0200 Subject: [PATCH 08/62] files --- relay-config/src/config.rs | 7 +++++- relay-server/src/service.rs | 6 ++--- .../src/services/buffer/envelopebuffer/mod.rs | 25 +++++++++++++++++++ .../priority.rs} | 25 +++---------------- relay-server/src/services/buffer/mod.rs | 17 +++++++++++-- relay-server/src/services/project_cache.rs | 1 - 6 files changed, 52 insertions(+), 29 deletions(-) create mode 100644 relay-server/src/services/buffer/envelopebuffer/mod.rs rename relay-server/src/services/buffer/{envelopebuffer.rs => envelopebuffer/priority.rs} (94%) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index ce517468db..be2fb762b5 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -873,12 +873,16 @@ pub struct EnvelopeSpool { version: EnvelopeSpoolVersion, } -// TODO(docs) +/// Version of the envelope buffering mechanism. #[derive(Debug, Default, Deserialize, Serialize)] pub enum EnvelopeSpoolVersion { + /// Use the spooler service, which only buffers envelopes for unloaded projects and + /// switches between an in-memory mode and a disk mode on-demand. #[default] #[serde(rename = "1")] V1, + /// Use the envelope buffer, through which all envelopes pass before getting unspooled. + /// Can be either disk based or memory based. #[serde(rename = "2")] V2, } @@ -2091,6 +2095,7 @@ impl Config { self.values.spool.envelopes.max_memory_size.as_bytes() } + /// Returns `true` if version 2 of the spooling mechanism is used. pub fn spool_v2(&self) -> bool { matches!( self.values.spool.envelopes.version, diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 86b12ec29d..2ec2537eec 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -23,15 +23,13 @@ use crate::services::metrics::{Aggregator, RouterService}; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::project_cache::{ - ProjectCache, ProjectCacheService, Services, ValidateEnvelope, -}; +use crate::services::project_cache::{ProjectCache, ProjectCacheService, Services}; use crate::services::relays::{RelayCache, RelayCacheService}; #[cfg(feature = "processing")] use crate::services::store::StoreService; use crate::services::test_store::{TestStore, TestStoreService}; use crate::services::upstream::{UpstreamRelay, UpstreamRelayService}; -use crate::utils::{ManagedEnvelope, MemoryChecker, MemoryStat}; +use crate::utils::{MemoryChecker, MemoryStat}; /// Indicates the type of failure of the server. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, thiserror::Error)] diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs new file mode 100644 index 0000000000..cc8ac0c96c --- /dev/null +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -0,0 +1,25 @@ +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use tokio::sync::Mutex; + +use crate::envelope::Envelope; +use crate::services::buffer::envelopebuffer::priority::PriorityEnvelopeBuffer; +use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; + +mod priority; + +pub trait EnvelopeBuffer: std::fmt::Debug + Send { + fn push(&mut self, envelope: Box); + fn peek(&mut self) -> Option<&Envelope>; + fn pop(&mut self) -> Option>; + fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); +} + +pub fn create(config: &Config) -> Arc> { + // TODO: create a DiskMemoryStack + Arc::new(Mutex::new( + PriorityEnvelopeBuffer::::new(), + )) +} diff --git a/relay-server/src/services/buffer/envelopebuffer.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs similarity index 94% rename from relay-server/src/services/buffer/envelopebuffer.rs rename to relay-server/src/services/buffer/envelopebuffer/priority.rs index 0dc90b8954..bfcae95227 100644 --- a/relay-server/src/services/buffer/envelopebuffer.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -1,29 +1,12 @@ use std::cmp::Ordering; use std::collections::BTreeSet; -use std::sync::Arc; use std::time::Instant; use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use tokio::sync::Mutex; use crate::envelope::Envelope; -use crate::services::buffer::envelopestack::{EnvelopeStack, InMemoryEnvelopeStack}; - -pub trait EnvelopeBuffer: std::fmt::Debug + Send { - fn push(&mut self, envelope: Box); - fn peek(&mut self) -> Option<&Envelope>; - fn pop(&mut self) -> Option>; - fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); -} - -// TODO: docs -pub fn create(config: &Config) -> Arc> { - // TODO: create a DiskMemoryStack - Arc::new(Mutex::new( - PriorityEnvelopeBuffer::::new(), - )) -} +use crate::services::buffer::envelopebuffer::EnvelopeBuffer; +use crate::services::buffer::envelopestack::EnvelopeStack; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { @@ -42,14 +25,14 @@ impl StackKey { } #[derive(Debug)] -struct PriorityEnvelopeBuffer { +pub struct PriorityEnvelopeBuffer { own_keys: hashbrown::HashMap>, sampling_keys: hashbrown::HashMap>, stacks: priority_queue::PriorityQueue, Priority>, } impl PriorityEnvelopeBuffer { - fn new() -> Self { + pub fn new() -> Self { Self { own_keys: Default::default(), sampling_keys: Default::default(), diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 8ad7a8ced7..7f758d960c 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use tokio::sync::{Mutex, MutexGuard}; +use tokio::sync::MutexGuard; use crate::envelope::Envelope; @@ -12,7 +12,20 @@ mod envelopestack; /// Wrapper for the EnvelopeBuffer implementation. #[derive(Debug)] -pub struct EnvelopeBuffer(Arc>); +pub struct EnvelopeBuffer( + /// TODO: Reconsider synchronization mechanism. + /// We can either + /// - keep the interface sync and use a std Mutex. In this case, we create a queue of threads. + /// - use an async interface with a tokio mutex. In this case, we create a queue of futures. + /// - use message passing (service or channel). In this case, we create a queue of messages. + /// + /// From the tokio docs: + /// + /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. + /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, + /// > and to use message passing to communicate with that task. + Arc>, +); impl EnvelopeBuffer { pub fn from_config(config: &Config) -> Self { diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 69b5e24814..2b28a878ef 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,7 +5,6 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; -use crate::services::buffer::EnvelopeBuffer; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; From 26eb70b471487dd542d15de72a98ed80cdd36663 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 09:05:48 +0200 Subject: [PATCH 09/62] return ready --- .../src/services/buffer/envelopebuffer/mod.rs | 4 +- .../buffer/envelopebuffer/priority.rs | 58 +++++++++---------- relay-server/src/services/buffer/mod.rs | 4 +- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index cc8ac0c96c..2d63e838c0 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -12,8 +12,8 @@ mod priority; pub trait EnvelopeBuffer: std::fmt::Debug + Send { fn push(&mut self, envelope: Box); - fn peek(&mut self) -> Option<&Envelope>; - fn pop(&mut self) -> Option>; + fn peek(&mut self) -> Option<(&Envelope, bool)>; + fn pop(&mut self) -> Option<(Box, bool)>; fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index bfcae95227..4af9412080 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -5,7 +5,7 @@ use std::time::Instant; use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; -use crate::services::buffer::envelopebuffer::EnvelopeBuffer; +use crate::services::buffer::envelopebuffer::{priority, EnvelopeBuffer}; use crate::services::buffer::envelopestack::EnvelopeStack; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] @@ -96,22 +96,22 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }); } - fn peek(&mut self) -> Option<&Envelope> { + fn peek(&mut self) -> Option<(&Envelope, bool)> { let ( QueueItem { key: _, value: stack, }, - _, + priority, ) = self.stacks.peek_mut()?; - stack.peek() + stack.peek().map(|envelope| (envelope, priority.ready())) } - fn pop(&mut self) -> Option> { - let (QueueItem { key, value: stack }, _) = self.stacks.peek_mut()?; + fn pop(&mut self) -> Option<(Box, bool)> { + let (QueueItem { key, value: stack }, priority) = self.stacks.peek_mut()?; let stack_key = *key; - let envelope = stack.pop(); - debug_assert!(envelope.is_some()); + let is_ready = priority.ready(); + let envelope = stack.pop().expect("found an empty stack"); let next_received_at = stack .peek() @@ -126,7 +126,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }); } } - envelope + Some((envelope, is_ready)) } fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { @@ -272,35 +272,35 @@ mod tests { assert!(buffer.peek().is_none()); buffer.push(new_envelope(project_key1, None)); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); buffer.push(new_envelope(project_key2, None)); // Both projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); buffer.push(new_envelope(project_key3, None)); // All projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); // After marking a project ready, it goes to the top: buffer.mark_ready(&project_key3, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key3); - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key3); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key3); + assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key3); // After popping, project 1 is on top again: - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); // Mark project 1 as ready (still on top): buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); // Mark project 2 as ready as well (now on top because most recent): buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key2); - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key2); + assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key2); + assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key2); // Pop last element: - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key1); + assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key1); assert!(buffer.pop().is_none()); assert!(buffer.peek().is_none()); } @@ -321,8 +321,8 @@ mod tests { buffer.push(envelope1); buffer.push(envelope2); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant1); assert!(buffer.pop().is_none()); } @@ -346,28 +346,28 @@ mod tests { buffer.push(envelope3); // Nothing is ready, instant1 is on top: - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); // Mark project 2 ready, gets on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant2); // Revert buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); // Project 1 ready: buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); // when both projects are ready, event no 3 ends up on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant3); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant3); + assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant2); buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant1); + assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant2); assert!(buffer.pop().is_none()); } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 7f758d960c..062252e70b 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -51,11 +51,11 @@ impl EnvelopeBuffer { pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); impl Peek<'_> { - pub fn get(&mut self) -> Option<&Envelope> { + pub fn get(&mut self) -> Option<(&Envelope, bool)> { self.0.peek() } - pub fn remove(&mut self) -> Option> { + pub fn remove(&mut self) -> Option<(Box, bool)> { self.0.pop() } } From b7abdadd06562c0fa534afed5559b4c5874516f5 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 09:17:08 +0200 Subject: [PATCH 10/62] prefetch --- relay-server/src/endpoints/common.rs | 5 ++--- relay-server/src/services/project_cache.rs | 14 +++++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 70b1b6b613..25aaa8072d 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -305,11 +305,10 @@ async fn queue_envelope( ); envelope.scope(scoping); - // TODO(jjbayer): schedule prefetch on project state here. if state.config().spool_v2() { + // NOTE: This assumes that a `prefetch` has already been scheduled for both the + // envelope's projects. See `handle_check_envelope`. relay_log::trace!("Pushing envelope to V2 buffer"); - // TODO(jjbayer): What do we lose by dropping the rest of the managed envelope? - // How does the old spooler handle this? state.envelope_buffer().push(envelope.into_envelope()).await; } else { state.project_cache().send(ValidateEnvelope::new(envelope)); diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 2b28a878ef..701bde8d10 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -22,7 +22,7 @@ use crate::services::global_config::{self, GlobalConfigManager, Subscribe}; use crate::services::metrics::{Aggregator, FlushBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProjectMetrics, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProjectMetrics, Sampling, }; use crate::services::project::{ CheckedBuckets, Project, ProjectFetchState, ProjectSender, ProjectState, @@ -762,12 +762,20 @@ impl ProjectCacheBroker { ) -> Result { let CheckEnvelope { envelope: context } = message; let project_cache = self.services.project_cache.clone(); - let project = self.get_or_create_project(context.envelope().meta().public_key()); + let project_key = context.envelope().meta().public_key(); + let project = self.get_or_create_project(project_key); // Preload the project cache so that it arrives a little earlier in processing. However, // do not pass `no_cache`. In case the project is rate limited, we do not want to force // a full reload. Fetching must not block the store request. - project.prefetch(project_cache, false); + project.prefetch(project_cache.clone(), false); + if let Some(sampling_key) = context.envelope().sampling_key() { + if sampling_key != project_key { + let sampling_project = self.get_or_create_project(sampling_key); + sampling_project.prefetch(project_cache, false); + } + } + project.check_envelope(context) } From d1b8beae5e44838d7d51501c4611d4c85696339d Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 11:52:14 +0200 Subject: [PATCH 11/62] wip project cache --- relay-server/src/service.rs | 1 + .../buffer/envelopebuffer/priority.rs | 2 +- relay-server/src/services/buffer/mod.rs | 2 +- relay-server/src/services/project_cache.rs | 120 ++++++++++++++++-- 4 files changed, 115 insertions(+), 10 deletions(-) diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 2ec2537eec..d967b71342 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -260,6 +260,7 @@ impl ServiceState { ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), + EnvelopeBuffer::from_config(&config), project_cache_services, metric_outcomes, redis_pool.clone(), diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 4af9412080..289a9ff400 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -6,7 +6,7 @@ use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; use crate::services::buffer::envelopebuffer::{priority, EnvelopeBuffer}; -use crate::services::buffer::envelopestack::EnvelopeStack; +use crate::services::buffer::envelopestack::{EnvelopeStack, InMemoryEnvelopeStack}; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 062252e70b..0b7db0de3a 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -55,7 +55,7 @@ impl Peek<'_> { self.0.peek() } - pub fn remove(&mut self) -> Option<(Box, bool)> { + pub fn remove(mut self) -> Option<(Box, bool)> { self.0.pop() } } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 701bde8d10..265bf6cc2e 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,6 +5,7 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; +use crate::services::buffer::{EnvelopeBuffer, Peek}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; @@ -22,7 +23,7 @@ use crate::services::global_config::{self, GlobalConfigManager, Subscribe}; use crate::services::metrics::{Aggregator, FlushBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProjectMetrics, Sampling, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, Sampling, }; use crate::services::project::{ CheckedBuckets, Project, ProjectFetchState, ProjectSender, ProjectState, @@ -41,6 +42,8 @@ use crate::services::upstream::UpstreamRelay; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; +const MAX_ENVELOPE_AGE: std::time::Duration = std::time::Duration::from_secs(24 * 60 * 60); + /// Requests a refresh of a project state from one of the available sources. /// /// The project state is resolved in the following precedence: @@ -763,18 +766,18 @@ impl ProjectCacheBroker { let CheckEnvelope { envelope: context } = message; let project_cache = self.services.project_cache.clone(); let project_key = context.envelope().meta().public_key(); - let project = self.get_or_create_project(project_key); - - // Preload the project cache so that it arrives a little earlier in processing. However, - // do not pass `no_cache`. In case the project is rate limited, we do not want to force - // a full reload. Fetching must not block the store request. - project.prefetch(project_cache.clone(), false); if let Some(sampling_key) = context.envelope().sampling_key() { if sampling_key != project_key { let sampling_project = self.get_or_create_project(sampling_key); - sampling_project.prefetch(project_cache, false); + sampling_project.prefetch(project_cache.clone(), false); } } + let project = self.get_or_create_project(project_key); + + // Preload the project cache so that it arrives a little earlier in processing. However, + // do not pass `no_cache`. In case the project is rate limited, we do not want to force + // a full reload. Fetching must not block the store request. + project.prefetch(project_cache, false); project.check_envelope(context) } @@ -1018,6 +1021,98 @@ impl ProjectCacheBroker { } } + fn peek_at_envelope(&self, mut peek: Peek<'_>) { + relay_log::trace!("Peeking into the envelope buffer"); + let Some((envelope, should_be_ready)) = peek.get() else { + return; + }; + + relay_log::trace!("Found an envelope"); + + // TODO: make envelope age configurable. + if envelope.meta().start_time().elapsed() > MAX_ENVELOPE_AGE { + let envelope = ManagedEnvelope::new( + peek.remove(), + self.services.outcome_aggregator, + self.services.test_store, + ProcessingGroup::Ungrouped, + ); + envelope.reject(Outcome::Invalid(DiscardReason::Expired)); + // TODO: metrics in all branches. + return; + } + let project_key = envelope.meta().public_key(); + let project = &mut self.get_or_create_project(project_key); + let reservoir_counters = project.reservoir_counters(); + + let project_state = project.get_cached_state(self.services.project_cache.clone(), false); + + let project_info = match project_state { + ProjectState::Enabled(info) => { + peek.mark_ready(project_key, true); + info + } + ProjectState::Disabled => { + let envelope = ManagedEnvelope::new( + peek.remove(), + self.services.outcome_aggregator, + self.services.test_store, + ProcessingGroup::Ungrouped, + ); + envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + return; + } + ProjectState::Pending => { + peek.mark_ready(project_key, false); + return; + } + }; + + let sampling_project_info = match envelope.sampling_key().map(|sampling_key| { + ( + sampling_key, + self.get_or_create_project(sampling_key) + .get_cached_state(self.services.project_cache.clone(), false), + ) + }) { + Some((sampling_key, ProjectState::Enabled(info))) => { + peek.mark_ready(sampling_key, true); + Some(info) + } + Some((_, ProjectState::Disabled)) => { + // Accept envelope even if its sampling state is disabled: + None + } + Some((sampling_key, ProjectState::Pending)) => { + peek.mark_ready(sampling_key, false); + return; + } + None => None, + }; + + let managed_envelope = ManagedEnvelope::new( + peek.remove(), + self.services.outcome_aggregator, + self.services.test_store, + ProcessingGroup::Ungrouped, // TODO: ungrouped correct? + ); + + let Ok(CheckedEnvelope { + envelope: Some(managed_envelope), + .. + }) = project.check_envelope(managed_envelope) + else { + return; // Outcomes are emitted by check_envelope + }; + + self.services.envelope_processor.send(ProcessEnvelope { + envelope: managed_envelope, + project_info, + sampling_project_info, + reservoir_counters, + }); + } + /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { self.config.spool_envelopes_unspool_interval() + self.buffer_unspool_backoff.next_backoff() @@ -1138,6 +1233,7 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, + envelope_buffer: EnvelopeBuffer, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1148,6 +1244,7 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, + envelope_buffer: EnvelopeBuffer, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1155,6 +1252,7 @@ impl ProjectCacheService { Self { config, memory_checker, + envelope_buffer, services, metric_outcomes, redis, @@ -1169,6 +1267,7 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, + envelope_buffer, services, metric_outcomes, redis, @@ -1293,6 +1392,11 @@ impl Service for ProjectCacheService { broker.handle_message(message) }) } + peek = envelope_buffer.peek() => { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { + broker.peek_at_envelope(peek) + }) + } else => break, } } From faf72feb46e7728fde983207b34e573a6cf201a1 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 14:44:33 +0200 Subject: [PATCH 12/62] borrow checker --- .../src/services/buffer/envelopebuffer/mod.rs | 4 +- .../buffer/envelopebuffer/priority.rs | 57 +++++++++---------- relay-server/src/services/buffer/mod.rs | 31 ++++++---- relay-server/src/services/project_cache.rs | 57 ++++++++++--------- 4 files changed, 81 insertions(+), 68 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index 2d63e838c0..cc8ac0c96c 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -12,8 +12,8 @@ mod priority; pub trait EnvelopeBuffer: std::fmt::Debug + Send { fn push(&mut self, envelope: Box); - fn peek(&mut self) -> Option<(&Envelope, bool)>; - fn pop(&mut self) -> Option<(Box, bool)>; + fn peek(&mut self) -> Option<&Envelope>; + fn pop(&mut self) -> Option>; fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); } diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 289a9ff400..524853dec2 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -5,8 +5,8 @@ use std::time::Instant; use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; -use crate::services::buffer::envelopebuffer::{priority, EnvelopeBuffer}; -use crate::services::buffer::envelopestack::{EnvelopeStack, InMemoryEnvelopeStack}; +use crate::services::buffer::envelopebuffer::EnvelopeBuffer; +use crate::services::buffer::envelopestack::EnvelopeStack; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StackKey { @@ -96,21 +96,20 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }); } - fn peek(&mut self) -> Option<(&Envelope, bool)> { + fn peek(&mut self) -> Option<&Envelope> { let ( QueueItem { key: _, value: stack, }, - priority, + _, ) = self.stacks.peek_mut()?; - stack.peek().map(|envelope| (envelope, priority.ready())) + stack.peek() } - fn pop(&mut self) -> Option<(Box, bool)> { - let (QueueItem { key, value: stack }, priority) = self.stacks.peek_mut()?; + fn pop(&mut self) -> Option> { + let (QueueItem { key, value: stack }, _) = self.stacks.peek_mut()?; let stack_key = *key; - let is_ready = priority.ready(); let envelope = stack.pop().expect("found an empty stack"); let next_received_at = stack @@ -126,7 +125,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }); } } - Some((envelope, is_ready)) + Some(envelope) } fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { @@ -272,35 +271,35 @@ mod tests { assert!(buffer.peek().is_none()); buffer.push(new_envelope(project_key1, None)); - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); buffer.push(new_envelope(project_key2, None)); // Both projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); buffer.push(new_envelope(project_key3, None)); // All projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); // After marking a project ready, it goes to the top: buffer.mark_ready(&project_key3, true); - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key3); - assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key3); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key3); + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key3); // After popping, project 1 is on top again: - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); // Mark project 1 as ready (still on top): buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); // Mark project 2 as ready as well (now on top because most recent): buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().0.meta().public_key(), project_key2); - assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key2); + assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key2); + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key2); // Pop last element: - assert_eq!(buffer.pop().unwrap().0.meta().public_key(), project_key1); + assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key1); assert!(buffer.pop().is_none()); assert!(buffer.peek().is_none()); } @@ -321,8 +320,8 @@ mod tests { buffer.push(envelope1); buffer.push(envelope2); - assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant2); - assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant1); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); assert!(buffer.pop().is_none()); } @@ -346,28 +345,28 @@ mod tests { buffer.push(envelope3); // Nothing is ready, instant1 is on top: - assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); // Mark project 2 ready, gets on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant2); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); // Revert buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); // Project 1 ready: buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant1); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); // when both projects are ready, event no 3 ends up on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant3); - assert_eq!(buffer.peek().unwrap().0.meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant3); + assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant1); - assert_eq!(buffer.pop().unwrap().0.meta().start_time(), instant2); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); assert!(buffer.pop().is_none()); } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 0b7db0de3a..ad9e912c91 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -38,24 +38,35 @@ impl EnvelopeBuffer { guard.push(envelope); } - pub async fn peek(&self) -> Peek { - Peek(self.0.lock().await) - } - - pub async fn mark_ready(&self, project: &ProjectKey, is_ready: bool) { + pub async fn peek(&self) -> Option { let mut guard = self.0.lock().await; - guard.mark_ready(project, is_ready) + guard.peek()?; + + Some(Peek(guard)) } + + // pub async fn mark_ready(&self, project: &ProjectKey, is_ready: bool) { + // let mut guard = self.0.lock().await; + // guard.mark_ready(project, is_ready) + // } } pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); impl Peek<'_> { - pub fn get(&mut self) -> Option<(&Envelope, bool)> { - self.0.peek() + pub fn get(&mut self) -> &Envelope { + self.0 + .peek() + .expect("element disappeared while holding lock") + } + + pub fn remove(mut self) -> Box { + self.0 + .pop() + .expect("element disappeared while holding lock") } - pub fn remove(mut self) -> Option<(Box, bool)> { - self.0.pop() + pub fn mark_ready(&mut self, project_key: &ProjectKey, ready: bool) { + self.0.mark_ready(project_key, ready); } } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 265bf6cc2e..c2c6743519 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -23,7 +23,7 @@ use crate::services::global_config::{self, GlobalConfigManager, Subscribe}; use crate::services::metrics::{Aggregator, FlushBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, Sampling, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; use crate::services::project::{ CheckedBuckets, Project, ProjectFetchState, ProjectSender, ProjectState, @@ -1021,62 +1021,59 @@ impl ProjectCacheBroker { } } - fn peek_at_envelope(&self, mut peek: Peek<'_>) { - relay_log::trace!("Peeking into the envelope buffer"); - let Some((envelope, should_be_ready)) = peek.get() else { - return; - }; - - relay_log::trace!("Found an envelope"); - + fn peek_at_envelope(&mut self, mut peek: Peek<'_>) { + let envelope = peek.get(); // TODO: make envelope age configurable. if envelope.meta().start_time().elapsed() > MAX_ENVELOPE_AGE { - let envelope = ManagedEnvelope::new( + let mut managed_envelope = ManagedEnvelope::new( peek.remove(), - self.services.outcome_aggregator, - self.services.test_store, + self.services.outcome_aggregator.clone(), + self.services.test_store.clone(), ProcessingGroup::Ungrouped, ); - envelope.reject(Outcome::Invalid(DiscardReason::Expired)); + managed_envelope.reject(Outcome::Invalid(DiscardReason::Timestamp)); // TODO: metrics in all branches. return; } + let sampling_key = envelope.sampling_key(); + let project_cache = self.services.project_cache.clone(); + let project_key = envelope.meta().public_key(); let project = &mut self.get_or_create_project(project_key); let reservoir_counters = project.reservoir_counters(); - let project_state = project.get_cached_state(self.services.project_cache.clone(), false); + let project_state = project.get_cached_state(project_cache.clone(), false); let project_info = match project_state { ProjectState::Enabled(info) => { - peek.mark_ready(project_key, true); + peek.mark_ready(&project_key, true); info } ProjectState::Disabled => { - let envelope = ManagedEnvelope::new( + let mut managed_envelope = ManagedEnvelope::new( peek.remove(), - self.services.outcome_aggregator, - self.services.test_store, + self.services.outcome_aggregator.clone(), + self.services.test_store.clone(), ProcessingGroup::Ungrouped, ); - envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); return; } ProjectState::Pending => { - peek.mark_ready(project_key, false); + peek.mark_ready(&project_key, false); return; } }; - let sampling_project_info = match envelope.sampling_key().map(|sampling_key| { + let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, self.get_or_create_project(sampling_key) - .get_cached_state(self.services.project_cache.clone(), false), + .get_cached_state(project_cache, false), ) }) { Some((sampling_key, ProjectState::Enabled(info))) => { - peek.mark_ready(sampling_key, true); + peek.mark_ready(&sampling_key, true); Some(info) } Some((_, ProjectState::Disabled)) => { @@ -1084,7 +1081,7 @@ impl ProjectCacheBroker { None } Some((sampling_key, ProjectState::Pending)) => { - peek.mark_ready(sampling_key, false); + peek.mark_ready(&sampling_key, false); return; } None => None, @@ -1092,11 +1089,12 @@ impl ProjectCacheBroker { let managed_envelope = ManagedEnvelope::new( peek.remove(), - self.services.outcome_aggregator, - self.services.test_store, + self.services.outcome_aggregator.clone(), + self.services.test_store.clone(), ProcessingGroup::Ungrouped, // TODO: ungrouped correct? ); + let project = &mut self.get_or_create_project(project_key); let Ok(CheckedEnvelope { envelope: Some(managed_envelope), .. @@ -1393,10 +1391,15 @@ impl Service for ProjectCacheService { }) } peek = envelope_buffer.peek() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { + relay_log::trace!("Peeking at envelope"); + if let Some(peek) = peek { + relay_log::trace!("Found an envelope"); + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { broker.peek_at_envelope(peek) }) } + + } else => break, } } From d0dae2995cf9745b47596f2ac872c2dfea845695 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 15:29:09 +0200 Subject: [PATCH 13/62] v1 vs v2 --- relay-server/src/services/project_cache.rs | 176 ++++++++++++--------- 1 file changed, 105 insertions(+), 71 deletions(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index c2c6743519..d5f87ab5eb 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -578,19 +578,25 @@ struct ProjectCacheBroker { source: ProjectSource, /// Tx channel used to send the updated project state whenever requested. state_tx: mpsc::UnboundedSender, + + /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). + spool_v1_unspool_handle: SleepHandle, + spool_v1: Option, + /// Status of the global configuration, used to determine readiness for processing. + global_config: GlobalConfigStatus, +} + +#[derive(Debug)] +struct SpoolV1 { /// Tx channel used by the [`BufferService`] to send back the requested dequeued elements. buffer_tx: mpsc::UnboundedSender, /// Index containing all the [`QueueKey`] that have been enqueued in the [`BufferService`]. index: HashSet, - /// Handle to schedule periodic unspooling of buffered envelopes. - buffer_unspool_handle: SleepHandle, /// Backoff strategy for retrying unspool attempts. buffer_unspool_backoff: RetryBackoff, /// Address of the [`BufferService`] used for enqueuing and dequeuing envelopes that can't be /// immediately processed. buffer: Addr, - /// Status of the global configuration, used to determine readiness for processing. - global_config: GlobalConfigStatus, } /// Describes the current status of the `GlobalConfig`. @@ -617,18 +623,21 @@ impl ProjectCacheBroker { } /// Adds the value to the queue for the provided key. - pub fn enqueue(&mut self, key: QueueKey, value: ManagedEnvelope) { - self.index.insert(key); - self.buffer.send(Enqueue::new(key, value)); + fn enqueue(&mut self, key: QueueKey, value: ManagedEnvelope) { + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + spool_v1.index.insert(key); + spool_v1.buffer.send(Enqueue::new(key, value)); } /// Sends the message to the buffer service to dequeue the envelopes. /// /// All the found envelopes will be send back through the `buffer_tx` channel and directly /// forwarded to `handle_processing`. - pub fn dequeue(&self, keys: HashSet) { - self.buffer - .send(DequeueMany::new(keys, self.buffer_tx.clone())) + fn dequeue(&self, keys: HashSet) { + let spool_v1 = self.spool_v1.as_ref().expect("no V1 spool configured"); + spool_v1 + .buffer + .send(DequeueMany::new(keys, spool_v1.buffer_tx.clone())) } /// Evict projects that are over its expiry date. @@ -648,13 +657,15 @@ impl ProjectCacheBroker { // Defer dropping the projects to a dedicated thread: let mut count = 0; for (project_key, project) in expired { - let keys = self - .index - .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) - .collect::>(); - - if !keys.is_empty() { - self.buffer.send(RemoveMany::new(project_key, keys)) + if let Some(spool_v1) = self.spool_v1.as_mut() { + let keys = spool_v1 + .index + .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) + .collect::>(); + + if !keys.is_empty() { + spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + } } self.garbage_disposal.dispose(project); @@ -999,11 +1010,13 @@ impl ProjectCacheBroker { } fn handle_buffer_index(&mut self, message: UpdateSpoolIndex) { - self.index.extend(message.0); + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + spool_v1.index.extend(message.0); } fn handle_spool_health(&mut self, sender: Sender) { - self.buffer.send(spooler::Health(sender)) + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + spool_v1.buffer.send(spooler::Health(sender)) } fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { @@ -1011,7 +1024,8 @@ impl ProjectCacheBroker { let project_cache = self.services.project_cache.clone(); for key in index { - self.index.insert(key); + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + spool_v1.index.insert(key); self.get_or_create_project(key.own_key) .prefetch(project_cache.clone(), false); if key.own_key != key.sampling_key { @@ -1113,15 +1127,19 @@ impl ProjectCacheBroker { /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { - self.config.spool_envelopes_unspool_interval() + self.buffer_unspool_backoff.next_backoff() + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + self.config.spool_envelopes_unspool_interval() + + spool_v1.buffer_unspool_backoff.next_backoff() } fn schedule_unspool(&mut self) { - if self.buffer_unspool_handle.is_idle() { - // Set the time for the next attempt. - let wait = self.next_unspool_attempt(); - self.buffer_unspool_handle.set(wait); + if self.spool_v1.is_some() { + return; } + + // Set the time for the next attempt. + let wait = self.next_unspool_attempt(); + self.spool_v1_unspool_handle.set(wait); } /// Returns `true` if the project state valid for the [`QueueKey`]. @@ -1154,21 +1172,22 @@ impl ProjectCacheBroker { } fn handle_periodic_unspool_inner(&mut self) -> (usize, &str) { - self.buffer_unspool_handle.reset(); + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); + self.spool_v1_unspool_handle.reset(); // If we don't yet have the global config, we will defer dequeuing until we do. if let GlobalConfigStatus::Pending = self.global_config { - self.buffer_unspool_backoff.reset(); + spool_v1.buffer_unspool_backoff.reset(); self.schedule_unspool(); return (0, "no_global_config"); } // If there is nothing spooled, schedule the next check a little bit later. - if self.index.is_empty() { + if spool_v1.index.is_empty() { self.schedule_unspool(); return (0, "index_empty"); } - let mut index = std::mem::take(&mut self.index); + let mut index = std::mem::take(&mut spool_v1.index); let keys = index .extract_if(|key| self.is_state_cached(key)) .take(BATCH_KEY_COUNT) @@ -1180,12 +1199,13 @@ impl ProjectCacheBroker { } // Return all the un-used items to the index. + let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); if !index.is_empty() { - self.index.extend(index); + spool_v1.index.extend(index); } // Schedule unspool once we are done. - self.buffer_unspool_backoff.reset(); + spool_v1.buffer_unspool_backoff.reset(); self.schedule_unspool(); (num_keys, "found_keys") @@ -1281,29 +1301,6 @@ impl Service for ProjectCacheService { // Channel for async project state responses back into the project cache. let (state_tx, mut state_rx) = mpsc::unbounded_channel(); - // Channel for envelope buffering. - let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); - let buffer_services = spooler::Services { - outcome_aggregator, - project_cache, - test_store, - }; - let buffer = match BufferService::create( - memory_checker.clone(), - buffer_services, - config.clone(), - ) - .await - { - Ok(buffer) => buffer.start(), - Err(err) => { - relay_log::error!(error = &err as &dyn Error, "failed to start buffer service",); - // NOTE: The process will exit with error if the buffer file could not be - // opened or the migrations could not be run. - std::process::exit(1); - } - }; - let Ok(mut subscription) = services.global_config.send(Subscribe).await else { // TODO(iker): we accept this sub-optimal error handling. TBD // the approach to deal with failures on the subscription @@ -1323,8 +1320,46 @@ impl Service for ProjectCacheService { } }; - // Request the existing index from the spooler. - buffer.send(RestoreIndex); + let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); + let spool_v1 = match config.spool_v2() { + true => None, + false => Some({ + // Channel for envelope buffering. + let buffer_services = spooler::Services { + outcome_aggregator, + project_cache, + test_store, + }; + let buffer = match BufferService::create( + memory_checker.clone(), + buffer_services, + config.clone(), + ) + .await + { + Ok(buffer) => buffer.start(), + Err(err) => { + relay_log::error!( + error = &err as &dyn Error, + "failed to start buffer service", + ); + // NOTE: The process will exit with error if the buffer file could not be + // opened or the migrations could not be run. + std::process::exit(1); + } + }; + + // Request the existing index from the spooler. + buffer.send(RestoreIndex); + + SpoolV1 { + buffer_tx, + index: HashSet::new(), + buffer_unspool_backoff: RetryBackoff::new(config.http_max_retry_interval()), + buffer, + } + }), + }; // Main broker that serializes public and internal messages, and triggers project state // fetches via the project source. @@ -1340,11 +1375,8 @@ impl Service for ProjectCacheService { ), services, state_tx, - buffer_tx, - index: HashSet::new(), - buffer_unspool_handle: SleepHandle::idle(), - buffer_unspool_backoff: RetryBackoff::new(config.http_max_retry_interval()), - buffer, + spool_v1_unspool_handle: SleepHandle::idle(), + spool_v1, global_config, metric_outcomes, }; @@ -1380,7 +1412,7 @@ impl Service for ProjectCacheService { broker.evict_stale_project_caches() }) } - () = &mut broker.buffer_unspool_handle => { + () = &mut broker.spool_v1_unspool_handle => { metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() }) @@ -1518,12 +1550,14 @@ mod tests { source: ProjectSource::start(config, services.upstream_relay.clone(), None), services, state_tx, - buffer_tx, - index: HashSet::new(), - buffer: buffer.clone(), + spool_v1_unspool_handle: SleepHandle::idle(), + spool_v1: Some(SpoolV1 { + buffer_tx, + index: HashSet::new(), + buffer: buffer.clone(), + buffer_unspool_backoff: RetryBackoff::new(Duration::from_millis(100)), + }), global_config: GlobalConfigStatus::Pending, - buffer_unspool_handle: SleepHandle::idle(), - buffer_unspool_backoff: RetryBackoff::new(Duration::from_millis(100)), metric_outcomes, }, buffer, @@ -1570,10 +1604,10 @@ mod tests { select! { Some(assert) = rx_assert.recv() => { - assert_eq!(broker.index.len(), assert); + assert_eq!(broker.spool_v1.as_ref().unwrap().index.len(), assert); }, Some(update) = rx_update.recv() => broker.merge_state(update), - () = &mut broker.buffer_unspool_handle => broker.handle_periodic_unspool(), + () = &mut broker.spool_v1_unspool_handle => broker.handle_periodic_unspool(), } } }); @@ -1632,7 +1666,7 @@ mod tests { // Index and projects are empty. assert!(broker.projects.is_empty()); - assert!(broker.index.is_empty()); + assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); // Since there is no project we should not process anything but create a project and spool // the envelope. @@ -1640,7 +1674,7 @@ mod tests { // Assert that we have a new project and also added an index. assert!(broker.projects.get(&project_key).is_some()); - assert!(broker.index.contains(&key)); + assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); // Check is we actually spooled anything. buffer_svc.send(DequeueMany::new([key].into(), buffer_tx.clone())); From 0fb88302a0b456ec0ceeb402c6cef230c6c6c4a1 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 15:43:57 +0200 Subject: [PATCH 14/62] make envelope buffer optional for now --- relay-config/src/config.rs | 2 +- relay-server/src/endpoints/common.rs | 17 ++++++++------- relay-server/src/service.rs | 6 +++--- relay-server/src/services/buffer/mod.rs | 6 ++++-- relay-server/src/services/project_cache.rs | 24 +++++++++++++++------- tests/integration/fixtures/relay.py | 2 +- 6 files changed, 36 insertions(+), 21 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index be2fb762b5..cda759a33c 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -868,8 +868,8 @@ pub struct EnvelopeSpool { /// The interval in milliseconds to trigger unspool. #[serde(default = "spool_envelopes_unspool_interval")] unspool_interval: u64, - /// Version of the spooler + #[serde(default = "EnvelopeSpoolVersion::default")] version: EnvelopeSpoolVersion, } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 25aaa8072d..00868184af 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -305,13 +305,16 @@ async fn queue_envelope( ); envelope.scope(scoping); - if state.config().spool_v2() { - // NOTE: This assumes that a `prefetch` has already been scheduled for both the - // envelope's projects. See `handle_check_envelope`. - relay_log::trace!("Pushing envelope to V2 buffer"); - state.envelope_buffer().push(envelope.into_envelope()).await; - } else { - state.project_cache().send(ValidateEnvelope::new(envelope)); + match state.envelope_buffer() { + Some(buffer) => { + // NOTE: This assumes that a `prefetch` has already been scheduled for both the + // envelope's projects. See `handle_check_envelope`. + relay_log::trace!("Pushing envelope to V2 buffer"); + buffer.push(envelope.into_envelope()).await; + } + None => { + state.project_cache().send(ValidateEnvelope::new(envelope)); + } } } // The entire envelope is taken for a split above, and it's empty at this point, we can just diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index d967b71342..0e010bc8de 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -139,7 +139,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: EnvelopeBuffer, + envelope_buffer: Option, registry: Registry, } @@ -323,8 +323,8 @@ impl ServiceState { &self.inner.memory_checker } - pub fn envelope_buffer(&self) -> &EnvelopeBuffer { - &self.inner.envelope_buffer + pub fn envelope_buffer(&self) -> Option<&EnvelopeBuffer> { + self.inner.envelope_buffer.as_ref() } /// Returns the address of the [`ProjectCache`] service. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index ad9e912c91..bcd54657ef 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -28,9 +28,11 @@ pub struct EnvelopeBuffer( ); impl EnvelopeBuffer { - pub fn from_config(config: &Config) -> Self { + pub fn from_config(config: &Config) -> Option { // TODO: create a DiskMemoryStack if db config is given. - Self(envelopebuffer::create(config)) + config + .spool_v2() + .then(|| Self(envelopebuffer::create(config))) } pub async fn push(&self, envelope: Box) { diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index d5f87ab5eb..721731e8fe 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1133,13 +1133,15 @@ impl ProjectCacheBroker { } fn schedule_unspool(&mut self) { - if self.spool_v1.is_some() { + if self.spool_v1.is_none() { return; } - // Set the time for the next attempt. - let wait = self.next_unspool_attempt(); - self.spool_v1_unspool_handle.set(wait); + if self.spool_v1_unspool_handle.is_idle() { + // Set the time for the next attempt. + let wait = self.next_unspool_attempt(); + self.spool_v1_unspool_handle.set(wait); + } } /// Returns `true` if the project state valid for the [`QueueKey`]. @@ -1164,6 +1166,7 @@ impl ProjectCacheBroker { /// This makes sure we always moving the unspool forward, even if we do not fetch the project /// states updates, but still can process data based on the existing cache. fn handle_periodic_unspool(&mut self) { + relay_log::trace!("handle_periodic_unspool"); let (num_keys, reason) = self.handle_periodic_unspool_inner(); relay_statsd::metric!( gauge(RelayGauges::BufferPeriodicUnspool) = num_keys as u64, @@ -1251,7 +1254,7 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: EnvelopeBuffer, + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1262,7 +1265,7 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, - envelope_buffer: EnvelopeBuffer, + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1422,7 +1425,7 @@ impl Service for ProjectCacheService { broker.handle_message(message) }) } - peek = envelope_buffer.peek() => { + peek = peek_buffer(&envelope_buffer) => { relay_log::trace!("Peeking at envelope"); if let Some(peek) = peek { relay_log::trace!("Found an envelope"); @@ -1441,6 +1444,13 @@ impl Service for ProjectCacheService { } } +async fn peek_buffer(buffer: &Option) -> Option { + match buffer { + Some(buffer) => buffer.peek().await, + None => std::future::pending().await, + } +} + #[derive(Clone, Debug)] pub struct FetchProjectState { /// The public key to fetch the project by. diff --git a/tests/integration/fixtures/relay.py b/tests/integration/fixtures/relay.py index 905117ef49..e07576c8d9 100644 --- a/tests/integration/fixtures/relay.py +++ b/tests/integration/fixtures/relay.py @@ -148,7 +148,7 @@ def inner( }, "spool": { # Unspool as quickly as possible - "envelopes": {"unspool_interval": 1, "version": "2"}, + "envelopes": {"unspool_interval": 1, "version": "1"}, }, } From a7d1cf87cdd867eb04ed2464e8f57c18b04556cf Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Jul 2024 16:08:35 +0200 Subject: [PATCH 15/62] fix: health check --- relay-server/src/services/buffer/mod.rs | 5 ----- relay-server/src/services/project_cache.rs | 9 ++++++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index bcd54657ef..3c74fa54b6 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -46,11 +46,6 @@ impl EnvelopeBuffer { Some(Peek(guard)) } - - // pub async fn mark_ready(&self, project: &ProjectKey, is_ready: bool) { - // let mut guard = self.0.lock().await; - // guard.mark_ready(project, is_ready) - // } } pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 721731e8fe..28f200202f 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1014,9 +1014,11 @@ impl ProjectCacheBroker { spool_v1.index.extend(message.0); } - fn handle_spool_health(&mut self, sender: Sender) { - let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); - spool_v1.buffer.send(spooler::Health(sender)) + fn handle_spool_health(&self, sender: Sender) { + match &self.spool_v1 { + Some(spool_v1) => spool_v1.buffer.send(spooler::Health(sender)), + None => sender.send(true), // TODO + } } fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { @@ -1444,6 +1446,7 @@ impl Service for ProjectCacheService { } } +/// Temporary helper function while V1 spool eixsts. async fn peek_buffer(buffer: &Option) -> Option { match buffer { Some(buffer) => buffer.peek().await, From 82f3a95849f642c54280f2fe880afacfcd6921c4 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sat, 27 Jul 2024 14:30:37 +0200 Subject: [PATCH 16/62] awaiting pop --- relay-server/src/services/buffer/mod.rs | 35 ++++++++++++++-------- relay-server/src/services/project_cache.rs | 10 ++----- tests/integration/test_projectconfigs.py | 4 ++- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 3c74fa54b6..fa11cca91e 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -12,7 +12,7 @@ mod envelopestack; /// Wrapper for the EnvelopeBuffer implementation. #[derive(Debug)] -pub struct EnvelopeBuffer( +pub struct EnvelopeBuffer { /// TODO: Reconsider synchronization mechanism. /// We can either /// - keep the interface sync and use a std Mutex. In this case, we create a queue of threads. @@ -24,27 +24,38 @@ pub struct EnvelopeBuffer( /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - Arc>, -); + backend: Arc>, + notify: tokio::sync::Notify, +} impl EnvelopeBuffer { pub fn from_config(config: &Config) -> Option { // TODO: create a DiskMemoryStack if db config is given. - config - .spool_v2() - .then(|| Self(envelopebuffer::create(config))) + config.spool_v2().then(|| Self { + backend: envelopebuffer::create(config), + notify: tokio::sync::Notify::new(), + }) } pub async fn push(&self, envelope: Box) { - let mut guard = self.0.lock().await; + let mut guard = self.backend.lock().await; guard.push(envelope); + relay_log::trace!("Notifying"); + self.notify.notify_waiters(); } - pub async fn peek(&self) -> Option { - let mut guard = self.0.lock().await; - guard.peek()?; - - Some(Peek(guard)) + pub async fn peek(&self) -> Peek { + relay_log::trace!("Calling peek"); + loop { + let mut guard = self.backend.lock().await; + if guard.peek().is_none() { + drop(guard); + relay_log::trace!("No envelope found, awaiting"); + self.notify.notified().await; + } else { + return Peek(guard); + } + } } } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 28f200202f..8ebdada491 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1429,14 +1429,10 @@ impl Service for ProjectCacheService { } peek = peek_buffer(&envelope_buffer) => { relay_log::trace!("Peeking at envelope"); - if let Some(peek) = peek { - relay_log::trace!("Found an envelope"); - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { - broker.peek_at_envelope(peek) + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { + broker.peek_at_envelope(peek); }) } - - } else => break, } } @@ -1447,7 +1443,7 @@ impl Service for ProjectCacheService { } /// Temporary helper function while V1 spool eixsts. -async fn peek_buffer(buffer: &Option) -> Option { +async fn peek_buffer(buffer: &Option) -> Peek { match buffer { Some(buffer) => buffer.peek().await, None => std::future::pending().await, diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index baf2a03466..32a712e22a 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -253,7 +253,9 @@ def test_unparsable_project_config(buffer_config, mini_sentry, relay): temp = tempfile.mkdtemp() dbfile = os.path.join(temp, "buffer.db") # set the buffer to something low to force the spooling - relay_config["spool"] = {"envelopes": {"path": dbfile, "max_memory_size": 1000}} + relay_config["spool"] = { + "envelopes": {"path": dbfile, "max_memory_size": 1000, "version": "2"} + } relay = relay(mini_sentry, relay_config) mini_sentry.add_full_project_config(project_key) From db1ec703f74c3d9537f79ed47abc089a4aec1132 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sat, 27 Jul 2024 20:01:04 +0200 Subject: [PATCH 17/62] mark_ready on updated state --- relay-server/src/services/buffer/mod.rs | 17 +++++++++++++---- relay-server/src/services/project_cache.rs | 10 ++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index fa11cca91e..80fef0dc7c 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -11,7 +11,7 @@ mod envelopebuffer; mod envelopestack; /// Wrapper for the EnvelopeBuffer implementation. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct EnvelopeBuffer { /// TODO: Reconsider synchronization mechanism. /// We can either @@ -25,15 +25,15 @@ pub struct EnvelopeBuffer { /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. backend: Arc>, - notify: tokio::sync::Notify, + notify: Arc, } impl EnvelopeBuffer { pub fn from_config(config: &Config) -> Option { - // TODO: create a DiskMemoryStack if db config is given. + // TODO: create a disk-based backend if db config is given (loads stacks from db). config.spool_v2().then(|| Self { backend: envelopebuffer::create(config), - notify: tokio::sync::Notify::new(), + notify: Arc::new(tokio::sync::Notify::new()), }) } @@ -57,6 +57,11 @@ impl EnvelopeBuffer { } } } + + pub async fn mark_ready(&self, project_key: &ProjectKey, ready: bool) { + let mut guard = self.backend.lock().await; + guard.mark_ready(project_key, ready) + } } pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); @@ -74,6 +79,10 @@ impl Peek<'_> { .expect("element disappeared while holding lock") } + /// Sync version of [`EnvelopeBuffer::mark_ready`]. + /// + /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready + /// without awaiting the lock. pub fn mark_ready(&mut self, project_key: &ProjectKey, ready: bool) { self.0.mark_ready(project_key, ready); } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 8ebdada491..1a598bdfb8 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -568,6 +568,8 @@ impl Services { struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, + // TODO: Make non-optional when spool_v1 is removed. + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, // Need hashbrown because extract_if is not stable in std yet. @@ -719,6 +721,11 @@ impl ProjectCacheBroker { // Try to schedule unspool if it's not scheduled yet. self.schedule_unspool(); + + // TODO: write test that shows envelope can overtake when project becomes ready. + if let Some(buffer) = self.envelope_buffer.clone() { + tokio::spawn(async move { buffer.mark_ready(&project_key, true).await }); + } } fn handle_request_update(&mut self, message: RequestUpdate) { @@ -1371,6 +1378,7 @@ impl Service for ProjectCacheService { let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, + envelope_buffer: envelope_buffer.clone(), projects: hashbrown::HashMap::new(), garbage_disposal: GarbageDisposal::new(), source: ProjectSource::start( @@ -1524,6 +1532,7 @@ mod tests { .unwrap() .into(); let memory_checker = MemoryChecker::new(MemoryStat::default(), config.clone()); + let envelope_buffer = EnvelopeBuffer::from_config(&config); let buffer_services = spooler::Services { outcome_aggregator: services.outcome_aggregator.clone(), project_cache: services.project_cache.clone(), @@ -1554,6 +1563,7 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, + envelope_buffer, projects: hashbrown::HashMap::new(), garbage_disposal: GarbageDisposal::new(), source: ProjectSource::start(config, services.upstream_relay.clone(), None), From 8c5167ef75a401f47266a45694d60dbc88ee2f8d Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 12:16:43 +0200 Subject: [PATCH 18/62] test: peek --- relay-server/src/services/buffer/mod.rs | 108 ++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 80fef0dc7c..67404bbe51 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -87,3 +87,111 @@ impl Peek<'_> { self.0.mark_ready(project_key, ready); } } + +#[cfg(test)] +mod tests { + use std::str::FromStr; + use std::sync::atomic::AtomicUsize; + use std::sync::atomic::Ordering; + use std::time::Duration; + + use relay_common::Dsn; + + use crate::extractors::RequestMeta; + + use super::*; + + #[tokio::test] + async fn no_busy_loop_when_empty() { + let buffer = new_buffer(); + let call_count = Arc::new(AtomicUsize::new(0)); + + tokio::time::pause(); + + let cloned_buffer = buffer.clone(); + let cloned_call_count = call_count.clone(); + tokio::spawn(async move { + cloned_buffer.peek().await.remove(); + cloned_call_count.fetch_add(1, Ordering::Relaxed); + cloned_buffer.peek().await.remove(); + cloned_call_count.fetch_add(1, Ordering::Relaxed); + }); + + // Initial state: no calls + assert_eq!(call_count.load(Ordering::Relaxed), 0); + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 0); + + // State after push: one call + buffer.push(new_envelope()).await; + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 1); + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 1); + + // State after second push: two calls + buffer.push(new_envelope()).await; + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 2); + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 2); + } + + #[tokio::test] + async fn no_busy_loop_when_unchanged() { + let buffer = new_buffer(); + let call_count = Arc::new(AtomicUsize::new(0)); + + tokio::time::pause(); + + let cloned_buffer = buffer.clone(); + let cloned_call_count = call_count.clone(); + tokio::spawn(async move { + cloned_buffer.peek().await; + cloned_call_count.fetch_add(1, Ordering::Relaxed); + cloned_buffer.peek().await; + cloned_call_count.fetch_add(1, Ordering::Relaxed); + }); + + buffer.push(new_envelope()).await; + + // Initial state: no calls + assert_eq!(call_count.load(Ordering::Relaxed), 0); + + // After first advance: got one call + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 1); + + // After second advance: still only one call (no change) + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 1); + + // State after second push: two calls + buffer.push(new_envelope()).await; + tokio::time::advance(Duration::from_nanos(1)).await; + assert_eq!(call_count.load(Ordering::Relaxed), 2); + } + + fn new_buffer() -> EnvelopeBuffer { + EnvelopeBuffer::from_config( + &Config::from_json_value(serde_json::json!({ + "spool": { + "envelopes": { + "version": "2" + } + } + })) + .unwrap(), + ) + .unwrap() + } + + fn new_envelope() -> Box { + Envelope::from_request( + None, + RequestMeta::new( + Dsn::from_str("http://a94ae32be2584e0bbd7a4cbb95971fed@localhost/1").unwrap(), + ), + ) + } +} From 472cecd6fbfb8d96ad2683491f8424719688c347 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 13:17:44 +0200 Subject: [PATCH 19/62] notify on change --- relay-server/src/services/buffer/mod.rs | 46 +++++++++++++++---------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 67404bbe51..8c74603078 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,4 +1,5 @@ #![deny(missing_docs)] +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use relay_base_schema::project::ProjectKey; @@ -26,6 +27,7 @@ pub struct EnvelopeBuffer { /// > and to use message passing to communicate with that task. backend: Arc>, notify: Arc, + changed: Arc, } impl EnvelopeBuffer { @@ -34,33 +36,41 @@ impl EnvelopeBuffer { config.spool_v2().then(|| Self { backend: envelopebuffer::create(config), notify: Arc::new(tokio::sync::Notify::new()), + changed: Arc::new(AtomicBool::new(true)), }) } pub async fn push(&self, envelope: Box) { let mut guard = self.backend.lock().await; guard.push(envelope); - relay_log::trace!("Notifying"); - self.notify.notify_waiters(); + self.notify(); } pub async fn peek(&self) -> Peek { relay_log::trace!("Calling peek"); loop { - let mut guard = self.backend.lock().await; - if guard.peek().is_none() { - drop(guard); - relay_log::trace!("No envelope found, awaiting"); - self.notify.notified().await; - } else { - return Peek(guard); + { + let mut guard = self.backend.lock().await; + if self.changed.load(Ordering::Relaxed) && guard.peek().is_some() { + self.changed.store(false, Ordering::Relaxed); + return Peek(guard); + } } + relay_log::trace!("No envelope found, awaiting"); + self.notify.notified().await; } } pub async fn mark_ready(&self, project_key: &ProjectKey, ready: bool) { let mut guard = self.backend.lock().await; - guard.mark_ready(project_key, ready) + guard.mark_ready(project_key, ready); + self.notify(); + } + + fn notify(&self) { + relay_log::trace!("Notifying"); + self.changed.store(true, Ordering::Relaxed); + self.notify.notify_waiters(); } } @@ -111,10 +121,10 @@ mod tests { let cloned_buffer = buffer.clone(); let cloned_call_count = call_count.clone(); tokio::spawn(async move { - cloned_buffer.peek().await.remove(); - cloned_call_count.fetch_add(1, Ordering::Relaxed); - cloned_buffer.peek().await.remove(); - cloned_call_count.fetch_add(1, Ordering::Relaxed); + loop { + cloned_buffer.peek().await.remove(); + cloned_call_count.fetch_add(1, Ordering::Relaxed); + } }); // Initial state: no calls @@ -147,10 +157,10 @@ mod tests { let cloned_buffer = buffer.clone(); let cloned_call_count = call_count.clone(); tokio::spawn(async move { - cloned_buffer.peek().await; - cloned_call_count.fetch_add(1, Ordering::Relaxed); - cloned_buffer.peek().await; - cloned_call_count.fetch_add(1, Ordering::Relaxed); + loop { + cloned_buffer.peek().await; + cloned_call_count.fetch_add(1, Ordering::Relaxed); + } }); buffer.push(new_envelope()).await; From 6abed1227ed0447ead219fd74eecc098b9ad9226 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 13:28:33 +0200 Subject: [PATCH 20/62] Notify on ready --- relay-server/src/services/buffer/mod.rs | 28 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 8c74603078..dc5e959f7f 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -53,10 +53,14 @@ impl EnvelopeBuffer { let mut guard = self.backend.lock().await; if self.changed.load(Ordering::Relaxed) && guard.peek().is_some() { self.changed.store(false, Ordering::Relaxed); - return Peek(guard); + return Peek { + guard, + changed: &self.changed, + notify: &self.notify, + }; } } - relay_log::trace!("No envelope found, awaiting"); + relay_log::trace!("Awaiting"); self.notify.notified().await; } } @@ -74,17 +78,22 @@ impl EnvelopeBuffer { } } -pub struct Peek<'a>(MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>); +pub struct Peek<'a> { + guard: MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>, + notify: &'a tokio::sync::Notify, + changed: &'a AtomicBool, +} impl Peek<'_> { pub fn get(&mut self) -> &Envelope { - self.0 + self.guard .peek() .expect("element disappeared while holding lock") } pub fn remove(mut self) -> Box { - self.0 + self.notify(); + self.guard .pop() .expect("element disappeared while holding lock") } @@ -94,7 +103,14 @@ impl Peek<'_> { /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready /// without awaiting the lock. pub fn mark_ready(&mut self, project_key: &ProjectKey, ready: bool) { - self.0.mark_ready(project_key, ready); + self.notify(); + self.guard.mark_ready(project_key, ready); + } + + fn notify(&self) { + relay_log::trace!("Notifying"); + self.changed.store(true, Ordering::Relaxed); + self.notify.notify_waiters(); } } From 9eb1fcc7418c67eef6e0d2d6a922ad9a215dadd6 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 14:08:00 +0200 Subject: [PATCH 21/62] fix: only one envelope buffer --- relay-server/src/endpoints/common.rs | 1 + relay-server/src/service.rs | 5 +-- .../buffer/envelopebuffer/priority.rs | 31 ++++++++++++------- relay-server/src/services/buffer/mod.rs | 3 ++ tests/integration/fixtures/relay.py | 2 +- 5 files changed, 28 insertions(+), 14 deletions(-) diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 00868184af..9d66669e1a 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -313,6 +313,7 @@ async fn queue_envelope( buffer.push(envelope.into_envelope()).await; } None => { + relay_log::trace!("Sending envelope to project cache for V1 buffer"); state.project_cache().send(ValidateEnvelope::new(envelope)); } } diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 0e010bc8de..078aa5f3af 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -257,10 +257,11 @@ impl ServiceState { upstream_relay.clone(), global_config.clone(), ); + let envelope_buffer = EnvelopeBuffer::from_config(&config); ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), - EnvelopeBuffer::from_config(&config), + envelope_buffer.clone(), project_cache_services, metric_outcomes, redis_pool.clone(), @@ -302,7 +303,7 @@ impl ServiceState { let state = StateInner { config: config.clone(), memory_checker: MemoryChecker::new(memory_stat, config.clone()), - envelope_buffer: EnvelopeBuffer::from_config(&config), + envelope_buffer, registry, }; diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 524853dec2..fe0db42631 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -28,7 +28,7 @@ impl StackKey { pub struct PriorityEnvelopeBuffer { own_keys: hashbrown::HashMap>, sampling_keys: hashbrown::HashMap>, - stacks: priority_queue::PriorityQueue, Priority>, + priority_queue: priority_queue::PriorityQueue, Priority>, } impl PriorityEnvelopeBuffer { @@ -36,22 +36,24 @@ impl PriorityEnvelopeBuffer { Self { own_keys: Default::default(), sampling_keys: Default::default(), - stacks: Default::default(), + priority_queue: Default::default(), } } } impl PriorityEnvelopeBuffer { fn push_stack(&mut self, envelope: Box) { + relay_log::trace!("PriorityEnvelopeBuffer: push_stack"); let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); - self.stacks.push( + let previous_entry = self.priority_queue.push( QueueItem { key: stack_key, value: S::new(envelope), }, Priority::new(received_at), ); + debug_assert!(previous_entry.is_none()); self.own_keys .entry(stack_key.own_key) .or_default() @@ -63,6 +65,7 @@ impl PriorityEnvelopeBuffer { } fn pop_stack(&mut self, stack_key: StackKey) { + relay_log::trace!("PriorityEnvelopeBuffer: pop_stack"); self.own_keys .get_mut(&stack_key.own_key) .expect("own_keys") @@ -71,12 +74,13 @@ impl PriorityEnvelopeBuffer { .get_mut(&stack_key.sampling_key) .expect("sampling_keys") .remove(&stack_key); - self.stacks.remove(&stack_key); + self.priority_queue.remove(&stack_key); } } impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { + relay_log::trace!("PriorityEnvelopeBuffer: push"); let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); if let Some(( @@ -85,30 +89,34 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff value: stack, }, _, - )) = self.stacks.get_mut(&stack_key) + )) = self.priority_queue.get_mut(&stack_key) { + relay_log::trace!("PriorityEnvelopeBuffer: pushing to existing stack"); stack.push(envelope); } else { + relay_log::trace!("PriorityEnvelopeBuffer: pushing new stack with one element"); self.push_stack(envelope); } - self.stacks.change_priority_by(&stack_key, |prio| { + self.priority_queue.change_priority_by(&stack_key, |prio| { prio.received_at = received_at; }); } fn peek(&mut self) -> Option<&Envelope> { + relay_log::trace!("PriorityEnvelopeBuffer: peek"); let ( QueueItem { key: _, value: stack, }, _, - ) = self.stacks.peek_mut()?; + ) = self.priority_queue.peek_mut()?; stack.peek() } fn pop(&mut self) -> Option> { - let (QueueItem { key, value: stack }, _) = self.stacks.peek_mut()?; + relay_log::trace!("PriorityEnvelopeBuffer: pop"); + let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; let stack_key = *key; let envelope = stack.pop().expect("found an empty stack"); @@ -120,7 +128,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff self.pop_stack(stack_key); } Some(next_received_at) => { - self.stacks.change_priority_by(&stack_key, |prio| { + self.priority_queue.change_priority_by(&stack_key, |prio| { prio.received_at = next_received_at; }); } @@ -129,16 +137,17 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff } fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { + relay_log::trace!("PriorityEnvelopeBuffer: mark_ready"); if let Some(stack_keys) = self.own_keys.get(project) { for stack_key in stack_keys { - self.stacks.change_priority_by(stack_key, |stack| { + self.priority_queue.change_priority_by(stack_key, |stack| { stack.own_ready = is_ready; }); } } if let Some(stack_keys) = self.sampling_keys.get(project) { for stack_key in stack_keys { - self.stacks.change_priority_by(stack_key, |stack| { + self.priority_queue.change_priority_by(stack_key, |stack| { stack.sampling_ready = is_ready; }); } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index dc5e959f7f..24a216f130 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -41,6 +41,7 @@ impl EnvelopeBuffer { } pub async fn push(&self, envelope: Box) { + relay_log::trace!("Calling push"); let mut guard = self.backend.lock().await; guard.push(envelope); self.notify(); @@ -86,12 +87,14 @@ pub struct Peek<'a> { impl Peek<'_> { pub fn get(&mut self) -> &Envelope { + relay_log::trace!("Getting reference to peeked element"); self.guard .peek() .expect("element disappeared while holding lock") } pub fn remove(mut self) -> Box { + relay_log::trace!("Popping peeked element"); self.notify(); self.guard .pop() diff --git a/tests/integration/fixtures/relay.py b/tests/integration/fixtures/relay.py index e07576c8d9..905117ef49 100644 --- a/tests/integration/fixtures/relay.py +++ b/tests/integration/fixtures/relay.py @@ -148,7 +148,7 @@ def inner( }, "spool": { # Unspool as quickly as possible - "envelopes": {"unspool_interval": 1, "version": "1"}, + "envelopes": {"unspool_interval": 1, "version": "2"}, }, } From 3d2186156b7a28a96872a1c505a3144459cc68ac Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 14:40:53 +0200 Subject: [PATCH 22/62] fix: derive envelope group --- relay-server/src/services/project_cache.rs | 54 +++++++++++----------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 1a598bdfb8..9e6b87fddb 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1059,13 +1059,11 @@ impl ProjectCacheBroker { return; } let sampling_key = envelope.sampling_key(); - let project_cache = self.services.project_cache.clone(); + let services = self.services.clone(); let project_key = envelope.meta().public_key(); - let project = &mut self.get_or_create_project(project_key); - let reservoir_counters = project.reservoir_counters(); - - let project_state = project.get_cached_state(project_cache.clone(), false); + let project = self.get_or_create_project(project_key); + let project_state = project.get_cached_state(services.project_cache.clone(), false); let project_info = match project_state { ProjectState::Enabled(info) => { @@ -1092,7 +1090,7 @@ impl ProjectCacheBroker { ( sampling_key, self.get_or_create_project(sampling_key) - .get_cached_state(project_cache, false), + .get_cached_state(services.project_cache, false), ) }) { Some((sampling_key, ProjectState::Enabled(info))) => { @@ -1110,28 +1108,32 @@ impl ProjectCacheBroker { None => None, }; - let managed_envelope = ManagedEnvelope::new( - peek.remove(), - self.services.outcome_aggregator.clone(), - self.services.test_store.clone(), - ProcessingGroup::Ungrouped, // TODO: ungrouped correct? - ); + let project = self.get_or_create_project(project_key); - let project = &mut self.get_or_create_project(project_key); - let Ok(CheckedEnvelope { - envelope: Some(managed_envelope), - .. - }) = project.check_envelope(managed_envelope) - else { - return; // Outcomes are emitted by check_envelope - }; + for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove()) { + let managed_envelope = ManagedEnvelope::new( + envelope, + services.outcome_aggregator.clone(), + services.test_store.clone(), + group, + ); - self.services.envelope_processor.send(ProcessEnvelope { - envelope: managed_envelope, - project_info, - sampling_project_info, - reservoir_counters, - }); + let Ok(CheckedEnvelope { + envelope: Some(managed_envelope), + .. + }) = project.check_envelope(managed_envelope) + else { + continue; // Outcomes are emitted by check_envelope + }; + + let reservoir_counters = project.reservoir_counters(); + services.envelope_processor.send(ProcessEnvelope { + envelope: managed_envelope, + project_info: project_info.clone(), + sampling_project_info: sampling_project_info.clone(), + reservoir_counters, + }); + } } /// Returns backoff timeout for an unspool attempt. From 29fe24275083386a99f8eb86863ec04425e32dcd Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 14:54:39 +0200 Subject: [PATCH 23/62] Notify only if changed --- .../src/services/buffer/envelopebuffer/mod.rs | 2 +- .../src/services/buffer/envelopebuffer/priority.rs | 14 +++++++++++--- relay-server/src/services/buffer/mod.rs | 12 ++++++++---- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index cc8ac0c96c..ff9c7f3450 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -14,7 +14,7 @@ pub trait EnvelopeBuffer: std::fmt::Debug + Send { fn push(&mut self, envelope: Box); fn peek(&mut self) -> Option<&Envelope>; fn pop(&mut self) -> Option>; - fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool); + fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool; } pub fn create(config: &Config) -> Arc> { diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index fe0db42631..11201f4bd2 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -136,22 +136,30 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff Some(envelope) } - fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) { + fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { relay_log::trace!("PriorityEnvelopeBuffer: mark_ready"); + let mut changed = false; if let Some(stack_keys) = self.own_keys.get(project) { for stack_key in stack_keys { self.priority_queue.change_priority_by(stack_key, |stack| { - stack.own_ready = is_ready; + if is_ready != stack.own_ready { + stack.own_ready = is_ready; + changed = true; + } }); } } if let Some(stack_keys) = self.sampling_keys.get(project) { for stack_key in stack_keys { self.priority_queue.change_priority_by(stack_key, |stack| { - stack.sampling_ready = is_ready; + if is_ready != stack.sampling_ready { + stack.sampling_ready = is_ready; + changed = true; + } }); } } + changed } } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 24a216f130..c352ecc568 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -68,8 +68,10 @@ impl EnvelopeBuffer { pub async fn mark_ready(&self, project_key: &ProjectKey, ready: bool) { let mut guard = self.backend.lock().await; - guard.mark_ready(project_key, ready); - self.notify(); + let changed = guard.mark_ready(project_key, ready); + if changed { + self.notify(); + } } fn notify(&self) { @@ -106,8 +108,10 @@ impl Peek<'_> { /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready /// without awaiting the lock. pub fn mark_ready(&mut self, project_key: &ProjectKey, ready: bool) { - self.notify(); - self.guard.mark_ready(project_key, ready); + let changed = self.guard.mark_ready(project_key, ready); + if changed { + self.notify(); + } } fn notify(&self) { From c686d9304d62859738c32680b9a1bdf15706e7e8 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Sun, 28 Jul 2024 15:58:53 +0200 Subject: [PATCH 24/62] ref: Eliminate duplicate stack keys --- .../src/services/buffer/envelopebuffer/mod.rs | 15 ++ .../buffer/envelopebuffer/priority.rs | 144 +++++++++--------- relay-server/src/services/buffer/mod.rs | 32 ++-- 3 files changed, 112 insertions(+), 79 deletions(-) diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index ff9c7f3450..1ff17c3472 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -10,13 +10,28 @@ use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; mod priority; +/// A buffer that stores & prioritizes envelopes. pub trait EnvelopeBuffer: std::fmt::Debug + Send { + /// Adds an envelope to the buffer. fn push(&mut self, envelope: Box); + + /// Returns a reference to the next envelope. + /// + /// Returns `None` if the buffer is empty. fn peek(&mut self) -> Option<&Envelope>; + + /// Returns and removes the next envelope. + /// + /// Returns `None` if the buffer is empty. fn pop(&mut self) -> Option>; + + /// Marks a project as ready or not ready. + /// + /// The buffer reprioritizes its envelopes based on this information. fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool; } +/// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. pub fn create(config: &Config) -> Arc> { // TODO: create a DiskMemoryStack Arc::new(Mutex::new( diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 11201f4bd2..9a87313798 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -8,34 +8,23 @@ use crate::envelope::Envelope; use crate::services::buffer::envelopebuffer::EnvelopeBuffer; use crate::services::buffer::envelopestack::EnvelopeStack; -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -struct StackKey { - own_key: ProjectKey, - sampling_key: ProjectKey, -} - -impl StackKey { - fn from_envelope(envelope: &Envelope) -> Self { - let own_key = envelope.meta().public_key(); - Self { - own_key, - sampling_key: envelope.sampling_key().unwrap_or(own_key), - } - } -} - +/// An envelope buffer that holds an individual stack for each project/sampling project combination. +/// +/// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope +/// is pushed, popped, or when a project becomes ready. #[derive(Debug)] pub struct PriorityEnvelopeBuffer { - own_keys: hashbrown::HashMap>, - sampling_keys: hashbrown::HashMap>, + /// The central priority queue. priority_queue: priority_queue::PriorityQueue, Priority>, + /// A lookup table to find all stacks involving a project. + stacks_by_project: hashbrown::HashMap>, } impl PriorityEnvelopeBuffer { + /// Creates an empty buffer. pub fn new() -> Self { Self { - own_keys: Default::default(), - sampling_keys: Default::default(), + stacks_by_project: Default::default(), priority_queue: Default::default(), } } @@ -43,7 +32,6 @@ impl PriorityEnvelopeBuffer { impl PriorityEnvelopeBuffer { fn push_stack(&mut self, envelope: Box) { - relay_log::trace!("PriorityEnvelopeBuffer: push_stack"); let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); let previous_entry = self.priority_queue.push( @@ -54,33 +42,27 @@ impl PriorityEnvelopeBuffer { Priority::new(received_at), ); debug_assert!(previous_entry.is_none()); - self.own_keys - .entry(stack_key.own_key) - .or_default() - .insert(stack_key); - self.sampling_keys - .entry(stack_key.sampling_key) - .or_default() - .insert(stack_key); + for project_key in stack_key.iter() { + self.stacks_by_project + .entry(project_key) + .or_default() + .insert(stack_key); + } } fn pop_stack(&mut self, stack_key: StackKey) { - relay_log::trace!("PriorityEnvelopeBuffer: pop_stack"); - self.own_keys - .get_mut(&stack_key.own_key) - .expect("own_keys") - .remove(&stack_key); - self.sampling_keys - .get_mut(&stack_key.sampling_key) - .expect("sampling_keys") - .remove(&stack_key); + for project_key in stack_key.iter() { + self.stacks_by_project + .get_mut(&project_key) + .expect("project_key is missing from lookup") + .remove(&stack_key); + } self.priority_queue.remove(&stack_key); } } impl EnvelopeBuffer for PriorityEnvelopeBuffer { fn push(&mut self, envelope: Box) { - relay_log::trace!("PriorityEnvelopeBuffer: push"); let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); if let Some(( @@ -91,10 +73,8 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff _, )) = self.priority_queue.get_mut(&stack_key) { - relay_log::trace!("PriorityEnvelopeBuffer: pushing to existing stack"); stack.push(envelope); } else { - relay_log::trace!("PriorityEnvelopeBuffer: pushing new stack with one element"); self.push_stack(envelope); } self.priority_queue.change_priority_by(&stack_key, |prio| { @@ -103,7 +83,6 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff } fn peek(&mut self) -> Option<&Envelope> { - relay_log::trace!("PriorityEnvelopeBuffer: peek"); let ( QueueItem { key: _, @@ -115,7 +94,6 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff } fn pop(&mut self) -> Option> { - relay_log::trace!("PriorityEnvelopeBuffer: pop"); let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; let stack_key = *key; let envelope = stack.pop().expect("found an empty stack"); @@ -137,25 +115,24 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff } fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { - relay_log::trace!("PriorityEnvelopeBuffer: mark_ready"); let mut changed = false; - if let Some(stack_keys) = self.own_keys.get(project) { + if let Some(stack_keys) = self.stacks_by_project.get(project) { for stack_key in stack_keys { self.priority_queue.change_priority_by(stack_key, |stack| { - if is_ready != stack.own_ready { - stack.own_ready = is_ready; - changed = true; - } - }); - } - } - if let Some(stack_keys) = self.sampling_keys.get(project) { - for stack_key in stack_keys { - self.priority_queue.change_priority_by(stack_key, |stack| { - if is_ready != stack.sampling_ready { - stack.sampling_ready = is_ready; - changed = true; + let mut found = false; + for (subkey, readiness) in [ + (stack_key.0, &mut stack.readiness.0), + (stack_key.1, &mut stack.readiness.1), + ] { + if &subkey == project { + found = true; + if *readiness != is_ready { + changed = true; + *readiness = is_ready; + } + } } + debug_assert!(found); }); } } @@ -163,6 +140,28 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff } } +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +struct StackKey(ProjectKey, ProjectKey); + +impl StackKey { + fn new(mut key1: ProjectKey, mut key2: ProjectKey) -> Self { + if key2 < key1 { + std::mem::swap(&mut key1, &mut key2); + } + Self(key1, key2) + } + + fn from_envelope(envelope: &Envelope) -> Self { + let own_key = envelope.meta().public_key(); + let sampling_key = envelope.sampling_key().unwrap_or(own_key); + StackKey::new(own_key, sampling_key) + } + + fn iter(&self) -> impl Iterator { + std::iter::once(self.0).chain((self.0 != self.1).then_some(self.1)) + } +} + #[derive(Debug)] struct QueueItem { key: K, @@ -191,22 +190,14 @@ impl Eq for QueueItem {} #[derive(Debug)] struct Priority { - own_ready: bool, - sampling_ready: bool, + readiness: Readiness, received_at: Instant, } -impl Priority { - fn ready(&self) -> bool { - self.own_ready && self.sampling_ready - } -} - impl Priority { fn new(received_at: Instant) -> Self { Self { - own_ready: false, - sampling_ready: false, + readiness: Readiness::new(), received_at, } } @@ -214,7 +205,7 @@ impl Priority { impl PartialEq for Priority { fn eq(&self, other: &Self) -> bool { - self.ready() == other.ready() && self.received_at == other.received_at + self.readiness.ready() == other.readiness.ready() && self.received_at == other.received_at } } @@ -228,7 +219,7 @@ impl Eq for Priority {} impl Ord for Priority { fn cmp(&self, other: &Self) -> Ordering { - match (self.ready(), other.ready()) { + match (self.readiness.ready(), other.readiness.ready()) { (true, true) => self.received_at.cmp(&other.received_at), (true, false) => Ordering::Greater, (false, true) => Ordering::Less, @@ -239,6 +230,19 @@ impl Ord for Priority { } } +#[derive(Debug)] +struct Readiness(bool, bool); + +impl Readiness { + fn new() -> Self { + Self(false, false) + } + + fn ready(&self) -> bool { + self.0 && self.1 + } +} + #[cfg(test)] mod tests { use std::str::FromStr; diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index c352ecc568..dd138d196b 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,4 +1,4 @@ -#![deny(missing_docs)] +//! Types for buffering envelopes. use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -11,7 +11,9 @@ use crate::envelope::Envelope; mod envelopebuffer; mod envelopestack; -/// Wrapper for the EnvelopeBuffer implementation. +/// Async envelope buffering interface. +/// +/// Access to the buffer is synchronized by a tokio lock. #[derive(Debug, Clone)] pub struct EnvelopeBuffer { /// TODO: Reconsider synchronization mechanism. @@ -31,6 +33,10 @@ pub struct EnvelopeBuffer { } impl EnvelopeBuffer { + /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. + /// + /// NOTE: until the V1 spooler implementation is removed, this function returns `None` + /// if V2 spooling is not configured. pub fn from_config(config: &Config) -> Option { // TODO: create a disk-based backend if db config is given (loads stacks from db). config.spool_v2().then(|| Self { @@ -40,15 +46,18 @@ impl EnvelopeBuffer { }) } + /// Adds an envelope to the buffer and wakes any waiting consumers. pub async fn push(&self, envelope: Box) { - relay_log::trace!("Calling push"); let mut guard = self.backend.lock().await; guard.push(envelope); self.notify(); } + /// Returns a reference to the next-in-line envelope. + /// + /// If the buffer is empty or has not changed since the last peek, this function will sleep + /// until something changes in the buffer. pub async fn peek(&self) -> Peek { - relay_log::trace!("Calling peek"); loop { { let mut guard = self.backend.lock().await; @@ -61,11 +70,13 @@ impl EnvelopeBuffer { }; } } - relay_log::trace!("Awaiting"); self.notify.notified().await; } } + /// Marks a project as ready or not ready. + /// + /// The buffer reprioritizes its envelopes based on this information. pub async fn mark_ready(&self, project_key: &ProjectKey, ready: bool) { let mut guard = self.backend.lock().await; let changed = guard.mark_ready(project_key, ready); @@ -75,12 +86,14 @@ impl EnvelopeBuffer { } fn notify(&self) { - relay_log::trace!("Notifying"); self.changed.store(true, Ordering::Relaxed); self.notify.notify_waiters(); } } +/// A view onto the next envelope in the buffer. +/// +/// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { guard: MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>, notify: &'a tokio::sync::Notify, @@ -88,15 +101,17 @@ pub struct Peek<'a> { } impl Peek<'_> { + /// Returns a reference to the next envelope. pub fn get(&mut self) -> &Envelope { - relay_log::trace!("Getting reference to peeked element"); self.guard .peek() .expect("element disappeared while holding lock") } + /// Pops the next envelope from the buffer. + /// + /// This functions consumes the [`Peek`]. pub fn remove(mut self) -> Box { - relay_log::trace!("Popping peeked element"); self.notify(); self.guard .pop() @@ -115,7 +130,6 @@ impl Peek<'_> { } fn notify(&self) { - relay_log::trace!("Notifying"); self.changed.store(true, Ordering::Relaxed); self.notify.notify_waiters(); } From b50e19d71c4b04972bb74ea28d79969d46dda1f5 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 29 Jul 2024 13:27:39 +0200 Subject: [PATCH 25/62] Merge --- relay-server/src/lib.rs | 2 - .../src/services/buffer/envelopebuffer/mod.rs | 29 +--- .../buffer/envelopebuffer/priority.rs | 133 +++++++++++------- .../src/services/buffer/envelopestack.rs | 32 ----- .../envelopestack}/mod.rs | 15 +- .../envelopestack}/sqlite.rs | 61 ++++---- relay-server/src/services/buffer/mod.rs | 18 ++- relay-server/src/services/project_cache.rs | 12 +- relay-server/src/services/spooler/mod.rs | 1 - 9 files changed, 138 insertions(+), 165 deletions(-) delete mode 100644 relay-server/src/services/buffer/envelopestack.rs rename relay-server/src/services/{spooler/envelope_stack => buffer/envelopestack}/mod.rs (64%) rename relay-server/src/services/{spooler/envelope_stack => buffer/envelopestack}/sqlite.rs (94%) diff --git a/relay-server/src/lib.rs b/relay-server/src/lib.rs index 4b1c1670ae..8322220935 100644 --- a/relay-server/src/lib.rs +++ b/relay-server/src/lib.rs @@ -269,8 +269,6 @@ mod utils; pub use self::services::spooler::spool_utils; // Public just for benchmarks. pub use self::envelope::Envelope; -pub use self::services::spooler::envelope_stack::sqlite::SQLiteEnvelopeStack; -pub use self::services::spooler::envelope_stack::EnvelopeStack; #[cfg(test)] mod testutils; diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index 1ff17c3472..b36023c26e 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -1,38 +1,15 @@ use std::sync::Arc; -use relay_base_schema::project::ProjectKey; use relay_config::Config; use tokio::sync::Mutex; -use crate::envelope::Envelope; use crate::services::buffer::envelopebuffer::priority::PriorityEnvelopeBuffer; -use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; +use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; -mod priority; - -/// A buffer that stores & prioritizes envelopes. -pub trait EnvelopeBuffer: std::fmt::Debug + Send { - /// Adds an envelope to the buffer. - fn push(&mut self, envelope: Box); - - /// Returns a reference to the next envelope. - /// - /// Returns `None` if the buffer is empty. - fn peek(&mut self) -> Option<&Envelope>; - - /// Returns and removes the next envelope. - /// - /// Returns `None` if the buffer is empty. - fn pop(&mut self) -> Option>; - - /// Marks a project as ready or not ready. - /// - /// The buffer reprioritizes its envelopes based on this information. - fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool; -} +pub mod priority; // TODO /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. -pub fn create(config: &Config) -> Arc> { +pub fn create(config: &Config) -> Arc>> { // TODO: create a DiskMemoryStack Arc::new(Mutex::new( PriorityEnvelopeBuffer::::new(), diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 9a87313798..4e34fb993d 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -5,7 +5,6 @@ use std::time::Instant; use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; -use crate::services::buffer::envelopebuffer::EnvelopeBuffer; use crate::services::buffer::envelopestack::EnvelopeStack; /// An envelope buffer that holds an individual stack for each project/sampling project combination. @@ -28,9 +27,7 @@ impl PriorityEnvelopeBuffer { priority_queue: Default::default(), } } -} -impl PriorityEnvelopeBuffer { fn push_stack(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -59,10 +56,8 @@ impl PriorityEnvelopeBuffer { } self.priority_queue.remove(&stack_key); } -} -impl EnvelopeBuffer for PriorityEnvelopeBuffer { - fn push(&mut self, envelope: Box) { + pub async fn push(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); if let Some(( @@ -73,7 +68,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff _, )) = self.priority_queue.get_mut(&stack_key) { - stack.push(envelope); + stack.push(envelope).await.unwrap(); // TODO: handle errors } else { self.push_stack(envelope); } @@ -82,7 +77,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }); } - fn peek(&mut self) -> Option<&Envelope> { + pub async fn peek(&mut self) -> Option<&Envelope> { let ( QueueItem { key: _, @@ -90,16 +85,18 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff }, _, ) = self.priority_queue.peek_mut()?; - stack.peek() + stack.peek().await.unwrap() // TODO: handle errors } - fn pop(&mut self) -> Option> { + pub async fn pop(&mut self) -> Option> { let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; let stack_key = *key; - let envelope = stack.pop().expect("found an empty stack"); + let envelope = stack.pop().await.unwrap().expect("found an empty stack"); let next_received_at = stack .peek() + .await + .unwrap() // TODO: handle error .map(|next_envelope| next_envelope.meta().start_time()); match next_received_at { None => { @@ -114,7 +111,7 @@ impl EnvelopeBuffer for PriorityEnvelopeBuff Some(envelope) } - fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { + pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { let mut changed = false; if let Some(stack_keys) = self.stacks_by_project.get(project) { for stack_key in stack_keys { @@ -253,7 +250,7 @@ mod tests { use crate::envelope::{Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::envelopestack::InMemoryEnvelopeStack; + use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; use super::*; @@ -280,53 +277,83 @@ mod tests { envelope } - #[test] - fn insert_pop() { + #[tokio::test] + async fn insert_pop() { let mut buffer = PriorityEnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); let project_key3 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); - assert!(buffer.pop().is_none()); - assert!(buffer.peek().is_none()); + assert!(buffer.pop().await.is_none()); + assert!(buffer.peek().await.is_none()); - buffer.push(new_envelope(project_key1, None)); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + buffer.push(new_envelope(project_key1, None)).await; + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); - buffer.push(new_envelope(project_key2, None)); + buffer.push(new_envelope(project_key2, None)).await; // Both projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); - buffer.push(new_envelope(project_key3, None)); + buffer.push(new_envelope(project_key3, None)).await; // All projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); // After marking a project ready, it goes to the top: buffer.mark_ready(&project_key3, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key3); - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key3); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key3 + ); + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key3 + ); // After popping, project 1 is on top again: - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); // Mark project 1 as ready (still on top): buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key1); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); // Mark project 2 as ready as well (now on top because most recent): buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().meta().public_key(), project_key2); - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key2); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key2 + ); + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key2 + ); // Pop last element: - assert_eq!(buffer.pop().unwrap().meta().public_key(), project_key1); - assert!(buffer.pop().is_none()); - assert!(buffer.peek().is_none()); + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key1 + ); + assert!(buffer.pop().await.is_none()); + assert!(buffer.peek().await.is_none()); } - #[test] - fn project_internal_order() { + #[tokio::test] + async fn project_internal_order() { let mut buffer = PriorityEnvelopeBuffer::::new(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -338,16 +365,16 @@ mod tests { assert!(instant2 > instant1); - buffer.push(envelope1); - buffer.push(envelope2); + buffer.push(envelope1).await; + buffer.push(envelope2).await; - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); - assert!(buffer.pop().is_none()); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); + assert!(buffer.pop().await.is_none()); } - #[test] - fn sampling_projects() { + #[tokio::test] + async fn sampling_projects() { let mut buffer = PriorityEnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -355,40 +382,40 @@ mod tests { let envelope1 = new_envelope(project_key1, None); let instant1 = envelope1.meta().start_time(); - buffer.push(envelope1); + buffer.push(envelope1).await; let envelope2 = new_envelope(project_key2, None); let instant2 = envelope2.meta().start_time(); - buffer.push(envelope2); + buffer.push(envelope2).await; let envelope3 = new_envelope(project_key1, Some(project_key2)); let instant3 = envelope3.meta().start_time(); - buffer.push(envelope3); + buffer.push(envelope3).await; // Nothing is ready, instant1 is on top: - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); // Mark project 2 ready, gets on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); // Revert buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); // Project 1 ready: buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant1); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); // when both projects are ready, event no 3 ends up on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant3); - assert_eq!(buffer.peek().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant3); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant1); - assert_eq!(buffer.pop().unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); - assert!(buffer.pop().is_none()); + assert!(buffer.pop().await.is_none()); } } diff --git a/relay-server/src/services/buffer/envelopestack.rs b/relay-server/src/services/buffer/envelopestack.rs deleted file mode 100644 index b720fb8875..0000000000 --- a/relay-server/src/services/buffer/envelopestack.rs +++ /dev/null @@ -1,32 +0,0 @@ -use crate::envelope::Envelope; - -pub trait EnvelopeStack: Send { - fn new(envelope: Box) -> Self; - - fn push(&mut self, envelope: Box); - - fn pop(&mut self) -> Option>; - - fn peek(&self) -> Option<&Envelope>; -} - -#[derive(Debug)] -pub struct InMemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); - -impl EnvelopeStack for InMemoryEnvelopeStack { - fn new(envelope: Box) -> Self { - Self(vec![envelope]) - } - - fn push(&mut self, envelope: Box) { - self.0.push(envelope) - } - - fn pop(&mut self) -> Option> { - self.0.pop() - } - - fn peek(&self) -> Option<&Envelope> { - self.0.last().map(Box::as_ref) - } -} diff --git a/relay-server/src/services/spooler/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelopestack/mod.rs similarity index 64% rename from relay-server/src/services/spooler/envelope_stack/mod.rs rename to relay-server/src/services/buffer/envelopestack/mod.rs index d6e9e0b9bb..aa81cd4d5f 100644 --- a/relay-server/src/services/spooler/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelopestack/mod.rs @@ -1,27 +1,28 @@ use crate::envelope::Envelope; use std::future::Future; +pub mod memory; pub mod sqlite; /// A stack-like data structure that holds [`Envelope`]s. -pub trait EnvelopeStack { +pub trait EnvelopeStack: Send { /// The error type that is returned when an error is encountered during reading or writing the /// [`EnvelopeStack`]. - type Error; + type Error: std::fmt::Debug; + + /// Creates a new stack with the given element. + fn new(envelope: Box) -> Self; /// Pushes an [`Envelope`] on top of the stack. - #[allow(dead_code)] fn push(&mut self, envelope: Box) -> impl Future>; /// Peeks the [`Envelope`] on top of the stack. /// /// If the stack is empty, an error is returned. - #[allow(dead_code)] - fn peek(&mut self) -> impl Future, Self::Error>>; + fn peek(&mut self) -> impl Future, Self::Error>>; /// Pops the [`Envelope`] on top of the stack. /// /// If the stack is empty, an error is returned. - #[allow(dead_code)] - fn pop(&mut self) -> impl Future, Self::Error>>; + fn pop(&mut self) -> impl Future>, Self::Error>>; } diff --git a/relay-server/src/services/spooler/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelopestack/sqlite.rs similarity index 94% rename from relay-server/src/services/spooler/envelope_stack/sqlite.rs rename to relay-server/src/services/buffer/envelopestack/sqlite.rs index c3fbae8676..0b35d97aa2 100644 --- a/relay-server/src/services/spooler/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelopestack/sqlite.rs @@ -1,6 +1,6 @@ use crate::envelope::Envelope; use crate::extractors::StartTime; -use crate::services::spooler::envelope_stack::EnvelopeStack; +use crate::services::buffer::envelopestack::EnvelopeStack; use futures::StreamExt; use relay_base_schema::project::ProjectKey; use sqlx::query::Query; @@ -237,6 +237,10 @@ impl SQLiteEnvelopeStack { impl EnvelopeStack for SQLiteEnvelopeStack { type Error = SQLiteEnvelopeStackError; + fn new(envelope: Box) -> Self { + todo!() + } + async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { debug_assert!(self.validate_envelope(&envelope)); @@ -263,30 +267,27 @@ impl EnvelopeStack for SQLiteEnvelopeStack { Ok(()) } - async fn peek(&mut self) -> Result<&Box, Self::Error> { + async fn peek(&mut self) -> Result, Self::Error> { if self.below_unspool_threshold() && self.check_disk { self.unspool_from_disk().await? } - self.batches_buffer + Ok(self + .batches_buffer .back() .and_then(|last_batch| last_batch.last()) - .ok_or(Self::Error::Empty) + .map(|boxed| boxed.as_ref())) } - async fn pop(&mut self) -> Result, Self::Error> { + async fn pop(&mut self) -> Result>, Self::Error> { if self.below_unspool_threshold() && self.check_disk { self.unspool_from_disk().await? } - let result = self - .batches_buffer - .back_mut() - .and_then(|last_batch| { - self.batches_buffer_size -= 1; - last_batch.pop() - }) - .ok_or(Self::Error::Empty); + let result = self.batches_buffer.back_mut().and_then(|last_batch| { + self.batches_buffer_size -= 1; + last_batch.pop() + }); // Since we might leave a batch without elements, we want to pop it from the buffer. if self @@ -297,7 +298,7 @@ impl EnvelopeStack for SQLiteEnvelopeStack { self.batches_buffer.pop_back(); } - result + Ok(result) } } @@ -336,7 +337,7 @@ pub fn build_delete_and_fetch_many_envelopes<'a>( sqlx::query( "DELETE FROM envelopes - WHERE id IN (SELECT id FROM envelopes WHERE own_key = ? AND sampling_key = ? + WHERE id IN (SELECT id FROM envelopes WHERE own_key = ? AND sampling_key = ? ORDER BY received_at DESC LIMIT ?) RETURNING received_at, own_key, sampling_key, envelope", @@ -355,12 +356,6 @@ fn received_at(envelope: &Envelope) -> i64 { #[cfg(test)] mod tests { - use crate::envelope::{Envelope, Item, ItemType}; - use crate::extractors::RequestMeta; - use crate::services::spooler::envelope_stack::sqlite::{ - SQLiteEnvelopeStack, SQLiteEnvelopeStackError, - }; - use crate::services::spooler::envelope_stack::EnvelopeStack; use relay_base_schema::project::ProjectKey; use relay_event_schema::protocol::EventId; use relay_sampling::DynamicSamplingContext; @@ -372,6 +367,10 @@ mod tests { use tokio::fs::DirBuilder; use uuid::Uuid; + use super::*; + use crate::envelope::{Envelope, Item, ItemType}; + use crate::extractors::RequestMeta; + fn request_meta() -> RequestMeta { let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" .parse() @@ -499,9 +498,9 @@ mod tests { assert_eq!(stack.batches_buffer_size, 3); // We pop the remaining elements, expecting the last added envelope to be on top. - let popped_envelope_1 = stack.pop().await.unwrap(); - let popped_envelope_2 = stack.pop().await.unwrap(); - let popped_envelope_3 = stack.pop().await.unwrap(); + let popped_envelope_1 = stack.pop().await.unwrap().unwrap(); + let popped_envelope_2 = stack.pop().await.unwrap().unwrap(); + let popped_envelope_3 = stack.pop().await.unwrap().unwrap(); assert_eq!( popped_envelope_1.event_id().unwrap(), envelope.event_id().unwrap() @@ -573,7 +572,7 @@ mod tests { assert_eq!(stack.batches_buffer_size, 5); // We peek the top element. - let peeked_envelope = stack.peek().await.unwrap(); + let peeked_envelope = stack.peek().await.unwrap().unwrap(); assert_eq!( peeked_envelope.event_id().unwrap(), envelopes.clone()[4].event_id().unwrap() @@ -581,7 +580,7 @@ mod tests { // We pop 5 envelopes. for envelope in envelopes.iter().rev() { - let popped_envelope = stack.pop().await.unwrap(); + let popped_envelope = stack.pop().await.unwrap().unwrap(); assert_eq!( popped_envelope.event_id().unwrap(), envelope.event_id().unwrap() @@ -609,7 +608,7 @@ mod tests { assert_eq!(stack.batches_buffer_size, 10); // We peek the top element. - let peeked_envelope = stack.peek().await.unwrap(); + let peeked_envelope = stack.peek().await.unwrap().unwrap(); assert_eq!( peeked_envelope.event_id().unwrap(), envelopes.clone()[14].event_id().unwrap() @@ -618,7 +617,7 @@ mod tests { // We pop 10 envelopes, and we expect that the last 10 are in memory, since the first 5 // should have been spooled to disk. for envelope in envelopes[5..15].iter().rev() { - let popped_envelope = stack.pop().await.unwrap(); + let popped_envelope = stack.pop().await.unwrap().unwrap(); assert_eq!( popped_envelope.event_id().unwrap(), envelope.event_id().unwrap() @@ -627,7 +626,7 @@ mod tests { assert_eq!(stack.batches_buffer_size, 0); // We peek the top element, which since the buffer is empty should result in a disk load. - let peeked_envelope = stack.peek().await.unwrap(); + let peeked_envelope = stack.peek().await.unwrap().unwrap(); assert_eq!( peeked_envelope.event_id().unwrap(), envelopes.clone()[4].event_id().unwrap() @@ -639,7 +638,7 @@ mod tests { assert!(stack.push(envelope.clone()).await.is_ok()); // We pop and expect the newly inserted element. - let popped_envelope = stack.pop().await.unwrap(); + let popped_envelope = stack.pop().await.unwrap().unwrap(); assert_eq!( popped_envelope.event_id().unwrap(), envelope.event_id().unwrap() @@ -648,7 +647,7 @@ mod tests { // We pop 5 envelopes, which should not result in a disk load since `peek()` already should // have caused it. for envelope in envelopes[0..5].iter().rev() { - let popped_envelope = stack.pop().await.unwrap(); + let popped_envelope = stack.pop().await.unwrap().unwrap(); assert_eq!( popped_envelope.event_id().unwrap(), envelope.event_id().unwrap() diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index dd138d196b..7673a78dd6 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -7,6 +7,8 @@ use relay_config::Config; use tokio::sync::MutexGuard; use crate::envelope::Envelope; +use crate::services::buffer::envelopebuffer::priority::PriorityEnvelopeBuffer; +use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; mod envelopebuffer; mod envelopestack; @@ -27,7 +29,7 @@ pub struct EnvelopeBuffer { /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - backend: Arc>, + backend: Arc>>, notify: Arc, changed: Arc, } @@ -49,7 +51,7 @@ impl EnvelopeBuffer { /// Adds an envelope to the buffer and wakes any waiting consumers. pub async fn push(&self, envelope: Box) { let mut guard = self.backend.lock().await; - guard.push(envelope); + guard.push(envelope).await; self.notify(); } @@ -61,7 +63,7 @@ impl EnvelopeBuffer { loop { { let mut guard = self.backend.lock().await; - if self.changed.load(Ordering::Relaxed) && guard.peek().is_some() { + if self.changed.load(Ordering::Relaxed) && guard.peek().await.is_some() { self.changed.store(false, Ordering::Relaxed); return Peek { guard, @@ -95,26 +97,28 @@ impl EnvelopeBuffer { /// /// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { - guard: MutexGuard<'a, dyn envelopebuffer::EnvelopeBuffer>, + guard: MutexGuard<'a, PriorityEnvelopeBuffer>, notify: &'a tokio::sync::Notify, changed: &'a AtomicBool, } impl Peek<'_> { /// Returns a reference to the next envelope. - pub fn get(&mut self) -> &Envelope { + pub async fn get(&mut self) -> &Envelope { self.guard .peek() + .await .expect("element disappeared while holding lock") } /// Pops the next envelope from the buffer. /// /// This functions consumes the [`Peek`]. - pub fn remove(mut self) -> Box { + pub async fn remove(mut self) -> Box { self.notify(); self.guard .pop() + .await .expect("element disappeared while holding lock") } @@ -159,7 +163,7 @@ mod tests { let cloned_call_count = call_count.clone(); tokio::spawn(async move { loop { - cloned_buffer.peek().await.remove(); + cloned_buffer.peek().await.remove().await; cloned_call_count.fetch_add(1, Ordering::Relaxed); } }); diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 9e6b87fddb..ad77752941 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1044,12 +1044,12 @@ impl ProjectCacheBroker { } } - fn peek_at_envelope(&mut self, mut peek: Peek<'_>) { - let envelope = peek.get(); + async fn peek_at_envelope(&mut self, mut peek: Peek<'_>) { + let envelope = peek.get().await; // TODO: make envelope age configurable. if envelope.meta().start_time().elapsed() > MAX_ENVELOPE_AGE { let mut managed_envelope = ManagedEnvelope::new( - peek.remove(), + peek.remove().await, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, @@ -1072,7 +1072,7 @@ impl ProjectCacheBroker { } ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( - peek.remove(), + peek.remove().await, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, @@ -1110,7 +1110,7 @@ impl ProjectCacheBroker { let project = self.get_or_create_project(project_key); - for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove()) { + for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove().await) { let managed_envelope = ManagedEnvelope::new( envelope, services.outcome_aggregator.clone(), @@ -1440,7 +1440,7 @@ impl Service for ProjectCacheService { peek = peek_buffer(&envelope_buffer) => { relay_log::trace!("Peeking at envelope"); metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { - broker.peek_at_envelope(peek); + broker.peek_at_envelope(peek).await; // TODO: make sync again? }) } else => break, diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 13e86421d0..e2492c8241 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -60,7 +60,6 @@ use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{ManagedEnvelope, MemoryChecker}; -pub mod envelope_stack; pub mod spool_utils; mod sql; From 054778a873a70c57c8ece67a9b696c408b9babd2 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 29 Jul 2024 16:02:40 +0200 Subject: [PATCH 26/62] wip: On-disk creation --- relay-server/benches/benches.rs | 8 +- relay-server/src/lib.rs | 1 + .../src/services/buffer/envelopebuffer/mod.rs | 469 +++++++++++++++++- .../buffer/envelopebuffer/priority.rs | 420 ---------------- .../services/buffer/envelopestack/memory.rs | 42 ++ .../src/services/buffer/envelopestack/mod.rs | 7 + .../services/buffer/envelopestack/sqlite.rs | 174 ++++++- relay-server/src/services/buffer/mod.rs | 3 + 8 files changed, 668 insertions(+), 456 deletions(-) create mode 100644 relay-server/src/services/buffer/envelopestack/memory.rs diff --git a/relay-server/benches/benches.rs b/relay-server/benches/benches.rs index 381f48936d..0befe7857d 100644 --- a/relay-server/benches/benches.rs +++ b/relay-server/benches/benches.rs @@ -8,7 +8,7 @@ use tempfile::TempDir; use tokio::runtime::Runtime; use relay_base_schema::project::ProjectKey; -use relay_server::{Envelope, EnvelopeStack, SQLiteEnvelopeStack}; +use relay_server::{Envelope, EnvelopeStack, SqliteEnvelopeStack}; fn setup_db(path: &PathBuf) -> Pool { let options = SqliteConnectOptions::new() @@ -83,7 +83,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { reset_db(db.clone()).await; }); - let stack = SQLiteEnvelopeStack::new( + let stack = SqliteEnvelopeStack::new( db.clone(), disk_batch_size, 2, @@ -119,7 +119,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { runtime.block_on(async { reset_db(db.clone()).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db.clone(), disk_batch_size, 2, @@ -159,7 +159,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { reset_db(db.clone()).await; }); - let stack = SQLiteEnvelopeStack::new( + let stack = SqliteEnvelopeStack::new( db.clone(), disk_batch_size, 2, diff --git a/relay-server/src/lib.rs b/relay-server/src/lib.rs index 8322220935..c3959f8fdc 100644 --- a/relay-server/src/lib.rs +++ b/relay-server/src/lib.rs @@ -269,6 +269,7 @@ mod utils; pub use self::services::spooler::spool_utils; // Public just for benchmarks. pub use self::envelope::Envelope; +pub use services::buffer::{EnvelopeStack, SqliteEnvelopeStack}; #[cfg(test)] mod testutils; diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelopebuffer/mod.rs index b36023c26e..8e66ce0958 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelopebuffer/mod.rs @@ -1,17 +1,472 @@ +use std::path::PathBuf; use std::sync::Arc; use relay_config::Config; use tokio::sync::Mutex; -use crate::services::buffer::envelopebuffer::priority::PriorityEnvelopeBuffer; -use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; - -pub mod priority; // TODO +use crate::services::buffer::envelopestack::memory::{DummyProvider, InMemoryEnvelopeStack}; +use crate::services::buffer::envelopestack::sqlite::SqliteStackProvider; +use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. -pub fn create(config: &Config) -> Arc>> { - // TODO: create a DiskMemoryStack +pub fn create(config: &Config) -> Arc>> { Arc::new(Mutex::new( - PriorityEnvelopeBuffer::::new(), + InnerEnvelopeBuffer::::new(), )) } + +use std::cmp::Ordering; +use std::collections::BTreeSet; +use std::time::Instant; + +use relay_base_schema::project::ProjectKey; + +use crate::envelope::Envelope; +use crate::services::buffer::envelopestack::EnvelopeStack; + +pub enum EnvelopeBuffer { + InMemory(InnerEnvelopeBuffer), + Sqlite(InnerEnvelopeBuffer), +} + +impl EnvelopeBuffer { + pub fn from_config(config: &Config) -> Self { + match config.spool_envelopes_path() { + Some(path) => Self::Sqlite(InnerEnvelopeBuffer::::new(path)), + None => Self::InMemory(InnerEnvelopeBuffer::::new()), + } + } + + // TODO: add push, pop, peek +} + +/// An envelope buffer that holds an individual stack for each project/sampling project combination. +/// +/// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope +/// is pushed, popped, or when a project becomes ready. +#[derive(Debug)] +struct InnerEnvelopeBuffer { + /// The central priority queue. + priority_queue: priority_queue::PriorityQueue, Priority>, + /// A lookup table to find all stacks involving a project. + stacks_by_project: hashbrown::HashMap>, + stack_provider: S::Provider, +} + +impl InnerEnvelopeBuffer { + /// Creates an empty buffer. + pub fn new() -> Self { + Self { + stacks_by_project: Default::default(), + priority_queue: Default::default(), + stack_provider: DummyProvider, + } + } +} +impl InnerEnvelopeBuffer { + /// Creates an empty buffer. + pub fn new(path: PathBuf) -> Self { + // TODO: Populate state from db. + Self { + stacks_by_project: Default::default(), + priority_queue: Default::default(), + stack_provider: SqliteStackProvider::new( + path, 100, // TODO: put in config + 2, // TODO: put in config + ), + } + } +} + +impl InnerEnvelopeBuffer { + fn push_stack(&mut self, envelope: Box) { + let received_at = envelope.meta().start_time(); + let stack_key = StackKey::from_envelope(&envelope); + let previous_entry = self.priority_queue.push( + QueueItem { + key: stack_key, + value: S::new(envelope), + }, + Priority::new(received_at), + ); + debug_assert!(previous_entry.is_none()); + for project_key in stack_key.iter() { + self.stacks_by_project + .entry(project_key) + .or_default() + .insert(stack_key); + } + } + + fn pop_stack(&mut self, stack_key: StackKey) { + for project_key in stack_key.iter() { + self.stacks_by_project + .get_mut(&project_key) + .expect("project_key is missing from lookup") + .remove(&stack_key); + } + self.priority_queue.remove(&stack_key); + } + + pub async fn push(&mut self, envelope: Box) { + let received_at = envelope.meta().start_time(); + let stack_key = StackKey::from_envelope(&envelope); + if let Some(( + QueueItem { + key: _, + value: stack, + }, + _, + )) = self.priority_queue.get_mut(&stack_key) + { + stack.push(envelope).await.unwrap(); // TODO: handle errors + } else { + self.push_stack(envelope); + } + self.priority_queue.change_priority_by(&stack_key, |prio| { + prio.received_at = received_at; + }); + } + + pub async fn peek(&mut self) -> Option<&Envelope> { + let ( + QueueItem { + key: _, + value: stack, + }, + _, + ) = self.priority_queue.peek_mut()?; + stack.peek().await.unwrap() // TODO: handle errors + } + + pub async fn pop(&mut self) -> Option> { + let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; + let stack_key = *key; + let envelope = stack.pop().await.unwrap().expect("found an empty stack"); + + let next_received_at = stack + .peek() + .await + .unwrap() // TODO: handle error + .map(|next_envelope| next_envelope.meta().start_time()); + match next_received_at { + None => { + self.pop_stack(stack_key); + } + Some(next_received_at) => { + self.priority_queue.change_priority_by(&stack_key, |prio| { + prio.received_at = next_received_at; + }); + } + } + Some(envelope) + } + + pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { + let mut changed = false; + if let Some(stack_keys) = self.stacks_by_project.get(project) { + for stack_key in stack_keys { + self.priority_queue.change_priority_by(stack_key, |stack| { + let mut found = false; + for (subkey, readiness) in [ + (stack_key.0, &mut stack.readiness.0), + (stack_key.1, &mut stack.readiness.1), + ] { + if &subkey == project { + found = true; + if *readiness != is_ready { + changed = true; + *readiness = is_ready; + } + } + } + debug_assert!(found); + }); + } + } + changed + } +} + +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +struct StackKey(ProjectKey, ProjectKey); + +impl StackKey { + fn new(mut key1: ProjectKey, mut key2: ProjectKey) -> Self { + if key2 < key1 { + std::mem::swap(&mut key1, &mut key2); + } + Self(key1, key2) + } + + fn from_envelope(envelope: &Envelope) -> Self { + let own_key = envelope.meta().public_key(); + let sampling_key = envelope.sampling_key().unwrap_or(own_key); + StackKey::new(own_key, sampling_key) + } + + fn iter(&self) -> impl Iterator { + std::iter::once(self.0).chain((self.0 != self.1).then_some(self.1)) + } +} + +#[derive(Debug)] +struct QueueItem { + key: K, + value: V, +} + +impl std::borrow::Borrow for QueueItem { + fn borrow(&self) -> &K { + &self.key + } +} + +impl std::hash::Hash for QueueItem { + fn hash(&self, state: &mut H) { + self.key.hash(state); + } +} + +impl PartialEq for QueueItem { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} + +impl Eq for QueueItem {} + +#[derive(Debug)] +struct Priority { + readiness: Readiness, + received_at: Instant, +} + +impl Priority { + fn new(received_at: Instant) -> Self { + Self { + readiness: Readiness::new(), + received_at, + } + } +} + +impl PartialEq for Priority { + fn eq(&self, other: &Self) -> bool { + self.readiness.ready() == other.readiness.ready() && self.received_at == other.received_at + } +} + +impl PartialOrd for Priority { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for Priority {} + +impl Ord for Priority { + fn cmp(&self, other: &Self) -> Ordering { + match (self.readiness.ready(), other.readiness.ready()) { + (true, true) => self.received_at.cmp(&other.received_at), + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + // For non-ready stacks, we invert the priority, such that projects that are not + // ready and did not receive envelopes recently can be evicted. + (false, false) => self.received_at.cmp(&other.received_at).reverse(), + } + } +} + +#[derive(Debug)] +struct Readiness(bool, bool); + +impl Readiness { + fn new() -> Self { + Self(false, false) + } + + fn ready(&self) -> bool { + self.0 && self.1 + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use relay_common::Dsn; + use relay_sampling::DynamicSamplingContext; + use uuid::Uuid; + + use crate::envelope::{Item, ItemType}; + use crate::extractors::RequestMeta; + use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; + + use super::*; + + fn new_envelope(project_key: ProjectKey, sampling_key: Option) -> Box { + let mut envelope = Envelope::from_request( + None, + RequestMeta::new(Dsn::from_str(&format!("http://{project_key}@localhost/1")).unwrap()), + ); + if let Some(sampling_key) = sampling_key { + envelope.set_dsc(DynamicSamplingContext { + public_key: sampling_key, + trace_id: Uuid::new_v4(), + release: None, + user: Default::default(), + replay_id: None, + environment: None, + transaction: None, + sample_rate: None, + sampled: None, + other: Default::default(), + }); + envelope.add_item(Item::new(ItemType::Transaction)); + } + envelope + } + + #[tokio::test] + async fn insert_pop() { + let mut buffer = InnerEnvelopeBuffer::::new(); + + let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let project_key3 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); + + assert!(buffer.pop().await.is_none()); + assert!(buffer.peek().await.is_none()); + + buffer.push(new_envelope(project_key1, None)).await; + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); + + buffer.push(new_envelope(project_key2, None)).await; + // Both projects are not ready, so project 1 is on top (has the oldest envelopes): + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); + + buffer.push(new_envelope(project_key3, None)).await; + // All projects are not ready, so project 1 is on top (has the oldest envelopes): + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); + + // After marking a project ready, it goes to the top: + buffer.mark_ready(&project_key3, true); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key3 + ); + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key3 + ); + + // After popping, project 1 is on top again: + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); + + // Mark project 1 as ready (still on top): + buffer.mark_ready(&project_key1, true); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key1 + ); + + // Mark project 2 as ready as well (now on top because most recent): + buffer.mark_ready(&project_key2, true); + assert_eq!( + buffer.peek().await.unwrap().meta().public_key(), + project_key2 + ); + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key2 + ); + + // Pop last element: + assert_eq!( + buffer.pop().await.unwrap().meta().public_key(), + project_key1 + ); + assert!(buffer.pop().await.is_none()); + assert!(buffer.peek().await.is_none()); + } + + #[tokio::test] + async fn project_internal_order() { + let mut buffer = InnerEnvelopeBuffer::::new(); + + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + + let envelope1 = new_envelope(project_key, None); + let instant1 = envelope1.meta().start_time(); + let envelope2 = new_envelope(project_key, None); + let instant2 = envelope2.meta().start_time(); + + assert!(instant2 > instant1); + + buffer.push(envelope1).await; + buffer.push(envelope2).await; + + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); + assert!(buffer.pop().await.is_none()); + } + + #[tokio::test] + async fn sampling_projects() { + let mut buffer = InnerEnvelopeBuffer::::new(); + + let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); + let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); + + let envelope1 = new_envelope(project_key1, None); + let instant1 = envelope1.meta().start_time(); + buffer.push(envelope1).await; + + let envelope2 = new_envelope(project_key2, None); + let instant2 = envelope2.meta().start_time(); + buffer.push(envelope2).await; + + let envelope3 = new_envelope(project_key1, Some(project_key2)); + let instant3 = envelope3.meta().start_time(); + buffer.push(envelope3).await; + + // Nothing is ready, instant1 is on top: + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + + // Mark project 2 ready, gets on top: + buffer.mark_ready(&project_key2, true); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); + + // Revert + buffer.mark_ready(&project_key2, false); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + + // Project 1 ready: + buffer.mark_ready(&project_key1, true); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + + // when both projects are ready, event no 3 ends up on top: + buffer.mark_ready(&project_key2, true); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant3); + assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); + + buffer.mark_ready(&project_key2, false); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); + assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); + + assert!(buffer.pop().await.is_none()); + } +} diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs index 4e34fb993d..8b13789179 100644 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ b/relay-server/src/services/buffer/envelopebuffer/priority.rs @@ -1,421 +1 @@ -use std::cmp::Ordering; -use std::collections::BTreeSet; -use std::time::Instant; -use relay_base_schema::project::ProjectKey; - -use crate::envelope::Envelope; -use crate::services::buffer::envelopestack::EnvelopeStack; - -/// An envelope buffer that holds an individual stack for each project/sampling project combination. -/// -/// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope -/// is pushed, popped, or when a project becomes ready. -#[derive(Debug)] -pub struct PriorityEnvelopeBuffer { - /// The central priority queue. - priority_queue: priority_queue::PriorityQueue, Priority>, - /// A lookup table to find all stacks involving a project. - stacks_by_project: hashbrown::HashMap>, -} - -impl PriorityEnvelopeBuffer { - /// Creates an empty buffer. - pub fn new() -> Self { - Self { - stacks_by_project: Default::default(), - priority_queue: Default::default(), - } - } - - fn push_stack(&mut self, envelope: Box) { - let received_at = envelope.meta().start_time(); - let stack_key = StackKey::from_envelope(&envelope); - let previous_entry = self.priority_queue.push( - QueueItem { - key: stack_key, - value: S::new(envelope), - }, - Priority::new(received_at), - ); - debug_assert!(previous_entry.is_none()); - for project_key in stack_key.iter() { - self.stacks_by_project - .entry(project_key) - .or_default() - .insert(stack_key); - } - } - - fn pop_stack(&mut self, stack_key: StackKey) { - for project_key in stack_key.iter() { - self.stacks_by_project - .get_mut(&project_key) - .expect("project_key is missing from lookup") - .remove(&stack_key); - } - self.priority_queue.remove(&stack_key); - } - - pub async fn push(&mut self, envelope: Box) { - let received_at = envelope.meta().start_time(); - let stack_key = StackKey::from_envelope(&envelope); - if let Some(( - QueueItem { - key: _, - value: stack, - }, - _, - )) = self.priority_queue.get_mut(&stack_key) - { - stack.push(envelope).await.unwrap(); // TODO: handle errors - } else { - self.push_stack(envelope); - } - self.priority_queue.change_priority_by(&stack_key, |prio| { - prio.received_at = received_at; - }); - } - - pub async fn peek(&mut self) -> Option<&Envelope> { - let ( - QueueItem { - key: _, - value: stack, - }, - _, - ) = self.priority_queue.peek_mut()?; - stack.peek().await.unwrap() // TODO: handle errors - } - - pub async fn pop(&mut self) -> Option> { - let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; - let stack_key = *key; - let envelope = stack.pop().await.unwrap().expect("found an empty stack"); - - let next_received_at = stack - .peek() - .await - .unwrap() // TODO: handle error - .map(|next_envelope| next_envelope.meta().start_time()); - match next_received_at { - None => { - self.pop_stack(stack_key); - } - Some(next_received_at) => { - self.priority_queue.change_priority_by(&stack_key, |prio| { - prio.received_at = next_received_at; - }); - } - } - Some(envelope) - } - - pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { - let mut changed = false; - if let Some(stack_keys) = self.stacks_by_project.get(project) { - for stack_key in stack_keys { - self.priority_queue.change_priority_by(stack_key, |stack| { - let mut found = false; - for (subkey, readiness) in [ - (stack_key.0, &mut stack.readiness.0), - (stack_key.1, &mut stack.readiness.1), - ] { - if &subkey == project { - found = true; - if *readiness != is_ready { - changed = true; - *readiness = is_ready; - } - } - } - debug_assert!(found); - }); - } - } - changed - } -} - -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -struct StackKey(ProjectKey, ProjectKey); - -impl StackKey { - fn new(mut key1: ProjectKey, mut key2: ProjectKey) -> Self { - if key2 < key1 { - std::mem::swap(&mut key1, &mut key2); - } - Self(key1, key2) - } - - fn from_envelope(envelope: &Envelope) -> Self { - let own_key = envelope.meta().public_key(); - let sampling_key = envelope.sampling_key().unwrap_or(own_key); - StackKey::new(own_key, sampling_key) - } - - fn iter(&self) -> impl Iterator { - std::iter::once(self.0).chain((self.0 != self.1).then_some(self.1)) - } -} - -#[derive(Debug)] -struct QueueItem { - key: K, - value: V, -} - -impl std::borrow::Borrow for QueueItem { - fn borrow(&self) -> &K { - &self.key - } -} - -impl std::hash::Hash for QueueItem { - fn hash(&self, state: &mut H) { - self.key.hash(state); - } -} - -impl PartialEq for QueueItem { - fn eq(&self, other: &Self) -> bool { - self.key == other.key - } -} - -impl Eq for QueueItem {} - -#[derive(Debug)] -struct Priority { - readiness: Readiness, - received_at: Instant, -} - -impl Priority { - fn new(received_at: Instant) -> Self { - Self { - readiness: Readiness::new(), - received_at, - } - } -} - -impl PartialEq for Priority { - fn eq(&self, other: &Self) -> bool { - self.readiness.ready() == other.readiness.ready() && self.received_at == other.received_at - } -} - -impl PartialOrd for Priority { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Eq for Priority {} - -impl Ord for Priority { - fn cmp(&self, other: &Self) -> Ordering { - match (self.readiness.ready(), other.readiness.ready()) { - (true, true) => self.received_at.cmp(&other.received_at), - (true, false) => Ordering::Greater, - (false, true) => Ordering::Less, - // For non-ready stacks, we invert the priority, such that projects that are not - // ready and did not receive envelopes recently can be evicted. - (false, false) => self.received_at.cmp(&other.received_at).reverse(), - } - } -} - -#[derive(Debug)] -struct Readiness(bool, bool); - -impl Readiness { - fn new() -> Self { - Self(false, false) - } - - fn ready(&self) -> bool { - self.0 && self.1 - } -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use relay_common::Dsn; - use relay_sampling::DynamicSamplingContext; - use uuid::Uuid; - - use crate::envelope::{Item, ItemType}; - use crate::extractors::RequestMeta; - use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; - - use super::*; - - fn new_envelope(project_key: ProjectKey, sampling_key: Option) -> Box { - let mut envelope = Envelope::from_request( - None, - RequestMeta::new(Dsn::from_str(&format!("http://{project_key}@localhost/1")).unwrap()), - ); - if let Some(sampling_key) = sampling_key { - envelope.set_dsc(DynamicSamplingContext { - public_key: sampling_key, - trace_id: Uuid::new_v4(), - release: None, - user: Default::default(), - replay_id: None, - environment: None, - transaction: None, - sample_rate: None, - sampled: None, - other: Default::default(), - }); - envelope.add_item(Item::new(ItemType::Transaction)); - } - envelope - } - - #[tokio::test] - async fn insert_pop() { - let mut buffer = PriorityEnvelopeBuffer::::new(); - - let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let project_key3 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); - - assert!(buffer.pop().await.is_none()); - assert!(buffer.peek().await.is_none()); - - buffer.push(new_envelope(project_key1, None)).await; - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key1 - ); - - buffer.push(new_envelope(project_key2, None)).await; - // Both projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key1 - ); - - buffer.push(new_envelope(project_key3, None)).await; - // All projects are not ready, so project 1 is on top (has the oldest envelopes): - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key1 - ); - - // After marking a project ready, it goes to the top: - buffer.mark_ready(&project_key3, true); - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key3 - ); - assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), - project_key3 - ); - - // After popping, project 1 is on top again: - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key1 - ); - - // Mark project 1 as ready (still on top): - buffer.mark_ready(&project_key1, true); - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key1 - ); - - // Mark project 2 as ready as well (now on top because most recent): - buffer.mark_ready(&project_key2, true); - assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), - project_key2 - ); - assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), - project_key2 - ); - - // Pop last element: - assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), - project_key1 - ); - assert!(buffer.pop().await.is_none()); - assert!(buffer.peek().await.is_none()); - } - - #[tokio::test] - async fn project_internal_order() { - let mut buffer = PriorityEnvelopeBuffer::::new(); - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - - let envelope1 = new_envelope(project_key, None); - let instant1 = envelope1.meta().start_time(); - let envelope2 = new_envelope(project_key, None); - let instant2 = envelope2.meta().start_time(); - - assert!(instant2 > instant1); - - buffer.push(envelope1).await; - buffer.push(envelope2).await; - - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); - assert!(buffer.pop().await.is_none()); - } - - #[tokio::test] - async fn sampling_projects() { - let mut buffer = PriorityEnvelopeBuffer::::new(); - - let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); - - let envelope1 = new_envelope(project_key1, None); - let instant1 = envelope1.meta().start_time(); - buffer.push(envelope1).await; - - let envelope2 = new_envelope(project_key2, None); - let instant2 = envelope2.meta().start_time(); - buffer.push(envelope2).await; - - let envelope3 = new_envelope(project_key1, Some(project_key2)); - let instant3 = envelope3.meta().start_time(); - buffer.push(envelope3).await; - - // Nothing is ready, instant1 is on top: - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); - - // Mark project 2 ready, gets on top: - buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); - - // Revert - buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); - - // Project 1 ready: - buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); - - // when both projects are ready, event no 3 ends up on top: - buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant3); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); - - buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); - - assert!(buffer.pop().await.is_none()); - } -} diff --git a/relay-server/src/services/buffer/envelopestack/memory.rs b/relay-server/src/services/buffer/envelopestack/memory.rs new file mode 100644 index 0000000000..98feea4af1 --- /dev/null +++ b/relay-server/src/services/buffer/envelopestack/memory.rs @@ -0,0 +1,42 @@ +use std::convert::Infallible; + +use crate::services::buffer::envelopestack::StackProvider; +use crate::Envelope; + +use super::EnvelopeStack; + +#[derive(Debug)] +pub struct InMemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); + +impl EnvelopeStack for InMemoryEnvelopeStack { + type Error = Infallible; + type Provider = DummyProvider; + + fn new(envelope: Box) -> Self { + Self(vec![envelope]) + } + + async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { + self.0.push(envelope); + Ok(()) + } + + async fn pop(&mut self) -> Result>, Self::Error> { + Ok(self.0.pop()) + } + + async fn peek(&mut self) -> Result, Self::Error> { + Ok(self.0.last().map(Box::as_ref)) + } +} + +pub struct DummyProvider; // TODO: needs pub? + +impl StackProvider for DummyProvider { + type Stack = InMemoryEnvelopeStack; + + // TODO: create empty stack instead + fn create_stack(&self, envelope: Box) -> Self::Stack { + InMemoryEnvelopeStack::new(envelope) + } +} diff --git a/relay-server/src/services/buffer/envelopestack/mod.rs b/relay-server/src/services/buffer/envelopestack/mod.rs index aa81cd4d5f..cc400cf13c 100644 --- a/relay-server/src/services/buffer/envelopestack/mod.rs +++ b/relay-server/src/services/buffer/envelopestack/mod.rs @@ -10,6 +10,8 @@ pub trait EnvelopeStack: Send { /// [`EnvelopeStack`]. type Error: std::fmt::Debug; + type Provider: StackProvider; + /// Creates a new stack with the given element. fn new(envelope: Box) -> Self; @@ -26,3 +28,8 @@ pub trait EnvelopeStack: Send { /// If the stack is empty, an error is returned. fn pop(&mut self) -> impl Future>, Self::Error>>; } + +pub trait StackProvider { + type Stack: EnvelopeStack; + fn create_stack(&self, envelope: Box) -> Self::Stack; +} diff --git a/relay-server/src/services/buffer/envelopestack/sqlite.rs b/relay-server/src/services/buffer/envelopestack/sqlite.rs index 0b35d97aa2..22e1c3e533 100644 --- a/relay-server/src/services/buffer/envelopestack/sqlite.rs +++ b/relay-server/src/services/buffer/envelopestack/sqlite.rs @@ -1,20 +1,27 @@ use crate::envelope::Envelope; use crate::extractors::StartTime; -use crate::services::buffer::envelopestack::EnvelopeStack; +use crate::services::buffer::envelopestack::{EnvelopeStack, StackProvider}; use futures::StreamExt; use relay_base_schema::project::ProjectKey; +use relay_config::Config; use sqlx::query::Query; -use sqlx::sqlite::{SqliteArguments, SqliteRow}; +use sqlx::sqlite::{ + SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, + SqliteRow, SqliteSynchronous, +}; use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use std::collections::VecDeque; use std::error::Error; use std::fmt::Debug; use std::num::NonZeroUsize; +use std::path::{Path, PathBuf}; use std::pin::pin; +use std::sync::Arc; +use tokio::fs::DirBuilder; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] -pub enum SQLiteEnvelopeStackError { +pub enum SqliteEnvelopeStackError { /// The stack is empty. #[error("the stack is empty")] Empty, @@ -28,7 +35,7 @@ pub enum SQLiteEnvelopeStackError { /// /// For efficiency reasons, the implementation has an in-memory buffer that is periodically spooled /// to disk in a batched way. -pub struct SQLiteEnvelopeStack { +pub struct SqliteEnvelopeStack { /// Shared SQLite database pool which will be used to read and write from disk. db: Pool, /// Threshold defining the maximum number of envelopes in the `batches_buffer` before spooling @@ -50,7 +57,7 @@ pub struct SQLiteEnvelopeStack { check_disk: bool, } -impl SQLiteEnvelopeStack { +impl SqliteEnvelopeStack { /// Creates a new empty [`SQLiteEnvelopeStack`]. #[allow(dead_code)] pub fn new( @@ -90,7 +97,7 @@ impl SQLiteEnvelopeStack { /// In case there is a failure while writing envelopes, all the envelopes that were enqueued /// to be written to disk are lost. The explanation for this behavior can be found in the body /// of the method. - async fn spool_to_disk(&mut self) -> Result<(), SQLiteEnvelopeStackError> { + async fn spool_to_disk(&mut self) -> Result<(), SqliteEnvelopeStackError> { let Some(envelopes) = self.batches_buffer.pop_front() else { return Ok(()); }; @@ -118,7 +125,7 @@ impl SQLiteEnvelopeStack { // the buffer are lost. We are doing this on purposes, since if we were to have a // database corruption during runtime, and we were to put the values back into the buffer // we will end up with an infinite cycle. - return Err(SQLiteEnvelopeStackError::DatabaseError(err)); + return Err(SqliteEnvelopeStackError::DatabaseError(err)); } // If we successfully spooled to disk, we know that data should be there. @@ -134,7 +141,7 @@ impl SQLiteEnvelopeStack { /// /// In case an envelope fails deserialization due to malformed data in the database, the affected /// envelope will not be unspooled and unspooling will continue with the remaining envelopes. - async fn unspool_from_disk(&mut self) -> Result<(), SQLiteEnvelopeStackError> { + async fn unspool_from_disk(&mut self) -> Result<(), SqliteEnvelopeStackError> { let envelopes = build_delete_and_fetch_many_envelopes( self.own_key, self.sampling_key, @@ -184,7 +191,7 @@ impl SQLiteEnvelopeStack { // If there was a database error and no envelopes have been returned, we assume that we are // in a critical state, so we return an error. if let Some(db_error) = db_error { - return Err(SQLiteEnvelopeStackError::DatabaseError(db_error)); + return Err(SqliteEnvelopeStackError::DatabaseError(db_error)); } // In case no envelopes were unspool, we will mark the disk as empty until another round @@ -206,17 +213,17 @@ impl SQLiteEnvelopeStack { } /// Deserializes an [`Envelope`] from a database row. - fn extract_envelope(&self, row: SqliteRow) -> Result, SQLiteEnvelopeStackError> { + fn extract_envelope(&self, row: SqliteRow) -> Result, SqliteEnvelopeStackError> { let envelope_row: Vec = row .try_get("envelope") - .map_err(|_| SQLiteEnvelopeStackError::Empty)?; + .map_err(|_| SqliteEnvelopeStackError::Empty)?; let envelope_bytes = bytes::Bytes::from(envelope_row); let mut envelope = - Envelope::parse_bytes(envelope_bytes).map_err(|_| SQLiteEnvelopeStackError::Empty)?; + Envelope::parse_bytes(envelope_bytes).map_err(|_| SqliteEnvelopeStackError::Empty)?; let received_at: i64 = row .try_get("received_at") - .map_err(|_| SQLiteEnvelopeStackError::Empty)?; + .map_err(|_| SqliteEnvelopeStackError::Empty)?; let start_time = StartTime::from_timestamp_millis(received_at as u64); envelope.set_start_time(start_time.into_inner()); @@ -234,11 +241,13 @@ impl SQLiteEnvelopeStack { } } -impl EnvelopeStack for SQLiteEnvelopeStack { - type Error = SQLiteEnvelopeStackError; +impl EnvelopeStack for SqliteEnvelopeStack { + type Error = SqliteEnvelopeStackError; + type Provider = SqliteStackProvider; + #[allow(unused)] fn new(envelope: Box) -> Self { - todo!() + todo!() // TODO: pass a `StackManager` into `new`. } async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { @@ -302,6 +311,121 @@ impl EnvelopeStack for SQLiteEnvelopeStack { } } +enum BufferError {} + +pub struct SqliteStackProvider { + db: Pool, + disk_batch_size: usize, + max_batches: usize, +} + +impl SqliteStackProvider { + /// Creates a new [`BufferService`] from the provided path to the SQLite database file. + pub async fn create(config: Arc) -> Result { + // TODO: error handling + let db = Self::prepare_disk_state(config.clone()).await?; + Ok(Self { + db, + disk_batch_size: 100, // TODO: put in config + max_batches: 2, // TODO: put in config + }) + } + + /// Set up the database and return the current number of envelopes. + /// + /// The directories and spool file will be created if they don't already + /// exist. + async fn setup(path: &Path) -> Result<(), BufferError> { + Self::create_spool_directory(path).await?; + + let options = SqliteConnectOptions::new() + .filename(path) + .journal_mode(SqliteJournalMode::Wal) + .create_if_missing(true); + + let db = SqlitePoolOptions::new() + .connect_with(options) + .await + .map_err(BufferError::SqlxSetupFailed)?; + + sqlx::migrate!("../migrations").run(&db).await?; + Ok(()) + } + + /// Creates the directories for the spool file. + async fn create_spool_directory(path: &Path) -> Result<(), BufferError> { + let Some(parent) = path.parent() else { + return Ok(()); + }; + if !parent.as_os_str().is_empty() && !parent.exists() { + relay_log::debug!("creating directory for spooling file: {}", parent.display()); + DirBuilder::new() + .recursive(true) + .create(&parent) + .await + .map_err(BufferError::FileSetupError)?; + } + Ok(()) + } + + /// Prepares the disk state. + async fn prepare_disk_state(config: Arc) -> Result, BufferError> { + let Some(path) = config.spool_envelopes_path() else { + return BufferError::MissingPath; + }; + + let options = SqliteConnectOptions::new() + .filename(&path) + // The WAL journaling mode uses a write-ahead log instead of a rollback journal to implement transactions. + // The WAL journaling mode is persistent; after being set it stays in effect + // across multiple database connections and after closing and reopening the database. + // + // 1. WAL is significantly faster in most scenarios. + // 2. WAL provides more concurrency as readers do not block writers and a writer does not block readers. Reading and writing can proceed concurrently. + // 3. Disk I/O operations tends to be more sequential using WAL. + // 4. WAL uses many fewer fsync() operations and is thus less vulnerable to problems on systems where the fsync() system call is broken. + .journal_mode(SqliteJournalMode::Wal) + // WAL mode is safe from corruption with synchronous=NORMAL. + // When synchronous is NORMAL, the SQLite database engine will still sync at the most critical moments, but less often than in FULL mode. + // Which guarantees good balance between safety and speed. + .synchronous(SqliteSynchronous::Normal) + // The freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every + // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. + // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. + // + // This will helps us to keep the file size under some control. + .auto_vacuum(SqliteAutoVacuum::Full) + // If shared-cache mode is enabled and a thread establishes multiple + // connections to the same database, the connections share a single data and schema cache. + // This can significantly reduce the quantity of memory and IO required by the system. + .shared_cache(true); + + SqlitePoolOptions::new() + .max_connections(config.spool_envelopes_max_connections()) + .min_connections(config.spool_envelopes_min_connections()) + .connect_with(options) + .await + .map_err(BufferError::SqlxSetupFailed) + + // TODO: populate priority queue from disk. + } +} + +impl StackProvider for SqliteStackProvider { + type Stack = SqliteEnvelopeStack; + + fn create_stack(&self, envelope: Box) -> Self::Stack { + let own_key = envelope.meta().public_key(); + SqliteEnvelopeStack::new( + self.db.clone(), + self.disk_batch_size, + self.max_batches, + own_key, + envelope.sampling_key().unwrap_or(own_key), + ) + } +} + /// Struct which contains all the rows that have to be inserted in the database when storing an /// [`Envelope`]. struct InsertEnvelope { @@ -453,7 +577,7 @@ mod tests { #[should_panic] async fn test_push_with_mismatching_project_keys() { let db = setup_db(false).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 2, 2, @@ -468,7 +592,7 @@ mod tests { #[tokio::test] async fn test_push_when_db_is_not_valid() { let db = setup_db(false).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 2, 2, @@ -488,7 +612,7 @@ mod tests { let envelope = mock_envelope(Instant::now()); assert!(matches!( stack.push(envelope).await, - Err(SQLiteEnvelopeStackError::DatabaseError(_)) + Err(SqliteEnvelopeStackError::DatabaseError(_)) )); // The stack now contains the last of the 3 elements that were added. If we add a new one @@ -519,7 +643,7 @@ mod tests { #[tokio::test] async fn test_pop_when_db_is_not_valid() { let db = setup_db(false).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 2, 2, @@ -530,14 +654,14 @@ mod tests { // We pop with an invalid db. assert!(matches!( stack.pop().await, - Err(SQLiteEnvelopeStackError::DatabaseError(_)) + Err(SqliteEnvelopeStackError::DatabaseError(_)) )); } #[tokio::test] async fn test_pop_when_stack_is_empty() { let db = setup_db(true).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 2, 2, @@ -548,14 +672,14 @@ mod tests { // We pop with no elements. assert!(matches!( stack.pop().await, - Err(SQLiteEnvelopeStackError::Empty) + Err(SqliteEnvelopeStackError::Empty) )); } #[tokio::test] async fn test_push_below_threshold_and_pop() { let db = setup_db(true).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 5, 2, @@ -591,7 +715,7 @@ mod tests { #[tokio::test] async fn test_push_above_threshold_and_pop() { let db = setup_db(true).await; - let mut stack = SQLiteEnvelopeStack::new( + let mut stack = SqliteEnvelopeStack::new( db, 5, 2, diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 7673a78dd6..185c79a9c3 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -13,6 +13,9 @@ use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; mod envelopebuffer; mod envelopestack; +pub use envelopestack::sqlite::SqliteEnvelopeStack; +pub use envelopestack::EnvelopeStack; + /// Async envelope buffering interface. /// /// Access to the buffer is synchronized by a tokio lock. From 761b6c17410555d55ee10e130a460f6c7681f360 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 29 Jul 2024 16:46:58 +0200 Subject: [PATCH 27/62] Improve --- .../mod.rs | 27 ++- .../memory.rs | 2 +- .../{envelopestack => envelope_stack}/mod.rs | 6 +- .../sqlite.rs | 159 +++--------------- .../buffer/envelopebuffer/priority.rs | 1 - relay-server/src/services/buffer/mod.rs | 19 ++- .../src/services/buffer/stack_provider/mod.rs | 2 + .../services/buffer/stack_provider/simple.rs | 13 ++ .../services/buffer/stack_provider/sqlite.rs | 137 +++++++++++++++ 9 files changed, 204 insertions(+), 162 deletions(-) rename relay-server/src/services/buffer/{envelopebuffer => envelope_buffer}/mod.rs (98%) rename relay-server/src/services/buffer/{envelopestack => envelope_stack}/memory.rs (94%) rename relay-server/src/services/buffer/{envelopestack => envelope_stack}/mod.rs (97%) rename relay-server/src/services/buffer/{envelopestack => envelope_stack}/sqlite.rs (82%) delete mode 100644 relay-server/src/services/buffer/envelopebuffer/priority.rs create mode 100644 relay-server/src/services/buffer/stack_provider/mod.rs create mode 100644 relay-server/src/services/buffer/stack_provider/simple.rs create mode 100644 relay-server/src/services/buffer/stack_provider/sqlite.rs diff --git a/relay-server/src/services/buffer/envelopebuffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs similarity index 98% rename from relay-server/src/services/buffer/envelopebuffer/mod.rs rename to relay-server/src/services/buffer/envelope_buffer/mod.rs index 8e66ce0958..5d0656e2ad 100644 --- a/relay-server/src/services/buffer/envelopebuffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -1,11 +1,18 @@ +use std::cmp::Ordering; +use std::collections::BTreeSet; use std::path::PathBuf; use std::sync::Arc; +use std::time::Instant; -use relay_config::Config; use tokio::sync::Mutex; -use crate::services::buffer::envelopestack::memory::{DummyProvider, InMemoryEnvelopeStack}; -use crate::services::buffer::envelopestack::sqlite::SqliteStackProvider; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; + +use crate::envelope::Envelope; +use crate::services::buffer::envelope_stack::memory::{DummyProvider, InMemoryEnvelopeStack}; +use crate::services::buffer::envelope_stack::sqlite::SqliteStackProvider; +use crate::services::buffer::envelope_stack::EnvelopeStack; use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. @@ -15,15 +22,6 @@ pub fn create(config: &Config) -> Arc), Sqlite(InnerEnvelopeBuffer), @@ -295,13 +293,14 @@ impl Readiness { mod tests { use std::str::FromStr; + use uuid::Uuid; + use relay_common::Dsn; use relay_sampling::DynamicSamplingContext; - use uuid::Uuid; use crate::envelope::{Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; + use crate::services::buffer::envelope_stack::memory::InMemoryEnvelopeStack; use super::*; diff --git a/relay-server/src/services/buffer/envelopestack/memory.rs b/relay-server/src/services/buffer/envelope_stack/memory.rs similarity index 94% rename from relay-server/src/services/buffer/envelopestack/memory.rs rename to relay-server/src/services/buffer/envelope_stack/memory.rs index 98feea4af1..4e980ef2d6 100644 --- a/relay-server/src/services/buffer/envelopestack/memory.rs +++ b/relay-server/src/services/buffer/envelope_stack/memory.rs @@ -1,6 +1,6 @@ use std::convert::Infallible; -use crate::services::buffer::envelopestack::StackProvider; +use crate::services::buffer::envelope_stack::StackProvider; use crate::Envelope; use super::EnvelopeStack; diff --git a/relay-server/src/services/buffer/envelopestack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs similarity index 97% rename from relay-server/src/services/buffer/envelopestack/mod.rs rename to relay-server/src/services/buffer/envelope_stack/mod.rs index cc400cf13c..54d9753f9e 100644 --- a/relay-server/src/services/buffer/envelopestack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -1,6 +1,7 @@ -use crate::envelope::Envelope; use std::future::Future; +use crate::envelope::Envelope; + pub mod memory; pub mod sqlite; @@ -10,8 +11,6 @@ pub trait EnvelopeStack: Send { /// [`EnvelopeStack`]. type Error: std::fmt::Debug; - type Provider: StackProvider; - /// Creates a new stack with the given element. fn new(envelope: Box) -> Self; @@ -31,5 +30,6 @@ pub trait EnvelopeStack: Send { pub trait StackProvider { type Stack: EnvelopeStack; + fn create_stack(&self, envelope: Box) -> Self::Stack; } diff --git a/relay-server/src/services/buffer/envelopestack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs similarity index 82% rename from relay-server/src/services/buffer/envelopestack/sqlite.rs rename to relay-server/src/services/buffer/envelope_stack/sqlite.rs index 22e1c3e533..da4fde18fd 100644 --- a/relay-server/src/services/buffer/envelopestack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -1,15 +1,3 @@ -use crate::envelope::Envelope; -use crate::extractors::StartTime; -use crate::services::buffer::envelopestack::{EnvelopeStack, StackProvider}; -use futures::StreamExt; -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use sqlx::query::Query; -use sqlx::sqlite::{ - SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, - SqliteRow, SqliteSynchronous, -}; -use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use std::collections::VecDeque; use std::error::Error; use std::fmt::Debug; @@ -17,8 +5,24 @@ use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use std::pin::pin; use std::sync::Arc; + +use futures::StreamExt; +use sqlx::query::Query; +use sqlx::sqlite::{ + SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, + SqliteRow, SqliteSynchronous, +}; +use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use tokio::fs::DirBuilder; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; + +use crate::envelope::Envelope; +use crate::extractors::StartTime; +use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; +use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; + /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] pub enum SqliteEnvelopeStackError { @@ -243,7 +247,6 @@ impl SqliteEnvelopeStack { impl EnvelopeStack for SqliteEnvelopeStack { type Error = SqliteEnvelopeStackError; - type Provider = SqliteStackProvider; #[allow(unused)] fn new(envelope: Box) -> Self { @@ -311,121 +314,6 @@ impl EnvelopeStack for SqliteEnvelopeStack { } } -enum BufferError {} - -pub struct SqliteStackProvider { - db: Pool, - disk_batch_size: usize, - max_batches: usize, -} - -impl SqliteStackProvider { - /// Creates a new [`BufferService`] from the provided path to the SQLite database file. - pub async fn create(config: Arc) -> Result { - // TODO: error handling - let db = Self::prepare_disk_state(config.clone()).await?; - Ok(Self { - db, - disk_batch_size: 100, // TODO: put in config - max_batches: 2, // TODO: put in config - }) - } - - /// Set up the database and return the current number of envelopes. - /// - /// The directories and spool file will be created if they don't already - /// exist. - async fn setup(path: &Path) -> Result<(), BufferError> { - Self::create_spool_directory(path).await?; - - let options = SqliteConnectOptions::new() - .filename(path) - .journal_mode(SqliteJournalMode::Wal) - .create_if_missing(true); - - let db = SqlitePoolOptions::new() - .connect_with(options) - .await - .map_err(BufferError::SqlxSetupFailed)?; - - sqlx::migrate!("../migrations").run(&db).await?; - Ok(()) - } - - /// Creates the directories for the spool file. - async fn create_spool_directory(path: &Path) -> Result<(), BufferError> { - let Some(parent) = path.parent() else { - return Ok(()); - }; - if !parent.as_os_str().is_empty() && !parent.exists() { - relay_log::debug!("creating directory for spooling file: {}", parent.display()); - DirBuilder::new() - .recursive(true) - .create(&parent) - .await - .map_err(BufferError::FileSetupError)?; - } - Ok(()) - } - - /// Prepares the disk state. - async fn prepare_disk_state(config: Arc) -> Result, BufferError> { - let Some(path) = config.spool_envelopes_path() else { - return BufferError::MissingPath; - }; - - let options = SqliteConnectOptions::new() - .filename(&path) - // The WAL journaling mode uses a write-ahead log instead of a rollback journal to implement transactions. - // The WAL journaling mode is persistent; after being set it stays in effect - // across multiple database connections and after closing and reopening the database. - // - // 1. WAL is significantly faster in most scenarios. - // 2. WAL provides more concurrency as readers do not block writers and a writer does not block readers. Reading and writing can proceed concurrently. - // 3. Disk I/O operations tends to be more sequential using WAL. - // 4. WAL uses many fewer fsync() operations and is thus less vulnerable to problems on systems where the fsync() system call is broken. - .journal_mode(SqliteJournalMode::Wal) - // WAL mode is safe from corruption with synchronous=NORMAL. - // When synchronous is NORMAL, the SQLite database engine will still sync at the most critical moments, but less often than in FULL mode. - // Which guarantees good balance between safety and speed. - .synchronous(SqliteSynchronous::Normal) - // The freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every - // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. - // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. - // - // This will helps us to keep the file size under some control. - .auto_vacuum(SqliteAutoVacuum::Full) - // If shared-cache mode is enabled and a thread establishes multiple - // connections to the same database, the connections share a single data and schema cache. - // This can significantly reduce the quantity of memory and IO required by the system. - .shared_cache(true); - - SqlitePoolOptions::new() - .max_connections(config.spool_envelopes_max_connections()) - .min_connections(config.spool_envelopes_min_connections()) - .connect_with(options) - .await - .map_err(BufferError::SqlxSetupFailed) - - // TODO: populate priority queue from disk. - } -} - -impl StackProvider for SqliteStackProvider { - type Stack = SqliteEnvelopeStack; - - fn create_stack(&self, envelope: Box) -> Self::Stack { - let own_key = envelope.meta().public_key(); - SqliteEnvelopeStack::new( - self.db.clone(), - self.disk_batch_size, - self.max_batches, - own_key, - envelope.sampling_key().unwrap_or(own_key), - ) - } -} - /// Struct which contains all the rows that have to be inserted in the database when storing an /// [`Envelope`]. struct InsertEnvelope { @@ -480,21 +368,24 @@ fn received_at(envelope: &Envelope) -> i64 { #[cfg(test)] mod tests { - use relay_base_schema::project::ProjectKey; - use relay_event_schema::protocol::EventId; - use relay_sampling::DynamicSamplingContext; - use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; - use sqlx::{Pool, Sqlite}; use std::collections::BTreeMap; use std::path::Path; use std::time::{Duration, Instant}; + + use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; + use sqlx::{Pool, Sqlite}; use tokio::fs::DirBuilder; use uuid::Uuid; - use super::*; + use relay_base_schema::project::ProjectKey; + use relay_event_schema::protocol::EventId; + use relay_sampling::DynamicSamplingContext; + use crate::envelope::{Envelope, Item, ItemType}; use crate::extractors::RequestMeta; + use super::*; + fn request_meta() -> RequestMeta { let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" .parse() diff --git a/relay-server/src/services/buffer/envelopebuffer/priority.rs b/relay-server/src/services/buffer/envelopebuffer/priority.rs deleted file mode 100644 index 8b13789179..0000000000 --- a/relay-server/src/services/buffer/envelopebuffer/priority.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 185c79a9c3..0d22c3a086 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -2,19 +2,20 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; +use tokio::sync::MutexGuard; + +pub use envelope_stack::sqlite::SqliteEnvelopeStack; +pub use envelope_stack::EnvelopeStack; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use tokio::sync::MutexGuard; use crate::envelope::Envelope; -use crate::services::buffer::envelopebuffer::priority::PriorityEnvelopeBuffer; -use crate::services::buffer::envelopestack::memory::InMemoryEnvelopeStack; - -mod envelopebuffer; -mod envelopestack; +use crate::services::buffer::envelope_buffer::priority::PriorityEnvelopeBuffer; +use crate::services::buffer::envelope_stack::memory::InMemoryEnvelopeStack; -pub use envelopestack::sqlite::SqliteEnvelopeStack; -pub use envelopestack::EnvelopeStack; +mod envelope_buffer; +mod envelope_stack; +mod stack_provider; /// Async envelope buffering interface. /// @@ -45,7 +46,7 @@ impl EnvelopeBuffer { pub fn from_config(config: &Config) -> Option { // TODO: create a disk-based backend if db config is given (loads stacks from db). config.spool_v2().then(|| Self { - backend: envelopebuffer::create(config), + backend: envelope_buffer::create(config), notify: Arc::new(tokio::sync::Notify::new()), changed: Arc::new(AtomicBool::new(true)), }) diff --git a/relay-server/src/services/buffer/stack_provider/mod.rs b/relay-server/src/services/buffer/stack_provider/mod.rs new file mode 100644 index 0000000000..79e36200bf --- /dev/null +++ b/relay-server/src/services/buffer/stack_provider/mod.rs @@ -0,0 +1,2 @@ +pub mod sqlite; +pub mod simple; diff --git a/relay-server/src/services/buffer/stack_provider/simple.rs b/relay-server/src/services/buffer/stack_provider/simple.rs new file mode 100644 index 0000000000..65e6b9b45d --- /dev/null +++ b/relay-server/src/services/buffer/stack_provider/simple.rs @@ -0,0 +1,13 @@ +use std::marker::PhantomData; +use crate::{Envelope, EnvelopeStack}; +use crate::services::buffer::envelope_stack::StackProvider; + +pub struct SimpleStackProvider(PhantomData); + +impl StackProvider for SimpleStackProvider { + type Stack = S; + + fn create_stack(&self, envelope: Box) -> Self::Stack { + S::new(envelope) + } +} diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs new file mode 100644 index 0000000000..adc15fc427 --- /dev/null +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -0,0 +1,137 @@ +use crate::services::buffer::envelope_stack::StackProvider; +use crate::{Envelope, EnvelopeStack, SqliteEnvelopeStack}; +use relay_config::Config; +use sqlx::sqlite::{ + SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous, +}; +use sqlx::{Pool, Sqlite}; +use std::path::Path; +use std::sync::Arc; +use tokio::fs::DirBuilder; + +#[derive(Debug, thiserror::Error)] +pub enum SqliteStackProviderError { + #[error("failed to setup the database: {0}")] + SqlxSetupFailed(sqlx::Error), + + #[error("failed to create the spool file: {0}")] + FileSetupError(std::io::Error), + + #[error("the path to which the database is configured doesn't exist")] + MissingPath, +} + +pub struct SqliteStackProvider { + db: Pool, + disk_batch_size: usize, + max_batches: usize, +} + +impl SqliteStackProvider { + /// Creates a new [`SqliteStackProvider`] from the provided path to the SQLite database file. + pub async fn create(config: Arc) -> Result { + // TODO: error handling + let db = Self::prepare_disk(config.clone()).await?; + Ok(Self { + db, + disk_batch_size: 100, // TODO: put in config + max_batches: 2, // TODO: put in config + }) + } + + /// Set up the database and return the current number of envelopes. + /// + /// The directories and spool file will be created if they don't already + /// exist. + async fn setup(path: &Path) -> Result<(), SqliteStackProviderError> { + Self::create_spool_directory(path).await?; + + let options = SqliteConnectOptions::new() + .filename(path) + .journal_mode(SqliteJournalMode::Wal) + .create_if_missing(true); + + let db = SqlitePoolOptions::new() + .connect_with(options) + .await + .map_err(SqliteStackProviderError::SqlxSetupFailed)?; + + sqlx::migrate!("../migrations").run(&db).await.unwrap(); + + Ok(()) + } + + /// Creates the directories for the spool file. + async fn create_spool_directory(path: &Path) -> Result<(), SqliteStackProviderError> { + let Some(parent) = path.parent() else { + return Ok(()); + }; + + if !parent.as_os_str().is_empty() && !parent.exists() { + relay_log::debug!("creating directory for spooling file: {}", parent.display()); + DirBuilder::new() + .recursive(true) + .create(&parent) + .await + .map_err(SqliteStackProviderError::FileSetupError)?; + } + + Ok(()) + } + + /// Prepares the disk for reading and writing data. + async fn prepare_disk(config: Arc) -> Result, SqliteStackProviderError> { + let Some(path) = config.spool_envelopes_path() else { + return Err(SqliteStackProviderError::MissingPath); + }; + + let options = SqliteConnectOptions::new() + .filename(&path) + // The WAL journaling mode uses a write-ahead log instead of a rollback journal to implement transactions. + // The WAL journaling mode is persistent; after being set it stays in effect + // across multiple database connections and after closing and reopening the database. + // + // 1. WAL is significantly faster in most scenarios. + // 2. WAL provides more concurrency as readers do not block writers and a writer does not block readers. Reading and writing can proceed concurrently. + // 3. Disk I/O operations tends to be more sequential using WAL. + // 4. WAL uses many fewer fsync() operations and is thus less vulnerable to problems on systems where the fsync() system call is broken. + .journal_mode(SqliteJournalMode::Wal) + // WAL mode is safe from corruption with synchronous=NORMAL. + // When synchronous is NORMAL, the SQLite database engine will still sync at the most critical moments, but less often than in FULL mode. + // Which guarantees good balance between safety and speed. + .synchronous(SqliteSynchronous::Normal) + // The freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every + // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. + // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. + // + // This will helps us to keep the file size under some control. + .auto_vacuum(SqliteAutoVacuum::Full) + // If shared-cache mode is enabled and a thread establishes multiple + // connections to the same database, the connections share a single data and schema cache. + // This can significantly reduce the quantity of memory and IO required by the system. + .shared_cache(true); + + SqlitePoolOptions::new() + .max_connections(config.spool_envelopes_max_connections()) + .min_connections(config.spool_envelopes_min_connections()) + .connect_with(options) + .await + .map_err(SqliteStackProviderError::SqlxSetupFailed) + } +} + +impl StackProvider for SqliteStackProvider { + type Stack = SqliteEnvelopeStack; + + fn create_stack(&self, envelope: Box) -> Self::Stack { + let own_key = envelope.meta().public_key(); + // TODO: start loading from disk the initial batch of envelopes. + SqliteEnvelopeStack::new( + self.db.clone(), + self.disk_batch_size, + self.max_batches, + own_key, + envelope.sampling_key().unwrap_or(own_key), + ) + } +} From 956b24954c9e6f87de8e982892c19a0453c02a32 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 29 Jul 2024 17:17:59 +0200 Subject: [PATCH 28/62] Improve --- relay-server/src/envelope.rs | 7 + .../services/buffer/envelope_buffer/mod.rs | 34 ++- .../services/buffer/envelope_stack/memory.rs | 27 +-- .../src/services/buffer/envelope_stack/mod.rs | 4 +- .../services/buffer/envelope_stack/sqlite.rs | 10 +- .../src/services/buffer/envelope_store/mod.rs | 23 ++ .../services/buffer/envelope_store/sqlite.rs | 228 ++++++++++++++++++ relay-server/src/services/buffer/mod.rs | 7 +- .../services/buffer/stack_provider/memory.rs | 13 + .../src/services/buffer/stack_provider/mod.rs | 2 +- .../services/buffer/stack_provider/simple.rs | 13 - .../services/buffer/stack_provider/sqlite.rs | 111 +-------- relay-server/src/utils/managed_envelope.rs | 2 +- 13 files changed, 317 insertions(+), 164 deletions(-) create mode 100644 relay-server/src/services/buffer/envelope_store/mod.rs create mode 100644 relay-server/src/services/buffer/envelope_store/sqlite.rs create mode 100644 relay-server/src/services/buffer/stack_provider/memory.rs delete mode 100644 relay-server/src/services/buffer/stack_provider/simple.rs diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index b95366d3d1..adcb2d0073 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -1238,6 +1238,13 @@ impl Envelope { self.dsc().map(|dsc| dsc.public_key) } + /// Returns the time at which the envelope was received at this Relay. + /// + /// This is the date time equivalent to [`start_time`](Self::start_time). + pub fn received_at(&self) -> DateTime { + relay_common::time::instant_to_date_time(self.meta().start_time()) + } + /// Sets the event id on the envelope. pub fn set_event_id(&mut self, event_id: EventId) { self.headers.event_id = Some(event_id); diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 5d0656e2ad..fa032f14a3 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -1,6 +1,5 @@ use std::cmp::Ordering; use std::collections::BTreeSet; -use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; @@ -10,20 +9,19 @@ use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::envelope::Envelope; -use crate::services::buffer::envelope_stack::memory::{DummyProvider, InMemoryEnvelopeStack}; +use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; use crate::services::buffer::envelope_stack::sqlite::SqliteStackProvider; use crate::services::buffer::envelope_stack::EnvelopeStack; +use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. -pub fn create(config: &Config) -> Arc>> { - Arc::new(Mutex::new( - InnerEnvelopeBuffer::::new(), - )) +pub fn create(config: &Config) -> Arc>> { + Arc::new(Mutex::new(InnerEnvelopeBuffer::::new())) } pub enum EnvelopeBuffer { - InMemory(InnerEnvelopeBuffer), + InMemory(InnerEnvelopeBuffer), Sqlite(InnerEnvelopeBuffer), } @@ -31,7 +29,7 @@ impl EnvelopeBuffer { pub fn from_config(config: &Config) -> Self { match config.spool_envelopes_path() { Some(path) => Self::Sqlite(InnerEnvelopeBuffer::::new(path)), - None => Self::InMemory(InnerEnvelopeBuffer::::new()), + None => Self::InMemory(InnerEnvelopeBuffer::::new()), } } @@ -51,27 +49,23 @@ struct InnerEnvelopeBuffer { stack_provider: S::Provider, } -impl InnerEnvelopeBuffer { +impl InnerEnvelopeBuffer { /// Creates an empty buffer. pub fn new() -> Self { Self { stacks_by_project: Default::default(), priority_queue: Default::default(), - stack_provider: DummyProvider, + stack_provider: MemoryStackProvider, } } } impl InnerEnvelopeBuffer { /// Creates an empty buffer. - pub fn new(path: PathBuf) -> Self { - // TODO: Populate state from db. + pub fn new(config: Arc) -> Self { Self { stacks_by_project: Default::default(), priority_queue: Default::default(), - stack_provider: SqliteStackProvider::new( - path, 100, // TODO: put in config - 2, // TODO: put in config - ), + stack_provider: SqliteStackProvider::new(config), } } } @@ -300,7 +294,7 @@ mod tests { use crate::envelope::{Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::envelope_stack::memory::InMemoryEnvelopeStack; + use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; use super::*; @@ -329,7 +323,7 @@ mod tests { #[tokio::test] async fn insert_pop() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); @@ -404,7 +398,7 @@ mod tests { #[tokio::test] async fn project_internal_order() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopeBuffer::::new(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -425,7 +419,7 @@ mod tests { #[tokio::test] async fn sampling_projects() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); diff --git a/relay-server/src/services/buffer/envelope_stack/memory.rs b/relay-server/src/services/buffer/envelope_stack/memory.rs index 4e980ef2d6..01a1e4a301 100644 --- a/relay-server/src/services/buffer/envelope_stack/memory.rs +++ b/relay-server/src/services/buffer/envelope_stack/memory.rs @@ -1,16 +1,16 @@ use std::convert::Infallible; -use crate::services::buffer::envelope_stack::StackProvider; -use crate::Envelope; - use super::EnvelopeStack; +use crate::services::buffer::stack_provider::memory::MemoryStackProvider; +use crate::Envelope; #[derive(Debug)] -pub struct InMemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); +pub struct MemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); -impl EnvelopeStack for InMemoryEnvelopeStack { +impl EnvelopeStack for MemoryEnvelopeStack { type Error = Infallible; - type Provider = DummyProvider; + + type Provider = MemoryStackProvider; fn new(envelope: Box) -> Self { Self(vec![envelope]) @@ -21,22 +21,11 @@ impl EnvelopeStack for InMemoryEnvelopeStack { Ok(()) } - async fn pop(&mut self) -> Result>, Self::Error> { - Ok(self.0.pop()) - } - async fn peek(&mut self) -> Result, Self::Error> { Ok(self.0.last().map(Box::as_ref)) } -} - -pub struct DummyProvider; // TODO: needs pub? -impl StackProvider for DummyProvider { - type Stack = InMemoryEnvelopeStack; - - // TODO: create empty stack instead - fn create_stack(&self, envelope: Box) -> Self::Stack { - InMemoryEnvelopeStack::new(envelope) + async fn pop(&mut self) -> Result>, Self::Error> { + Ok(self.0.pop()) } } diff --git a/relay-server/src/services/buffer/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs index 54d9753f9e..631a4f7fea 100644 --- a/relay-server/src/services/buffer/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -11,6 +11,8 @@ pub trait EnvelopeStack: Send { /// [`EnvelopeStack`]. type Error: std::fmt::Debug; + type Provider: StackProvider; + /// Creates a new stack with the given element. fn new(envelope: Box) -> Self; @@ -30,6 +32,6 @@ pub trait EnvelopeStack: Send { pub trait StackProvider { type Stack: EnvelopeStack; - + fn create_stack(&self, envelope: Box) -> Self::Stack; } diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index da4fde18fd..226b714491 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -21,6 +21,8 @@ use relay_config::Config; use crate::envelope::Envelope; use crate::extractors::StartTime; use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; +use crate::services::buffer::envelope_store::sqlite::SqliteEnvelopeStore; +use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. @@ -41,7 +43,7 @@ pub enum SqliteEnvelopeStackError { /// to disk in a batched way. pub struct SqliteEnvelopeStack { /// Shared SQLite database pool which will be used to read and write from disk. - db: Pool, + envelope_store: SqliteEnvelopeStore, /// Threshold defining the maximum number of envelopes in the `batches_buffer` before spooling /// to disk will take place. spool_threshold: NonZeroUsize, @@ -65,14 +67,14 @@ impl SqliteEnvelopeStack { /// Creates a new empty [`SQLiteEnvelopeStack`]. #[allow(dead_code)] pub fn new( - db: Pool, + envelope_store: SqliteEnvelopeStore, disk_batch_size: usize, max_batches: usize, own_key: ProjectKey, sampling_key: ProjectKey, ) -> Self { Self { - db, + envelope_store, spool_threshold: NonZeroUsize::new(disk_batch_size * max_batches) .expect("the spool threshold must be > 0"), batch_size: NonZeroUsize::new(disk_batch_size) @@ -248,6 +250,8 @@ impl SqliteEnvelopeStack { impl EnvelopeStack for SqliteEnvelopeStack { type Error = SqliteEnvelopeStackError; + type Provider = SqliteStackProvider; + #[allow(unused)] fn new(envelope: Box) -> Self { todo!() // TODO: pass a `StackManager` into `new`. diff --git a/relay-server/src/services/buffer/envelope_store/mod.rs b/relay-server/src/services/buffer/envelope_store/mod.rs new file mode 100644 index 0000000000..97131c2a52 --- /dev/null +++ b/relay-server/src/services/buffer/envelope_store/mod.rs @@ -0,0 +1,23 @@ +pub mod sqlite; + +use crate::Envelope; +use std::future::Future; + +pub trait EnvelopeStore { + type Envelope; + + type Error; + + fn insert_many( + &mut self, + envelopes: impl Iterator, + ) -> impl Future>; + + fn delete_many(&mut self) -> impl Future, Self::Error>>; + + fn project_keys_pairs( + &self, + ) -> impl Future, Self::Error>>; + + fn used_size(&self) -> impl Future>; +} diff --git a/relay-server/src/services/buffer/envelope_store/sqlite.rs b/relay-server/src/services/buffer/envelope_store/sqlite.rs new file mode 100644 index 0000000000..580bdd7446 --- /dev/null +++ b/relay-server/src/services/buffer/envelope_store/sqlite.rs @@ -0,0 +1,228 @@ +use crate::services::buffer::envelope_store::EnvelopeStore; +use crate::Envelope; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use sqlx::query::Query; +use sqlx::sqlite::{ + SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, + SqliteSynchronous, +}; +use sqlx::{Pool, QueryBuilder, Sqlite}; +use std::future::Future; +use std::iter; +use std::path::Path; +use std::sync::Arc; +use tokio::fs::DirBuilder; + +struct InsertEnvelope { + received_at: i64, + own_key: ProjectKey, + sampling_key: ProjectKey, + encoded_envelope: Vec, +} + +impl<'a> From<&'a Envelope> for InsertEnvelope { + fn from(value: &'a Envelope) -> Self { + let own_key = value.meta().public_key(); + let sampling_key = value.sampling_key().unwrap_or(own_key); + + InsertEnvelope { + received_at: value.received_at().timestamp_millis(), + own_key, + sampling_key, + encoded_envelope: value.to_vec().unwrap(), + } + } +} + +/// An error returned when doing an operation on [`SqliteEnvelopeStore`]. +#[derive(Debug, thiserror::Error)] +pub enum SqliteEnvelopeStoreError { + #[error("failed to setup the database: {0}")] + SqlxSetupFailed(sqlx::Error), + + #[error("failed to create the spool file: {0}")] + FileSetupError(std::io::Error), + + #[error("no file path for the spool was provided")] + NoFilePath, +} + +#[derive(Clone)] +pub struct SqliteEnvelopeStore { + db: Pool, + max_disk_size: usize, +} + +impl SqliteEnvelopeStore { + /// Prepares the [`SqliteEnvelopeStore`] by running all the necessary migrations and preparing + /// the folders where data will be stored. + pub async fn prepare( + config: Arc, + ) -> Result { + // If no path is provided, we can't do disk spooling. + let Some(path) = config.spool_envelopes_path() else { + return Err(SqliteEnvelopeStoreError::NoFilePath); + }; + + relay_log::info!("buffer file {}", path.to_string_lossy()); + relay_log::info!( + "max memory size {}", + config.spool_envelopes_max_memory_size() + ); + relay_log::info!("max disk size {}", config.spool_envelopes_max_disk_size()); + + Self::setup(&path).await?; + + let options = SqliteConnectOptions::new() + .filename(&path) + // The WAL journaling mode uses a write-ahead log instead of a rollback journal to implement transactions. + // The WAL journaling mode is persistent; after being set it stays in effect + // across multiple database connections and after closing and reopening the database. + // + // 1. WAL is significantly faster in most scenarios. + // 2. WAL provides more concurrency as readers do not block writers and a writer does not block readers. Reading and writing can proceed concurrently. + // 3. Disk I/O operations tends to be more sequential using WAL. + // 4. WAL uses many fewer fsync() operations and is thus less vulnerable to problems on systems where the fsync() system call is broken. + .journal_mode(SqliteJournalMode::Wal) + // WAL mode is safe from corruption with synchronous=NORMAL. + // When synchronous is NORMAL, the SQLite database engine will still sync at the most critical moments, but less often than in FULL mode. + // Which guarantees good balance between safety and speed. + .synchronous(SqliteSynchronous::Normal) + // The freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every + // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. + // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. + // + // This will helps us to keep the file size under some control. + .auto_vacuum(SqliteAutoVacuum::Full) + // If shared-cache mode is enabled and a thread establishes multiple + // connections to the same database, the connections share a single data and schema cache. + // This can significantly reduce the quantity of memory and IO required by the system. + .shared_cache(true); + + let db = SqlitePoolOptions::new() + .max_connections(config.spool_envelopes_max_connections()) + .min_connections(config.spool_envelopes_min_connections()) + .connect_with(options) + .await + .map_err(SqliteEnvelopeStoreError::SqlxSetupFailed)?; + + Ok(SqliteEnvelopeStore { + db, + max_disk_size: config.spool_envelopes_max_disk_size(), + }) + } + + /// Set up the database and return the current number of envelopes. + /// + /// The directories and spool file will be created if they don't already + /// exist. + async fn setup(path: &Path) -> Result<(), SqliteEnvelopeStoreError> { + Self::create_spool_directory(path).await?; + + let options = SqliteConnectOptions::new() + .filename(path) + .journal_mode(SqliteJournalMode::Wal) + .create_if_missing(true); + + let db = SqlitePoolOptions::new() + .connect_with(options) + .await + .map_err(SqliteEnvelopeStoreError::SqlxSetupFailed)?; + + sqlx::migrate!("../migrations").run(&db).await?; + Ok(()) + } + + /// Creates the directories for the spool file. + async fn create_spool_directory(path: &Path) -> Result<(), SqliteEnvelopeStoreError> { + let Some(parent) = path.parent() else { + return Ok(()); + }; + if !parent.as_os_str().is_empty() && !parent.exists() { + relay_log::debug!("creating directory for spooling file: {}", parent.display()); + DirBuilder::new() + .recursive(true) + .create(&parent) + .await + .map_err(SqliteEnvelopeStoreError::FileSetupError)?; + } + Ok(()) + } +} + +impl EnvelopeStore for SqliteEnvelopeStore { + type Envelope = InsertEnvelope; + type Error = SqliteEnvelopeStoreError; + + async fn insert_many( + &mut self, + envelopes: impl Iterator, + ) -> Result<(), Self::Error> { + todo!() + } + + async fn delete_many(&mut self) -> Result, Self::Error> { + todo!() + } + + async fn project_keys_pairs( + &self, + ) -> Result, Self::Error> { + iter::empty() + } + + async fn used_size(&self) -> Result { + todo!() + } +} + +/// Builds a query that inserts many [`Envelope`]s in the database. +fn build_insert_many_envelopes<'a>( + envelopes: impl Iterator, +) -> QueryBuilder<'a, Sqlite> { + let mut builder: QueryBuilder = + QueryBuilder::new("INSERT INTO envelopes (received_at, own_key, sampling_key, envelope) "); + + builder.push_values(envelopes, |mut b, envelope| { + b.push_bind(envelope.received_at) + .push_bind(envelope.own_key.to_string()) + .push_bind(envelope.sampling_key.to_string()) + .push_bind(envelope.encoded_envelope); + }); + + builder +} + +/// Builds a query that deletes many [`Envelope`] from the database. +pub fn build_delete_and_fetch_many_envelopes<'a>( + own_key: ProjectKey, + project_key: ProjectKey, + batch_size: i64, +) -> Query<'a, Sqlite, SqliteArguments<'a>> { + sqlx::query( + "DELETE FROM + envelopes + WHERE id IN (SELECT id FROM envelopes WHERE own_key = ? AND sampling_key = ? + ORDER BY received_at DESC LIMIT ?) + RETURNING + received_at, own_key, sampling_key, envelope", + ) + .bind(own_key.to_string()) + .bind(project_key.to_string()) + .bind(batch_size) +} + +/// Creates a query which fetches the number of used database pages multiplied by the page size. +/// +/// This info used to estimate the current allocated database size. +pub fn estimate_size<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { + sqlx::query( + r#"SELECT (page_count - freelist_count) * page_size as size FROM pragma_page_count(), pragma_freelist_count(), pragma_page_size();"#, + ) +} + +/// Returns the query to select all the unique combinations of own and sampling keys. +pub fn get_keys<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { + sqlx::query("SELECT DISTINCT own_key, sampling_key FROM envelopes;") +} diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 0d22c3a086..b05da89515 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -11,10 +11,11 @@ use relay_config::Config; use crate::envelope::Envelope; use crate::services::buffer::envelope_buffer::priority::PriorityEnvelopeBuffer; -use crate::services::buffer::envelope_stack::memory::InMemoryEnvelopeStack; +use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; mod envelope_buffer; mod envelope_stack; +mod envelope_store; mod stack_provider; /// Async envelope buffering interface. @@ -33,7 +34,7 @@ pub struct EnvelopeBuffer { /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - backend: Arc>>, + backend: Arc>>, notify: Arc, changed: Arc, } @@ -101,7 +102,7 @@ impl EnvelopeBuffer { /// /// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { - guard: MutexGuard<'a, PriorityEnvelopeBuffer>, + guard: MutexGuard<'a, PriorityEnvelopeBuffer>, notify: &'a tokio::sync::Notify, changed: &'a AtomicBool, } diff --git a/relay-server/src/services/buffer/stack_provider/memory.rs b/relay-server/src/services/buffer/stack_provider/memory.rs new file mode 100644 index 0000000000..d4911ea504 --- /dev/null +++ b/relay-server/src/services/buffer/stack_provider/memory.rs @@ -0,0 +1,13 @@ +use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; +use crate::services::buffer::envelope_stack::StackProvider; +use crate::{Envelope, EnvelopeStack}; + +pub struct MemoryStackProvider; + +impl StackProvider for MemoryStackProvider { + type Stack = MemoryEnvelopeStack; + + fn create_stack(&self, envelope: Box) -> Self::Stack { + MemoryEnvelopeStack::new(envelope) + } +} diff --git a/relay-server/src/services/buffer/stack_provider/mod.rs b/relay-server/src/services/buffer/stack_provider/mod.rs index 79e36200bf..ae663f641d 100644 --- a/relay-server/src/services/buffer/stack_provider/mod.rs +++ b/relay-server/src/services/buffer/stack_provider/mod.rs @@ -1,2 +1,2 @@ +pub mod memory; pub mod sqlite; -pub mod simple; diff --git a/relay-server/src/services/buffer/stack_provider/simple.rs b/relay-server/src/services/buffer/stack_provider/simple.rs deleted file mode 100644 index 65e6b9b45d..0000000000 --- a/relay-server/src/services/buffer/stack_provider/simple.rs +++ /dev/null @@ -1,13 +0,0 @@ -use std::marker::PhantomData; -use crate::{Envelope, EnvelopeStack}; -use crate::services::buffer::envelope_stack::StackProvider; - -pub struct SimpleStackProvider(PhantomData); - -impl StackProvider for SimpleStackProvider { - type Stack = S; - - fn create_stack(&self, envelope: Box) -> Self::Stack { - S::new(envelope) - } -} diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index adc15fc427..c13f03ea5f 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -1,123 +1,28 @@ use crate::services::buffer::envelope_stack::StackProvider; +use crate::services::buffer::envelope_store::sqlite::{ + SqliteEnvelopeStore, SqliteEnvelopeStoreError, +}; use crate::{Envelope, EnvelopeStack, SqliteEnvelopeStack}; use relay_config::Config; -use sqlx::sqlite::{ - SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous, -}; -use sqlx::{Pool, Sqlite}; -use std::path::Path; use std::sync::Arc; -use tokio::fs::DirBuilder; - -#[derive(Debug, thiserror::Error)] -pub enum SqliteStackProviderError { - #[error("failed to setup the database: {0}")] - SqlxSetupFailed(sqlx::Error), - - #[error("failed to create the spool file: {0}")] - FileSetupError(std::io::Error), - - #[error("the path to which the database is configured doesn't exist")] - MissingPath, -} pub struct SqliteStackProvider { - db: Pool, + envelope_store: SqliteEnvelopeStore, disk_batch_size: usize, max_batches: usize, } impl SqliteStackProvider { /// Creates a new [`SqliteStackProvider`] from the provided path to the SQLite database file. - pub async fn create(config: Arc) -> Result { + pub async fn new(config: Arc) -> Result { // TODO: error handling - let db = Self::prepare_disk(config.clone()).await?; + let envelope_store = SqliteEnvelopeStore::prepare(config).await?; Ok(Self { - db, + envelope_store, disk_batch_size: 100, // TODO: put in config max_batches: 2, // TODO: put in config }) } - - /// Set up the database and return the current number of envelopes. - /// - /// The directories and spool file will be created if they don't already - /// exist. - async fn setup(path: &Path) -> Result<(), SqliteStackProviderError> { - Self::create_spool_directory(path).await?; - - let options = SqliteConnectOptions::new() - .filename(path) - .journal_mode(SqliteJournalMode::Wal) - .create_if_missing(true); - - let db = SqlitePoolOptions::new() - .connect_with(options) - .await - .map_err(SqliteStackProviderError::SqlxSetupFailed)?; - - sqlx::migrate!("../migrations").run(&db).await.unwrap(); - - Ok(()) - } - - /// Creates the directories for the spool file. - async fn create_spool_directory(path: &Path) -> Result<(), SqliteStackProviderError> { - let Some(parent) = path.parent() else { - return Ok(()); - }; - - if !parent.as_os_str().is_empty() && !parent.exists() { - relay_log::debug!("creating directory for spooling file: {}", parent.display()); - DirBuilder::new() - .recursive(true) - .create(&parent) - .await - .map_err(SqliteStackProviderError::FileSetupError)?; - } - - Ok(()) - } - - /// Prepares the disk for reading and writing data. - async fn prepare_disk(config: Arc) -> Result, SqliteStackProviderError> { - let Some(path) = config.spool_envelopes_path() else { - return Err(SqliteStackProviderError::MissingPath); - }; - - let options = SqliteConnectOptions::new() - .filename(&path) - // The WAL journaling mode uses a write-ahead log instead of a rollback journal to implement transactions. - // The WAL journaling mode is persistent; after being set it stays in effect - // across multiple database connections and after closing and reopening the database. - // - // 1. WAL is significantly faster in most scenarios. - // 2. WAL provides more concurrency as readers do not block writers and a writer does not block readers. Reading and writing can proceed concurrently. - // 3. Disk I/O operations tends to be more sequential using WAL. - // 4. WAL uses many fewer fsync() operations and is thus less vulnerable to problems on systems where the fsync() system call is broken. - .journal_mode(SqliteJournalMode::Wal) - // WAL mode is safe from corruption with synchronous=NORMAL. - // When synchronous is NORMAL, the SQLite database engine will still sync at the most critical moments, but less often than in FULL mode. - // Which guarantees good balance between safety and speed. - .synchronous(SqliteSynchronous::Normal) - // The freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every - // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. - // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. - // - // This will helps us to keep the file size under some control. - .auto_vacuum(SqliteAutoVacuum::Full) - // If shared-cache mode is enabled and a thread establishes multiple - // connections to the same database, the connections share a single data and schema cache. - // This can significantly reduce the quantity of memory and IO required by the system. - .shared_cache(true); - - SqlitePoolOptions::new() - .max_connections(config.spool_envelopes_max_connections()) - .min_connections(config.spool_envelopes_min_connections()) - .connect_with(options) - .await - .map_err(SqliteStackProviderError::SqlxSetupFailed) - } } impl StackProvider for SqliteStackProvider { @@ -127,7 +32,7 @@ impl StackProvider for SqliteStackProvider { let own_key = envelope.meta().public_key(); // TODO: start loading from disk the initial batch of envelopes. SqliteEnvelopeStack::new( - self.db.clone(), + self.envelope_store.clone(), self.disk_batch_size, self.max_batches, own_key, diff --git a/relay-server/src/utils/managed_envelope.rs b/relay-server/src/utils/managed_envelope.rs index f737f27a81..a31b6d6e4a 100644 --- a/relay-server/src/utils/managed_envelope.rs +++ b/relay-server/src/utils/managed_envelope.rs @@ -503,7 +503,7 @@ impl ManagedEnvelope { /// /// This is the date time equivalent to [`start_time`](Self::start_time). pub fn received_at(&self) -> DateTime { - relay_common::time::instant_to_date_time(self.envelope().meta().start_time()) + self.envelope.received_at() } /// Resets inner state to ensure there's no more logging. From 8864e61560dec7160a4c826cf0dc1d86299f92f4 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 29 Jul 2024 17:56:30 +0200 Subject: [PATCH 29/62] Improve --- .../services/buffer/envelope_buffer/mod.rs | 2 +- .../services/buffer/envelope_stack/memory.rs | 10 +- .../src/services/buffer/envelope_stack/mod.rs | 3 - .../services/buffer/envelope_stack/sqlite.rs | 134 ++++-------------- .../src/services/buffer/envelope_store/mod.rs | 8 +- .../services/buffer/envelope_store/sqlite.rs | 116 +++++++++++++-- 6 files changed, 151 insertions(+), 122 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index fa032f14a3..c327e49b16 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -16,7 +16,7 @@ use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. -pub fn create(config: &Config) -> Arc>> { +pub fn create(_config: Arc) -> Arc>> { Arc::new(Mutex::new(InnerEnvelopeBuffer::::new())) } diff --git a/relay-server/src/services/buffer/envelope_stack/memory.rs b/relay-server/src/services/buffer/envelope_stack/memory.rs index 01a1e4a301..2e1f2b9429 100644 --- a/relay-server/src/services/buffer/envelope_stack/memory.rs +++ b/relay-server/src/services/buffer/envelope_stack/memory.rs @@ -7,15 +7,17 @@ use crate::Envelope; #[derive(Debug)] pub struct MemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); +impl MemoryEnvelopeStack { + fn new(envelope: Box) -> Self { + Self(vec![envelope]) + } +} + impl EnvelopeStack for MemoryEnvelopeStack { type Error = Infallible; type Provider = MemoryStackProvider; - fn new(envelope: Box) -> Self { - Self(vec![envelope]) - } - async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { self.0.push(envelope); Ok(()) diff --git a/relay-server/src/services/buffer/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs index 631a4f7fea..574b5e0bd3 100644 --- a/relay-server/src/services/buffer/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -13,9 +13,6 @@ pub trait EnvelopeStack: Send { type Provider: StackProvider; - /// Creates a new stack with the given element. - fn new(envelope: Box) -> Self; - /// Pushes an [`Envelope`] on top of the stack. fn push(&mut self, envelope: Box) -> impl Future>; diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 226b714491..262a702f7b 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -22,6 +22,7 @@ use crate::envelope::Envelope; use crate::extractors::StartTime; use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::envelope_store::sqlite::SqliteEnvelopeStore; +use crate::services::buffer::envelope_store::EnvelopeStore; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; @@ -64,8 +65,8 @@ pub struct SqliteEnvelopeStack { } impl SqliteEnvelopeStack { + // TODO: implement method for initializing the stack given disk contents. /// Creates a new empty [`SQLiteEnvelopeStack`]. - #[allow(dead_code)] pub fn new( envelope_store: SqliteEnvelopeStore, disk_batch_size: usize, @@ -109,30 +110,14 @@ impl SqliteEnvelopeStack { }; self.batches_buffer_size -= envelopes.len(); - let insert_envelopes = envelopes.iter().map(|e| InsertEnvelope { - received_at: received_at(e), - own_key: self.own_key, - sampling_key: self.sampling_key, - encoded_envelope: e.to_vec().unwrap(), - }); - - // TODO: check how we can do this in a background tokio task in a non-blocking way. - if let Err(err) = build_insert_many_envelopes(insert_envelopes) - .build() - .execute(&self.db) - .await - { - relay_log::error!( - error = &err as &dyn Error, - "failed to spool envelopes to disk", - ); + let envelopes = envelopes.iter().map(|e| e.as_ref().into()); - // When early return here, we are acknowledging that the elements that we popped from - // the buffer are lost. We are doing this on purposes, since if we were to have a - // database corruption during runtime, and we were to put the values back into the buffer - // we will end up with an infinite cycle. - return Err(SqliteEnvelopeStackError::DatabaseError(err)); - } + // When early return here, we are acknowledging that the elements that we popped from + // the buffer are lost. We are doing this on purposes, since if we were to have a + // database corruption during runtime, and we were to put the values back into the buffer + // we will end up with an infinite cycle. + // TODO: handle error. + self.envelope_store.insert_many(envelopes).await.unwrap(); // If we successfully spooled to disk, we know that data should be there. self.check_disk = true; @@ -148,95 +133,39 @@ impl SqliteEnvelopeStack { /// In case an envelope fails deserialization due to malformed data in the database, the affected /// envelope will not be unspooled and unspooling will continue with the remaining envelopes. async fn unspool_from_disk(&mut self) -> Result<(), SqliteEnvelopeStackError> { - let envelopes = build_delete_and_fetch_many_envelopes( - self.own_key, - self.sampling_key, - self.batch_size.get() as i64, - ) - .fetch(&self.db) - .peekable(); - - let mut envelopes = pin!(envelopes); - if envelopes.as_mut().peek().await.is_none() { - return Ok(()); - } - - // We use a sorted vector to order envelopes that are deleted from the database. - // Unfortunately we have to do this because SQLite `DELETE` with `RETURNING` doesn't - // return deleted rows in a specific order. - let mut extracted_envelopes = Vec::with_capacity(self.batch_size.get()); - let mut db_error = None; - while let Some(envelope) = envelopes.as_mut().next().await { - let envelope = match envelope { - Ok(envelope) => envelope, - Err(err) => { - relay_log::error!( - error = &err as &dyn Error, - "failed to unspool the envelopes from the disk", - ); - db_error = Some(err); - - continue; - } - }; - - match self.extract_envelope(envelope) { - Ok(envelope) => { - extracted_envelopes.push(envelope); - } - Err(err) => { - relay_log::error!( - error = &err as &dyn Error, - "failed to extract the envelope unspooled from disk", - ) - } - } - } + // TODO: handle error. + let envelopes = self + .envelope_store + .delete_many( + self.own_key, + self.sampling_key, + self.batch_size.get() as i64, + ) + .await + .unwrap(); - if extracted_envelopes.is_empty() { + if envelopes.is_empty() { // If there was a database error and no envelopes have been returned, we assume that we are // in a critical state, so we return an error. - if let Some(db_error) = db_error { - return Err(SqliteEnvelopeStackError::DatabaseError(db_error)); - } + // if let Some(db_error) = db_error { + // return Err(SqliteEnvelopeStackError::DatabaseError(db_error)); + // } - // In case no envelopes were unspool, we will mark the disk as empty until another round - // of spooling takes place. + // In case no envelopes were unspooled, we will mark the disk as empty until another + // round of spooling takes place. self.check_disk = false; return Ok(()); } - // We sort envelopes by `received_at`. - extracted_envelopes.sort_by_key(|a| received_at(a)); - // We push in the back of the buffer, since we still want to give priority to // incoming envelopes that have a more recent timestamp. - self.batches_buffer_size += extracted_envelopes.len(); - self.batches_buffer.push_front(extracted_envelopes); + self.batches_buffer_size += envelopes.len(); + self.batches_buffer.push_front(envelopes); Ok(()) } - /// Deserializes an [`Envelope`] from a database row. - fn extract_envelope(&self, row: SqliteRow) -> Result, SqliteEnvelopeStackError> { - let envelope_row: Vec = row - .try_get("envelope") - .map_err(|_| SqliteEnvelopeStackError::Empty)?; - let envelope_bytes = bytes::Bytes::from(envelope_row); - let mut envelope = - Envelope::parse_bytes(envelope_bytes).map_err(|_| SqliteEnvelopeStackError::Empty)?; - - let received_at: i64 = row - .try_get("received_at") - .map_err(|_| SqliteEnvelopeStackError::Empty)?; - let start_time = StartTime::from_timestamp_millis(received_at as u64); - - envelope.set_start_time(start_time.into_inner()); - - Ok(envelope) - } - /// Validates that the incoming [`Envelope`] has the same project keys at the /// [`SQLiteEnvelopeStack`]. fn validate_envelope(&self, envelope: &Envelope) -> bool { @@ -252,11 +181,6 @@ impl EnvelopeStack for SqliteEnvelopeStack { type Provider = SqliteStackProvider; - #[allow(unused)] - fn new(envelope: Box) -> Self { - todo!() // TODO: pass a `StackManager` into `new`. - } - async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { debug_assert!(self.validate_envelope(&envelope)); @@ -348,7 +272,7 @@ fn build_insert_many_envelopes<'a>( pub fn build_delete_and_fetch_many_envelopes<'a>( own_key: ProjectKey, project_key: ProjectKey, - batch_size: i64, + limit: i64, ) -> Query<'a, Sqlite, SqliteArguments<'a>> { sqlx::query( "DELETE FROM @@ -360,7 +284,7 @@ pub fn build_delete_and_fetch_many_envelopes<'a>( ) .bind(own_key.to_string()) .bind(project_key.to_string()) - .bind(batch_size) + .bind(limit) } /// Computes the `received_at` timestamps of an [`Envelope`] based on the `start_time` header. diff --git a/relay-server/src/services/buffer/envelope_store/mod.rs b/relay-server/src/services/buffer/envelope_store/mod.rs index 97131c2a52..97fa04cd27 100644 --- a/relay-server/src/services/buffer/envelope_store/mod.rs +++ b/relay-server/src/services/buffer/envelope_store/mod.rs @@ -1,6 +1,7 @@ pub mod sqlite; use crate::Envelope; +use relay_base_schema::project::ProjectKey; use std::future::Future; pub trait EnvelopeStore { @@ -13,7 +14,12 @@ pub trait EnvelopeStore { envelopes: impl Iterator, ) -> impl Future>; - fn delete_many(&mut self) -> impl Future, Self::Error>>; + fn delete_many( + &mut self, + own_key: ProjectKey, + sampling_key: ProjectKey, + limit: i64, + ) -> impl Future>, Self::Error>>; fn project_keys_pairs( &self, diff --git a/relay-server/src/services/buffer/envelope_store/sqlite.rs b/relay-server/src/services/buffer/envelope_store/sqlite.rs index 580bdd7446..ebc15dc4c8 100644 --- a/relay-server/src/services/buffer/envelope_store/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_store/sqlite.rs @@ -1,16 +1,20 @@ +use crate::extractors::StartTime; +use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; use crate::services::buffer::envelope_store::EnvelopeStore; use crate::Envelope; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use sqlx::migrate::MigrateError; use sqlx::query::Query; use sqlx::sqlite::{ SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, - SqliteSynchronous, + SqliteRow, SqliteSynchronous, }; use sqlx::{Pool, QueryBuilder, Sqlite}; -use std::future::Future; +use std::error::Error; use std::iter; use std::path::Path; +use std::pin::pin; use std::sync::Arc; use tokio::fs::DirBuilder; @@ -44,8 +48,14 @@ pub enum SqliteEnvelopeStoreError { #[error("failed to create the spool file: {0}")] FileSetupError(std::io::Error), + #[error("an error occurred while spooling envelopes: {0}")] + SpoolingError(sqlx::Error), + #[error("no file path for the spool was provided")] NoFilePath, + + #[error("error during the migration of the database: {0}")] + MigrationError(MigrateError), } #[derive(Clone)] @@ -130,7 +140,11 @@ impl SqliteEnvelopeStore { .await .map_err(SqliteEnvelopeStoreError::SqlxSetupFailed)?; - sqlx::migrate!("../migrations").run(&db).await?; + sqlx::migrate!("../migrations") + .run(&db) + .await + .map_err(SqliteEnvelopeStoreError::MigrationError)?; + Ok(()) } @@ -139,6 +153,7 @@ impl SqliteEnvelopeStore { let Some(parent) = path.parent() else { return Ok(()); }; + if !parent.as_os_str().is_empty() && !parent.exists() { relay_log::debug!("creating directory for spooling file: {}", parent.display()); DirBuilder::new() @@ -147,36 +162,121 @@ impl SqliteEnvelopeStore { .await .map_err(SqliteEnvelopeStoreError::FileSetupError)?; } + Ok(()) } } impl EnvelopeStore for SqliteEnvelopeStore { type Envelope = InsertEnvelope; + type Error = SqliteEnvelopeStoreError; async fn insert_many( &mut self, envelopes: impl Iterator, ) -> Result<(), Self::Error> { - todo!() + if let Err(err) = build_insert_many_envelopes(envelopes) + .build() + .execute(&self.db) + .await + { + relay_log::error!( + error = &err as &dyn Error, + "failed to spool envelopes to disk", + ); + + return Err(SqliteEnvelopeStoreError::SpoolingError(err)); + } + + Ok(()) } - async fn delete_many(&mut self) -> Result, Self::Error> { - todo!() + async fn delete_many( + &mut self, + own_key: ProjectKey, + sampling_key: ProjectKey, + limit: i64, + ) -> Result>, Self::Error> { + let envelopes = build_delete_and_fetch_many_envelopes(own_key, sampling_key, limit) + .fetch(&self.db) + .peekable(); + + let mut envelopes = pin!(envelopes); + if envelopes.as_mut().peek().await.is_none() { + return Ok(vec![]); + } + + // We use a sorted vector to order envelopes that are deleted from the database. + // Unfortunately we have to do this because SQLite `DELETE` with `RETURNING` doesn't + // return deleted rows in a specific order. + let mut extracted_envelopes = Vec::with_capacity(limit as usize); + let mut db_error = None; + while let Some(envelope) = envelopes.as_mut().next().await { + let envelope = match envelope { + Ok(envelope) => envelope, + Err(err) => { + relay_log::error!( + error = &err as &dyn Error, + "failed to unspool the envelopes from the disk", + ); + db_error = Some(err); + + continue; + } + }; + + match extract_envelope(envelope) { + Ok(envelope) => { + extracted_envelopes.push(envelope); + } + Err(err) => { + relay_log::error!( + error = &err as &dyn Error, + "failed to extract the envelope unspooled from disk", + ) + } + } + } + + // We sort envelopes by `received_at`. + extracted_envelopes.sort_by_key(|a| a.received_at()); + + Ok(extracted_envelopes) } async fn project_keys_pairs( &self, ) -> Result, Self::Error> { - iter::empty() + // TODO: implement. + Ok(iter::empty()) } async fn used_size(&self) -> Result { - todo!() + // TODO: implement. + Ok(10) } } +/// Deserializes an [`Envelope`] from a database row. +fn extract_envelope(row: SqliteRow) -> Result, SqliteEnvelopeStackError> { + let envelope_row: Vec = row + .try_get("envelope") + .map_err(|_| SqliteEnvelopeStackError::Empty)?; + let envelope_bytes = bytes::Bytes::from(envelope_row); + let mut envelope = + Envelope::parse_bytes(envelope_bytes).map_err(|_| SqliteEnvelopeStackError::Empty)?; + + let received_at: i64 = row + .try_get("received_at") + .map_err(|_| SqliteEnvelopeStackError::Empty)?; + let start_time = StartTime::from_timestamp_millis(received_at as u64); + + envelope.set_start_time(start_time.into_inner()); + + Ok(envelope) +} + /// Builds a query that inserts many [`Envelope`]s in the database. fn build_insert_many_envelopes<'a>( envelopes: impl Iterator, From 990bb4e13ebd8c3c7f80cace736ebed20d5d98e9 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 29 Jul 2024 18:07:43 +0200 Subject: [PATCH 30/62] Improve --- relay-server/src/service.rs | 8 ++++---- .../services/buffer/envelope_buffer/mod.rs | 9 +++++---- .../services/buffer/envelope_stack/sqlite.rs | 19 +++++++++++++------ .../services/buffer/envelope_store/sqlite.rs | 6 +++++- relay-server/src/services/buffer/mod.rs | 19 +++++++++---------- relay-server/src/services/project_cache.rs | 12 ++++++------ 6 files changed, 42 insertions(+), 31 deletions(-) diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 078aa5f3af..4288347094 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use std::time::Duration; use crate::metrics::{MetricOutcomes, MetricStats}; -use crate::services::buffer::EnvelopeBuffer; +use crate::services::buffer::EnvelopesBufferManager; use crate::services::stats::RelayStats; use anyhow::{Context, Result}; use axum::extract::FromRequestParts; @@ -139,7 +139,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option, registry: Registry, } @@ -257,7 +257,7 @@ impl ServiceState { upstream_relay.clone(), global_config.clone(), ); - let envelope_buffer = EnvelopeBuffer::from_config(&config); + let envelope_buffer = EnvelopesBufferManager::from_config(&config); ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), @@ -324,7 +324,7 @@ impl ServiceState { &self.inner.memory_checker } - pub fn envelope_buffer(&self) -> Option<&EnvelopeBuffer> { + pub fn envelope_buffer(&self) -> Option<&EnvelopesBufferManager> { self.inner.envelope_buffer.as_ref() } diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index c327e49b16..3652b0b2bf 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -15,17 +15,18 @@ use crate::services::buffer::envelope_stack::EnvelopeStack; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::SqliteEnvelopeStack; -/// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. -pub fn create(_config: Arc) -> Arc>> { +/// Creates a memory or disk based [`EnvelopesBuffer`], depending on the given config. +pub fn create(_config: &Config) -> Arc>> { Arc::new(Mutex::new(InnerEnvelopeBuffer::::new())) } -pub enum EnvelopeBuffer { +#[derive(Debug)] +pub enum EnvelopesBuffer { InMemory(InnerEnvelopeBuffer), Sqlite(InnerEnvelopeBuffer), } -impl EnvelopeBuffer { +impl EnvelopesBuffer { pub fn from_config(config: &Config) -> Self { match config.spool_envelopes_path() { Some(path) => Self::Sqlite(InnerEnvelopeBuffer::::new(path)), diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 262a702f7b..ab7ea5ea4e 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -38,6 +38,7 @@ pub enum SqliteEnvelopeStackError { DatabaseError(#[from] sqlx::Error), } +#[derive(Debug)] /// An [`EnvelopeStack`] that is implemented on an SQLite database. /// /// For efficiency reasons, the implementation has an in-memory buffer that is periodically spooled @@ -396,8 +397,9 @@ mod tests { #[should_panic] async fn test_push_with_mismatching_project_keys() { let db = setup_db(false).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 2, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), @@ -411,8 +413,9 @@ mod tests { #[tokio::test] async fn test_push_when_db_is_not_valid() { let db = setup_db(false).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 2, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), @@ -462,8 +465,9 @@ mod tests { #[tokio::test] async fn test_pop_when_db_is_not_valid() { let db = setup_db(false).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 2, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), @@ -480,8 +484,9 @@ mod tests { #[tokio::test] async fn test_pop_when_stack_is_empty() { let db = setup_db(true).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 2, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), @@ -498,8 +503,9 @@ mod tests { #[tokio::test] async fn test_push_below_threshold_and_pop() { let db = setup_db(true).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 5, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), @@ -534,8 +540,9 @@ mod tests { #[tokio::test] async fn test_push_above_threshold_and_pop() { let db = setup_db(true).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); let mut stack = SqliteEnvelopeStack::new( - db, + envelope_store, 5, 2, ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), diff --git a/relay-server/src/services/buffer/envelope_store/sqlite.rs b/relay-server/src/services/buffer/envelope_store/sqlite.rs index ebc15dc4c8..40f9f8a9ba 100644 --- a/relay-server/src/services/buffer/envelope_store/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_store/sqlite.rs @@ -58,13 +58,17 @@ pub enum SqliteEnvelopeStoreError { MigrationError(MigrateError), } -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct SqliteEnvelopeStore { db: Pool, max_disk_size: usize, } impl SqliteEnvelopeStore { + pub fn new(db: Pool, max_disk_size: usize) -> Self { + Self { db, max_disk_size } + } + /// Prepares the [`SqliteEnvelopeStore`] by running all the necessary migrations and preparing /// the folders where data will be stored. pub async fn prepare( diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index b05da89515..b7f4d53501 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -10,8 +10,7 @@ use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::envelope::Envelope; -use crate::services::buffer::envelope_buffer::priority::PriorityEnvelopeBuffer; -use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; +use crate::services::buffer::envelope_buffer::EnvelopesBuffer; mod envelope_buffer; mod envelope_stack; @@ -22,7 +21,7 @@ mod stack_provider; /// /// Access to the buffer is synchronized by a tokio lock. #[derive(Debug, Clone)] -pub struct EnvelopeBuffer { +pub struct EnvelopesBufferManager { /// TODO: Reconsider synchronization mechanism. /// We can either /// - keep the interface sync and use a std Mutex. In this case, we create a queue of threads. @@ -34,13 +33,13 @@ pub struct EnvelopeBuffer { /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - backend: Arc>>, + backend: Arc>, notify: Arc, changed: Arc, } -impl EnvelopeBuffer { - /// Creates a memory or disk based [`EnvelopeBuffer`], depending on the given config. +impl EnvelopesBufferManager { + /// Creates a memory or disk based [`EnvelopesBufferManager`], depending on the given config. /// /// NOTE: until the V1 spooler implementation is removed, this function returns `None` /// if V2 spooling is not configured. @@ -102,7 +101,7 @@ impl EnvelopeBuffer { /// /// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { - guard: MutexGuard<'a, PriorityEnvelopeBuffer>, + guard: MutexGuard<'a, EnvelopesBuffer>, notify: &'a tokio::sync::Notify, changed: &'a AtomicBool, } @@ -127,7 +126,7 @@ impl Peek<'_> { .expect("element disappeared while holding lock") } - /// Sync version of [`EnvelopeBuffer::mark_ready`]. + /// Sync version of [`EnvelopesBufferManager::mark_ready`]. /// /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready /// without awaiting the lock. @@ -228,8 +227,8 @@ mod tests { assert_eq!(call_count.load(Ordering::Relaxed), 2); } - fn new_buffer() -> EnvelopeBuffer { - EnvelopeBuffer::from_config( + fn new_buffer() -> EnvelopesBufferManager { + EnvelopesBufferManager::from_config( &Config::from_json_value(serde_json::json!({ "spool": { "envelopes": { diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index ad77752941..c184e975d2 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,7 +5,7 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; -use crate::services::buffer::{EnvelopeBuffer, Peek}; +use crate::services::buffer::{EnvelopesBufferManager, Peek}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; @@ -569,7 +569,7 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, // TODO: Make non-optional when spool_v1 is removed. - envelope_buffer: Option, + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, // Need hashbrown because extract_if is not stable in std yet. @@ -1265,7 +1265,7 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1276,7 +1276,7 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1453,7 +1453,7 @@ impl Service for ProjectCacheService { } /// Temporary helper function while V1 spool eixsts. -async fn peek_buffer(buffer: &Option) -> Peek { +async fn peek_buffer(buffer: &Option) -> Peek { match buffer { Some(buffer) => buffer.peek().await, None => std::future::pending().await, @@ -1534,7 +1534,7 @@ mod tests { .unwrap() .into(); let memory_checker = MemoryChecker::new(MemoryStat::default(), config.clone()); - let envelope_buffer = EnvelopeBuffer::from_config(&config); + let envelope_buffer = EnvelopesBufferManager::from_config(&config); let buffer_services = spooler::Services { outcome_aggregator: services.outcome_aggregator.clone(), project_cache: services.project_cache.clone(), From c5cd261640136b94c4e82b259b4ccdbdfad42241 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 29 Jul 2024 18:09:28 +0200 Subject: [PATCH 31/62] Improve --- .../services/buffer/envelope_buffer/mod.rs | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 3652b0b2bf..2262fcac01 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -16,21 +16,23 @@ use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopesBuffer`], depending on the given config. -pub fn create(_config: &Config) -> Arc>> { - Arc::new(Mutex::new(InnerEnvelopeBuffer::::new())) +pub fn create(_config: &Config) -> Arc> { + Arc::new(Mutex::new(EnvelopesBuffer::InMemory( + InnerEnvelopesBuffer::::new(), + ))) } #[derive(Debug)] pub enum EnvelopesBuffer { - InMemory(InnerEnvelopeBuffer), - Sqlite(InnerEnvelopeBuffer), + InMemory(InnerEnvelopesBuffer), + Sqlite(InnerEnvelopesBuffer), } impl EnvelopesBuffer { pub fn from_config(config: &Config) -> Self { match config.spool_envelopes_path() { - Some(path) => Self::Sqlite(InnerEnvelopeBuffer::::new(path)), - None => Self::InMemory(InnerEnvelopeBuffer::::new()), + Some(path) => Self::Sqlite(InnerEnvelopesBuffer::::new(path)), + None => Self::InMemory(InnerEnvelopesBuffer::::new()), } } @@ -42,7 +44,7 @@ impl EnvelopesBuffer { /// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope /// is pushed, popped, or when a project becomes ready. #[derive(Debug)] -struct InnerEnvelopeBuffer { +struct InnerEnvelopesBuffer { /// The central priority queue. priority_queue: priority_queue::PriorityQueue, Priority>, /// A lookup table to find all stacks involving a project. @@ -50,7 +52,7 @@ struct InnerEnvelopeBuffer { stack_provider: S::Provider, } -impl InnerEnvelopeBuffer { +impl InnerEnvelopesBuffer { /// Creates an empty buffer. pub fn new() -> Self { Self { @@ -60,7 +62,7 @@ impl InnerEnvelopeBuffer { } } } -impl InnerEnvelopeBuffer { +impl InnerEnvelopesBuffer { /// Creates an empty buffer. pub fn new(config: Arc) -> Self { Self { @@ -71,7 +73,7 @@ impl InnerEnvelopeBuffer { } } -impl InnerEnvelopeBuffer { +impl InnerEnvelopesBuffer { fn push_stack(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -324,7 +326,7 @@ mod tests { #[tokio::test] async fn insert_pop() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); @@ -399,7 +401,7 @@ mod tests { #[tokio::test] async fn project_internal_order() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -420,7 +422,7 @@ mod tests { #[tokio::test] async fn sampling_projects() { - let mut buffer = InnerEnvelopeBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); From 89724440cff49b6bf1850ecf4b48fb8d56455033 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 09:05:58 +0200 Subject: [PATCH 32/62] Improve --- .../services/buffer/envelope_buffer/mod.rs | 15 ++++--- .../services/buffer/envelope_stack/memory.rs | 5 ++- .../services/buffer/envelope_stack/sqlite.rs | 15 ++----- .../src/services/buffer/envelope_store/mod.rs | 29 ------------- relay-server/src/services/buffer/mod.rs | 2 +- .../sqlite.rs => sqlite_envelope_store.rs} | 43 ++++++++----------- .../services/buffer/stack_provider/memory.rs | 3 +- .../services/buffer/stack_provider/sqlite.rs | 9 ++-- relay-server/src/services/cogs.rs | 3 +- .../src/services/metrics/aggregator.rs | 5 ++- relay-server/src/services/metrics/mod.rs | 6 +-- relay-server/src/services/metrics/router.rs | 1 + relay-server/src/services/project/state.rs | 7 +-- .../src/services/project/state/fetch_state.rs | 3 +- .../src/services/project/state/info.rs | 7 +-- 15 files changed, 60 insertions(+), 93 deletions(-) delete mode 100644 relay-server/src/services/buffer/envelope_store/mod.rs rename relay-server/src/services/buffer/{envelope_store/sqlite.rs => sqlite_envelope_store.rs} (96%) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 2262fcac01..9926bcca5f 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -10,9 +10,9 @@ use relay_config::Config; use crate::envelope::Envelope; use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; -use crate::services::buffer::envelope_stack::sqlite::SqliteStackProvider; -use crate::services::buffer::envelope_stack::EnvelopeStack; +use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; +use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopesBuffer`], depending on the given config. @@ -29,9 +29,9 @@ pub enum EnvelopesBuffer { } impl EnvelopesBuffer { - pub fn from_config(config: &Config) -> Self { + pub async fn from_config(config: &Config) -> Self { match config.spool_envelopes_path() { - Some(path) => Self::Sqlite(InnerEnvelopesBuffer::::new(path)), + Some(_) => Self::Sqlite(InnerEnvelopesBuffer::::new(config).await), None => Self::InMemory(InnerEnvelopesBuffer::::new()), } } @@ -64,11 +64,12 @@ impl InnerEnvelopesBuffer { } impl InnerEnvelopesBuffer { /// Creates an empty buffer. - pub fn new(config: Arc) -> Self { + pub async fn new(config: &Config) -> Self { Self { stacks_by_project: Default::default(), priority_queue: Default::default(), - stack_provider: SqliteStackProvider::new(config), + // TODO: handle error. + stack_provider: SqliteStackProvider::new(config).await.unwrap(), } } } @@ -80,7 +81,7 @@ impl InnerEnvelopesBuffer { let previous_entry = self.priority_queue.push( QueueItem { key: stack_key, - value: S::new(envelope), + value: self.stack_provider.create_stack(envelope), }, Priority::new(received_at), ); diff --git a/relay-server/src/services/buffer/envelope_stack/memory.rs b/relay-server/src/services/buffer/envelope_stack/memory.rs index 2e1f2b9429..15dbaaee7e 100644 --- a/relay-server/src/services/buffer/envelope_stack/memory.rs +++ b/relay-server/src/services/buffer/envelope_stack/memory.rs @@ -1,14 +1,15 @@ use std::convert::Infallible; -use super::EnvelopeStack; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::Envelope; +use super::EnvelopeStack; + #[derive(Debug)] pub struct MemoryEnvelopeStack(#[allow(clippy::vec_box)] Vec>); impl MemoryEnvelopeStack { - fn new(envelope: Box) -> Self { + pub fn new(envelope: Box) -> Self { Self(vec![envelope]) } } diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index ab7ea5ea4e..878398deff 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -2,28 +2,19 @@ use std::collections::VecDeque; use std::error::Error; use std::fmt::Debug; use std::num::NonZeroUsize; -use std::path::{Path, PathBuf}; -use std::pin::pin; -use std::sync::Arc; +use std::path::Path; use futures::StreamExt; use sqlx::query::Query; -use sqlx::sqlite::{ - SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, - SqliteRow, SqliteSynchronous, -}; +use sqlx::sqlite::{SqliteArguments, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use tokio::fs::DirBuilder; use relay_base_schema::project::ProjectKey; -use relay_config::Config; use crate::envelope::Envelope; -use crate::extractors::StartTime; use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; -use crate::services::buffer::envelope_store::sqlite::SqliteEnvelopeStore; -use crate::services::buffer::envelope_store::EnvelopeStore; -use crate::services::buffer::stack_provider::memory::MemoryStackProvider; +use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStore; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. diff --git a/relay-server/src/services/buffer/envelope_store/mod.rs b/relay-server/src/services/buffer/envelope_store/mod.rs deleted file mode 100644 index 97fa04cd27..0000000000 --- a/relay-server/src/services/buffer/envelope_store/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -pub mod sqlite; - -use crate::Envelope; -use relay_base_schema::project::ProjectKey; -use std::future::Future; - -pub trait EnvelopeStore { - type Envelope; - - type Error; - - fn insert_many( - &mut self, - envelopes: impl Iterator, - ) -> impl Future>; - - fn delete_many( - &mut self, - own_key: ProjectKey, - sampling_key: ProjectKey, - limit: i64, - ) -> impl Future>, Self::Error>>; - - fn project_keys_pairs( - &self, - ) -> impl Future, Self::Error>>; - - fn used_size(&self) -> impl Future>; -} diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index b7f4d53501..c0e9433d8c 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -14,7 +14,7 @@ use crate::services::buffer::envelope_buffer::EnvelopesBuffer; mod envelope_buffer; mod envelope_stack; -mod envelope_store; +mod sqlite_envelope_store; mod stack_provider; /// Async envelope buffering interface. diff --git a/relay-server/src/services/buffer/envelope_store/sqlite.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs similarity index 96% rename from relay-server/src/services/buffer/envelope_store/sqlite.rs rename to relay-server/src/services/buffer/sqlite_envelope_store.rs index 40f9f8a9ba..e4240160a5 100644 --- a/relay-server/src/services/buffer/envelope_store/sqlite.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -1,9 +1,9 @@ -use crate::extractors::StartTime; -use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; -use crate::services::buffer::envelope_store::EnvelopeStore; -use crate::Envelope; -use relay_base_schema::project::ProjectKey; -use relay_config::Config; +use std::error::Error; +use std::iter; +use std::path::Path; +use std::pin::pin; +use std::sync::Arc; + use sqlx::migrate::MigrateError; use sqlx::query::Query; use sqlx::sqlite::{ @@ -11,13 +11,16 @@ use sqlx::sqlite::{ SqliteRow, SqliteSynchronous, }; use sqlx::{Pool, QueryBuilder, Sqlite}; -use std::error::Error; -use std::iter; -use std::path::Path; -use std::pin::pin; -use std::sync::Arc; use tokio::fs::DirBuilder; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; + +use crate::extractors::StartTime; +use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; +use crate::services::buffer::envelope_store::EnvelopeStore; +use crate::Envelope; + struct InsertEnvelope { received_at: i64, own_key: ProjectKey, @@ -71,9 +74,7 @@ impl SqliteEnvelopeStore { /// Prepares the [`SqliteEnvelopeStore`] by running all the necessary migrations and preparing /// the folders where data will be stored. - pub async fn prepare( - config: Arc, - ) -> Result { + pub async fn prepare(config: &Config) -> Result { // If no path is provided, we can't do disk spooling. let Some(path) = config.spool_envelopes_path() else { return Err(SqliteEnvelopeStoreError::NoFilePath); @@ -169,16 +170,10 @@ impl SqliteEnvelopeStore { Ok(()) } -} - -impl EnvelopeStore for SqliteEnvelopeStore { - type Envelope = InsertEnvelope; - - type Error = SqliteEnvelopeStoreError; async fn insert_many( &mut self, - envelopes: impl Iterator, + envelopes: impl Iterator, ) -> Result<(), Self::Error> { if let Err(err) = build_insert_many_envelopes(envelopes) .build() @@ -201,7 +196,7 @@ impl EnvelopeStore for SqliteEnvelopeStore { own_key: ProjectKey, sampling_key: ProjectKey, limit: i64, - ) -> Result>, Self::Error> { + ) -> Result>, SqliteEnvelopeStoreError> { let envelopes = build_delete_and_fetch_many_envelopes(own_key, sampling_key, limit) .fetch(&self.db) .peekable(); @@ -251,12 +246,12 @@ impl EnvelopeStore for SqliteEnvelopeStore { async fn project_keys_pairs( &self, - ) -> Result, Self::Error> { + ) -> Result, SqliteEnvelopeStoreError> { // TODO: implement. Ok(iter::empty()) } - async fn used_size(&self) -> Result { + async fn used_size(&self) -> Result { // TODO: implement. Ok(10) } diff --git a/relay-server/src/services/buffer/stack_provider/memory.rs b/relay-server/src/services/buffer/stack_provider/memory.rs index d4911ea504..b3fe5c3bb3 100644 --- a/relay-server/src/services/buffer/stack_provider/memory.rs +++ b/relay-server/src/services/buffer/stack_provider/memory.rs @@ -1,7 +1,8 @@ use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; use crate::services::buffer::envelope_stack::StackProvider; -use crate::{Envelope, EnvelopeStack}; +use crate::Envelope; +#[derive(Debug)] pub struct MemoryStackProvider; impl StackProvider for MemoryStackProvider { diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index c13f03ea5f..3fa7c382a5 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -1,11 +1,12 @@ +use relay_config::Config; + use crate::services::buffer::envelope_stack::StackProvider; use crate::services::buffer::envelope_store::sqlite::{ SqliteEnvelopeStore, SqliteEnvelopeStoreError, }; -use crate::{Envelope, EnvelopeStack, SqliteEnvelopeStack}; -use relay_config::Config; -use std::sync::Arc; +use crate::{Envelope, SqliteEnvelopeStack}; +#[derive(Debug)] pub struct SqliteStackProvider { envelope_store: SqliteEnvelopeStore, disk_batch_size: usize, @@ -14,7 +15,7 @@ pub struct SqliteStackProvider { impl SqliteStackProvider { /// Creates a new [`SqliteStackProvider`] from the provided path to the SQLite database file. - pub async fn new(config: Arc) -> Result { + pub async fn new(config: &Config) -> Result { // TODO: error handling let envelope_store = SqliteEnvelopeStore::prepare(config).await?; Ok(Self { diff --git a/relay-server/src/services/cogs.rs b/relay-server/src/services/cogs.rs index acbeb790ab..385d2201c5 100644 --- a/relay-server/src/services/cogs.rs +++ b/relay-server/src/services/cogs.rs @@ -1,9 +1,10 @@ use std::sync::atomic::{AtomicBool, Ordering}; +use sentry_usage_accountant::{Producer, UsageAccountant, UsageUnit}; + use relay_cogs::{CogsMeasurement, CogsRecorder, ResourceId}; use relay_config::Config; use relay_system::{Addr, Controller, FromMessage, Interface, Service}; -use sentry_usage_accountant::{Producer, UsageAccountant, UsageUnit}; #[cfg(feature = "processing")] use crate::services::store::{Store, StoreCogs}; diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index e76bf78bda..96fbd653cb 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -1,15 +1,16 @@ -use hashbrown::HashMap; use std::time::Duration; +use hashbrown::HashMap; + use relay_base_schema::project::ProjectKey; use relay_config::AggregatorServiceConfig; +use relay_metrics::{aggregator, Bucket}; use relay_system::{ AsyncResponse, Controller, FromMessage, Interface, NoResponse, Recipient, Sender, Service, Shutdown, }; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; -use relay_metrics::{aggregator, Bucket}; /// Aggregator for metric buckets. /// diff --git a/relay-server/src/services/metrics/mod.rs b/relay-server/src/services/metrics/mod.rs index cbf5a94af6..5a1ed2cf8f 100644 --- a/relay-server/src/services/metrics/mod.rs +++ b/relay-server/src/services/metrics/mod.rs @@ -1,5 +1,5 @@ -mod aggregator; -mod router; - pub use self::aggregator::*; pub use self::router::*; + +mod aggregator; +mod router; diff --git a/relay-server/src/services/metrics/router.rs b/relay-server/src/services/metrics/router.rs index 76b063e752..6b7de838f3 100644 --- a/relay-server/src/services/metrics/router.rs +++ b/relay-server/src/services/metrics/router.rs @@ -3,6 +3,7 @@ use futures::stream::FuturesUnordered; use futures::StreamExt; + use relay_config::{AggregatorServiceConfig, ScopedAggregatorConfig}; use relay_metrics::MetricNamespace; use relay_system::{Addr, NoResponse, Recipient, Service}; diff --git a/relay-server/src/services/project/state.rs b/relay-server/src/services/project/state.rs index 4788f51e10..9c7ea90acc 100644 --- a/relay-server/src/services/project/state.rs +++ b/relay-server/src/services/project/state.rs @@ -1,12 +1,13 @@ //! Types that represent the current project state. use std::sync::Arc; -mod fetch_state; -mod info; +use serde::{Deserialize, Serialize}; pub use fetch_state::{ExpiryState, ProjectFetchState}; pub use info::{LimitedProjectInfo, ProjectInfo}; -use serde::{Deserialize, Serialize}; + +mod fetch_state; +mod info; /// Representation of a project's current state. #[derive(Clone, Debug)] diff --git a/relay-server/src/services/project/state/fetch_state.rs b/relay-server/src/services/project/state/fetch_state.rs index 3499c9184a..a9cbbdf31a 100644 --- a/relay-server/src/services/project/state/fetch_state.rs +++ b/relay-server/src/services/project/state/fetch_state.rs @@ -1,8 +1,9 @@ use std::sync::Arc; +use tokio::time::Instant; + use relay_config::Config; use relay_dynamic_config::ProjectConfig; -use tokio::time::Instant; use crate::services::project::state::info::ProjectInfo; use crate::services::project::ProjectState; diff --git a/relay-server/src/services/project/state/info.rs b/relay-server/src/services/project/state/info.rs index 0f21d2e015..9652cb348c 100644 --- a/relay-server/src/services/project/state/info.rs +++ b/relay-server/src/services/project/state/info.rs @@ -1,4 +1,8 @@ use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use url::Url; + use relay_base_schema::project::{ProjectId, ProjectKey}; #[cfg(feature = "processing")] use relay_cardinality::CardinalityLimit; @@ -8,9 +12,6 @@ use relay_dynamic_config::ErrorBoundary; use relay_dynamic_config::{Feature, LimitedProjectConfig, ProjectConfig}; use relay_filter::matches_any_origin; use relay_quotas::{Quota, Scoping}; -use serde::{Deserialize, Serialize}; -use smallvec::SmallVec; -use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; From a8228327ffc7a015be7891927ee2dcdab6d7d279 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 09:17:43 +0200 Subject: [PATCH 33/62] Improve --- .../services/buffer/envelope_buffer/mod.rs | 28 ++++++++- .../services/buffer/envelope_stack/sqlite.rs | 57 +------------------ .../services/buffer/sqlite_envelope_store.rs | 23 +++----- .../services/buffer/stack_provider/sqlite.rs | 2 +- 4 files changed, 39 insertions(+), 71 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 9926bcca5f..8843ac3730 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -36,7 +36,33 @@ impl EnvelopesBuffer { } } - // TODO: add push, pop, peek + pub async fn push(&mut self, envelope: Box) { + match self { + Self::Sqlite(buffer) => buffer.push(envelope).await, + Self::InMemory(buffer) => buffer.push(envelope).await, + } + } + + pub async fn peek(&mut self) -> Option<&Envelope> { + match self { + Self::Sqlite(buffer) => buffer.peek().await, + Self::InMemory(buffer) => buffer.peek().await, + } + } + + pub async fn pop(&mut self) -> Option> { + match self { + Self::Sqlite(buffer) => buffer.pop().await, + Self::InMemory(buffer) => buffer.pop().await, + } + } + + pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { + match self { + Self::Sqlite(buffer) => buffer.mark_ready(project, is_ready), + Self::InMemory(buffer) => buffer.mark_ready(project, is_ready), + } + } } /// An envelope buffer that holds an individual stack for each project/sampling project combination. diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 878398deff..894ea5f9fc 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -5,9 +5,8 @@ use std::num::NonZeroUsize; use std::path::Path; use futures::StreamExt; -use sqlx::query::Query; -use sqlx::sqlite::{SqliteArguments, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; -use sqlx::{Pool, QueryBuilder, Row, Sqlite}; +use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; +use sqlx::{Pool, Row, Sqlite}; use tokio::fs::DirBuilder; use relay_base_schema::project::ProjectKey; @@ -234,58 +233,6 @@ impl EnvelopeStack for SqliteEnvelopeStack { } } -/// Struct which contains all the rows that have to be inserted in the database when storing an -/// [`Envelope`]. -struct InsertEnvelope { - received_at: i64, - own_key: ProjectKey, - sampling_key: ProjectKey, - encoded_envelope: Vec, -} - -/// Builds a query that inserts many [`Envelope`]s in the database. -fn build_insert_many_envelopes<'a>( - envelopes: impl Iterator, -) -> QueryBuilder<'a, Sqlite> { - let mut builder: QueryBuilder = - QueryBuilder::new("INSERT INTO envelopes (received_at, own_key, sampling_key, envelope) "); - - builder.push_values(envelopes, |mut b, envelope| { - b.push_bind(envelope.received_at) - .push_bind(envelope.own_key.to_string()) - .push_bind(envelope.sampling_key.to_string()) - .push_bind(envelope.encoded_envelope); - }); - - builder -} - -/// Builds a query that deletes many [`Envelope`] from the database. -pub fn build_delete_and_fetch_many_envelopes<'a>( - own_key: ProjectKey, - project_key: ProjectKey, - limit: i64, -) -> Query<'a, Sqlite, SqliteArguments<'a>> { - sqlx::query( - "DELETE FROM - envelopes - WHERE id IN (SELECT id FROM envelopes WHERE own_key = ? AND sampling_key = ? - ORDER BY received_at DESC LIMIT ?) - RETURNING - received_at, own_key, sampling_key, envelope", - ) - .bind(own_key.to_string()) - .bind(project_key.to_string()) - .bind(limit) -} - -/// Computes the `received_at` timestamps of an [`Envelope`] based on the `start_time` header. -/// -/// This method has been copied from the `ManagedEnvelope.received_at()` method. -fn received_at(envelope: &Envelope) -> i64 { - relay_common::time::instant_to_date_time(envelope.meta().start_time()).timestamp_millis() -} - #[cfg(test)] mod tests { use std::collections::BTreeMap; diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index e4240160a5..62a8a6a6b8 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -2,15 +2,15 @@ use std::error::Error; use std::iter; use std::path::Path; use std::pin::pin; -use std::sync::Arc; +use futures::stream::StreamExt; use sqlx::migrate::MigrateError; use sqlx::query::Query; use sqlx::sqlite::{ SqliteArguments, SqliteAutoVacuum, SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteRow, SqliteSynchronous, }; -use sqlx::{Pool, QueryBuilder, Sqlite}; +use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use tokio::fs::DirBuilder; use relay_base_schema::project::ProjectKey; @@ -18,7 +18,6 @@ use relay_config::Config; use crate::extractors::StartTime; use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; -use crate::services::buffer::envelope_store::EnvelopeStore; use crate::Envelope; struct InsertEnvelope { @@ -171,10 +170,10 @@ impl SqliteEnvelopeStore { Ok(()) } - async fn insert_many( + pub async fn insert_many( &mut self, envelopes: impl Iterator, - ) -> Result<(), Self::Error> { + ) -> Result<(), SqliteEnvelopeStoreError> { if let Err(err) = build_insert_many_envelopes(envelopes) .build() .execute(&self.db) @@ -191,7 +190,7 @@ impl SqliteEnvelopeStore { Ok(()) } - async fn delete_many( + pub async fn delete_many( &mut self, own_key: ProjectKey, sampling_key: ProjectKey, @@ -206,11 +205,7 @@ impl SqliteEnvelopeStore { return Ok(vec![]); } - // We use a sorted vector to order envelopes that are deleted from the database. - // Unfortunately we have to do this because SQLite `DELETE` with `RETURNING` doesn't - // return deleted rows in a specific order. let mut extracted_envelopes = Vec::with_capacity(limit as usize); - let mut db_error = None; while let Some(envelope) = envelopes.as_mut().next().await { let envelope = match envelope { Ok(envelope) => envelope, @@ -219,8 +214,6 @@ impl SqliteEnvelopeStore { error = &err as &dyn Error, "failed to unspool the envelopes from the disk", ); - db_error = Some(err); - continue; } }; @@ -239,19 +232,21 @@ impl SqliteEnvelopeStore { } // We sort envelopes by `received_at`. + // Unfortunately we have to do this because SQLite `DELETE` with `RETURNING` doesn't + // return deleted rows in a specific order. extracted_envelopes.sort_by_key(|a| a.received_at()); Ok(extracted_envelopes) } - async fn project_keys_pairs( + pub async fn project_keys_pairs( &self, ) -> Result, SqliteEnvelopeStoreError> { // TODO: implement. Ok(iter::empty()) } - async fn used_size(&self) -> Result { + pub async fn used_size(&self) -> Result { // TODO: implement. Ok(10) } diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index 3fa7c382a5..c88238029a 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -1,7 +1,7 @@ use relay_config::Config; use crate::services::buffer::envelope_stack::StackProvider; -use crate::services::buffer::envelope_store::sqlite::{ +use crate::services::buffer::sqlite_envelope_store::{ SqliteEnvelopeStore, SqliteEnvelopeStoreError, }; use crate::{Envelope, SqliteEnvelopeStack}; From 8325990945f83ba3b448254b73984ba4a94d3e73 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 09:35:34 +0200 Subject: [PATCH 34/62] Improve --- relay-server/src/services/buffer/envelope_buffer/mod.rs | 5 ++++- relay-server/src/services/buffer/envelope_stack/mod.rs | 4 ++-- .../src/services/buffer/envelope_stack/sqlite.rs | 9 +-------- .../src/services/buffer/sqlite_envelope_store.rs | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 8843ac3730..ad2d65e772 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -72,7 +72,10 @@ impl EnvelopesBuffer { #[derive(Debug)] struct InnerEnvelopesBuffer { /// The central priority queue. - priority_queue: priority_queue::PriorityQueue, Priority>, + priority_queue: priority_queue::PriorityQueue< + QueueItem::Provider as StackProvider>::Stack>, + Priority, + >, /// A lookup table to find all stacks involving a project. stacks_by_project: hashbrown::HashMap>, stack_provider: S::Provider, diff --git a/relay-server/src/services/buffer/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs index 574b5e0bd3..fb8b1114a2 100644 --- a/relay-server/src/services/buffer/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -6,7 +6,7 @@ pub mod memory; pub mod sqlite; /// A stack-like data structure that holds [`Envelope`]s. -pub trait EnvelopeStack: Send { +pub trait EnvelopeStack: Send + std::fmt::Debug { /// The error type that is returned when an error is encountered during reading or writing the /// [`EnvelopeStack`]. type Error: std::fmt::Debug; @@ -27,7 +27,7 @@ pub trait EnvelopeStack: Send { fn pop(&mut self) -> impl Future>, Self::Error>>; } -pub trait StackProvider { +pub trait StackProvider: std::fmt::Debug { type Stack: EnvelopeStack; fn create_stack(&self, envelope: Box) -> Self::Stack; diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 894ea5f9fc..ecfd1213ad 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -1,18 +1,11 @@ use std::collections::VecDeque; -use std::error::Error; use std::fmt::Debug; use std::num::NonZeroUsize; -use std::path::Path; - -use futures::StreamExt; -use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; -use sqlx::{Pool, Row, Sqlite}; -use tokio::fs::DirBuilder; use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; -use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; +use crate::services::buffer::envelope_stack::EnvelopeStack; use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStore; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 62a8a6a6b8..0fe7a6c0ae 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -20,7 +20,7 @@ use crate::extractors::StartTime; use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; use crate::Envelope; -struct InsertEnvelope { +pub struct InsertEnvelope { received_at: i64, own_key: ProjectKey, sampling_key: ProjectKey, From 8de10105ffa3d6695fd8fa955ae03fc90d6bef08 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 09:42:06 +0200 Subject: [PATCH 35/62] Improve --- .../services/buffer/envelope_buffer/mod.rs | 34 ++++++++----------- .../services/buffer/envelope_stack/memory.rs | 3 -- .../src/services/buffer/envelope_stack/mod.rs | 2 -- .../services/buffer/envelope_stack/sqlite.rs | 3 -- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index ad2d65e772..74f1d43a21 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -9,30 +9,28 @@ use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::envelope::Envelope; -use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; -use crate::SqliteEnvelopeStack; /// Creates a memory or disk based [`EnvelopesBuffer`], depending on the given config. pub fn create(_config: &Config) -> Arc> { Arc::new(Mutex::new(EnvelopesBuffer::InMemory( - InnerEnvelopesBuffer::::new(), + InnerEnvelopesBuffer::::new(), ))) } #[derive(Debug)] pub enum EnvelopesBuffer { - InMemory(InnerEnvelopesBuffer), - Sqlite(InnerEnvelopesBuffer), + InMemory(InnerEnvelopesBuffer), + Sqlite(InnerEnvelopesBuffer), } impl EnvelopesBuffer { pub async fn from_config(config: &Config) -> Self { match config.spool_envelopes_path() { - Some(_) => Self::Sqlite(InnerEnvelopesBuffer::::new(config).await), - None => Self::InMemory(InnerEnvelopesBuffer::::new()), + Some(_) => Self::Sqlite(InnerEnvelopesBuffer::::new(config).await), + None => Self::InMemory(InnerEnvelopesBuffer::::new()), } } @@ -70,18 +68,15 @@ impl EnvelopesBuffer { /// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope /// is pushed, popped, or when a project becomes ready. #[derive(Debug)] -struct InnerEnvelopesBuffer { +struct InnerEnvelopesBuffer { /// The central priority queue. - priority_queue: priority_queue::PriorityQueue< - QueueItem::Provider as StackProvider>::Stack>, - Priority, - >, + priority_queue: priority_queue::PriorityQueue, Priority>, /// A lookup table to find all stacks involving a project. stacks_by_project: hashbrown::HashMap>, - stack_provider: S::Provider, + stack_provider: P, } -impl InnerEnvelopesBuffer { +impl InnerEnvelopesBuffer { /// Creates an empty buffer. pub fn new() -> Self { Self { @@ -91,7 +86,7 @@ impl InnerEnvelopesBuffer { } } } -impl InnerEnvelopesBuffer { +impl InnerEnvelopesBuffer { /// Creates an empty buffer. pub async fn new(config: &Config) -> Self { Self { @@ -103,7 +98,7 @@ impl InnerEnvelopesBuffer { } } -impl InnerEnvelopesBuffer { +impl InnerEnvelopesBuffer

{ fn push_stack(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -327,7 +322,6 @@ mod tests { use crate::envelope::{Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::envelope_stack::memory::MemoryEnvelopeStack; use super::*; @@ -356,7 +350,7 @@ mod tests { #[tokio::test] async fn insert_pop() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); @@ -431,7 +425,7 @@ mod tests { #[tokio::test] async fn project_internal_order() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -452,7 +446,7 @@ mod tests { #[tokio::test] async fn sampling_projects() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = InnerEnvelopesBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); diff --git a/relay-server/src/services/buffer/envelope_stack/memory.rs b/relay-server/src/services/buffer/envelope_stack/memory.rs index 15dbaaee7e..5e8087010f 100644 --- a/relay-server/src/services/buffer/envelope_stack/memory.rs +++ b/relay-server/src/services/buffer/envelope_stack/memory.rs @@ -1,6 +1,5 @@ use std::convert::Infallible; -use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::Envelope; use super::EnvelopeStack; @@ -17,8 +16,6 @@ impl MemoryEnvelopeStack { impl EnvelopeStack for MemoryEnvelopeStack { type Error = Infallible; - type Provider = MemoryStackProvider; - async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { self.0.push(envelope); Ok(()) diff --git a/relay-server/src/services/buffer/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs index fb8b1114a2..1db4d3c82a 100644 --- a/relay-server/src/services/buffer/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -11,8 +11,6 @@ pub trait EnvelopeStack: Send + std::fmt::Debug { /// [`EnvelopeStack`]. type Error: std::fmt::Debug; - type Provider: StackProvider; - /// Pushes an [`Envelope`] on top of the stack. fn push(&mut self, envelope: Box) -> impl Future>; diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index ecfd1213ad..022670c589 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -7,7 +7,6 @@ use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; use crate::services::buffer::envelope_stack::EnvelopeStack; use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStore; -use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] @@ -163,8 +162,6 @@ impl SqliteEnvelopeStack { impl EnvelopeStack for SqliteEnvelopeStack { type Error = SqliteEnvelopeStackError; - type Provider = SqliteStackProvider; - async fn push(&mut self, envelope: Box) -> Result<(), Self::Error> { debug_assert!(self.validate_envelope(&envelope)); From fc5a0e9c22dfa3750cf81dab9c47fe70cc609035 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 10:10:46 +0200 Subject: [PATCH 36/62] Improve --- .../services/buffer/envelope_stack/sqlite.rs | 48 +++++++++---------- .../services/buffer/sqlite_envelope_store.rs | 29 ++++++++--- 2 files changed, 45 insertions(+), 32 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 022670c589..803cd9dc2a 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -6,18 +6,16 @@ use relay_base_schema::project::ProjectKey; use crate::envelope::Envelope; use crate::services::buffer::envelope_stack::EnvelopeStack; -use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStore; +use crate::services::buffer::sqlite_envelope_store::{ + SqliteEnvelopeStore, SqliteEnvelopeStoreError, +}; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] pub enum SqliteEnvelopeStackError { - /// The stack is empty. - #[error("the stack is empty")] - Empty, - - /// The database encountered an unexpected error. - #[error("a database error occurred")] - DatabaseError(#[from] sqlx::Error), + /// The envelope store encountered an error. + #[error("an error occurred in the envelope store: {0}")] + EnvelopeStoreError(#[from] SqliteEnvelopeStoreError), } #[derive(Debug)] @@ -99,8 +97,10 @@ impl SqliteEnvelopeStack { // the buffer are lost. We are doing this on purposes, since if we were to have a // database corruption during runtime, and we were to put the values back into the buffer // we will end up with an infinite cycle. - // TODO: handle error. - self.envelope_store.insert_many(envelopes).await.unwrap(); + self.envelope_store + .insert_many(envelopes) + .await + .map_err(SqliteEnvelopeStackError::EnvelopeStoreError)?; // If we successfully spooled to disk, we know that data should be there. self.check_disk = true; @@ -116,7 +116,6 @@ impl SqliteEnvelopeStack { /// In case an envelope fails deserialization due to malformed data in the database, the affected /// envelope will not be unspooled and unspooling will continue with the remaining envelopes. async fn unspool_from_disk(&mut self) -> Result<(), SqliteEnvelopeStackError> { - // TODO: handle error. let envelopes = self .envelope_store .delete_many( @@ -125,15 +124,9 @@ impl SqliteEnvelopeStack { self.batch_size.get() as i64, ) .await - .unwrap(); + .map_err(SqliteEnvelopeStackError::EnvelopeStoreError)?; if envelopes.is_empty() { - // If there was a database error and no envelopes have been returned, we assume that we are - // in a critical state, so we return an error. - // if let Some(db_error) = db_error { - // return Err(SqliteEnvelopeStackError::DatabaseError(db_error)); - // } - // In case no envelopes were unspooled, we will mark the disk as empty until another // round of spooling takes place. self.check_disk = false; @@ -193,11 +186,13 @@ impl EnvelopeStack for SqliteEnvelopeStack { self.unspool_from_disk().await? } - Ok(self + let last = self .batches_buffer .back() .and_then(|last_batch| last_batch.last()) - .map(|boxed| boxed.as_ref())) + .map(|last_batch| last_batch.as_ref()); + + Ok(last) } async fn pop(&mut self) -> Result>, Self::Error> { @@ -209,6 +204,9 @@ impl EnvelopeStack for SqliteEnvelopeStack { self.batches_buffer_size -= 1; last_batch.pop() }); + if result.is_none() { + return Ok(None); + } // Since we might leave a batch without elements, we want to pop it from the buffer. if self @@ -362,7 +360,7 @@ mod tests { let envelope = mock_envelope(Instant::now()); assert!(matches!( stack.push(envelope).await, - Err(SqliteEnvelopeStackError::DatabaseError(_)) + Err(SqliteEnvelopeStackError::EnvelopeStoreError(_)) )); // The stack now contains the last of the 3 elements that were added. If we add a new one @@ -405,7 +403,7 @@ mod tests { // We pop with an invalid db. assert!(matches!( stack.pop().await, - Err(SqliteEnvelopeStackError::DatabaseError(_)) + Err(SqliteEnvelopeStackError::EnvelopeStoreError(_)) )); } @@ -422,10 +420,8 @@ mod tests { ); // We pop with no elements. - assert!(matches!( - stack.pop().await, - Err(SqliteEnvelopeStackError::Empty) - )); + // We pop with no elements. + assert!(stack.pop().await.unwrap().is_none()); } #[tokio::test] diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 0fe7a6c0ae..7504293832 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -17,7 +17,6 @@ use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::extractors::StartTime; -use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; use crate::Envelope; pub struct InsertEnvelope { @@ -53,11 +52,17 @@ pub enum SqliteEnvelopeStoreError { #[error("an error occurred while spooling envelopes: {0}")] SpoolingError(sqlx::Error), + #[error("an error occurred while unspooling envelopes: {0}")] + UnspoolingError(sqlx::Error), + #[error("no file path for the spool was provided")] NoFilePath, #[error("error during the migration of the database: {0}")] MigrationError(MigrateError), + + #[error("error while extracting the envelope from the database")] + EnvelopeExtractionError, } #[derive(Debug, Clone)] @@ -206,6 +211,7 @@ impl SqliteEnvelopeStore { } let mut extracted_envelopes = Vec::with_capacity(limit as usize); + let mut db_error = None; while let Some(envelope) = envelopes.as_mut().next().await { let envelope = match envelope { Ok(envelope) => envelope, @@ -214,6 +220,8 @@ impl SqliteEnvelopeStore { error = &err as &dyn Error, "failed to unspool the envelopes from the disk", ); + db_error = Some(err); + continue; } }; @@ -231,6 +239,15 @@ impl SqliteEnvelopeStore { } } + // If we have no envelopes and there was at least one error, we signal total failure to the + // caller. We do this under the assumption that if there are envelopes and failures, we are + // fine with just logging the failure and not failing completely. + if extracted_envelopes.is_empty() { + if let Some(db_error) = db_error { + return Err(SqliteEnvelopeStoreError::UnspoolingError(db_error)); + } + } + // We sort envelopes by `received_at`. // Unfortunately we have to do this because SQLite `DELETE` with `RETURNING` doesn't // return deleted rows in a specific order. @@ -253,17 +270,17 @@ impl SqliteEnvelopeStore { } /// Deserializes an [`Envelope`] from a database row. -fn extract_envelope(row: SqliteRow) -> Result, SqliteEnvelopeStackError> { +fn extract_envelope(row: SqliteRow) -> Result, SqliteEnvelopeStoreError> { let envelope_row: Vec = row .try_get("envelope") - .map_err(|_| SqliteEnvelopeStackError::Empty)?; + .map_err(SqliteEnvelopeStoreError::UnspoolingError)?; let envelope_bytes = bytes::Bytes::from(envelope_row); - let mut envelope = - Envelope::parse_bytes(envelope_bytes).map_err(|_| SqliteEnvelopeStackError::Empty)?; + let mut envelope = Envelope::parse_bytes(envelope_bytes) + .map_err(|_| SqliteEnvelopeStoreError::EnvelopeExtractionError)?; let received_at: i64 = row .try_get("received_at") - .map_err(|_| SqliteEnvelopeStackError::Empty)?; + .map_err(SqliteEnvelopeStoreError::UnspoolingError)?; let start_time = StartTime::from_timestamp_millis(received_at as u64); envelope.set_start_time(start_time.into_inner()); From 17290516aeefe7a9510bd59fa66a5798d998ddec Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 11:11:59 +0200 Subject: [PATCH 37/62] Improve --- .../services/buffer/envelope_stack/sqlite.rs | 53 +---- relay-server/src/services/buffer/mod.rs | 1 + .../services/buffer/sqlite_envelope_store.rs | 200 ++++++++++++++++-- .../services/buffer/stack_provider/sqlite.rs | 6 +- relay-server/src/services/buffer/testutils.rs | 36 ++++ 5 files changed, 223 insertions(+), 73 deletions(-) create mode 100644 relay-server/src/services/buffer/testutils.rs diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 803cd9dc2a..e5e5bee84e 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -46,7 +46,6 @@ pub struct SqliteEnvelopeStack { } impl SqliteEnvelopeStack { - // TODO: implement method for initializing the stack given disk contents. /// Creates a new empty [`SQLiteEnvelopeStack`]. pub fn new( envelope_store: SqliteEnvelopeStore, @@ -91,12 +90,13 @@ impl SqliteEnvelopeStack { }; self.batches_buffer_size -= envelopes.len(); + // We convert envelopes into a format which simplifies insertion in the store. let envelopes = envelopes.iter().map(|e| e.as_ref().into()); // When early return here, we are acknowledging that the elements that we popped from - // the buffer are lost. We are doing this on purposes, since if we were to have a - // database corruption during runtime, and we were to put the values back into the buffer - // we will end up with an infinite cycle. + // the buffer are lost in case of failure. We are doing this on purposes, since if we were + // to have a database corruption during runtime, and we were to put the values back into + // the buffer we will end up with an infinite cycle. self.envelope_store .insert_many(envelopes) .await @@ -224,22 +224,18 @@ impl EnvelopeStack for SqliteEnvelopeStack { #[cfg(test)] mod tests { use std::collections::BTreeMap; - use std::path::Path; use std::time::{Duration, Instant}; - use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; - use sqlx::{Pool, Sqlite}; - use tokio::fs::DirBuilder; use uuid::Uuid; use relay_base_schema::project::ProjectKey; use relay_event_schema::protocol::EventId; use relay_sampling::DynamicSamplingContext; + use super::*; use crate::envelope::{Envelope, Item, ItemType}; use crate::extractors::RequestMeta; - - use super::*; + use crate::services::buffer::testutils::setup_db; fn request_meta() -> RequestMeta { let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" @@ -282,43 +278,6 @@ mod tests { .collect() } - async fn setup_db(run_migrations: bool) -> Pool { - let path = std::env::temp_dir().join(Uuid::new_v4().to_string()); - - create_spool_directory(&path).await; - - let options = SqliteConnectOptions::new() - .filename(&path) - .journal_mode(SqliteJournalMode::Wal) - .create_if_missing(true); - - let db = SqlitePoolOptions::new() - .connect_with(options) - .await - .unwrap(); - - if run_migrations { - sqlx::migrate!("../migrations").run(&db).await.unwrap(); - } - - db - } - - async fn create_spool_directory(path: &Path) { - let Some(parent) = path.parent() else { - return; - }; - - if !parent.as_os_str().is_empty() && !parent.exists() { - relay_log::debug!("creating directory for spooling file: {}", parent.display()); - DirBuilder::new() - .recursive(true) - .create(&parent) - .await - .unwrap(); - } - } - #[tokio::test] #[should_panic] async fn test_push_with_mismatching_project_keys() { diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index c0e9433d8c..181dcf87b1 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -16,6 +16,7 @@ mod envelope_buffer; mod envelope_stack; mod sqlite_envelope_store; mod stack_provider; +mod testutils; /// Async envelope buffering interface. /// diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 7504293832..1e2dacc4bb 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -1,5 +1,4 @@ use std::error::Error; -use std::iter; use std::path::Path; use std::pin::pin; @@ -13,7 +12,7 @@ use sqlx::sqlite::{ use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use tokio::fs::DirBuilder; -use relay_base_schema::project::ProjectKey; +use relay_base_schema::project::{ParseProjectKeyError, ProjectKey}; use relay_config::Config; use crate::extractors::StartTime; @@ -49,11 +48,11 @@ pub enum SqliteEnvelopeStoreError { #[error("failed to create the spool file: {0}")] FileSetupError(std::io::Error), - #[error("an error occurred while spooling envelopes: {0}")] - SpoolingError(sqlx::Error), + #[error("an error occurred while writing to disk: {0}")] + WriteError(sqlx::Error), - #[error("an error occurred while unspooling envelopes: {0}")] - UnspoolingError(sqlx::Error), + #[error("an error occurred while reading from disk: {0}")] + FetchError(sqlx::Error), #[error("no file path for the spool was provided")] NoFilePath, @@ -63,6 +62,12 @@ pub enum SqliteEnvelopeStoreError { #[error("error while extracting the envelope from the database")] EnvelopeExtractionError, + + #[error("error while extracting the project key from the database")] + ProjectKeyExtractionError(#[from] ParseProjectKeyError), + + #[error("failed to get database file size: {0}")] + FileSizeReadFailed(sqlx::Error), } #[derive(Debug, Clone)] @@ -112,7 +117,7 @@ impl SqliteEnvelopeStore { // transaction commit. Note, however, that auto-vacuum only truncates the freelist pages from the file. // Auto-vacuum does not defragment the database nor repack individual database pages the way that the VACUUM command does. // - // This will helps us to keep the file size under some control. + // This will help us to keep the file size under some control. .auto_vacuum(SqliteAutoVacuum::Full) // If shared-cache mode is enabled and a thread establishes multiple // connections to the same database, the connections share a single data and schema cache. @@ -176,10 +181,10 @@ impl SqliteEnvelopeStore { } pub async fn insert_many( - &mut self, - envelopes: impl Iterator, + &self, + envelopes: impl IntoIterator, ) -> Result<(), SqliteEnvelopeStoreError> { - if let Err(err) = build_insert_many_envelopes(envelopes) + if let Err(err) = build_insert_many_envelopes(envelopes.into_iter()) .build() .execute(&self.db) .await @@ -189,14 +194,14 @@ impl SqliteEnvelopeStore { "failed to spool envelopes to disk", ); - return Err(SqliteEnvelopeStoreError::SpoolingError(err)); + return Err(SqliteEnvelopeStoreError::WriteError(err)); } Ok(()) } pub async fn delete_many( - &mut self, + &self, own_key: ProjectKey, sampling_key: ProjectKey, limit: i64, @@ -244,7 +249,7 @@ impl SqliteEnvelopeStore { // fine with just logging the failure and not failing completely. if extracted_envelopes.is_empty() { if let Some(db_error) = db_error { - return Err(SqliteEnvelopeStoreError::UnspoolingError(db_error)); + return Err(SqliteEnvelopeStoreError::FetchError(db_error)); } } @@ -256,16 +261,29 @@ impl SqliteEnvelopeStore { Ok(extracted_envelopes) } - pub async fn project_keys_pairs( + pub async fn project_key_pairs( &self, - ) -> Result, SqliteEnvelopeStoreError> { - // TODO: implement. - Ok(iter::empty()) + ) -> Result, SqliteEnvelopeStoreError> { + let project_key_pairs = build_get_project_key_pairs() + .fetch_all(&self.db) + .await + .map_err(SqliteEnvelopeStoreError::FetchError)?; + + let project_key_pairs = project_key_pairs + .into_iter() + // Collect only keys we can extract. + .filter_map(|project_key_pair| extract_project_key_pair(project_key_pair).ok()) + .collect(); + + Ok(project_key_pairs) } pub async fn used_size(&self) -> Result { - // TODO: implement. - Ok(10) + build_estimate_size() + .fetch_one(&self.db) + .await + .and_then(|r| r.try_get(0)) + .map_err(SqliteEnvelopeStoreError::FileSizeReadFailed) } } @@ -273,14 +291,14 @@ impl SqliteEnvelopeStore { fn extract_envelope(row: SqliteRow) -> Result, SqliteEnvelopeStoreError> { let envelope_row: Vec = row .try_get("envelope") - .map_err(SqliteEnvelopeStoreError::UnspoolingError)?; + .map_err(SqliteEnvelopeStoreError::FetchError)?; let envelope_bytes = bytes::Bytes::from(envelope_row); let mut envelope = Envelope::parse_bytes(envelope_bytes) .map_err(|_| SqliteEnvelopeStoreError::EnvelopeExtractionError)?; let received_at: i64 = row .try_get("received_at") - .map_err(SqliteEnvelopeStoreError::UnspoolingError)?; + .map_err(SqliteEnvelopeStoreError::FetchError)?; let start_time = StartTime::from_timestamp_millis(received_at as u64); envelope.set_start_time(start_time.into_inner()); @@ -288,6 +306,33 @@ fn extract_envelope(row: SqliteRow) -> Result, SqliteEnvelopeStore Ok(envelope) } +/// Deserializes a pair of [`ProjectKey`] from the database. +fn extract_project_key_pair( + row: SqliteRow, +) -> Result<(ProjectKey, ProjectKey), SqliteEnvelopeStoreError> { + let own_key = row + .try_get("own_key") + .map_err(SqliteEnvelopeStoreError::FetchError) + .and_then(|key| { + ProjectKey::parse(key).map_err(SqliteEnvelopeStoreError::ProjectKeyExtractionError) + }); + let sampling_key = row + .try_get("sampling_key") + .map_err(SqliteEnvelopeStoreError::FetchError) + .and_then(|key| { + ProjectKey::parse(key).map_err(SqliteEnvelopeStoreError::ProjectKeyExtractionError) + }); + + match (own_key, sampling_key) { + (Ok(own_key), Ok(sampling_key)) => Ok((own_key, sampling_key)), + // Report the first found error. + (Err(err), _) | (_, Err(err)) => { + relay_log::error!("Failed to extract a queue key from the spool record: {err}"); + Err(err) + } + } +} + /// Builds a query that inserts many [`Envelope`]s in the database. fn build_insert_many_envelopes<'a>( envelopes: impl Iterator, @@ -327,13 +372,122 @@ pub fn build_delete_and_fetch_many_envelopes<'a>( /// Creates a query which fetches the number of used database pages multiplied by the page size. /// /// This info used to estimate the current allocated database size. -pub fn estimate_size<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { +pub fn build_estimate_size<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { sqlx::query( r#"SELECT (page_count - freelist_count) * page_size as size FROM pragma_page_count(), pragma_freelist_count(), pragma_page_size();"#, ) } /// Returns the query to select all the unique combinations of own and sampling keys. -pub fn get_keys<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { +pub fn build_get_project_key_pairs<'a>() -> Query<'a, Sqlite, SqliteArguments<'a>> { sqlx::query("SELECT DISTINCT own_key, sampling_key FROM envelopes;") } + +#[cfg(test)] +mod tests { + + use hashbrown::HashSet; + use std::collections::BTreeMap; + use std::time::{Duration, Instant}; + use uuid::Uuid; + + use relay_base_schema::project::ProjectKey; + use relay_event_schema::protocol::EventId; + use relay_sampling::DynamicSamplingContext; + + use super::*; + use crate::envelope::{Envelope, Item, ItemType}; + use crate::extractors::RequestMeta; + use crate::services::buffer::testutils::setup_db; + + fn request_meta() -> RequestMeta { + let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" + .parse() + .unwrap(); + + RequestMeta::new(dsn) + } + + fn mock_envelope(instant: Instant) -> Box { + let event_id = EventId::new(); + let mut envelope = Envelope::from_request(Some(event_id), request_meta()); + + let dsc = DynamicSamplingContext { + trace_id: Uuid::new_v4(), + public_key: ProjectKey::parse("b81ae32be2584e0bbd7a4cbb95971fe1").unwrap(), + release: Some("1.1.1".to_string()), + user: Default::default(), + replay_id: None, + environment: None, + transaction: Some("transaction1".into()), + sample_rate: None, + sampled: Some(true), + other: BTreeMap::new(), + }; + + envelope.set_dsc(dsc); + envelope.set_start_time(instant); + + envelope.add_item(Item::new(ItemType::Transaction)); + + envelope + } + + #[allow(clippy::vec_box)] + fn mock_envelopes(count: usize) -> Vec> { + let instant = Instant::now(); + (0..count) + .map(|i| mock_envelope(instant - Duration::from_secs((count - i) as u64))) + .collect() + } + + #[tokio::test] + async fn test_insert_and_delete_envelopes() { + let db = setup_db(true).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); + + let own_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let sampling_key = ProjectKey::parse("b81ae32be2584e0bbd7a4cbb95971fe1").unwrap(); + + // We insert 10 envelopes. + let envelopes = mock_envelopes(10); + let envelope_ids: HashSet = + envelopes.iter().filter_map(|e| e.event_id()).collect(); + assert!(envelope_store + .insert_many(envelopes.iter().map(|e| e.as_ref().into())) + .await + .is_ok()); + + // We check that if we load more than the limit, we still get back at most 10. + let extracted_envelopes = envelope_store + .delete_many(own_key, sampling_key, 15) + .await + .unwrap(); + assert_eq!(envelopes.len(), 10); + for envelope in extracted_envelopes { + assert!(envelope_ids.contains(&envelope.event_id().unwrap())); + } + } + + #[tokio::test] + async fn test_insert_and_get_project_keys_pairs() { + let db = setup_db(true).await; + let envelope_store = SqliteEnvelopeStore::new(db, 0); + + let own_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let sampling_key = ProjectKey::parse("b81ae32be2584e0bbd7a4cbb95971fe1").unwrap(); + + // We insert 10 envelopes. + let envelopes = mock_envelopes(2); + assert!(envelope_store + .insert_many(envelopes.iter().map(|e| e.as_ref().into())) + .await + .is_ok()); + + // We check that we get back only one pair of project keys, since all envelopes have the + // same pair. + let project_key_pairs = envelope_store.project_key_pairs().await.unwrap(); + assert_eq!(project_key_pairs.len(), 1); + assert_eq!(project_key_pairs[0], (own_key, sampling_key)); + } +} diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index c88238029a..9ba7d4ddee 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -16,7 +16,6 @@ pub struct SqliteStackProvider { impl SqliteStackProvider { /// Creates a new [`SqliteStackProvider`] from the provided path to the SQLite database file. pub async fn new(config: &Config) -> Result { - // TODO: error handling let envelope_store = SqliteEnvelopeStore::prepare(config).await?; Ok(Self { envelope_store, @@ -31,13 +30,14 @@ impl StackProvider for SqliteStackProvider { fn create_stack(&self, envelope: Box) -> Self::Stack { let own_key = envelope.meta().public_key(); - // TODO: start loading from disk the initial batch of envelopes. + let sampling_key = envelope.sampling_key().unwrap_or(own_key); + SqliteEnvelopeStack::new( self.envelope_store.clone(), self.disk_batch_size, self.max_batches, own_key, - envelope.sampling_key().unwrap_or(own_key), + sampling_key, ) } } diff --git a/relay-server/src/services/buffer/testutils.rs b/relay-server/src/services/buffer/testutils.rs new file mode 100644 index 0000000000..ae67da0e17 --- /dev/null +++ b/relay-server/src/services/buffer/testutils.rs @@ -0,0 +1,36 @@ +use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; +use sqlx::{Pool, Sqlite}; +use tokio::fs::DirBuilder; +use uuid::Uuid; + +/// Sets up a temporary SQLite database for testing purposes. +pub async fn setup_db(run_migrations: bool) -> Pool { + let path = std::env::temp_dir().join(Uuid::new_v4().to_string()); + + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() && !parent.exists() { + relay_log::debug!("creating directory for spooling file: {}", parent.display()); + DirBuilder::new() + .recursive(true) + .create(&parent) + .await + .unwrap(); + } + } + + let options = SqliteConnectOptions::new() + .filename(&path) + .journal_mode(SqliteJournalMode::Wal) + .create_if_missing(true); + + let db = SqlitePoolOptions::new() + .connect_with(options) + .await + .unwrap(); + + if run_migrations { + sqlx::migrate!("../migrations").run(&db).await.unwrap(); + } + + db +} From af36538a5f1c829c8c566a57ab59757551306b31 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 11:20:04 +0200 Subject: [PATCH 38/62] Improve --- .../services/buffer/envelope_stack/sqlite.rs | 12 +++---- .../services/buffer/sqlite_envelope_store.rs | 36 ++++++++++--------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index e5e5bee84e..4fd4cb99d9 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -282,7 +282,7 @@ mod tests { #[should_panic] async fn test_push_with_mismatching_project_keys() { let db = setup_db(false).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 2, @@ -298,7 +298,7 @@ mod tests { #[tokio::test] async fn test_push_when_db_is_not_valid() { let db = setup_db(false).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 2, @@ -350,7 +350,7 @@ mod tests { #[tokio::test] async fn test_pop_when_db_is_not_valid() { let db = setup_db(false).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 2, @@ -369,7 +369,7 @@ mod tests { #[tokio::test] async fn test_pop_when_stack_is_empty() { let db = setup_db(true).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 2, @@ -386,7 +386,7 @@ mod tests { #[tokio::test] async fn test_push_below_threshold_and_pop() { let db = setup_db(true).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 5, @@ -423,7 +423,7 @@ mod tests { #[tokio::test] async fn test_push_above_threshold_and_pop() { let db = setup_db(true).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let mut stack = SqliteEnvelopeStack::new( envelope_store, 5, diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 1e2dacc4bb..e25df0adb7 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -3,6 +3,7 @@ use std::path::Path; use std::pin::pin; use futures::stream::StreamExt; +use hashbrown::HashSet; use sqlx::migrate::MigrateError; use sqlx::query::Query; use sqlx::sqlite::{ @@ -73,12 +74,12 @@ pub enum SqliteEnvelopeStoreError { #[derive(Debug, Clone)] pub struct SqliteEnvelopeStore { db: Pool, - max_disk_size: usize, } impl SqliteEnvelopeStore { - pub fn new(db: Pool, max_disk_size: usize) -> Self { - Self { db, max_disk_size } + /// Initializes the [`SqliteEnvelopeStore`] with a supplied [`Pool`]. + pub fn new(db: Pool) -> Self { + Self { db } } /// Prepares the [`SqliteEnvelopeStore`] by running all the necessary migrations and preparing @@ -90,11 +91,6 @@ impl SqliteEnvelopeStore { }; relay_log::info!("buffer file {}", path.to_string_lossy()); - relay_log::info!( - "max memory size {}", - config.spool_envelopes_max_memory_size() - ); - relay_log::info!("max disk size {}", config.spool_envelopes_max_disk_size()); Self::setup(&path).await?; @@ -131,10 +127,7 @@ impl SqliteEnvelopeStore { .await .map_err(SqliteEnvelopeStoreError::SqlxSetupFailed)?; - Ok(SqliteEnvelopeStore { - db, - max_disk_size: config.spool_envelopes_max_disk_size(), - }) + Ok(SqliteEnvelopeStore { db }) } /// Set up the database and return the current number of envelopes. @@ -180,6 +173,7 @@ impl SqliteEnvelopeStore { Ok(()) } + /// Inserts one or more [`InsertEnvelope`] into the database. pub async fn insert_many( &self, envelopes: impl IntoIterator, @@ -200,6 +194,7 @@ impl SqliteEnvelopeStore { Ok(()) } + /// Deletes and returns at most `limit` [`Envelope`]s from the database. pub async fn delete_many( &self, own_key: ProjectKey, @@ -261,9 +256,11 @@ impl SqliteEnvelopeStore { Ok(extracted_envelopes) } + /// Returns a set of project key pairs, representing all the unique combinations of + /// `own_key` and `project_key` that are found in the database. pub async fn project_key_pairs( &self, - ) -> Result, SqliteEnvelopeStoreError> { + ) -> Result, SqliteEnvelopeStoreError> { let project_key_pairs = build_get_project_key_pairs() .fetch_all(&self.db) .await @@ -278,6 +275,7 @@ impl SqliteEnvelopeStore { Ok(project_key_pairs) } + /// Returns an approximate measure of the size of the database. pub async fn used_size(&self) -> Result { build_estimate_size() .fetch_one(&self.db) @@ -327,7 +325,8 @@ fn extract_project_key_pair( (Ok(own_key), Ok(sampling_key)) => Ok((own_key, sampling_key)), // Report the first found error. (Err(err), _) | (_, Err(err)) => { - relay_log::error!("Failed to extract a queue key from the spool record: {err}"); + relay_log::error!("failed to extract a queue key from the spool record: {err}"); + Err(err) } } @@ -444,7 +443,7 @@ mod tests { #[tokio::test] async fn test_insert_and_delete_envelopes() { let db = setup_db(true).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let own_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); let sampling_key = ProjectKey::parse("b81ae32be2584e0bbd7a4cbb95971fe1").unwrap(); @@ -472,7 +471,7 @@ mod tests { #[tokio::test] async fn test_insert_and_get_project_keys_pairs() { let db = setup_db(true).await; - let envelope_store = SqliteEnvelopeStore::new(db, 0); + let envelope_store = SqliteEnvelopeStore::new(db); let own_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); let sampling_key = ProjectKey::parse("b81ae32be2584e0bbd7a4cbb95971fe1").unwrap(); @@ -488,6 +487,9 @@ mod tests { // same pair. let project_key_pairs = envelope_store.project_key_pairs().await.unwrap(); assert_eq!(project_key_pairs.len(), 1); - assert_eq!(project_key_pairs[0], (own_key, sampling_key)); + assert_eq!( + project_key_pairs.into_iter().last().unwrap(), + (own_key, sampling_key) + ); } } From 5f5228aba085b715ff45ca63e2839aafa36588f9 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 11:42:31 +0200 Subject: [PATCH 39/62] Improve --- relay-config/src/config.rs | 31 ++++++++++++++++++- .../src/services/buffer/envelope_stack/mod.rs | 4 --- .../services/buffer/envelope_stack/sqlite.rs | 1 - .../services/buffer/sqlite_envelope_store.rs | 11 ++++--- .../services/buffer/stack_provider/sqlite.rs | 4 +-- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index cda759a33c..81f3e754dc 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -842,6 +842,16 @@ fn spool_envelopes_unspool_interval() -> u64 { 100 } +/// Default batch size for the stack. +fn spool_envelopes_stack_disk_batch_size() -> usize { + 1000 +} + +/// Default maximum number of batches for the stack. +fn spool_envelopes_stack_max_batches() -> usize { + 2 +} + /// Persistent buffering configuration for incoming envelopes. #[derive(Debug, Serialize, Deserialize)] pub struct EnvelopeSpool { @@ -868,7 +878,12 @@ pub struct EnvelopeSpool { /// The interval in milliseconds to trigger unspool. #[serde(default = "spool_envelopes_unspool_interval")] unspool_interval: u64, - /// Version of the spooler + /// Number of elements of the envelope stack that are flushed to disk. + stack_disk_batch_size: usize, + /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before + /// flushing one batch to disk. + stack_max_batches: usize, + /// Version of the spooler. #[serde(default = "EnvelopeSpoolVersion::default")] version: EnvelopeSpoolVersion, } @@ -896,6 +911,8 @@ impl Default for EnvelopeSpool { max_disk_size: spool_envelopes_max_disk_size(), max_memory_size: spool_envelopes_max_memory_size(), unspool_interval: spool_envelopes_unspool_interval(), // 100ms + stack_disk_batch_size: spool_envelopes_stack_disk_batch_size(), + stack_max_batches: spool_envelopes_stack_max_batches(), version: EnvelopeSpoolVersion::V2, } } @@ -2095,6 +2112,18 @@ impl Config { self.values.spool.envelopes.max_memory_size.as_bytes() } + /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before + /// flushing one batch to disk. + pub fn spool_envelopes_stack_disk_batch_size(&self) -> usize { + self.values.spool.envelopes.stack_disk_batch_size + } + + /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before + /// flushing one batch to disk. + pub fn spool_envelopes_stack_max_batches(&self) -> usize { + self.values.spool.envelopes.stack_max_batches + } + /// Returns `true` if version 2 of the spooling mechanism is used. pub fn spool_v2(&self) -> bool { matches!( diff --git a/relay-server/src/services/buffer/envelope_stack/mod.rs b/relay-server/src/services/buffer/envelope_stack/mod.rs index 1db4d3c82a..ee48016f09 100644 --- a/relay-server/src/services/buffer/envelope_stack/mod.rs +++ b/relay-server/src/services/buffer/envelope_stack/mod.rs @@ -15,13 +15,9 @@ pub trait EnvelopeStack: Send + std::fmt::Debug { fn push(&mut self, envelope: Box) -> impl Future>; /// Peeks the [`Envelope`] on top of the stack. - /// - /// If the stack is empty, an error is returned. fn peek(&mut self) -> impl Future, Self::Error>>; /// Pops the [`Envelope`] on top of the stack. - /// - /// If the stack is empty, an error is returned. fn pop(&mut self) -> impl Future>, Self::Error>>; } diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 4fd4cb99d9..1b57a0e580 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -13,7 +13,6 @@ use crate::services::buffer::sqlite_envelope_store::{ /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] pub enum SqliteEnvelopeStackError { - /// The envelope store encountered an error. #[error("an error occurred in the envelope store: {0}")] EnvelopeStoreError(#[from] SqliteEnvelopeStoreError), } diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index e25df0adb7..af1b48d036 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -19,6 +19,7 @@ use relay_config::Config; use crate::extractors::StartTime; use crate::Envelope; +/// Struct that contains all the fields of an [`Envelope`] that are mapped to the database columns. pub struct InsertEnvelope { received_at: i64, own_key: ProjectKey, @@ -49,22 +50,22 @@ pub enum SqliteEnvelopeStoreError { #[error("failed to create the spool file: {0}")] FileSetupError(std::io::Error), - #[error("an error occurred while writing to disk: {0}")] + #[error("failed to write to disk: {0}")] WriteError(sqlx::Error), - #[error("an error occurred while reading from disk: {0}")] + #[error("failed to read from disk: {0}")] FetchError(sqlx::Error), #[error("no file path for the spool was provided")] NoFilePath, - #[error("error during the migration of the database: {0}")] + #[error("failed to migrate the database: {0}")] MigrationError(MigrateError), - #[error("error while extracting the envelope from the database")] + #[error("failed to extract the envelope from the database")] EnvelopeExtractionError, - #[error("error while extracting the project key from the database")] + #[error("failed to extract a project key from the database")] ProjectKeyExtractionError(#[from] ParseProjectKeyError), #[error("failed to get database file size: {0}")] diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index 9ba7d4ddee..895f9f279d 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -19,8 +19,8 @@ impl SqliteStackProvider { let envelope_store = SqliteEnvelopeStore::prepare(config).await?; Ok(Self { envelope_store, - disk_batch_size: 100, // TODO: put in config - max_batches: 2, // TODO: put in config + disk_batch_size: config.spool_envelopes_stack_disk_batch_size(), + max_batches: config.spool_envelopes_stack_max_batches(), }) } } From 6c7cd9fccdacc2ad26ee239e51cf8ddc800fb7f0 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 11:32:30 +0200 Subject: [PATCH 40/62] little fixes --- relay-config/src/config.rs | 6 +- relay-server/src/endpoints/common.rs | 9 ++- relay-server/src/service.rs | 13 +++-- .../services/buffer/envelope_buffer/mod.rs | 55 ++++++++++--------- .../services/buffer/envelope_stack/sqlite.rs | 2 + relay-server/src/services/buffer/mod.rs | 46 +++++++++------- relay-server/src/services/project_cache.rs | 13 ++--- tests/integration/fixtures/relay.py | 2 +- tests/integration/test_projectconfigs.py | 6 +- 9 files changed, 88 insertions(+), 64 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 81f3e754dc..0b6a2e9e8e 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -893,12 +893,16 @@ pub struct EnvelopeSpool { pub enum EnvelopeSpoolVersion { /// Use the spooler service, which only buffers envelopes for unloaded projects and /// switches between an in-memory mode and a disk mode on-demand. + /// + /// This mode will be removed soon. #[default] #[serde(rename = "1")] V1, /// Use the envelope buffer, through which all envelopes pass before getting unspooled. /// Can be either disk based or memory based. - #[serde(rename = "2")] + /// + /// This mode has not been fully stress-tested, do not use in production environments. + #[serde(rename = "experimental")] V2, } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 9d66669e1a..75f0453b31 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -259,7 +259,7 @@ pub fn event_id_from_items(items: &Items) -> Result, BadStoreReq /// /// Queueing can fail if the queue exceeds `envelope_buffer_size`. In this case, `Err` is /// returned and the envelope is not queued. -async fn queue_envelope( +fn queue_envelope( state: &ServiceState, mut managed_envelope: ManagedEnvelope, ) -> Result<(), BadStoreRequest> { @@ -310,7 +310,10 @@ async fn queue_envelope( // NOTE: This assumes that a `prefetch` has already been scheduled for both the // envelope's projects. See `handle_check_envelope`. relay_log::trace!("Pushing envelope to V2 buffer"); - buffer.push(envelope.into_envelope()).await; + // buffer.check_space_available()?; + tokio::spawn(async move { + buffer.push(envelope.into_envelope()).await; + }); } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); @@ -390,7 +393,7 @@ pub async fn handle_envelope( return Err(BadStoreRequest::Overflow(offender)); } - queue_envelope(state, managed_envelope).await?; + queue_envelope(state, managed_envelope)?; if checked.rate_limits.is_limited() { // Even if some envelope items have been queued, there might be active rate limits on diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 4288347094..2925ee92b8 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use std::time::Duration; use crate::metrics::{MetricOutcomes, MetricStats}; -use crate::services::buffer::EnvelopesBufferManager; +use crate::services::buffer::GuardedEnvelopeBuffer; use crate::services::stats::RelayStats; use anyhow::{Context, Result}; use axum::extract::FromRequestParts; @@ -139,7 +139,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option>, registry: Registry, } @@ -257,7 +257,7 @@ impl ServiceState { upstream_relay.clone(), global_config.clone(), ); - let envelope_buffer = EnvelopesBufferManager::from_config(&config); + let envelope_buffer = GuardedEnvelopeBuffer::from_config(&config).map(Arc::new); ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), @@ -324,8 +324,11 @@ impl ServiceState { &self.inner.memory_checker } - pub fn envelope_buffer(&self) -> Option<&EnvelopesBufferManager> { - self.inner.envelope_buffer.as_ref() + /// Returns the V2 envelope buffer, if present. + /// + /// Clones the inner Arc. + pub fn envelope_buffer(&self) -> Option> { + self.inner.envelope_buffer.clone() } /// Returns the address of the [`ProjectCache`] service. diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 74f1d43a21..e8b4a6d77a 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -1,10 +1,7 @@ use std::cmp::Ordering; use std::collections::BTreeSet; -use std::sync::Arc; use std::time::Instant; -use tokio::sync::Mutex; - use relay_base_schema::project::ProjectKey; use relay_config::Config; @@ -13,25 +10,29 @@ use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; -/// Creates a memory or disk based [`EnvelopesBuffer`], depending on the given config. -pub fn create(_config: &Config) -> Arc> { - Arc::new(Mutex::new(EnvelopesBuffer::InMemory( - InnerEnvelopesBuffer::::new(), - ))) -} - +/// Polymorphic envelope buffering interface. +/// +/// The underlying buffer can either be disk-based or memory-based, +/// depending on the given configuration. +/// +/// NOTE: This is implemented as an enum because a trait object with async methods would not be +/// object safe. #[derive(Debug)] -pub enum EnvelopesBuffer { - InMemory(InnerEnvelopesBuffer), - Sqlite(InnerEnvelopesBuffer), +#[allow(private_interfaces)] +pub enum PolymorphicEnvelopeBuffer { + InMemory(EnvelopeBuffer), + #[allow(dead_code)] + Sqlite(EnvelopeBuffer), } -impl EnvelopesBuffer { - pub async fn from_config(config: &Config) -> Self { - match config.spool_envelopes_path() { - Some(_) => Self::Sqlite(InnerEnvelopesBuffer::::new(config).await), - None => Self::InMemory(InnerEnvelopesBuffer::::new()), +impl PolymorphicEnvelopeBuffer { + /// Creates either a memory-based or a disk-based envelope buffer, + /// depending on the given configuration. + pub fn from_config(config: &Config) -> Self { + if config.spool_envelopes_path().is_some() { + panic!("Disk backend not yet supported for spool V2"); } + Self::InMemory(EnvelopeBuffer::::new()) } pub async fn push(&mut self, envelope: Box) { @@ -68,15 +69,19 @@ impl EnvelopesBuffer { /// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope /// is pushed, popped, or when a project becomes ready. #[derive(Debug)] -struct InnerEnvelopesBuffer { +struct EnvelopeBuffer { /// The central priority queue. priority_queue: priority_queue::PriorityQueue, Priority>, /// A lookup table to find all stacks involving a project. stacks_by_project: hashbrown::HashMap>, + /// A helper to create new stacks. + /// + /// This indirection is needed because different stack implementations might need different + /// initialization (e.g. a database connection). stack_provider: P, } -impl InnerEnvelopesBuffer { +impl EnvelopeBuffer { /// Creates an empty buffer. pub fn new() -> Self { Self { @@ -86,7 +91,7 @@ impl InnerEnvelopesBuffer { } } } -impl InnerEnvelopesBuffer { +impl EnvelopeBuffer { /// Creates an empty buffer. pub async fn new(config: &Config) -> Self { Self { @@ -98,7 +103,7 @@ impl InnerEnvelopesBuffer { } } -impl InnerEnvelopesBuffer

{ +impl EnvelopeBuffer

{ fn push_stack(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -350,7 +355,7 @@ mod tests { #[tokio::test] async fn insert_pop() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = EnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); @@ -425,7 +430,7 @@ mod tests { #[tokio::test] async fn project_internal_order() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = EnvelopeBuffer::::new(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); @@ -446,7 +451,7 @@ mod tests { #[tokio::test] async fn sampling_projects() { - let mut buffer = InnerEnvelopesBuffer::::new(); + let mut buffer = EnvelopeBuffer::::new(); let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 1b57a0e580..9a6af29470 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -196,11 +196,13 @@ impl EnvelopeStack for SqliteEnvelopeStack { async fn pop(&mut self) -> Result>, Self::Error> { if self.below_unspool_threshold() && self.check_disk { + relay_log::trace!("Unspool from disk"); self.unspool_from_disk().await? } let result = self.batches_buffer.back_mut().and_then(|last_batch| { self.batches_buffer_size -= 1; + relay_log::trace!("Popping from memory"); last_batch.pop() }); if result.is_none() { diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 181dcf87b1..2e299f03fd 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,16 +1,15 @@ //! Types for buffering envelopes. use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; use tokio::sync::MutexGuard; -pub use envelope_stack::sqlite::SqliteEnvelopeStack; -pub use envelope_stack::EnvelopeStack; +pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks +pub use envelope_stack::EnvelopeStack; // pub for benchmarks use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::envelope::Envelope; -use crate::services::buffer::envelope_buffer::EnvelopesBuffer; +use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; mod envelope_buffer; mod envelope_stack; @@ -21,11 +20,11 @@ mod testutils; /// Async envelope buffering interface. /// /// Access to the buffer is synchronized by a tokio lock. -#[derive(Debug, Clone)] -pub struct EnvelopesBufferManager { +#[derive(Debug)] +pub struct GuardedEnvelopeBuffer { /// TODO: Reconsider synchronization mechanism. /// We can either - /// - keep the interface sync and use a std Mutex. In this case, we create a queue of threads. + /// - make the interface sync and use a std Mutex. In this case, we create a queue of threads. /// - use an async interface with a tokio mutex. In this case, we create a queue of futures. /// - use message passing (service or channel). In this case, we create a queue of messages. /// @@ -34,23 +33,26 @@ pub struct EnvelopesBufferManager { /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - backend: Arc>, - notify: Arc, - changed: Arc, + backend: tokio::sync::Mutex, + notify: tokio::sync::Notify, + changed: AtomicBool, } -impl EnvelopesBufferManager { +impl GuardedEnvelopeBuffer { /// Creates a memory or disk based [`EnvelopesBufferManager`], depending on the given config. /// /// NOTE: until the V1 spooler implementation is removed, this function returns `None` /// if V2 spooling is not configured. pub fn from_config(config: &Config) -> Option { - // TODO: create a disk-based backend if db config is given (loads stacks from db). - config.spool_v2().then(|| Self { - backend: envelope_buffer::create(config), - notify: Arc::new(tokio::sync::Notify::new()), - changed: Arc::new(AtomicBool::new(true)), - }) + if config.spool_v2() { + Some(Self { + backend: tokio::sync::Mutex::new(PolymorphicEnvelopeBuffer::from_config(config)), + notify: tokio::sync::Notify::new(), + changed: AtomicBool::new(true), + }) + } else { + None + } } /// Adds an envelope to the buffer and wakes any waiting consumers. @@ -102,7 +104,7 @@ impl EnvelopesBufferManager { /// /// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { - guard: MutexGuard<'a, EnvelopesBuffer>, + guard: MutexGuard<'a, PolymorphicEnvelopeBuffer>, notify: &'a tokio::sync::Notify, changed: &'a AtomicBool, } @@ -149,6 +151,7 @@ mod tests { use std::str::FromStr; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; + use std::sync::Arc; use std::time::Duration; use relay_common::Dsn; @@ -228,18 +231,19 @@ mod tests { assert_eq!(call_count.load(Ordering::Relaxed), 2); } - fn new_buffer() -> EnvelopesBufferManager { - EnvelopesBufferManager::from_config( + fn new_buffer() -> Arc { + GuardedEnvelopeBuffer::from_config( &Config::from_json_value(serde_json::json!({ "spool": { "envelopes": { - "version": "2" + "version": "expermental" } } })) .unwrap(), ) .unwrap() + .into() } fn new_envelope() -> Box { diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index c184e975d2..cc139f4af3 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,7 +5,7 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; -use crate::services::buffer::{EnvelopesBufferManager, Peek}; +use crate::services::buffer::{GuardedEnvelopeBuffer, Peek}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; @@ -569,7 +569,7 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, // TODO: Make non-optional when spool_v1 is removed. - envelope_buffer: Option, + envelope_buffer: Option>, services: Services, metric_outcomes: MetricOutcomes, // Need hashbrown because extract_if is not stable in std yet. @@ -1265,7 +1265,7 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option>, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1276,7 +1276,7 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, - envelope_buffer: Option, + envelope_buffer: Option>, services: Services, metric_outcomes: MetricOutcomes, redis: Option, @@ -1438,7 +1438,6 @@ impl Service for ProjectCacheService { }) } peek = peek_buffer(&envelope_buffer) => { - relay_log::trace!("Peeking at envelope"); metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { broker.peek_at_envelope(peek).await; // TODO: make sync again? }) @@ -1453,7 +1452,7 @@ impl Service for ProjectCacheService { } /// Temporary helper function while V1 spool eixsts. -async fn peek_buffer(buffer: &Option) -> Peek { +async fn peek_buffer(buffer: &Option>) -> Peek { match buffer { Some(buffer) => buffer.peek().await, None => std::future::pending().await, @@ -1534,7 +1533,7 @@ mod tests { .unwrap() .into(); let memory_checker = MemoryChecker::new(MemoryStat::default(), config.clone()); - let envelope_buffer = EnvelopesBufferManager::from_config(&config); + let envelope_buffer = GuardedEnvelopeBuffer::from_config(&config).map(Arc::new); let buffer_services = spooler::Services { outcome_aggregator: services.outcome_aggregator.clone(), project_cache: services.project_cache.clone(), diff --git a/tests/integration/fixtures/relay.py b/tests/integration/fixtures/relay.py index 905117ef49..fc55f0d031 100644 --- a/tests/integration/fixtures/relay.py +++ b/tests/integration/fixtures/relay.py @@ -148,7 +148,7 @@ def inner( }, "spool": { # Unspool as quickly as possible - "envelopes": {"unspool_interval": 1, "version": "2"}, + "envelopes": {"unspool_interval": 1, "version": "experimental"}, }, } diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 32a712e22a..37bf51c2ce 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -254,7 +254,11 @@ def test_unparsable_project_config(buffer_config, mini_sentry, relay): dbfile = os.path.join(temp, "buffer.db") # set the buffer to something low to force the spooling relay_config["spool"] = { - "envelopes": {"path": dbfile, "max_memory_size": 1000, "version": "2"} + "envelopes": { + "path": dbfile, + "max_memory_size": 1000, + "version": "experimental", + } } relay = relay(mini_sentry, relay_config) From 2ccd34d19fd2c88a80c747816d855a36df9b56ce Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 12:39:57 +0200 Subject: [PATCH 41/62] error handling --- relay-server/src/endpoints/common.rs | 7 +- .../services/buffer/envelope_buffer/mod.rs | 158 ++++++++++++------ relay-server/src/services/buffer/mod.rs | 59 ++++--- relay-server/src/services/project_cache.rs | 26 +-- 4 files changed, 164 insertions(+), 86 deletions(-) diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 75f0453b31..57f5e11a4f 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -312,7 +312,12 @@ fn queue_envelope( relay_log::trace!("Pushing envelope to V2 buffer"); // buffer.check_space_available()?; tokio::spawn(async move { - buffer.push(envelope.into_envelope()).await; + if let Err(e) = buffer.push(envelope.into_envelope()).await { + relay_log::error!( + error = &e as &dyn std::error::Error, + "failed to push envelope" + ); + } }); } None => { diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index e8b4a6d77a..c1af57e5d6 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -1,12 +1,15 @@ use std::cmp::Ordering; use std::collections::BTreeSet; +use std::convert::Infallible; use std::time::Instant; use relay_base_schema::project::ProjectKey; use relay_config::Config; use crate::envelope::Envelope; +use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; +use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStoreError; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; @@ -35,21 +38,21 @@ impl PolymorphicEnvelopeBuffer { Self::InMemory(EnvelopeBuffer::::new()) } - pub async fn push(&mut self, envelope: Box) { + pub async fn push(&mut self, envelope: Box) -> Result<(), EnvelopeBufferError> { match self { Self::Sqlite(buffer) => buffer.push(envelope).await, Self::InMemory(buffer) => buffer.push(envelope).await, } } - pub async fn peek(&mut self) -> Option<&Envelope> { + pub async fn peek(&mut self) -> Result, EnvelopeBufferError> { match self { Self::Sqlite(buffer) => buffer.peek().await, Self::InMemory(buffer) => buffer.peek().await, } } - pub async fn pop(&mut self) -> Option> { + pub async fn pop(&mut self) -> Result>, EnvelopeBufferError> { match self { Self::Sqlite(buffer) => buffer.pop().await, Self::InMemory(buffer) => buffer.pop().await, @@ -64,6 +67,15 @@ impl PolymorphicEnvelopeBuffer { } } +/// Error that occurs while interacting with the envelope buffer. +#[derive(Debug, thiserror::Error)] +pub enum EnvelopeBufferError { + #[error("sqlite")] + Sqlite(#[from] SqliteEnvelopeStackError), + #[error("impossible")] + Impossible(#[from] Infallible), +} + /// An envelope buffer that holds an individual stack for each project/sampling project combination. /// /// Envelope stacks are organized in a priority queue, and are reprioritized every time an envelope @@ -93,17 +105,20 @@ impl EnvelopeBuffer { } impl EnvelopeBuffer { /// Creates an empty buffer. - pub async fn new(config: &Config) -> Self { - Self { + pub async fn new(config: &Config) -> Result { + Ok(Self { stacks_by_project: Default::default(), priority_queue: Default::default(), // TODO: handle error. - stack_provider: SqliteStackProvider::new(config).await.unwrap(), - } + stack_provider: SqliteStackProvider::new(config).await?, + }) } } -impl EnvelopeBuffer

{ +impl EnvelopeBuffer

+where + EnvelopeBufferError: std::convert::From<::Error>, +{ fn push_stack(&mut self, envelope: Box) { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -133,7 +148,7 @@ impl EnvelopeBuffer

{ self.priority_queue.remove(&stack_key); } - pub async fn push(&mut self, envelope: Box) { + pub async fn push(&mut self, envelope: Box) -> Result<(), EnvelopeBufferError> { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); if let Some(( @@ -144,35 +159,42 @@ impl EnvelopeBuffer

{ _, )) = self.priority_queue.get_mut(&stack_key) { - stack.push(envelope).await.unwrap(); // TODO: handle errors + stack.push(envelope).await?; } else { self.push_stack(envelope); } self.priority_queue.change_priority_by(&stack_key, |prio| { prio.received_at = received_at; }); + + Ok(()) } - pub async fn peek(&mut self) -> Option<&Envelope> { - let ( + pub async fn peek(&mut self) -> Result, EnvelopeBufferError> { + let Some(( QueueItem { key: _, value: stack, }, _, - ) = self.priority_queue.peek_mut()?; - stack.peek().await.unwrap() // TODO: handle errors + )) = self.priority_queue.peek_mut() + else { + return Ok(None); + }; + + Ok(stack.peek().await?) } - pub async fn pop(&mut self) -> Option> { - let (QueueItem { key, value: stack }, _) = self.priority_queue.peek_mut()?; + pub async fn pop(&mut self) -> Result>, EnvelopeBufferError> { + let Some((QueueItem { key, value: stack }, _)) = self.priority_queue.peek_mut() else { + return Ok(None); + }; let stack_key = *key; let envelope = stack.pop().await.unwrap().expect("found an empty stack"); let next_received_at = stack .peek() - .await - .unwrap() // TODO: handle error + .await? .map(|next_envelope| next_envelope.meta().start_time()); match next_received_at { None => { @@ -184,7 +206,7 @@ impl EnvelopeBuffer

{ }); } } - Some(envelope) + Ok(Some(envelope)) } pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { @@ -361,71 +383,71 @@ mod tests { let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); let project_key3 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fef").unwrap(); - assert!(buffer.pop().await.is_none()); - assert!(buffer.peek().await.is_none()); + assert!(buffer.pop().await.unwrap().is_none()); + assert!(buffer.peek().await.unwrap().is_none()); - buffer.push(new_envelope(project_key1, None)).await; + buffer.push(new_envelope(project_key1, None)).await.unwrap(); assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key1 ); - buffer.push(new_envelope(project_key2, None)).await; + buffer.push(new_envelope(project_key2, None)).await.unwrap(); // Both projects are not ready, so project 1 is on top (has the oldest envelopes): assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key1 ); - buffer.push(new_envelope(project_key3, None)).await; + buffer.push(new_envelope(project_key3, None)).await.unwrap(); // All projects are not ready, so project 1 is on top (has the oldest envelopes): assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key1 ); // After marking a project ready, it goes to the top: buffer.mark_ready(&project_key3, true); assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key3 ); assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), + buffer.pop().await.unwrap().unwrap().meta().public_key(), project_key3 ); // After popping, project 1 is on top again: assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key1 ); // Mark project 1 as ready (still on top): buffer.mark_ready(&project_key1, true); assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key1 ); // Mark project 2 as ready as well (now on top because most recent): buffer.mark_ready(&project_key2, true); assert_eq!( - buffer.peek().await.unwrap().meta().public_key(), + buffer.peek().await.unwrap().unwrap().meta().public_key(), project_key2 ); assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), + buffer.pop().await.unwrap().unwrap().meta().public_key(), project_key2 ); // Pop last element: assert_eq!( - buffer.pop().await.unwrap().meta().public_key(), + buffer.pop().await.unwrap().unwrap().meta().public_key(), project_key1 ); - assert!(buffer.pop().await.is_none()); - assert!(buffer.peek().await.is_none()); + assert!(buffer.pop().await.unwrap().is_none()); + assert!(buffer.peek().await.unwrap().is_none()); } #[tokio::test] @@ -441,12 +463,18 @@ mod tests { assert!(instant2 > instant1); - buffer.push(envelope1).await; - buffer.push(envelope2).await; + buffer.push(envelope1).await.unwrap(); + buffer.push(envelope2).await.unwrap(); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); - assert!(buffer.pop().await.is_none()); + assert_eq!( + buffer.pop().await.unwrap().unwrap().meta().start_time(), + instant2 + ); + assert_eq!( + buffer.pop().await.unwrap().unwrap().meta().start_time(), + instant1 + ); + assert!(buffer.pop().await.unwrap().is_none()); } #[tokio::test] @@ -458,40 +486,64 @@ mod tests { let envelope1 = new_envelope(project_key1, None); let instant1 = envelope1.meta().start_time(); - buffer.push(envelope1).await; + buffer.push(envelope1).await.unwrap(); let envelope2 = new_envelope(project_key2, None); let instant2 = envelope2.meta().start_time(); - buffer.push(envelope2).await; + buffer.push(envelope2).await.unwrap(); let envelope3 = new_envelope(project_key1, Some(project_key2)); let instant3 = envelope3.meta().start_time(); - buffer.push(envelope3).await; + buffer.push(envelope3).await.unwrap(); // Nothing is ready, instant1 is on top: - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + assert_eq!( + buffer.peek().await.unwrap().unwrap().meta().start_time(), + instant1 + ); // Mark project 2 ready, gets on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); + assert_eq!( + buffer.peek().await.unwrap().unwrap().meta().start_time(), + instant2 + ); // Revert buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + assert_eq!( + buffer.peek().await.unwrap().unwrap().meta().start_time(), + instant1 + ); // Project 1 ready: buffer.mark_ready(&project_key1, true); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant1); + assert_eq!( + buffer.peek().await.unwrap().unwrap().meta().start_time(), + instant1 + ); // when both projects are ready, event no 3 ends up on top: buffer.mark_ready(&project_key2, true); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant3); - assert_eq!(buffer.peek().await.unwrap().meta().start_time(), instant2); + assert_eq!( + buffer.pop().await.unwrap().unwrap().meta().start_time(), + instant3 + ); + assert_eq!( + buffer.peek().await.unwrap().unwrap().meta().start_time(), + instant2 + ); buffer.mark_ready(&project_key2, false); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant1); - assert_eq!(buffer.pop().await.unwrap().meta().start_time(), instant2); + assert_eq!( + buffer.pop().await.unwrap().unwrap().meta().start_time(), + instant1 + ); + assert_eq!( + buffer.pop().await.unwrap().unwrap().meta().start_time(), + instant2 + ); - assert!(buffer.pop().await.is_none()); + assert!(buffer.pop().await.unwrap().is_none()); } } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 2e299f03fd..65e92f6c32 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -17,6 +17,8 @@ mod sqlite_envelope_store; mod stack_provider; mod testutils; +pub use envelope_buffer::EnvelopeBufferError; + /// Async envelope buffering interface. /// /// Access to the buffer is synchronized by a tokio lock. @@ -56,10 +58,11 @@ impl GuardedEnvelopeBuffer { } /// Adds an envelope to the buffer and wakes any waiting consumers. - pub async fn push(&self, envelope: Box) { + pub async fn push(&self, envelope: Box) -> Result<(), EnvelopeBufferError> { let mut guard = self.backend.lock().await; - guard.push(envelope).await; + guard.push(envelope).await?; self.notify(); + Ok(()) } /// Returns a reference to the next-in-line envelope. @@ -70,12 +73,24 @@ impl GuardedEnvelopeBuffer { loop { { let mut guard = self.backend.lock().await; - if self.changed.load(Ordering::Relaxed) && guard.peek().await.is_some() { - self.changed.store(false, Ordering::Relaxed); - return Peek { - guard, - changed: &self.changed, - notify: &self.notify, + if self.changed.load(Ordering::Relaxed) { + match guard.peek().await { + Ok(envelope) => { + if envelope.is_some() { + self.changed.store(false, Ordering::Relaxed); + return Peek { + guard, + changed: &self.changed, + notify: &self.notify, + }; + } + } + Err(error) => { + relay_log::error!( + error = &error as &dyn std::error::Error, + "failed to peek envelope" + ); + } }; } } @@ -111,22 +126,24 @@ pub struct Peek<'a> { impl Peek<'_> { /// Returns a reference to the next envelope. - pub async fn get(&mut self) -> &Envelope { - self.guard + pub async fn get(&mut self) -> Result<&Envelope, EnvelopeBufferError> { + Ok(self + .guard .peek() - .await - .expect("element disappeared while holding lock") + .await? + .expect("element disappeared while holding lock")) } /// Pops the next envelope from the buffer. /// /// This functions consumes the [`Peek`]. - pub async fn remove(mut self) -> Box { + pub async fn remove(mut self) -> Result, EnvelopeBufferError> { self.notify(); - self.guard + Ok(self + .guard .pop() - .await - .expect("element disappeared while holding lock") + .await? + .expect("element disappeared while holding lock")) } /// Sync version of [`EnvelopesBufferManager::mark_ready`]. @@ -171,7 +188,7 @@ mod tests { let cloned_call_count = call_count.clone(); tokio::spawn(async move { loop { - cloned_buffer.peek().await.remove().await; + cloned_buffer.peek().await.remove().await.unwrap(); cloned_call_count.fetch_add(1, Ordering::Relaxed); } }); @@ -182,14 +199,14 @@ mod tests { assert_eq!(call_count.load(Ordering::Relaxed), 0); // State after push: one call - buffer.push(new_envelope()).await; + buffer.push(new_envelope()).await.unwrap(); tokio::time::advance(Duration::from_nanos(1)).await; assert_eq!(call_count.load(Ordering::Relaxed), 1); tokio::time::advance(Duration::from_nanos(1)).await; assert_eq!(call_count.load(Ordering::Relaxed), 1); // State after second push: two calls - buffer.push(new_envelope()).await; + buffer.push(new_envelope()).await.unwrap(); tokio::time::advance(Duration::from_nanos(1)).await; assert_eq!(call_count.load(Ordering::Relaxed), 2); tokio::time::advance(Duration::from_nanos(1)).await; @@ -212,7 +229,7 @@ mod tests { } }); - buffer.push(new_envelope()).await; + buffer.push(new_envelope()).await.unwrap(); // Initial state: no calls assert_eq!(call_count.load(Ordering::Relaxed), 0); @@ -226,7 +243,7 @@ mod tests { assert_eq!(call_count.load(Ordering::Relaxed), 1); // State after second push: two calls - buffer.push(new_envelope()).await; + buffer.push(new_envelope()).await.unwrap(); tokio::time::advance(Duration::from_nanos(1)).await; assert_eq!(call_count.load(Ordering::Relaxed), 2); } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index cc139f4af3..90b7b1f90f 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -5,7 +5,7 @@ use std::time::Duration; use crate::extractors::RequestMeta; use crate::metrics::MetricOutcomes; -use crate::services::buffer::{GuardedEnvelopeBuffer, Peek}; +use crate::services::buffer::{EnvelopeBufferError, GuardedEnvelopeBuffer, Peek}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::{Config, RelayMode}; @@ -1044,19 +1044,19 @@ impl ProjectCacheBroker { } } - async fn peek_at_envelope(&mut self, mut peek: Peek<'_>) { - let envelope = peek.get().await; + async fn peek_at_envelope(&mut self, mut peek: Peek<'_>) -> Result<(), EnvelopeBufferError> { + let envelope = peek.get().await?; // TODO: make envelope age configurable. if envelope.meta().start_time().elapsed() > MAX_ENVELOPE_AGE { let mut managed_envelope = ManagedEnvelope::new( - peek.remove().await, + peek.remove().await?, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, ); managed_envelope.reject(Outcome::Invalid(DiscardReason::Timestamp)); // TODO: metrics in all branches. - return; + return Ok(()); } let sampling_key = envelope.sampling_key(); let services = self.services.clone(); @@ -1072,17 +1072,17 @@ impl ProjectCacheBroker { } ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( - peek.remove().await, + peek.remove().await?, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, ); managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); - return; + return Ok(()); } ProjectState::Pending => { peek.mark_ready(&project_key, false); - return; + return Ok(()); } }; @@ -1103,14 +1103,14 @@ impl ProjectCacheBroker { } Some((sampling_key, ProjectState::Pending)) => { peek.mark_ready(&sampling_key, false); - return; + return Ok(()); } None => None, }; let project = self.get_or_create_project(project_key); - for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove().await) { + for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove().await?) { let managed_envelope = ManagedEnvelope::new( envelope, services.outcome_aggregator.clone(), @@ -1134,6 +1134,8 @@ impl ProjectCacheBroker { reservoir_counters, }); } + + Ok(()) } /// Returns backoff timeout for an unspool attempt. @@ -1439,7 +1441,9 @@ impl Service for ProjectCacheService { } peek = peek_buffer(&envelope_buffer) => { metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "peek_at_envelope", { - broker.peek_at_envelope(peek).await; // TODO: make sync again? + if let Err(e) = broker.peek_at_envelope(peek).await { + relay_log::error!(error = &e as &dyn std::error::Error, "Failed to peek envelope"); + } }) } else => break, From b6262f27dec65ca5df21f6d3b8764796819904a5 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 12:52:33 +0200 Subject: [PATCH 42/62] metrics --- .../src/services/buffer/envelope_buffer/mod.rs | 11 ++++++++--- .../src/services/buffer/envelope_stack/sqlite.rs | 5 +++++ relay-server/src/services/spooler/mod.rs | 6 +++--- relay-server/src/services/spooler/sql.rs | 2 +- relay-server/src/statsd.rs | 16 ++++++++-------- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index c1af57e5d6..a759f515bb 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -12,6 +12,7 @@ use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStoreError; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; +use crate::statsd::RelayCounters; /// Polymorphic envelope buffering interface. /// @@ -42,7 +43,9 @@ impl PolymorphicEnvelopeBuffer { match self { Self::Sqlite(buffer) => buffer.push(envelope).await, Self::InMemory(buffer) => buffer.push(envelope).await, - } + }?; + relay_statsd::metric!(counter(RelayCounters::BufferEnvelopesWritten) += 1); + Ok(()) } pub async fn peek(&mut self) -> Result, EnvelopeBufferError> { @@ -53,10 +56,12 @@ impl PolymorphicEnvelopeBuffer { } pub async fn pop(&mut self) -> Result>, EnvelopeBufferError> { - match self { + let envelope = match self { Self::Sqlite(buffer) => buffer.pop().await, Self::InMemory(buffer) => buffer.pop().await, - } + }?; + relay_statsd::metric!(counter(RelayCounters::BufferEnvelopesRead) += 1); + Ok(envelope) } pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 9a6af29470..3b17c3d088 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -9,6 +9,7 @@ use crate::services::buffer::envelope_stack::EnvelopeStack; use crate::services::buffer::sqlite_envelope_store::{ SqliteEnvelopeStore, SqliteEnvelopeStoreError, }; +use crate::statsd::RelayCounters; /// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] @@ -101,6 +102,8 @@ impl SqliteEnvelopeStack { .await .map_err(SqliteEnvelopeStackError::EnvelopeStoreError)?; + relay_statsd::metric!(counter(RelayCounters::BufferWritesDisk) += 1); + // If we successfully spooled to disk, we know that data should be there. self.check_disk = true; @@ -125,6 +128,8 @@ impl SqliteEnvelopeStack { .await .map_err(SqliteEnvelopeStackError::EnvelopeStoreError)?; + relay_statsd::metric!(counter(RelayCounters::BufferReadsDisk) += 1); + if envelopes.is_empty() { // In case no envelopes were unspooled, we will mark the disk as empty until another // round of spooling takes place. diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index e2492c8241..00265b0e92 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -563,7 +563,7 @@ impl OnDisk { .fetch(&self.db) .peekable(); let mut envelopes = pin!(envelopes); - relay_statsd::metric!(counter(RelayCounters::BufferReads) += 1); + relay_statsd::metric!(counter(RelayCounters::BufferReadsDisk) += 1); // Stream is empty, we can break the loop, since we read everything by now. if envelopes.as_mut().peek().await.is_none() { @@ -630,7 +630,7 @@ impl OnDisk { .fetch(&self.db) .peekable(); let mut envelopes = pin!(envelopes); - relay_statsd::metric!(counter(RelayCounters::BufferReads) += 1); + relay_statsd::metric!(counter(RelayCounters::BufferReadsDisk) += 1); // Stream is empty, we can break the loop, since we read everything by now. if envelopes.as_mut().peek().await.is_none() { break; @@ -754,7 +754,7 @@ impl OnDisk { .map_err(BufferError::InsertFailed)?; self.track_count(1); - relay_statsd::metric!(counter(RelayCounters::BufferWrites) += 1); + relay_statsd::metric!(counter(RelayCounters::BufferWritesDisk) += 1); Ok(()) } diff --git a/relay-server/src/services/spooler/sql.rs b/relay-server/src/services/spooler/sql.rs index aab3450d4e..02d0b66d3c 100644 --- a/relay-server/src/services/spooler/sql.rs +++ b/relay-server/src/services/spooler/sql.rs @@ -148,7 +148,7 @@ pub async fn do_insert( while let Some(chunk) = envelopes.next().await { let result = build_insert(&mut query_builder, chunk).execute(db).await?; count += result.rows_affected(); - relay_statsd::metric!(counter(RelayCounters::BufferWrites) += 1); + relay_statsd::metric!(counter(RelayCounters::BufferWritesDisk) += 1); // Reset the builder to initial state set by `QueryBuilder::new` function, // so it can be reused for another chunk. diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 0629ac9d47..c166a2bc4e 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -576,13 +576,13 @@ pub enum RelayCounters { /// - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if /// there was an error or bug. EnvelopeRejected, - /// Number times the envelope buffer spools to disk. - BufferWrites, - /// Number times the envelope buffer reads back from disk. - BufferReads, - /// Number of _envelopes_ the envelope buffer spools to disk. + /// Number of times the envelope buffer spools to disk. + BufferWritesDisk, + /// Number of times the envelope buffer reads back from disk. + BufferReadsDisk, + /// Number of _envelopes_ the envelope buffer ingests. BufferEnvelopesWritten, - /// Number of _envelopes_ the envelope buffer reads back from disk. + /// Number of _envelopes_ the envelope buffer produces. BufferEnvelopesRead, /// Number of state changes in the envelope buffer. /// This metric is tagged with: @@ -794,8 +794,8 @@ impl CounterMetric for RelayCounters { RelayCounters::EventCorrupted => "event.corrupted", RelayCounters::EnvelopeAccepted => "event.accepted", RelayCounters::EnvelopeRejected => "event.rejected", - RelayCounters::BufferWrites => "buffer.writes", - RelayCounters::BufferReads => "buffer.reads", + RelayCounters::BufferWritesDisk => "buffer.writes", + RelayCounters::BufferReadsDisk => "buffer.reads", RelayCounters::BufferEnvelopesWritten => "buffer.envelopes_written", RelayCounters::BufferEnvelopesRead => "buffer.envelopes_read", RelayCounters::BufferStateTransition => "buffer.state.transition", From 3a1258995b227d513474c1e3f342c9bc8fd30b6d Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 13:20:55 +0200 Subject: [PATCH 43/62] Improve --- .../services/buffer/envelope_stack/sqlite.rs | 5 ++-- .../services/buffer/sqlite_envelope_store.rs | 26 ++++++++++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 3b17c3d088..2353bf7be8 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -90,8 +90,9 @@ impl SqliteEnvelopeStack { }; self.batches_buffer_size -= envelopes.len(); - // We convert envelopes into a format which simplifies insertion in the store. - let envelopes = envelopes.iter().map(|e| e.as_ref().into()); + // We convert envelopes into a format which simplifies insertion in the store. If an + // envelope can't be serialized, we will not insert it. + let envelopes = envelopes.iter().filter_map(|e| e.as_ref().try_into().ok()); // When early return here, we are acknowledging that the elements that we popped from // the buffer are lost in case of failure. We are doing this on purposes, since if we were diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index af1b48d036..ee0d3285e8 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -16,6 +16,7 @@ use tokio::fs::DirBuilder; use relay_base_schema::project::{ParseProjectKeyError, ProjectKey}; use relay_config::Config; +use crate::envelope::EnvelopeError; use crate::extractors::StartTime; use crate::Envelope; @@ -27,17 +28,28 @@ pub struct InsertEnvelope { encoded_envelope: Vec, } -impl<'a> From<&'a Envelope> for InsertEnvelope { - fn from(value: &'a Envelope) -> Self { +impl<'a> TryFrom<&'a Envelope> for InsertEnvelope { + type Error = EnvelopeError; + + fn try_from(value: &'a Envelope) -> Result { let own_key = value.meta().public_key(); let sampling_key = value.sampling_key().unwrap_or(own_key); - InsertEnvelope { + let encoded_envelope = match value.to_vec() { + Ok(encoded_envelope) => encoded_envelope, + Err(err) => { + relay_log::error!(error = &err as &dyn Error, "failed to serialize envelope",); + + return Err(err); + } + }; + + Ok(InsertEnvelope { received_at: value.received_at().timestamp_millis(), own_key, sampling_key, - encoded_envelope: value.to_vec().unwrap(), - } + encoded_envelope, + }) } } @@ -454,7 +466,7 @@ mod tests { let envelope_ids: HashSet = envelopes.iter().filter_map(|e| e.event_id()).collect(); assert!(envelope_store - .insert_many(envelopes.iter().map(|e| e.as_ref().into())) + .insert_many(envelopes.iter().map(|e| e.as_ref().try_into().unwrap())) .await .is_ok()); @@ -480,7 +492,7 @@ mod tests { // We insert 10 envelopes. let envelopes = mock_envelopes(2); assert!(envelope_store - .insert_many(envelopes.iter().map(|e| e.as_ref().into())) + .insert_many(envelopes.iter().map(|e| e.as_ref().try_into().unwrap())) .await .is_ok()); From be9e4e5b032dd0c17014ae1dd276745bdf4e461b Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 13:21:29 +0200 Subject: [PATCH 44/62] Improve --- relay-server/src/services/buffer/sqlite_envelope_store.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index ee0d3285e8..8104410d34 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -38,7 +38,12 @@ impl<'a> TryFrom<&'a Envelope> for InsertEnvelope { let encoded_envelope = match value.to_vec() { Ok(encoded_envelope) => encoded_envelope, Err(err) => { - relay_log::error!(error = &err as &dyn Error, "failed to serialize envelope",); + relay_log::error!( + error = &err as &dyn Error, + own_key = own_key.to_string(), + sampling_key = sampling_key.to_string(), + "failed to serialize envelope", + ); return Err(err); } From 4f9030d2938f2fe129a5733b6fa6b1a541e4c849 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 13:25:36 +0200 Subject: [PATCH 45/62] doc & config --- relay-config/src/config.rs | 34 +++-- relay-server/src/endpoints/common.rs | 4 +- .../services/buffer/envelope_buffer/mod.rs | 118 +++++++++++------- relay-server/src/services/project_cache.rs | 8 +- 4 files changed, 104 insertions(+), 60 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 0b6a2e9e8e..67ef2ee054 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -852,6 +852,10 @@ fn spool_envelopes_stack_max_batches() -> usize { 2 } +fn spool_envelopes_max_envelope_delay_secs() -> u64 { + 24 * 60 * 60 +} + /// Persistent buffering configuration for incoming envelopes. #[derive(Debug, Serialize, Deserialize)] pub struct EnvelopeSpool { @@ -879,10 +883,18 @@ pub struct EnvelopeSpool { #[serde(default = "spool_envelopes_unspool_interval")] unspool_interval: u64, /// Number of elements of the envelope stack that are flushed to disk. - stack_disk_batch_size: usize, - /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before + #[serde(default = "spool_envelopes_stack_disk_batch_size")] + disk_batch_size: usize, + /// Number of batches of size [`Self::disk_batch_size`] that need to be accumulated before /// flushing one batch to disk. - stack_max_batches: usize, + #[serde(default = "spool_envelopes_stack_max_batches")] + max_batches: usize, + /// Maximum time between receiving the envelope and processing it. + /// + /// When envelopes spend too much time in the buffer (e.g. because their project cannot be loaded), + /// they are dropped. Defaults to 24h. + #[serde(default = "spool_envelopes_max_envelope_delay_secs")] + max_envelope_delay_secs: u64, /// Version of the spooler. #[serde(default = "EnvelopeSpoolVersion::default")] version: EnvelopeSpoolVersion, @@ -901,7 +913,7 @@ pub enum EnvelopeSpoolVersion { /// Use the envelope buffer, through which all envelopes pass before getting unspooled. /// Can be either disk based or memory based. /// - /// This mode has not been fully stress-tested, do not use in production environments. + /// This mode has not yet been stress-tested, do not use in production environments. #[serde(rename = "experimental")] V2, } @@ -915,8 +927,9 @@ impl Default for EnvelopeSpool { max_disk_size: spool_envelopes_max_disk_size(), max_memory_size: spool_envelopes_max_memory_size(), unspool_interval: spool_envelopes_unspool_interval(), // 100ms - stack_disk_batch_size: spool_envelopes_stack_disk_batch_size(), - stack_max_batches: spool_envelopes_stack_max_batches(), + disk_batch_size: spool_envelopes_stack_disk_batch_size(), + max_batches: spool_envelopes_stack_max_batches(), + max_envelope_delay_secs: spool_envelopes_max_envelope_delay_secs(), version: EnvelopeSpoolVersion::V2, } } @@ -2119,13 +2132,13 @@ impl Config { /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before /// flushing one batch to disk. pub fn spool_envelopes_stack_disk_batch_size(&self) -> usize { - self.values.spool.envelopes.stack_disk_batch_size + self.values.spool.envelopes.disk_batch_size } /// Number of batches of size `stack_disk_batch_size` that need to be accumulated before /// flushing one batch to disk. pub fn spool_envelopes_stack_max_batches(&self) -> usize { - self.values.spool.envelopes.stack_max_batches + self.values.spool.envelopes.max_batches } /// Returns `true` if version 2 of the spooling mechanism is used. @@ -2136,6 +2149,11 @@ impl Config { ) } + /// Returns the time after which we drop envelopes as a [`Duration`] object. + pub fn spool_envelopes_max_age(&self) -> Duration { + Duration::from_secs(self.values.spool.envelopes.max_envelope_delay_secs) + } + /// Returns the maximum size of an event payload in bytes. pub fn max_event_size(&self) -> usize { self.values.limits.max_event_size.as_bytes() diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 57f5e11a4f..2fd6c41716 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -310,7 +310,9 @@ fn queue_envelope( // NOTE: This assumes that a `prefetch` has already been scheduled for both the // envelope's projects. See `handle_check_envelope`. relay_log::trace!("Pushing envelope to V2 buffer"); - // buffer.check_space_available()?; + + // TODO: Sync-check whether the buffer has capacity. + // Otherwise return `QueueFailed`. tokio::spawn(async move { if let Err(e) = buffer.push(envelope.into_envelope()).await { relay_log::error!( diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index a759f515bb..ca12bce1fe 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -14,6 +14,8 @@ use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; use crate::statsd::RelayCounters; +use stack_key::StackKey; + /// Polymorphic envelope buffering interface. /// /// The underlying buffer can either be disk-based or memory-based, @@ -39,6 +41,7 @@ impl PolymorphicEnvelopeBuffer { Self::InMemory(EnvelopeBuffer::::new()) } + /// Adds an envelope to the buffer. pub async fn push(&mut self, envelope: Box) -> Result<(), EnvelopeBufferError> { match self { Self::Sqlite(buffer) => buffer.push(envelope).await, @@ -48,6 +51,7 @@ impl PolymorphicEnvelopeBuffer { Ok(()) } + /// Returns a reference to the next-in-line envelope. pub async fn peek(&mut self) -> Result, EnvelopeBufferError> { match self { Self::Sqlite(buffer) => buffer.peek().await, @@ -55,6 +59,7 @@ impl PolymorphicEnvelopeBuffer { } } + /// Pops the next-in-line envelope. pub async fn pop(&mut self) -> Result>, EnvelopeBufferError> { let envelope = match self { Self::Sqlite(buffer) => buffer.pop().await, @@ -64,6 +69,9 @@ impl PolymorphicEnvelopeBuffer { Ok(envelope) } + /// Marks a project as ready or not ready. + /// + /// The buffer reprioritizes its envelopes based on this information. pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { match self { Self::Sqlite(buffer) => buffer.mark_ready(project, is_ready), @@ -124,35 +132,6 @@ impl EnvelopeBuffer

where EnvelopeBufferError: std::convert::From<::Error>, { - fn push_stack(&mut self, envelope: Box) { - let received_at = envelope.meta().start_time(); - let stack_key = StackKey::from_envelope(&envelope); - let previous_entry = self.priority_queue.push( - QueueItem { - key: stack_key, - value: self.stack_provider.create_stack(envelope), - }, - Priority::new(received_at), - ); - debug_assert!(previous_entry.is_none()); - for project_key in stack_key.iter() { - self.stacks_by_project - .entry(project_key) - .or_default() - .insert(stack_key); - } - } - - fn pop_stack(&mut self, stack_key: StackKey) { - for project_key in stack_key.iter() { - self.stacks_by_project - .get_mut(&project_key) - .expect("project_key is missing from lookup") - .remove(&stack_key); - } - self.priority_queue.remove(&stack_key); - } - pub async fn push(&mut self, envelope: Box) -> Result<(), EnvelopeBufferError> { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -221,10 +200,10 @@ where self.priority_queue.change_priority_by(stack_key, |stack| { let mut found = false; for (subkey, readiness) in [ - (stack_key.0, &mut stack.readiness.0), - (stack_key.1, &mut stack.readiness.1), + (stack_key.lesser(), &mut stack.readiness.0), + (stack_key.greater(), &mut stack.readiness.1), ] { - if &subkey == project { + if subkey == project { found = true; if *readiness != is_ready { changed = true; @@ -238,27 +217,72 @@ where } changed } -} - -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -struct StackKey(ProjectKey, ProjectKey); -impl StackKey { - fn new(mut key1: ProjectKey, mut key2: ProjectKey) -> Self { - if key2 < key1 { - std::mem::swap(&mut key1, &mut key2); + fn push_stack(&mut self, envelope: Box) { + let received_at = envelope.meta().start_time(); + let stack_key = StackKey::from_envelope(&envelope); + let previous_entry = self.priority_queue.push( + QueueItem { + key: stack_key, + value: self.stack_provider.create_stack(envelope), + }, + Priority::new(received_at), + ); + debug_assert!(previous_entry.is_none()); + for project_key in stack_key.iter() { + self.stacks_by_project + .entry(project_key) + .or_default() + .insert(stack_key); } - Self(key1, key2) } - fn from_envelope(envelope: &Envelope) -> Self { - let own_key = envelope.meta().public_key(); - let sampling_key = envelope.sampling_key().unwrap_or(own_key); - StackKey::new(own_key, sampling_key) + fn pop_stack(&mut self, stack_key: StackKey) { + for project_key in stack_key.iter() { + self.stacks_by_project + .get_mut(&project_key) + .expect("project_key is missing from lookup") + .remove(&stack_key); + } + self.priority_queue.remove(&stack_key); } +} + +mod stack_key { + use super::*; + /// Sorted stack key. + /// + /// Contains a pair of project keys. The lower key is always the first + /// element in the pair, such that `(k1, k2)` and `(k2, k1)` map to the same + /// stack key. + #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] + pub struct StackKey(ProjectKey, ProjectKey); + + impl StackKey { + pub fn from_envelope(envelope: &Envelope) -> Self { + let own_key = envelope.meta().public_key(); + let sampling_key = envelope.sampling_key().unwrap_or(own_key); + Self::new(own_key, sampling_key) + } - fn iter(&self) -> impl Iterator { - std::iter::once(self.0).chain((self.0 != self.1).then_some(self.1)) + pub fn lesser(&self) -> &ProjectKey { + &self.0 + } + + pub fn greater(&self) -> &ProjectKey { + &self.1 + } + + pub fn iter(&self) -> impl Iterator { + std::iter::once(self.0).chain((self.0 != self.1).then_some(self.1)) + } + + fn new(mut key1: ProjectKey, mut key2: ProjectKey) -> Self { + if key2 < key1 { + std::mem::swap(&mut key1, &mut key2); + } + Self(key1, key2) + } } } diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 90b7b1f90f..57c1d0d4d8 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -42,8 +42,6 @@ use crate::services::upstream::UpstreamRelay; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; -const MAX_ENVELOPE_AGE: std::time::Duration = std::time::Duration::from_secs(24 * 60 * 60); - /// Requests a refresh of a project state from one of the available sources. /// /// The project state is resolved in the following precedence: @@ -1046,8 +1044,7 @@ impl ProjectCacheBroker { async fn peek_at_envelope(&mut self, mut peek: Peek<'_>) -> Result<(), EnvelopeBufferError> { let envelope = peek.get().await?; - // TODO: make envelope age configurable. - if envelope.meta().start_time().elapsed() > MAX_ENVELOPE_AGE { + if envelope.meta().start_time().elapsed() > self.config.spool_envelopes_max_age() { let mut managed_envelope = ManagedEnvelope::new( peek.remove().await?, self.services.outcome_aggregator.clone(), @@ -1065,6 +1062,7 @@ impl ProjectCacheBroker { let project = self.get_or_create_project(project_key); let project_state = project.get_cached_state(services.project_cache.clone(), false); + // Check if project config is enabled. let project_info = match project_state { ProjectState::Enabled(info) => { peek.mark_ready(&project_key, true); @@ -1086,6 +1084,7 @@ impl ProjectCacheBroker { } }; + // Check if sampling config is enabled. let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, @@ -1110,6 +1109,7 @@ impl ProjectCacheBroker { let project = self.get_or_create_project(project_key); + // Reassign processing groups and proceed to processing. for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove().await?) { let managed_envelope = ManagedEnvelope::new( envelope, From 0e0ab80c203cae1f5651651e2341eb0e53fb050a Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 14:43:05 +0200 Subject: [PATCH 46/62] Improve --- relay-server/benches/benches.rs | 9 +++++---- relay-server/src/lib.rs | 5 ++--- relay-server/src/services/buffer/mod.rs | 14 +++++--------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/relay-server/benches/benches.rs b/relay-server/benches/benches.rs index 0befe7857d..f1776bd7ed 100644 --- a/relay-server/benches/benches.rs +++ b/relay-server/benches/benches.rs @@ -8,7 +8,7 @@ use tempfile::TempDir; use tokio::runtime::Runtime; use relay_base_schema::project::ProjectKey; -use relay_server::{Envelope, EnvelopeStack, SqliteEnvelopeStack}; +use relay_server::{Envelope, EnvelopeStack, SqliteEnvelopeStack, SqliteEnvelopeStore}; fn setup_db(path: &PathBuf) -> Pool { let options = SqliteConnectOptions::new() @@ -61,6 +61,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { let temp_dir = TempDir::new().unwrap(); let db_path = temp_dir.path().join("test.db"); let db = setup_db(&db_path); + let envelope_store = SqliteEnvelopeStore::new(db); let runtime = Runtime::new().unwrap(); @@ -84,7 +85,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { }); let stack = SqliteEnvelopeStack::new( - db.clone(), + envelope_store.clone(), disk_batch_size, 2, ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), @@ -120,7 +121,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { reset_db(db.clone()).await; let mut stack = SqliteEnvelopeStack::new( - db.clone(), + envelope_store.clone(), disk_batch_size, 2, ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), @@ -160,7 +161,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { }); let stack = SqliteEnvelopeStack::new( - db.clone(), + envelope_store.clone(), disk_batch_size, 2, ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), diff --git a/relay-server/src/lib.rs b/relay-server/src/lib.rs index c3959f8fdc..7049016775 100644 --- a/relay-server/src/lib.rs +++ b/relay-server/src/lib.rs @@ -266,10 +266,9 @@ mod services; mod statsd; mod utils; +pub use self::envelope::Envelope; // pub for benchmarks +pub use self::services::buffer::{EnvelopeStack, SqliteEnvelopeStack, SqliteEnvelopeStore}; // pub for benchmarks pub use self::services::spooler::spool_utils; -// Public just for benchmarks. -pub use self::envelope::Envelope; -pub use services::buffer::{EnvelopeStack, SqliteEnvelopeStack}; #[cfg(test)] mod testutils; diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 65e92f6c32..c0268997d1 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,15 +1,13 @@ -//! Types for buffering envelopes. -use std::sync::atomic::{AtomicBool, Ordering}; - -use tokio::sync::MutexGuard; - pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks pub use envelope_stack::EnvelopeStack; // pub for benchmarks -use relay_base_schema::project::ProjectKey; -use relay_config::Config; +pub use sqlite_envelope_store::SqliteEnvelopeStore; // pub for benchmarks use crate::envelope::Envelope; use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use std::sync::atomic::{AtomicBool, Ordering}; +use tokio::sync::MutexGuard; mod envelope_buffer; mod envelope_stack; @@ -17,8 +15,6 @@ mod sqlite_envelope_store; mod stack_provider; mod testutils; -pub use envelope_buffer::EnvelopeBufferError; - /// Async envelope buffering interface. /// /// Access to the buffer is synchronized by a tokio lock. From 1d57809649d67c41b64e9794e3f44e3ccd9747a5 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 14:57:57 +0200 Subject: [PATCH 47/62] Improve --- .../src/services/buffer/envelope_buffer/mod.rs | 4 ++-- relay-server/src/services/buffer/mod.rs | 15 +++++++++------ .../src/services/buffer/sqlite_envelope_store.rs | 5 ++--- relay-server/src/services/cogs.rs | 3 +-- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index ca12bce1fe..167bd0bf16 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -5,6 +5,7 @@ use std::time::Instant; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use stack_key::StackKey; use crate::envelope::Envelope; use crate::services::buffer::envelope_stack::sqlite::SqliteEnvelopeStackError; @@ -14,8 +15,6 @@ use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; use crate::statsd::RelayCounters; -use stack_key::StackKey; - /// Polymorphic envelope buffering interface. /// /// The underlying buffer can either be disk-based or memory-based, @@ -85,6 +84,7 @@ impl PolymorphicEnvelopeBuffer { pub enum EnvelopeBufferError { #[error("sqlite")] Sqlite(#[from] SqliteEnvelopeStackError), + #[error("impossible")] Impossible(#[from] Infallible), } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index c0268997d1..8a07edab51 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,14 +1,17 @@ -pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks -pub use envelope_stack::EnvelopeStack; // pub for benchmarks -pub use sqlite_envelope_store::SqliteEnvelopeStore; // pub for benchmarks +use std::sync::atomic::{AtomicBool, Ordering}; -use crate::envelope::Envelope; -use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use std::sync::atomic::{AtomicBool, Ordering}; use tokio::sync::MutexGuard; +use crate::envelope::Envelope; +use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; + +pub use envelope_buffer::EnvelopeBufferError; +pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks +pub use envelope_stack::EnvelopeStack; // pub for benchmarks +pub use sqlite_envelope_store::SqliteEnvelopeStore; // pub for benchmarks + mod envelope_buffer; mod envelope_stack; mod sqlite_envelope_store; diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 8104410d34..9fcf61d14a 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -4,6 +4,8 @@ use std::pin::pin; use futures::stream::StreamExt; use hashbrown::HashSet; +use relay_base_schema::project::{ParseProjectKeyError, ProjectKey}; +use relay_config::Config; use sqlx::migrate::MigrateError; use sqlx::query::Query; use sqlx::sqlite::{ @@ -13,9 +15,6 @@ use sqlx::sqlite::{ use sqlx::{Pool, QueryBuilder, Row, Sqlite}; use tokio::fs::DirBuilder; -use relay_base_schema::project::{ParseProjectKeyError, ProjectKey}; -use relay_config::Config; - use crate::envelope::EnvelopeError; use crate::extractors::StartTime; use crate::Envelope; diff --git a/relay-server/src/services/cogs.rs b/relay-server/src/services/cogs.rs index 385d2201c5..acbeb790ab 100644 --- a/relay-server/src/services/cogs.rs +++ b/relay-server/src/services/cogs.rs @@ -1,10 +1,9 @@ use std::sync::atomic::{AtomicBool, Ordering}; -use sentry_usage_accountant::{Producer, UsageAccountant, UsageUnit}; - use relay_cogs::{CogsMeasurement, CogsRecorder, ResourceId}; use relay_config::Config; use relay_system::{Addr, Controller, FromMessage, Interface, Service}; +use sentry_usage_accountant::{Producer, UsageAccountant, UsageUnit}; #[cfg(feature = "processing")] use crate::services::store::{Store, StoreCogs}; From 07ec7e914a80b6be0164ddced23cad4cc83a08a6 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 15:11:27 +0200 Subject: [PATCH 48/62] Improve --- relay-server/benches/benches.rs | 2 +- .../services/buffer/envelope_buffer/mod.rs | 3 +- .../services/buffer/envelope_stack/sqlite.rs | 4 +- .../services/buffer/sqlite_envelope_store.rs | 8 ++- .../services/buffer/stack_provider/sqlite.rs | 1 + relay-server/src/services/buffer/testutils.rs | 59 ++++++++++--------- 6 files changed, 43 insertions(+), 34 deletions(-) diff --git a/relay-server/benches/benches.rs b/relay-server/benches/benches.rs index f1776bd7ed..515e3c1f58 100644 --- a/relay-server/benches/benches.rs +++ b/relay-server/benches/benches.rs @@ -61,7 +61,7 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { let temp_dir = TempDir::new().unwrap(); let db_path = temp_dir.path().join("test.db"); let db = setup_db(&db_path); - let envelope_store = SqliteEnvelopeStore::new(db); + let envelope_store = SqliteEnvelopeStore::new(db.clone()); let runtime = Runtime::new().unwrap(); diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 167bd0bf16..64e4efd392 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -116,13 +116,14 @@ impl EnvelopeBuffer { } } } + +#[allow(dead_code)] impl EnvelopeBuffer { /// Creates an empty buffer. pub async fn new(config: &Config) -> Result { Ok(Self { stacks_by_project: Default::default(), priority_queue: Default::default(), - // TODO: handle error. stack_provider: SqliteStackProvider::new(config).await?, }) } diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index 2353bf7be8..d2098e85d3 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -46,7 +46,7 @@ pub struct SqliteEnvelopeStack { } impl SqliteEnvelopeStack { - /// Creates a new empty [`SQLiteEnvelopeStack`]. + /// Creates a new empty [`SqliteEnvelopeStack`]. pub fn new( envelope_store: SqliteEnvelopeStore, disk_batch_size: usize, @@ -242,7 +242,7 @@ mod tests { use super::*; use crate::envelope::{Envelope, Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::testutils::setup_db; + use crate::services::buffer::testutils::utils::setup_db; fn request_meta() -> RequestMeta { let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" diff --git a/relay-server/src/services/buffer/sqlite_envelope_store.rs b/relay-server/src/services/buffer/sqlite_envelope_store.rs index 9fcf61d14a..83616bf55a 100644 --- a/relay-server/src/services/buffer/sqlite_envelope_store.rs +++ b/relay-server/src/services/buffer/sqlite_envelope_store.rs @@ -88,6 +88,10 @@ pub enum SqliteEnvelopeStoreError { FileSizeReadFailed(sqlx::Error), } +/// Struct that offers access to a SQLite-based store of [`Envelope`]s. +/// +/// The goal of this struct is to hide away all the complexity of dealing with the database for +/// reading and writing envelopes. #[derive(Debug, Clone)] pub struct SqliteEnvelopeStore { db: Pool, @@ -190,7 +194,7 @@ impl SqliteEnvelopeStore { Ok(()) } - /// Inserts one or more [`InsertEnvelope`] into the database. + /// Inserts one or more envelopes into the database. pub async fn insert_many( &self, envelopes: impl IntoIterator, @@ -414,7 +418,7 @@ mod tests { use super::*; use crate::envelope::{Envelope, Item, ItemType}; use crate::extractors::RequestMeta; - use crate::services::buffer::testutils::setup_db; + use crate::services::buffer::testutils::utils::setup_db; fn request_meta() -> RequestMeta { let dsn = "https://a94ae32be2584e0bbd7a4cbb95971fee:@sentry.io/42" diff --git a/relay-server/src/services/buffer/stack_provider/sqlite.rs b/relay-server/src/services/buffer/stack_provider/sqlite.rs index 895f9f279d..9716585606 100644 --- a/relay-server/src/services/buffer/stack_provider/sqlite.rs +++ b/relay-server/src/services/buffer/stack_provider/sqlite.rs @@ -13,6 +13,7 @@ pub struct SqliteStackProvider { max_batches: usize, } +#[warn(dead_code)] impl SqliteStackProvider { /// Creates a new [`SqliteStackProvider`] from the provided path to the SQLite database file. pub async fn new(config: &Config) -> Result { diff --git a/relay-server/src/services/buffer/testutils.rs b/relay-server/src/services/buffer/testutils.rs index ae67da0e17..0cb2ed75d6 100644 --- a/relay-server/src/services/buffer/testutils.rs +++ b/relay-server/src/services/buffer/testutils.rs @@ -1,36 +1,39 @@ -use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; -use sqlx::{Pool, Sqlite}; -use tokio::fs::DirBuilder; -use uuid::Uuid; +#[cfg(test)] +pub mod utils { + use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; + use sqlx::{Pool, Sqlite}; + use tokio::fs::DirBuilder; + use uuid::Uuid; -/// Sets up a temporary SQLite database for testing purposes. -pub async fn setup_db(run_migrations: bool) -> Pool { - let path = std::env::temp_dir().join(Uuid::new_v4().to_string()); + /// Sets up a temporary SQLite database for testing purposes. + pub async fn setup_db(run_migrations: bool) -> Pool { + let path = std::env::temp_dir().join(Uuid::new_v4().to_string()); - if let Some(parent) = path.parent() { - if !parent.as_os_str().is_empty() && !parent.exists() { - relay_log::debug!("creating directory for spooling file: {}", parent.display()); - DirBuilder::new() - .recursive(true) - .create(&parent) - .await - .unwrap(); + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() && !parent.exists() { + relay_log::debug!("creating directory for spooling file: {}", parent.display()); + DirBuilder::new() + .recursive(true) + .create(&parent) + .await + .unwrap(); + } } - } - let options = SqliteConnectOptions::new() - .filename(&path) - .journal_mode(SqliteJournalMode::Wal) - .create_if_missing(true); + let options = SqliteConnectOptions::new() + .filename(&path) + .journal_mode(SqliteJournalMode::Wal) + .create_if_missing(true); - let db = SqlitePoolOptions::new() - .connect_with(options) - .await - .unwrap(); + let db = SqlitePoolOptions::new() + .connect_with(options) + .await + .unwrap(); - if run_migrations { - sqlx::migrate!("../migrations").run(&db).await.unwrap(); - } + if run_migrations { + sqlx::migrate!("../migrations").run(&db).await.unwrap(); + } - db + db + } } From e3e62eccb2a4f6211655e1ac6f7e84e163bace67 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 15:20:51 +0200 Subject: [PATCH 49/62] Improve --- .../services/buffer/envelope_stack/sqlite.rs | 8 +-- relay-server/src/services/buffer/mod.rs | 54 ++++++++++--------- .../src/services/metrics/aggregator.rs | 1 - .../src/services/project/state/info.rs | 6 +-- 4 files changed, 35 insertions(+), 34 deletions(-) diff --git a/relay-server/src/services/buffer/envelope_stack/sqlite.rs b/relay-server/src/services/buffer/envelope_stack/sqlite.rs index d2098e85d3..9ea8d16a66 100644 --- a/relay-server/src/services/buffer/envelope_stack/sqlite.rs +++ b/relay-server/src/services/buffer/envelope_stack/sqlite.rs @@ -11,7 +11,7 @@ use crate::services::buffer::sqlite_envelope_store::{ }; use crate::statsd::RelayCounters; -/// An error returned when doing an operation on [`SQLiteEnvelopeStack`]. +/// An error returned when doing an operation on [`SqliteEnvelopeStack`]. #[derive(Debug, thiserror::Error)] pub enum SqliteEnvelopeStackError { #[error("an error occurred in the envelope store: {0}")] @@ -68,12 +68,12 @@ impl SqliteEnvelopeStack { } } - /// Threshold above which the [`SQLiteEnvelopeStack`] will spool data from the `buffer` to disk. + /// Threshold above which the [`SqliteEnvelopeStack`] will spool data from the `buffer` to disk. fn above_spool_threshold(&self) -> bool { self.batches_buffer_size >= self.spool_threshold.get() } - /// Threshold below which the [`SQLiteEnvelopeStack`] will unspool data from disk to the + /// Threshold below which the [`SqliteEnvelopeStack`] will unspool data from disk to the /// `buffer`. fn below_unspool_threshold(&self) -> bool { self.batches_buffer_size == 0 @@ -148,7 +148,7 @@ impl SqliteEnvelopeStack { } /// Validates that the incoming [`Envelope`] has the same project keys at the - /// [`SQLiteEnvelopeStack`]. + /// [`SqliteEnvelopeStack`]. fn validate_envelope(&self, envelope: &Envelope) -> bool { let own_key = envelope.meta().public_key(); let sampling_key = envelope.sampling_key().unwrap_or(own_key); diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 8a07edab51..e55acc9825 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,3 +1,5 @@ +//! Types for buffering envelopes. + use std::sync::atomic::{AtomicBool, Ordering}; use relay_base_schema::project::ProjectKey; @@ -40,7 +42,7 @@ pub struct GuardedEnvelopeBuffer { } impl GuardedEnvelopeBuffer { - /// Creates a memory or disk based [`EnvelopesBufferManager`], depending on the given config. + /// Creates a memory or disk based [`GuardedEnvelopeBuffer`], depending on the given config. /// /// NOTE: until the V1 spooler implementation is removed, this function returns `None` /// if V2 spooling is not configured. @@ -145,7 +147,7 @@ impl Peek<'_> { .expect("element disappeared while holding lock")) } - /// Sync version of [`EnvelopesBufferManager::mark_ready`]. + /// Sync version of [`GuardedEnvelopeBuffer::mark_ready`]. /// /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready /// without awaiting the lock. @@ -176,6 +178,30 @@ mod tests { use super::*; + fn new_buffer() -> Arc { + GuardedEnvelopeBuffer::from_config( + &Config::from_json_value(serde_json::json!({ + "spool": { + "envelopes": { + "version": "expermental" + } + } + })) + .unwrap(), + ) + .unwrap() + .into() + } + + fn new_envelope() -> Box { + Envelope::from_request( + None, + RequestMeta::new( + Dsn::from_str("http://a94ae32be2584e0bbd7a4cbb95971fed@localhost/1").unwrap(), + ), + ) + } + #[tokio::test] async fn no_busy_loop_when_empty() { let buffer = new_buffer(); @@ -246,28 +272,4 @@ mod tests { tokio::time::advance(Duration::from_nanos(1)).await; assert_eq!(call_count.load(Ordering::Relaxed), 2); } - - fn new_buffer() -> Arc { - GuardedEnvelopeBuffer::from_config( - &Config::from_json_value(serde_json::json!({ - "spool": { - "envelopes": { - "version": "expermental" - } - } - })) - .unwrap(), - ) - .unwrap() - .into() - } - - fn new_envelope() -> Box { - Envelope::from_request( - None, - RequestMeta::new( - Dsn::from_str("http://a94ae32be2584e0bbd7a4cbb95971fed@localhost/1").unwrap(), - ), - ) - } } diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index 96fbd653cb..6d916d73f0 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -1,7 +1,6 @@ use std::time::Duration; use hashbrown::HashMap; - use relay_base_schema::project::ProjectKey; use relay_config::AggregatorServiceConfig; use relay_metrics::{aggregator, Bucket}; diff --git a/relay-server/src/services/project/state/info.rs b/relay-server/src/services/project/state/info.rs index 9652cb348c..9e97a73aaf 100644 --- a/relay-server/src/services/project/state/info.rs +++ b/relay-server/src/services/project/state/info.rs @@ -1,7 +1,4 @@ use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use smallvec::SmallVec; -use url::Url; use relay_base_schema::project::{ProjectId, ProjectKey}; #[cfg(feature = "processing")] @@ -12,6 +9,9 @@ use relay_dynamic_config::ErrorBoundary; use relay_dynamic_config::{Feature, LimitedProjectConfig, ProjectConfig}; use relay_filter::matches_any_origin; use relay_quotas::{Quota, Scoping}; +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; From f5c8df05d7b52f4bba43bc6b2a12bda9f07d82e0 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 30 Jul 2024 15:21:54 +0200 Subject: [PATCH 50/62] Improve --- relay-server/src/services/buffer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index e55acc9825..4703397bfb 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -183,7 +183,7 @@ mod tests { &Config::from_json_value(serde_json::json!({ "spool": { "envelopes": { - "version": "expermental" + "version": "experimental" } } })) From 71da42607ae670ffd3e51890c2aa50c7901da19f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 16:19:36 +0200 Subject: [PATCH 51/62] fix test_query --- relay-config/src/config.rs | 2 +- tests/integration/test_projectconfigs.py | 18 +----------------- tests/integration/test_query.py | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 67ef2ee054..d8185189c9 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -844,7 +844,7 @@ fn spool_envelopes_unspool_interval() -> u64 { /// Default batch size for the stack. fn spool_envelopes_stack_disk_batch_size() -> usize { - 1000 + 200 } /// Default maximum number of batches for the stack. diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 37bf51c2ce..eba46e3884 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -231,11 +231,7 @@ def get_response(relay, packed, signature, version="3"): return data -@pytest.mark.parametrize( - "buffer_config", - [False, True], -) -def test_unparsable_project_config(buffer_config, mini_sentry, relay): +def test_unparsable_project_config(mini_sentry, relay): project_key = 42 relay_config = { "cache": { @@ -249,18 +245,6 @@ def test_unparsable_project_config(buffer_config, mini_sentry, relay): }, } - if buffer_config: - temp = tempfile.mkdtemp() - dbfile = os.path.join(temp, "buffer.db") - # set the buffer to something low to force the spooling - relay_config["spool"] = { - "envelopes": { - "path": dbfile, - "max_memory_size": 1000, - "version": "experimental", - } - } - relay = relay(mini_sentry, relay_config) mini_sentry.add_full_project_config(project_key) public_key = mini_sentry.get_dsn_public_key(project_key) diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index 1783cc64e8..d9bcc14556 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -124,7 +124,7 @@ def get_project_config(): retry_count += 1 print("RETRY", retry_count) - if retry_count < 2: + if retry_count < 3: if failure_type == "timeout": time.sleep(50) # ensure timeout elif failure_type == "socketerror": From 8e57db925e1839dfbc4c5c0d89fd87dd8ed497fb Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 16:46:58 +0200 Subject: [PATCH 52/62] changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 482b8084d5..43513bba62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ **Internal**: -- Add `EnvelopeStack` and `SQLiteEnvelopeStack` to manage envelopes on disk. ([#3855](https://github.com/getsentry/relay/pull/3855)) +- Add experimental support for V2 envelope buffering. ([#3855](https://github.com/getsentry/relay/pull/3855), [#3863](https://github.com/getsentry/relay/pull/3863)) ## 24.7.1 From 675b29f7607f85efb0c56cdf4418ddb1e7475b7c Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 17:43:57 +0200 Subject: [PATCH 53/62] count inflight --- relay-server/src/endpoints/common.rs | 9 +----- relay-server/src/services/buffer/mod.rs | 35 +++++++++++++++++----- relay-server/src/utils/managed_envelope.rs | 2 +- tests/integration/test_projectconfigs.py | 2 -- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 2fd6c41716..3309b7a0e4 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -313,14 +313,7 @@ fn queue_envelope( // TODO: Sync-check whether the buffer has capacity. // Otherwise return `QueueFailed`. - tokio::spawn(async move { - if let Err(e) = buffer.push(envelope.into_envelope()).await { - relay_log::error!( - error = &e as &dyn std::error::Error, - "failed to push envelope" - ); - } - }); + buffer.defer_push(envelope); } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 4703397bfb..e4fdaf0059 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,6 +1,7 @@ //! Types for buffering envelopes. -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::Arc; use relay_base_schema::project::ProjectKey; use relay_config::Config; @@ -8,6 +9,7 @@ use tokio::sync::MutexGuard; use crate::envelope::Envelope; use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; +use crate::utils::ManagedEnvelope; pub use envelope_buffer::EnvelopeBufferError; pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks @@ -39,6 +41,7 @@ pub struct GuardedEnvelopeBuffer { backend: tokio::sync::Mutex, notify: tokio::sync::Notify, changed: AtomicBool, + inflight_push_count: AtomicUsize, } impl GuardedEnvelopeBuffer { @@ -52,18 +55,28 @@ impl GuardedEnvelopeBuffer { backend: tokio::sync::Mutex::new(PolymorphicEnvelopeBuffer::from_config(config)), notify: tokio::sync::Notify::new(), changed: AtomicBool::new(true), + inflight_push_count: AtomicUsize::new(0), }) } else { None } } - /// Adds an envelope to the buffer and wakes any waiting consumers. - pub async fn push(&self, envelope: Box) -> Result<(), EnvelopeBufferError> { - let mut guard = self.backend.lock().await; - guard.push(envelope).await?; - self.notify(); - Ok(()) + /// Schedules a task to push an envelope to the buffer. + /// + /// Once the envelope is pushed, waiters will be notified. + pub fn defer_push(self: Arc, envelope: ManagedEnvelope) { + self.inflight_push_count.fetch_add(1, Ordering::Relaxed); + let this = self.clone(); + tokio::spawn(async move { + if let Err(e) = this.push(envelope.into_envelope()).await { + relay_log::error!( + error = &e as &dyn std::error::Error, + "failed to push envelope" + ); + } + this.inflight_push_count.fetch_sub(1, Ordering::Relaxed); + }); } /// Returns a reference to the next-in-line envelope. @@ -110,6 +123,14 @@ impl GuardedEnvelopeBuffer { } } + /// Adds an envelope to the buffer and wakes any waiting consumers. + async fn push(&self, envelope: Box) -> Result<(), EnvelopeBufferError> { + let mut guard = self.backend.lock().await; + guard.push(envelope).await?; + self.notify(); + Ok(()) + } + fn notify(&self) { self.changed.store(true, Ordering::Relaxed); self.notify.notify_waiters(); diff --git a/relay-server/src/utils/managed_envelope.rs b/relay-server/src/utils/managed_envelope.rs index a31b6d6e4a..ba52f7c02e 100644 --- a/relay-server/src/utils/managed_envelope.rs +++ b/relay-server/src/utils/managed_envelope.rs @@ -225,7 +225,7 @@ impl ManagedEnvelope { /// Consumes itself returning the managed envelope. pub fn into_envelope(mut self) -> Box { self.context.done = true; - Box::new(self.envelope.take_items()) + self.take_envelope() } /// Converts current managed envelope into processed envelope. diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index eba46e3884..0c4f6735e4 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -6,8 +6,6 @@ import pytest import time from collections import namedtuple -import tempfile -import os from sentry_relay.auth import PublicKey, SecretKey, generate_key_pair From 107ec5b514d64e550acc7197420f8fce5c7ae93f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 18:00:13 +0200 Subject: [PATCH 54/62] metric for in flight pushes --- relay-server/src/services/buffer/mod.rs | 13 ++++++++++--- relay-server/src/services/project_cache.rs | 6 ++++++ relay-server/src/statsd.rs | 5 +++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index e4fdaf0059..3ef037f2c9 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,6 +1,6 @@ //! Types for buffering envelopes. -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::Arc; use relay_base_schema::project::ProjectKey; @@ -39,9 +39,12 @@ pub struct GuardedEnvelopeBuffer { /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. backend: tokio::sync::Mutex, + /// Used to notify callers of `peek()` of any changes in the buffer. notify: tokio::sync::Notify, + /// Used to notify callers of `peek()` of any changes in the buffer. changed: AtomicBool, - inflight_push_count: AtomicUsize, + /// Metric that counts how many push operations are waiting. + inflight_push_count: AtomicU64, } impl GuardedEnvelopeBuffer { @@ -55,7 +58,7 @@ impl GuardedEnvelopeBuffer { backend: tokio::sync::Mutex::new(PolymorphicEnvelopeBuffer::from_config(config)), notify: tokio::sync::Notify::new(), changed: AtomicBool::new(true), - inflight_push_count: AtomicUsize::new(0), + inflight_push_count: AtomicU64::new(0), }) } else { None @@ -79,6 +82,10 @@ impl GuardedEnvelopeBuffer { }); } + pub fn inflight_push_count(&self) -> u64 { + self.inflight_push_count.load(Ordering::Relaxed) + } + /// Returns a reference to the next-in-line envelope. /// /// If the buffer is empty or has not changed since the last peek, this function will sleep diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 57c1d0d4d8..9c15f8140d 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1312,6 +1312,7 @@ impl Service for ProjectCacheService { tokio::spawn(async move { let mut ticker = tokio::time::interval(config.cache_eviction_interval()); + let mut report_ticker = tokio::time::interval(Duration::from_secs(1)); relay_log::info!("project cache started"); // Channel for async project state responses back into the project cache. @@ -1446,6 +1447,11 @@ impl Service for ProjectCacheService { } }) } + _ = report_ticker.tick() => { + if let Some(envelope_buffer) = &envelope_buffer { + relay_statsd::metric!(gauge(RelayGauges::BufferPushInFlight) = envelope_buffer.inflight_push_count()); + } + } else => break, } } diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index c166a2bc4e..93b97c4be0 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -26,6 +26,10 @@ pub enum RelayGauges { /// This metric is tagged with: /// - `reason`: Why keys are / are not unspooled. BufferPeriodicUnspool, + /// Number of envelopes currently waiting to be buffered. + /// + /// This corresponds to the number of corresponding tokio tasks currently scheduled or running. + BufferPushInFlight, /// The currently used memory by the entire system. /// /// Relay uses the same value for its memory health check. @@ -50,6 +54,7 @@ impl GaugeMetric for RelayGauges { RelayGauges::BufferEnvelopesMemoryCount => "buffer.envelopes_mem_count", RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", + RelayGauges::BufferPushInFlight => "buffer.push_inflight", RelayGauges::SystemMemoryUsed => "health.system_memory.used", RelayGauges::SystemMemoryTotal => "health.system_memory.total", #[cfg(feature = "processing")] From 007bd36dbc9dc2f6c0309513368fbf1047e25b20 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Jul 2024 21:11:17 +0200 Subject: [PATCH 55/62] bench --- relay-server/benches/benches.rs | 102 +++++++++++++++++- relay-server/src/lib.rs | 4 +- .../services/buffer/envelope_buffer/mod.rs | 2 + relay-server/src/services/buffer/mod.rs | 4 +- 4 files changed, 105 insertions(+), 7 deletions(-) diff --git a/relay-server/benches/benches.rs b/relay-server/benches/benches.rs index 515e3c1f58..38583cb4b1 100644 --- a/relay-server/benches/benches.rs +++ b/relay-server/benches/benches.rs @@ -1,5 +1,6 @@ use bytes::Bytes; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use relay_config::Config; use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; use sqlx::{Pool, Sqlite}; use std::path::PathBuf; @@ -8,7 +9,9 @@ use tempfile::TempDir; use tokio::runtime::Runtime; use relay_base_schema::project::ProjectKey; -use relay_server::{Envelope, EnvelopeStack, SqliteEnvelopeStack, SqliteEnvelopeStore}; +use relay_server::{ + Envelope, EnvelopeStack, PolymorphicEnvelopeBuffer, SqliteEnvelopeStack, SqliteEnvelopeStore, +}; fn setup_db(path: &PathBuf) -> Pool { let options = SqliteConnectOptions::new() @@ -37,6 +40,11 @@ async fn reset_db(db: Pool) { } fn mock_envelope(size: &str) -> Box { + let project_key = "e12d836b15bb49d7bbf99e64295d995b"; + mock_envelope_with_project_key(&ProjectKey::parse(project_key).unwrap(), size) +} + +fn mock_envelope_with_project_key(project_key: &ProjectKey, size: &str) -> Box { let payload = match size { "small" => "small_payload".to_string(), "medium" => "medium_payload".repeat(100), @@ -47,10 +55,11 @@ fn mock_envelope(size: &str) -> Box { let bytes = Bytes::from(format!( "\ - {{\"event_id\":\"9ec79c33ec9942ab8353589fcb2e04dc\",\"dsn\":\"https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42\"}}\n\ + {{\"event_id\":\"9ec79c33ec9942ab8353589fcb2e04dc\",\"dsn\":\"https://{}:@sentry.io/42\"}}\n\ {{\"type\":\"attachment\"}}\n\ {}\n\ ", + project_key, payload )); @@ -200,5 +209,90 @@ fn benchmark_sqlite_envelope_stack(c: &mut Criterion) { group.finish(); } -criterion_group!(benches, benchmark_sqlite_envelope_stack); -criterion_main!(benches); +fn benchmark_envelope_buffer(c: &mut Criterion) { + use rand::seq::SliceRandom; + let mut group = c.benchmark_group("envelope_buffer"); + group.sample_size(10); + + let runtime = Runtime::new().unwrap(); + + let num_projects = 1000; + let envelopes_per_project = 100; + + group.throughput(Throughput::Elements( + num_projects * envelopes_per_project as u64, + )); + + group.bench_function("push_only", |b| { + b.iter_with_setup( + || { + let project_keys: Vec<_> = (0..num_projects) + .map(|i| ProjectKey::parse(&format!("{:#032x}", i)).unwrap()) + .collect(); + + let mut envelopes = vec![]; + for project_key in &project_keys { + for _ in 0..envelopes_per_project { + envelopes.push(mock_envelope_with_project_key(project_key, "big")) + } + } + + envelopes.shuffle(&mut rand::thread_rng()); + + envelopes + }, + |envelopes| { + runtime.block_on(async { + let mut buffer = PolymorphicEnvelopeBuffer::from_config(&Config::default()); + for envelope in envelopes.into_iter() { + buffer.push(envelope).await.unwrap(); + } + }) + }, + ); + }); + + group.bench_function("push_pop", |b| { + b.iter_with_setup( + || { + let project_keys: Vec<_> = (0..num_projects) + .map(|i| ProjectKey::parse(&format!("{:#032x}", i)).unwrap()) + .collect(); + + let mut envelopes = vec![]; + for project_key in &project_keys { + for _ in 0..envelopes_per_project { + envelopes.push(mock_envelope_with_project_key(project_key, "big")) + } + } + + envelopes.shuffle(&mut rand::thread_rng()); + + envelopes + }, + |envelopes| { + runtime.block_on(async { + let mut buffer = PolymorphicEnvelopeBuffer::from_config(&Config::default()); + let n = envelopes.len(); + for envelope in envelopes.into_iter() { + let public_key = envelope.meta().public_key(); + buffer.push(envelope).await.unwrap(); + // Mark as ready: + buffer.mark_ready(&public_key, true); + } + for _ in 0..n { + let envelope = buffer.pop().await.unwrap().unwrap(); + // Send back to end of queue to get worse-case behavior: + buffer.mark_ready(&envelope.meta().public_key(), false); + } + }) + }, + ); + }); + + group.finish(); +} + +criterion_group!(sqlite, benchmark_sqlite_envelope_stack); +criterion_group!(buffer, benchmark_envelope_buffer); +criterion_main!(sqlite, buffer); diff --git a/relay-server/src/lib.rs b/relay-server/src/lib.rs index 7049016775..66ba5bc6d6 100644 --- a/relay-server/src/lib.rs +++ b/relay-server/src/lib.rs @@ -267,7 +267,9 @@ mod statsd; mod utils; pub use self::envelope::Envelope; // pub for benchmarks -pub use self::services::buffer::{EnvelopeStack, SqliteEnvelopeStack, SqliteEnvelopeStore}; // pub for benchmarks +pub use self::services::buffer::{ + EnvelopeStack, PolymorphicEnvelopeBuffer, SqliteEnvelopeStack, SqliteEnvelopeStore, +}; // pub for benchmarks pub use self::services::spooler::spool_utils; #[cfg(test)] diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 64e4efd392..158f1b3bda 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -25,7 +25,9 @@ use crate::statsd::RelayCounters; #[derive(Debug)] #[allow(private_interfaces)] pub enum PolymorphicEnvelopeBuffer { + /// An enveloper buffer that uses in-memory envelopes stacks. InMemory(EnvelopeBuffer), + /// An enveloper buffer that uses sqlite envelopes stacks. #[allow(dead_code)] Sqlite(EnvelopeBuffer), } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 3ef037f2c9..fe3e2a4bbb 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -8,13 +8,13 @@ use relay_config::Config; use tokio::sync::MutexGuard; use crate::envelope::Envelope; -use crate::services::buffer::envelope_buffer::PolymorphicEnvelopeBuffer; use crate::utils::ManagedEnvelope; pub use envelope_buffer::EnvelopeBufferError; +pub use envelope_buffer::PolymorphicEnvelopeBuffer; pub use envelope_stack::sqlite::SqliteEnvelopeStack; // pub for benchmarks pub use envelope_stack::EnvelopeStack; // pub for benchmarks -pub use sqlite_envelope_store::SqliteEnvelopeStore; // pub for benchmarks +pub use sqlite_envelope_store::SqliteEnvelopeStore; // pub for benchmarks // pub for benchmarks mod envelope_buffer; mod envelope_stack; From 358630369145d76c3215a5dbbec6fd0223c87599 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 09:14:50 +0200 Subject: [PATCH 56/62] bench: More projects --- relay-server/benches/benches.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/relay-server/benches/benches.rs b/relay-server/benches/benches.rs index 38583cb4b1..ac6ff48257 100644 --- a/relay-server/benches/benches.rs +++ b/relay-server/benches/benches.rs @@ -4,7 +4,7 @@ use relay_config::Config; use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}; use sqlx::{Pool, Sqlite}; use std::path::PathBuf; -use std::time::Duration; +use std::time::{Duration, Instant}; use tempfile::TempDir; use tokio::runtime::Runtime; @@ -63,7 +63,9 @@ fn mock_envelope_with_project_key(project_key: &ProjectKey, size: &str) -> Box Date: Wed, 31 Jul 2024 11:08:51 +0200 Subject: [PATCH 57/62] fix: org_id check --- relay-server/src/services/project_cache.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 9c15f8140d..14c06d565e 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1085,6 +1085,7 @@ impl ProjectCacheBroker { }; // Check if sampling config is enabled. + // TODO: .filter(|state| state.organization_id == project_info.organization_id); let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, @@ -1094,7 +1095,9 @@ impl ProjectCacheBroker { }) { Some((sampling_key, ProjectState::Enabled(info))) => { peek.mark_ready(&sampling_key, true); - Some(info) + // Only set if it matches the organization ID. Otherwise treat as if there is + // no sampling project. + (info.organization_id == project_info.organization_id).then_some(info) } Some((_, ProjectState::Disabled)) => { // Accept envelope even if its sampling state is disabled: From 2de15b07f4b9adce312bf510b84487784af5a6bd Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 12:22:21 +0200 Subject: [PATCH 58/62] ref: Variables are your friend --- relay-server/src/services/project_cache.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 14c06d565e..3b3bbe7262 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1045,8 +1045,9 @@ impl ProjectCacheBroker { async fn peek_at_envelope(&mut self, mut peek: Peek<'_>) -> Result<(), EnvelopeBufferError> { let envelope = peek.get().await?; if envelope.meta().start_time().elapsed() > self.config.spool_envelopes_max_age() { + let popped_envelope = peek.remove().await?; let mut managed_envelope = ManagedEnvelope::new( - peek.remove().await?, + popped_envelope, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, @@ -1069,8 +1070,9 @@ impl ProjectCacheBroker { info } ProjectState::Disabled => { + let popped_envelope = peek.remove().await?; let mut managed_envelope = ManagedEnvelope::new( - peek.remove().await?, + popped_envelope, self.services.outcome_aggregator.clone(), self.services.test_store.clone(), ProcessingGroup::Ungrouped, @@ -1113,7 +1115,8 @@ impl ProjectCacheBroker { let project = self.get_or_create_project(project_key); // Reassign processing groups and proceed to processing. - for (group, envelope) in ProcessingGroup::split_envelope(*peek.remove().await?) { + let popped_envelope = peek.remove().await?; + for (group, envelope) in ProcessingGroup::split_envelope(*popped_envelope) { let managed_envelope = ManagedEnvelope::new( envelope, services.outcome_aggregator.clone(), From 09d0b247da1b9e6c7bc55e31478ac934cb791dc6 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 12:27:39 +0200 Subject: [PATCH 59/62] ref: review comments --- relay-server/src/services/project_cache.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 3b3bbe7262..8d506063da 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1059,14 +1059,14 @@ impl ProjectCacheBroker { let sampling_key = envelope.sampling_key(); let services = self.services.clone(); - let project_key = envelope.meta().public_key(); - let project = self.get_or_create_project(project_key); + let own_key = envelope.meta().public_key(); + let project = self.get_or_create_project(own_key); let project_state = project.get_cached_state(services.project_cache.clone(), false); // Check if project config is enabled. let project_info = match project_state { ProjectState::Enabled(info) => { - peek.mark_ready(&project_key, true); + peek.mark_ready(&own_key, true); info } ProjectState::Disabled => { @@ -1081,13 +1081,12 @@ impl ProjectCacheBroker { return Ok(()); } ProjectState::Pending => { - peek.mark_ready(&project_key, false); + peek.mark_ready(&own_key, false); return Ok(()); } }; // Check if sampling config is enabled. - // TODO: .filter(|state| state.organization_id == project_info.organization_id); let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, @@ -1112,7 +1111,7 @@ impl ProjectCacheBroker { None => None, }; - let project = self.get_or_create_project(project_key); + let project = self.get_or_create_project(own_key); // Reassign processing groups and proceed to processing. let popped_envelope = peek.remove().await?; From d58c64dc418b825f8d513a02b22c44ea40617304 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 12:42:42 +0200 Subject: [PATCH 60/62] fix: guard changed --- relay-server/src/services/buffer/mod.rs | 87 +++++++++++++------------ 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index fe3e2a4bbb..d5a862136e 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -1,6 +1,6 @@ //! Types for buffering envelopes. -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use relay_base_schema::project::ProjectKey; @@ -38,11 +38,10 @@ pub struct GuardedEnvelopeBuffer { /// > The primary use case for the async mutex is to provide shared mutable access to IO resources such as a database connection. /// > [...] when you do want shared access to an IO resource, it is often better to spawn a task to manage the IO resource, /// > and to use message passing to communicate with that task. - backend: tokio::sync::Mutex, + inner: tokio::sync::Mutex, /// Used to notify callers of `peek()` of any changes in the buffer. notify: tokio::sync::Notify, - /// Used to notify callers of `peek()` of any changes in the buffer. - changed: AtomicBool, + /// Metric that counts how many push operations are waiting. inflight_push_count: AtomicU64, } @@ -55,9 +54,11 @@ impl GuardedEnvelopeBuffer { pub fn from_config(config: &Config) -> Option { if config.spool_v2() { Some(Self { - backend: tokio::sync::Mutex::new(PolymorphicEnvelopeBuffer::from_config(config)), + inner: tokio::sync::Mutex::new(Inner { + backend: PolymorphicEnvelopeBuffer::from_config(config), + changed: true, + }), notify: tokio::sync::Notify::new(), - changed: AtomicBool::new(true), inflight_push_count: AtomicU64::new(0), }) } else { @@ -92,29 +93,27 @@ impl GuardedEnvelopeBuffer { /// until something changes in the buffer. pub async fn peek(&self) -> Peek { loop { - { - let mut guard = self.backend.lock().await; - if self.changed.load(Ordering::Relaxed) { - match guard.peek().await { - Ok(envelope) => { - if envelope.is_some() { - self.changed.store(false, Ordering::Relaxed); - return Peek { - guard, - changed: &self.changed, - notify: &self.notify, - }; - } - } - Err(error) => { - relay_log::error!( - error = &error as &dyn std::error::Error, - "failed to peek envelope" - ); + let mut guard = self.inner.lock().await; + if guard.changed { + match guard.backend.peek().await { + Ok(envelope) => { + if envelope.is_some() { + guard.changed = false; + return Peek { + guard, + notify: &self.notify, + }; } - }; - } + } + Err(error) => { + relay_log::error!( + error = &error as &dyn std::error::Error, + "failed to peek envelope" + ); + } + }; } + drop(guard); // release the lock self.notify.notified().await; } } @@ -123,23 +122,23 @@ impl GuardedEnvelopeBuffer { /// /// The buffer reprioritizes its envelopes based on this information. pub async fn mark_ready(&self, project_key: &ProjectKey, ready: bool) { - let mut guard = self.backend.lock().await; - let changed = guard.mark_ready(project_key, ready); + let mut guard = self.inner.lock().await; + let changed = guard.backend.mark_ready(project_key, ready); if changed { - self.notify(); + self.notify(&mut guard); } } /// Adds an envelope to the buffer and wakes any waiting consumers. async fn push(&self, envelope: Box) -> Result<(), EnvelopeBufferError> { - let mut guard = self.backend.lock().await; - guard.push(envelope).await?; - self.notify(); + let mut guard = self.inner.lock().await; + guard.backend.push(envelope).await?; + self.notify(&mut guard); Ok(()) } - fn notify(&self) { - self.changed.store(true, Ordering::Relaxed); + fn notify(&self, guard: &mut MutexGuard) { + guard.changed = true; self.notify.notify_waiters(); } } @@ -148,9 +147,8 @@ impl GuardedEnvelopeBuffer { /// /// Objects of this type can only exist if the buffer is not empty. pub struct Peek<'a> { - guard: MutexGuard<'a, PolymorphicEnvelopeBuffer>, + guard: MutexGuard<'a, Inner>, notify: &'a tokio::sync::Notify, - changed: &'a AtomicBool, } impl Peek<'_> { @@ -158,6 +156,7 @@ impl Peek<'_> { pub async fn get(&mut self) -> Result<&Envelope, EnvelopeBufferError> { Ok(self .guard + .backend .peek() .await? .expect("element disappeared while holding lock")) @@ -170,6 +169,7 @@ impl Peek<'_> { self.notify(); Ok(self .guard + .backend .pop() .await? .expect("element disappeared while holding lock")) @@ -180,18 +180,25 @@ impl Peek<'_> { /// Since [`Peek`] already has exclusive access to the buffer, it can mark projects as ready /// without awaiting the lock. pub fn mark_ready(&mut self, project_key: &ProjectKey, ready: bool) { - let changed = self.guard.mark_ready(project_key, ready); + let changed = self.guard.backend.mark_ready(project_key, ready); if changed { self.notify(); } } - fn notify(&self) { - self.changed.store(true, Ordering::Relaxed); + fn notify(&mut self) { + self.guard.changed = true; self.notify.notify_waiters(); } } +#[derive(Debug)] +struct Inner { + backend: PolymorphicEnvelopeBuffer, + /// Used to notify callers of `peek()` of any changes in the buffer. + changed: bool, +} + #[cfg(test)] mod tests { use std::str::FromStr; From d59d2b116b4f7f3c2bbf41687f227b31c2b86b69 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 13:55:46 +0200 Subject: [PATCH 61/62] Update relay-config/src/config.rs Co-authored-by: Sebastian Zivota --- relay-config/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index d8185189c9..108799e195 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -896,7 +896,7 @@ pub struct EnvelopeSpool { #[serde(default = "spool_envelopes_max_envelope_delay_secs")] max_envelope_delay_secs: u64, /// Version of the spooler. - #[serde(default = "EnvelopeSpoolVersion::default")] + #[serde(default)] version: EnvelopeSpoolVersion, } From 76db84dc537a970f9eaffca5f3eb29c49553b44a Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 31 Jul 2024 15:48:28 +0200 Subject: [PATCH 62/62] instr(buffer): Metric for number of stacks (#3878) Add a gauge metric for the total number of stacks in the priority queue. --- .../src/services/buffer/envelope_buffer/mod.rs | 18 +++++++++++++++++- relay-server/src/statsd.rs | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/buffer/envelope_buffer/mod.rs b/relay-server/src/services/buffer/envelope_buffer/mod.rs index 158f1b3bda..969d9a3565 100644 --- a/relay-server/src/services/buffer/envelope_buffer/mod.rs +++ b/relay-server/src/services/buffer/envelope_buffer/mod.rs @@ -13,7 +13,7 @@ use crate::services::buffer::envelope_stack::{EnvelopeStack, StackProvider}; use crate::services::buffer::sqlite_envelope_store::SqliteEnvelopeStoreError; use crate::services::buffer::stack_provider::memory::MemoryStackProvider; use crate::services::buffer::stack_provider::sqlite::SqliteStackProvider; -use crate::statsd::RelayCounters; +use crate::statsd::{RelayCounters, RelayGauges}; /// Polymorphic envelope buffering interface. /// @@ -135,6 +135,10 @@ impl EnvelopeBuffer

where EnvelopeBufferError: std::convert::From<::Error>, { + /// Pushes an envelope to the appropriate envelope stack and reprioritizes the stack. + /// + /// If the envelope stack does not exist, a new stack is pushed to the priority queue. + /// The priority of the stack is updated with the envelope's received_at time. pub async fn push(&mut self, envelope: Box) -> Result<(), EnvelopeBufferError> { let received_at = envelope.meta().start_time(); let stack_key = StackKey::from_envelope(&envelope); @@ -157,6 +161,7 @@ where Ok(()) } + /// Returns a reference to the next-in-line envelope, if one exists. pub async fn peek(&mut self) -> Result, EnvelopeBufferError> { let Some(( QueueItem { @@ -172,6 +177,10 @@ where Ok(stack.peek().await?) } + /// Returns the next-in-line envelope, if one exists. + /// + /// The priority of the envelope's stack is updated with the next envelope's received_at + /// time. If the stack is empty after popping, it is removed from the priority queue. pub async fn pop(&mut self) -> Result>, EnvelopeBufferError> { let Some((QueueItem { key, value: stack }, _)) = self.priority_queue.peek_mut() else { return Ok(None); @@ -196,6 +205,7 @@ where Ok(Some(envelope)) } + /// Reprioritizes all stacks that involve the given project key by setting it to "ready". pub fn mark_ready(&mut self, project: &ProjectKey, is_ready: bool) -> bool { let mut changed = false; if let Some(stack_keys) = self.stacks_by_project.get(project) { @@ -238,6 +248,9 @@ where .or_default() .insert(stack_key); } + relay_statsd::metric!( + gauge(RelayGauges::BufferStackCount) = self.priority_queue.len() as u64 + ); } fn pop_stack(&mut self, stack_key: StackKey) { @@ -248,6 +261,9 @@ where .remove(&stack_key); } self.priority_queue.remove(&stack_key); + relay_statsd::metric!( + gauge(RelayGauges::BufferStackCount) = self.priority_queue.len() as u64 + ); } } diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 93b97c4be0..489d01d84d 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -30,6 +30,10 @@ pub enum RelayGauges { /// /// This corresponds to the number of corresponding tokio tasks currently scheduled or running. BufferPushInFlight, + /// The number of individual stacks in the priority queue. + /// + /// Per combination of `(own_key, sampling_key)`, a new stack is created. + BufferStackCount, /// The currently used memory by the entire system. /// /// Relay uses the same value for its memory health check. @@ -55,6 +59,7 @@ impl GaugeMetric for RelayGauges { RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", RelayGauges::BufferPushInFlight => "buffer.push_inflight", + RelayGauges::BufferStackCount => "buffer.stack_count", RelayGauges::SystemMemoryUsed => "health.system_memory.used", RelayGauges::SystemMemoryTotal => "health.system_memory.total", #[cfg(feature = "processing")]