raftstore: calculate the slow score by considering individual disk pe…

…rformance factors (tikv#17801) (tikv#17912) close tikv#17884 This pr introduces an extra and individual inspector to detect whether there exists I/O hung issues on kvdb disk, if the kvdb is deployed with a separate mount path. Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io> Signed-off-by: lucasliang <nkcs_lykx@hotmail.com> Co-authored-by: lucasliang <nkcs_lykx@hotmail.com>
ti-chi-bot · Dec 2, 2024 · 99279b8 · 99279b8
1 parent eb66485
commit 99279b8
Show file tree

Hide file tree

Showing 24 changed files with 712 additions and 167 deletions.
diff --git a/components/health_controller/src/lib.rs b/components/health_controller/src/lib.rs
@@ -30,6 +30,8 @@
 //!   that are specific to different modules, increasing the complexity and
 //!   possibility to misuse of `HealthController`.
 
+#![feature(div_duration)]
+
 pub mod reporters;
 pub mod slow_score;
 pub mod trend;

diff --git a/components/health_controller/src/reporters.rs b/components/health_controller/src/reporters.rs
@@ -12,6 +12,7 @@ use prometheus::IntGauge;
 use crate::{
     slow_score::{SlowScore, SlowScoreTickResult},
     trend::{RequestPerSecRecorder, Trend},
+    types::InspectFactor,
     HealthController, HealthControllerInner, RaftstoreDuration,
 };
 
@@ -27,6 +28,7 @@ pub struct RaftstoreReporterConfig {
     /// worker) is expected to tick it. But the interval is necessary in
     /// some internal calculations.
     pub inspect_interval: Duration,
+    pub inspect_kvdb_interval: Duration,
 
     pub unsensitive_cause: f64,
     pub unsensitive_result: f64,
@@ -43,9 +45,72 @@ pub struct RaftstoreReporterConfig {
     pub result_l2_gap_gauges: IntGauge,
 }
 
+/// A unified slow score that combines multiple slow scores.
+///
+/// It calculates the final slow score of a store by picking the maximum
+/// score among multiple factors. Each factor represents a different aspect of
+/// the store's performance. Typically, we have two factors: Raft Disk I/O and
+/// KvDB Disk I/O. If there are more factors in the future, we can add them
+/// here.
+#[derive(Default)]
+pub struct UnifiedSlowScore {
+    factors: Vec<SlowScore>,
+}
+
+impl UnifiedSlowScore {
+    pub fn new(cfg: &RaftstoreReporterConfig) -> Self {
+        let mut unified_slow_score = UnifiedSlowScore::default();
+        // The first factor is for Raft Disk I/O.
+        unified_slow_score
+            .factors
+            .push(SlowScore::new(cfg.inspect_interval));
+        // The second factor is for KvDB Disk I/O.
+        unified_slow_score
+            .factors
+            .push(SlowScore::new_with_extra_config(
+                cfg.inspect_kvdb_interval,
+                0.6,
+            ));
+        unified_slow_score
+    }
+
+    #[inline]
+    pub fn record(
+        &mut self,
+        id: u64,
+        factor: InspectFactor,
+        duration: &RaftstoreDuration,
+        not_busy: bool,
+    ) {
+        self.factors[factor as usize].record(id, duration.delays_on_disk_io(false), not_busy);
+    }
+
+    #[inline]
+    pub fn get(&self, factor: InspectFactor) -> &SlowScore {
+        &self.factors[factor as usize]
+    }
+
+    #[inline]
+    pub fn get_mut(&mut self, factor: InspectFactor) -> &mut SlowScore {
+        &mut self.factors[factor as usize]
+    }
+
+    // Returns the maximum score of all factors.
+    pub fn get_score(&self) -> f64 {
+        self.factors
+            .iter()
+            .map(|factor| factor.get())
+            .fold(1.0, f64::max)
+    }
+
+    pub fn last_tick_finished(&self) -> bool {
+        self.factors.iter().all(SlowScore::last_tick_finished)
+    }
+}
+
 pub struct RaftstoreReporter {
     health_controller_inner: Arc<HealthControllerInner>,
-    slow_score: SlowScore,
+    slow_score: UnifiedSlowScore,
     slow_trend: SlowTrendStatistics,
     is_healthy: bool,
 }
@@ -56,18 +121,14 @@ impl RaftstoreReporter {
     pub fn new(health_controller: &HealthController, cfg: RaftstoreReporterConfig) -> Self {
         Self {
             health_controller_inner: health_controller.inner.clone(),
-            slow_score: SlowScore::new(cfg.inspect_interval),
+            slow_score: UnifiedSlowScore::new(&cfg),
             slow_trend: SlowTrendStatistics::new(cfg),
             is_healthy: true,
         }
     }
 
-    pub fn get_tick_interval(&self) -> Duration {
-        self.slow_score.get_inspect_interval()
-    }
-
     pub fn get_slow_score(&self) -> f64 {
-        self.slow_score.get()
+        self.slow_score.get_score()
     }
 
     pub fn get_slow_trend(&self) -> &SlowTrendStatistics {
@@ -77,17 +138,18 @@ impl RaftstoreReporter {
     pub fn record_raftstore_duration(
         &mut self,
         id: u64,
+        factor: InspectFactor,
         duration: RaftstoreDuration,
         store_not_busy: bool,
     ) {
         // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account.
         self.slow_score
-            .record(id, duration.delays_on_disk_io(false), store_not_busy);
+            .record(id, factor, &duration, store_not_busy);
         self.slow_trend.record(duration);
 
         // Publish slow score to health controller
         self.health_controller_inner
-            .update_raftstore_slow_score(self.slow_score.get());
+            .update_raftstore_slow_score(self.slow_score.get_score());
     }
 
     fn is_healthy(&self) -> bool {
@@ -109,34 +171,42 @@ impl RaftstoreReporter {
         }
     }
 
-    pub fn tick(&mut self, store_maybe_busy: bool) -> SlowScoreTickResult {
+    pub fn tick(&mut self, store_maybe_busy: bool, factor: InspectFactor) -> SlowScoreTickResult {
         // Record a fairly great value when timeout
         self.slow_trend.slow_cause.record(500_000, Instant::now());
 
+        // healthy: The health status of the current store.
+        // all_ticks_finished: The last tick of all factors is finished.
+        // factor_tick_finished: The last tick of the current factor is finished.
+        let (healthy, all_ticks_finished, factor_tick_finished) = (
+            self.is_healthy(),
+            self.slow_score.last_tick_finished(),
+            self.slow_score.get(factor).last_tick_finished(),
+        );
         // The health status is recovered to serving as long as any tick
         // does not timeout.
-        if !self.is_healthy() && self.slow_score.last_tick_finished() {
+        if !healthy && all_ticks_finished {
             self.set_is_healthy(true);
         }
-        if !self.slow_score.last_tick_finished() {
+        if !all_ticks_finished {
             // If the last tick is not finished, it means that the current store might
             // be busy on handling requests or delayed on I/O operations. And only when
             // the current store is not busy, it should record the last_tick as a timeout.
-            if !store_maybe_busy {
-                self.slow_score.record_timeout();
+            if !store_maybe_busy && !factor_tick_finished {
+                self.slow_score.get_mut(factor).record_timeout();
             }
         }
 
-        let slow_score_tick_result = self.slow_score.tick();
+        let slow_score_tick_result = self.slow_score.get_mut(factor).tick();
         if slow_score_tick_result.updated_score.is_some() && !slow_score_tick_result.has_new_record
         {
             self.set_is_healthy(false);
         }
 
         // Publish the slow score to health controller
-        if let Some(slow_score_value) = slow_score_tick_result.updated_score {
+        if slow_score_tick_result.updated_score.is_some() {
             self.health_controller_inner
-                .update_raftstore_slow_score(slow_score_value);
+                .update_raftstore_slow_score(self.slow_score.get_score());
         }
 
         slow_score_tick_result

diff --git a/components/health_controller/src/slow_score.rs b/components/health_controller/src/slow_score.rs
@@ -7,6 +7,12 @@ use std::{
 
 use ordered_float::OrderedFloat;
 
+/// Interval for updating the slow score.
+const UPDATE_INTERVALS: Duration = Duration::from_secs(10);
+/// Recovery intervals for the slow score.
+/// If the score has reached 100 and there is no timeout inspecting requests
+/// during this interval, the score will go back to 1 after 5min.
+const RECOVERY_INTERVALS: Duration = Duration::from_secs(60 * 5);
 // Slow score is a value that represents the speed of a store and ranges in [1,
 // 100]. It is maintained in the AIMD way.
 // If there are some inspecting requests timeout during a round, by default the
@@ -45,7 +51,7 @@ impl SlowScore {
 
             inspect_interval,
             ratio_thresh: OrderedFloat(0.1),
-            min_ttr: Duration::from_secs(5 * 60),
+            min_ttr: RECOVERY_INTERVALS,
             last_record_time: Instant::now(),
             last_update_time: Instant::now(),
             round_ticks: 30,
@@ -54,6 +60,29 @@ impl SlowScore {
         }
     }
 
+    // Only for kvdb.
+    pub fn new_with_extra_config(inspect_interval: Duration, timeout_ratio: f64) -> SlowScore {
+        SlowScore {
+            value: OrderedFloat(1.0),
+
+            timeout_requests: 0,
+            total_requests: 0,
+
+            inspect_interval,
+            ratio_thresh: OrderedFloat(timeout_ratio),
+            min_ttr: RECOVERY_INTERVALS,
+            last_record_time: Instant::now(),
+            last_update_time: Instant::now(),
+            // The minimal round ticks is 1 for kvdb.
+            round_ticks: cmp::max(
+                UPDATE_INTERVALS.div_duration_f64(inspect_interval) as u64,
+                1_u64,
+            ),
+            last_tick_id: 0,
+            last_tick_finished: true,
+        }
+    }
+
     pub fn record(&mut self, id: u64, duration: Duration, not_busy: bool) {
         self.last_record_time = Instant::now();
         if id != self.last_tick_id {
@@ -207,4 +236,52 @@ mod tests {
             slow_score.update_impl(Duration::from_secs(57))
         );
     }
+
+    #[test]
+    fn test_slow_score_extra() {
+        let mut slow_score = SlowScore::new_with_extra_config(Duration::from_millis(1000), 0.6);
+        slow_score.timeout_requests = 1;
+        slow_score.total_requests = 10;
+        let score = slow_score.update_impl(Duration::from_secs(10));
+        assert!(score > OrderedFloat(1.16));
+        assert!(score < OrderedFloat(1.17));
+
+        slow_score.timeout_requests = 2;
+        slow_score.total_requests = 10;
+        let score = slow_score.update_impl(Duration::from_secs(10));
+        assert!(score > OrderedFloat(1.5));
+        assert!(score < OrderedFloat(1.6));
+
+        slow_score.timeout_requests = 0;
+        slow_score.total_requests = 100;
+        assert_eq!(
+            OrderedFloat(1.0),
+            slow_score.update_impl(Duration::from_secs(57))
+        );
+
+        slow_score.timeout_requests = 3;
+        slow_score.total_requests = 10;
+        assert_eq!(
+            OrderedFloat(1.5),
+            slow_score.update_impl(Duration::from_secs(10))
+        );
+
+        slow_score.timeout_requests = 6;
+        slow_score.total_requests = 10;
+        assert_eq!(
+            OrderedFloat(3.0),
+            slow_score.update_impl(Duration::from_secs(10))
+        );
+
+        slow_score.timeout_requests = 10;
+        slow_score.total_requests = 10;
+        assert_eq!(
+            OrderedFloat(6.0),
+            slow_score.update_impl(Duration::from_secs(10))
+        );
+
+        // Test too large inspect interval.
+        let slow_score = SlowScore::new_with_extra_config(Duration::from_secs(11), 0.1);
+        assert_eq!(slow_score.round_ticks, 1);
+    }
 }
diff --git a/components/health_controller/src/types.rs b/components/health_controller/src/types.rs
@@ -50,6 +50,22 @@ impl RaftstoreDuration {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum InspectFactor {
+    RaftDisk = 0,
+    KvDisk,
+    // TODO: Add more factors, like network io.
+}
+
+impl InspectFactor {
+    pub fn as_str(&self) -> &str {
+        match *self {
+            InspectFactor::RaftDisk => "raft",
+            InspectFactor::KvDisk => "kvdb",
+        }
+    }
+}
+
 /// Used to inspect the latency of all stages of raftstore.
 pub struct LatencyInspector {
     id: u64,

diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs
@@ -9,7 +9,7 @@ use causal_ts::CausalTsProviderImpl;
 use collections::HashMap;
 use concurrency_manager::ConcurrencyManager;
 use engine_traits::{KvEngine, RaftEngine, TabletRegistry};
-use health_controller::types::{LatencyInspector, RaftstoreDuration};
+use health_controller::types::{InspectFactor, LatencyInspector, RaftstoreDuration};
 use kvproto::{metapb, pdpb};
 use pd_client::{BucketStat, PdClient};
 use raftstore::store::{
@@ -254,6 +254,7 @@ where
         let mut stats_monitor = PdStatsMonitor::new(
             store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT,
             cfg.value().inspect_interval.0,
+            std::time::Duration::default(),
             PdReporter::new(pd_scheduler, logger.clone()),
         );
         stats_monitor.start(auto_split_controller, collector_reg_handle)?;
@@ -428,7 +429,7 @@ impl StoreStatsReporter for PdReporter {
         }
     }
 
-    fn update_latency_stats(&self, timer_tick: u64) {
+    fn update_latency_stats(&self, timer_tick: u64, _factor: InspectFactor) {
         // Tick slowness statistics.
         {
             if let Err(e) = self.scheduler.schedule(Task::TickSlownessStats) {