Change APIs on tuples (#55)

kampersanda · Nov 2, 2024 · 3d8980e · 3d8980e
1 parent ffca7ad
commit 3d8980e
Show file tree

Hide file tree

Showing 6 changed files with 188 additions and 141 deletions.
diff --git a/elinor-cli/src/bin/compare.rs b/elinor-cli/src/bin/compare.rs
@@ -273,11 +273,11 @@ fn compare_two_systems(
         for df in df_metrics.iter() {
             let values_1 = df.column("system_1")?.f64()?;
             let values_2 = df.column("system_2")?.f64()?;
-            let diff_scores = values_1
+            let paired_samples = values_1
                 .into_iter()
                 .zip(values_2.into_iter())
-                .map(|(x, y)| x.unwrap() - y.unwrap());
-            stats.push(StudentTTest::from_samples(diff_scores)?);
+                .map(|(a, b)| (a.unwrap(), b.unwrap()));
+            stats.push(StudentTTest::from_paired_samples(paired_samples)?);
         }
         let columns = vec![
             Series::new(
@@ -326,11 +326,11 @@ fn compare_two_systems(
         for df in df_metrics.iter() {
             let values_1 = df.column("system_1")?.f64()?;
             let values_2 = df.column("system_2")?.f64()?;
-            let diff_scores = values_1
+            let paired_samples = values_1
                 .into_iter()
                 .zip(values_2.into_iter())
-                .map(|(x, y)| x.unwrap() - y.unwrap());
-            stats.push(tester.test(diff_scores)?);
+                .map(|(a, b)| (a.unwrap(), b.unwrap()));
+            stats.push(tester.test(paired_samples)?);
         }
         let columns = vec![
             Series::new(

diff --git a/examples/paired_bootstrap_test.rs b/examples/paired_bootstrap_test.rs
@@ -12,8 +12,8 @@ fn main() -> Result<()> {
         0.40, 0.40, 0.10, 0.40, 0.20, 0.10, 0.10, 0.60, 0.30, 0.20,
     ];
 
-    let samples = a.into_iter().zip(b.into_iter()).map(|(x, y)| x - y);
-    let result = BootstrapTest::from_samples(samples)?;
+    let samples = a.into_iter().zip(b.into_iter());
+    let result = BootstrapTest::from_paired_samples(samples)?;
     println!("p-value: {:.4}", result.p_value());
 
     Ok(())

diff --git a/src/lib.rs b/src/lib.rs
@@ -89,7 +89,7 @@
 //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! use approx::assert_relative_eq;
 //! use elinor::{TrueRelStoreBuilder, PredRelStoreBuilder, Metric};
-//! use elinor::statistical_tests::StudentTTest;
+//! use elinor::statistical_tests::{StudentTTest, pairs_from_maps};
 //!
 //! // Prepare true relevance scores.
 //! let mut b = TrueRelStoreBuilder::new();
@@ -120,8 +120,8 @@
 //! let result_b = elinor::evaluate(&true_rels, &pred_rels_b, metric)?;
 //!
 //! // Perform two-sided paired Student's t-test.
-//! let tupled_scores = elinor::tupled_scores_from_score_maps([result_a.scores(), result_b.scores()])?;
-//! let stat = StudentTTest::from_samples(tupled_scores.iter().map(|x| x[0] - x[1]))?;
+//! let pairs = pairs_from_maps(result_a.scores(), result_b.scores())?;
+//! let stat = StudentTTest::from_paired_samples(pairs)?;
 //!
 //! // Various statistics can be obtained from the t-test result.
 //! assert!(stat.mean() > 0.0);
@@ -153,7 +153,7 @@
 //! ```
 //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! use elinor::{TrueRelStoreBuilder, PredRelStoreBuilder, Metric};
-//! use elinor::statistical_tests::{RandomizedTukeyHsdTest, TukeyHsdTest};
+//! use elinor::statistical_tests::{RandomizedTukeyHsdTest, TukeyHsdTest, tuples_from_maps};
 //!
 //! // Prepare true relevance scores.
 //! let mut b = TrueRelStoreBuilder::new();
@@ -192,9 +192,7 @@
 //! let result_c = elinor::evaluate(&true_rels, &pred_rels_c, metric)?;
 //!
 //! // Prepare tupled scores for tests.
-//! let tupled_scores = elinor::tupled_scores_from_score_maps(
-//!     [result_a.scores(), result_b.scores(), result_c.scores()]
-//! )?;
+//! let tupled_scores = tuples_from_maps([result_a.scores(), result_b.scores(), result_c.scores()])?;
 //!
 //! // Perform Tukey HSD test with paired observations.
 //! let hsd_stat = TukeyHsdTest::from_tupled_samples(tupled_scores.iter(), 3)?;
@@ -391,56 +389,10 @@ where
     })
 }
 
-/// Converts maps of scores into a vector of tupled scores, where each tuple contains the scores for each key.
-///
-/// This function is expected to be used to prepare data for statistical tests.
-///
-/// # Errors
-///
-/// * [`ElinorError::InvalidArgument`] if score_maps have different sets of keys.
-pub fn tupled_scores_from_score_maps<'a, I, K>(score_maps: I) -> Result<Vec<Vec<f64>>>
-where
-    I: IntoIterator<Item = &'a BTreeMap<K, f64>>,
-    K: Clone + Eq + Ord + std::fmt::Display + 'a,
-{
-    let score_maps = score_maps.into_iter().collect::<Vec<_>>();
-    if score_maps.len() < 2 {
-        return Err(ElinorError::InvalidArgument(format!(
-            "The number of score maps must be at least 2, but got {}.",
-            score_maps.len()
-        )));
-    }
-    for i in 1..score_maps.len() {
-        if score_maps[0].len() != score_maps[i].len() {
-            return Err(ElinorError::InvalidArgument(format!(
-                "The number of keys in score maps must be the same, but got score_maps[0].len()={} and score_maps[{}].len()={}.",
-                score_maps[0].len(),
-                i,
-                score_maps[i].len()
-            )));
-        }
-        if score_maps[0].keys().ne(score_maps[i].keys()) {
-            return Err(ElinorError::InvalidArgument(
-                "The keys in the score maps must be the same.".to_string(),
-            ));
-        }
-    }
-    let mut tupled_scores = vec![];
-    for query_id in score_maps[0].keys() {
-        let mut scores = vec![];
-        for score_map in &score_maps {
-            scores.push(*score_map.get(query_id).unwrap());
-        }
-        tupled_scores.push(scores);
-    }
-    Ok(tupled_scores)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use approx::assert_relative_eq;
-    use maplit::btreemap;
 
     #[test]
     fn test_evaluate() {
@@ -475,47 +427,4 @@ mod tests {
         assert_relative_eq!(scores["q_1"], 2. / 3.);
         assert_relative_eq!(scores["q_2"], 1. / 3.);
     }
-
-    #[test]
-    fn test_tupled_scores_from_score_maps() {
-        let scores_a = btreemap! {
-            "q_1" => 2.,
-            "q_2" => 5.,
-        };
-        let scores_b = btreemap! {
-            "q_1" => 1.,
-            "q_2" => 0.,
-        };
-        let scores_c = btreemap! {
-            "q_1" => 2.,
-            "q_2" => 1.,
-        };
-        let tupled_scores =
-            tupled_scores_from_score_maps([&scores_a, &scores_b, &scores_c]).unwrap();
-        assert_eq!(tupled_scores, vec![vec![2., 1., 2.], vec![5., 0., 1.]]);
-    }
-
-    #[test]
-    fn test_tupled_scores_from_score_maps_different_keys() {
-        let scores_a = btreemap! {
-            "q_1" => 2.,
-            "q_2" => 5.,
-        };
-        let scores_b = btreemap! {
-            "q_1" => 1.,
-            "q_3" => 0.,
-        };
-        let tupled_scores = tupled_scores_from_score_maps([&scores_a, &scores_b]);
-        assert!(tupled_scores.is_err());
-    }
-
-    #[test]
-    fn test_tupled_scores_from_score_maps_single_map() {
-        let scores_a = btreemap! {
-            "q_1" => 2.,
-            "q_2" => 5.,
-        };
-        let tupled_scores = tupled_scores_from_score_maps([&scores_a]);
-        assert!(tupled_scores.is_err());
-    }
 }
diff --git a/src/statistical_tests.rs b/src/statistical_tests.rs
@@ -18,3 +18,138 @@ pub use randomized_tukey_hsd_test::RandomizedTukeyHsdTest;
 pub use student_t_test::StudentTTest;
 pub use tukey_hsd_test::TukeyHsdTest;
 pub use two_way_anova_without_replication::TwoWayAnovaWithoutReplication;
+
+use std::collections::BTreeMap;
+
+use crate::errors::ElinorError;
+use crate::errors::Result;
+
+/// Converts two maps of scores, $`A`$ and $`B`$, into a vector of paired scores $`X`$:
+///
+/// - $`A = \{ (k^A_1 \mapsto v^A_1), (k^A_2 \mapsto v^A_2), \dots, (k^A_n \mapsto v^A_n) \}`$,
+/// - $`B = \{ (k^B_1 \mapsto v^B_1), (k^B_2 \mapsto v^B_2), \dots, (k^B_n \mapsto v^B_n) \}`$, and
+/// - $`X = [(v^A_1, v^B_1), (v^A_2, v^B_2), \dots, (v^A_n, v^B_n)]`$,
+///
+/// where $`k^A_i = k^B_i`$ for all $`i`$.
+///
+/// # Examples
+///
+/// ```
+/// use elinor::statistical_tests::pairs_from_maps;
+///
+/// let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
+/// let map_b = [("a", 0.50), ("b", 0.10), ("c", 0.00)].into();
+/// let pairs = pairs_from_maps(&map_a, &map_b).unwrap();
+/// assert_eq!(pairs, vec![(0.70, 0.50), (0.30, 0.10), (0.20, 0.00)]);
+/// ```
+///
+/// # Errors
+///
+/// * [`ElinorError::InvalidArgument`] if maps have different sets of keys.
+pub fn pairs_from_maps<K>(
+    map_a: &BTreeMap<K, f64>,
+    map_b: &BTreeMap<K, f64>,
+) -> Result<Vec<(f64, f64)>>
+where
+    K: Clone + Eq + Ord + std::fmt::Display,
+{
+    tuples_from_maps([map_a, map_b]).map(|tuples| {
+        tuples
+            .into_iter()
+            .map(|tuple| (tuple[0], tuple[1]))
+            .collect()
+    })
+}
+
+/// Converts maps of scores, $`A_1, A_2, \dots, A_m`$, into a vector of tupled scores $`X`$:
+///
+/// - $`A_j = \{ (k^j_1 \mapsto v^j_1), (k^j_2 \mapsto v^j_2), \dots, (k^j_n \mapsto v^j_n) \}`$ for all $`j`$,
+/// - $`X = [(v^1_1, v^2_1, \dots, v^m_1), (v^1_2, v^2_2, \dots, v^m_2), \dots, (v^1_n, v^2_n, \dots, v^m_n)]`$,
+///
+/// where $`k^1_i = k^2_i = \dots = k^m_i`$ for all $`i`$.
+///
+/// # Examples
+///
+/// ```
+/// use elinor::statistical_tests::tuples_from_maps;
+///
+/// let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
+/// let map_b = [("a", 0.50), ("b", 0.10), ("c", 0.00)].into();
+/// let map_c = [("a", 0.60), ("b", 0.20), ("c", 0.10)].into();
+/// let tuples = tuples_from_maps([&map_a, &map_b, &map_c]).unwrap();
+/// assert_eq!(tuples, vec![vec![0.70, 0.50, 0.60], vec![0.30, 0.10, 0.20], vec![0.20, 0.00, 0.10]]);
+/// ```
+///
+/// # Errors
+///
+/// * [`ElinorError::InvalidArgument`] if maps have different sets of keys.
+pub fn tuples_from_maps<'a, I, K>(maps: I) -> Result<Vec<Vec<f64>>>
+where
+    I: IntoIterator<Item = &'a BTreeMap<K, f64>>,
+    K: Clone + Eq + Ord + std::fmt::Display + 'a,
+{
+    let maps = maps.into_iter().collect::<Vec<_>>();
+    for i in 1..maps.len() {
+        if maps[0].len() != maps[i].len() {
+            return Err(ElinorError::InvalidArgument(format!(
+                "The number of keys in maps must be the same, but got maps[0].len()={} and maps[{}].len()={}.",
+                maps[0].len(),
+                i,
+                maps[i].len()
+            )));
+        }
+        if maps[0].keys().ne(maps[i].keys()) {
+            return Err(ElinorError::InvalidArgument(
+                "The keys in the maps must be the same.".to_string(),
+            ));
+        }
+    }
+    let mut tuples = vec![];
+    for query_id in maps[0].keys() {
+        let mut tuple = vec![];
+        for &map in &maps {
+            tuple.push(*map.get(query_id).unwrap());
+        }
+        tuples.push(tuple);
+    }
+    Ok(tuples)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_pairs_from_maps_different_keys() {
+        let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
+        let map_b = [("a", 0.50), ("b", 0.10), ("d", 0.00)].into();
+        assert_eq!(
+            pairs_from_maps(&map_a, &map_b),
+            Err(ElinorError::InvalidArgument(
+                "The keys in the maps must be the same.".to_string()
+            ))
+        );
+    }
+
+    #[test]
+    fn test_tuples_from_maps_different_keys() {
+        let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
+        let map_b = [("a", 0.50), ("b", 0.10), ("d", 0.00)].into();
+        let map_c = [("a", 0.60), ("b", 0.20), ("c", 0.10)].into();
+        assert_eq!(
+            tuples_from_maps([&map_a, &map_b, &map_c]),
+            Err(ElinorError::InvalidArgument(
+                "The keys in the maps must be the same.".to_string()
+            ))
+        );
+    }
+
+    #[test]
+    fn test_tuples_from_maps_single_map() {
+        let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
+        assert_eq!(
+            tuples_from_maps([&map_a]),
+            Ok(vec![vec![0.70], vec![0.30], vec![0.20]])
+        );
+    }
+}