Skip to content

Commit

Permalink
Change APIs on tuples (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
kampersanda authored Nov 2, 2024
1 parent ffca7ad commit 3d8980e
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 141 deletions.
12 changes: 6 additions & 6 deletions elinor-cli/src/bin/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,11 @@ fn compare_two_systems(
for df in df_metrics.iter() {
let values_1 = df.column("system_1")?.f64()?;
let values_2 = df.column("system_2")?.f64()?;
let diff_scores = values_1
let paired_samples = values_1
.into_iter()
.zip(values_2.into_iter())
.map(|(x, y)| x.unwrap() - y.unwrap());
stats.push(StudentTTest::from_samples(diff_scores)?);
.map(|(a, b)| (a.unwrap(), b.unwrap()));
stats.push(StudentTTest::from_paired_samples(paired_samples)?);
}
let columns = vec![
Series::new(
Expand Down Expand Up @@ -326,11 +326,11 @@ fn compare_two_systems(
for df in df_metrics.iter() {
let values_1 = df.column("system_1")?.f64()?;
let values_2 = df.column("system_2")?.f64()?;
let diff_scores = values_1
let paired_samples = values_1
.into_iter()
.zip(values_2.into_iter())
.map(|(x, y)| x.unwrap() - y.unwrap());
stats.push(tester.test(diff_scores)?);
.map(|(a, b)| (a.unwrap(), b.unwrap()));
stats.push(tester.test(paired_samples)?);
}
let columns = vec![
Series::new(
Expand Down
4 changes: 2 additions & 2 deletions examples/paired_bootstrap_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ fn main() -> Result<()> {
0.40, 0.40, 0.10, 0.40, 0.20, 0.10, 0.10, 0.60, 0.30, 0.20,
];

let samples = a.into_iter().zip(b.into_iter()).map(|(x, y)| x - y);
let result = BootstrapTest::from_samples(samples)?;
let samples = a.into_iter().zip(b.into_iter());
let result = BootstrapTest::from_paired_samples(samples)?;
println!("p-value: {:.4}", result.p_value());

Ok(())
Expand Down
101 changes: 5 additions & 96 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! use approx::assert_relative_eq;
//! use elinor::{TrueRelStoreBuilder, PredRelStoreBuilder, Metric};
//! use elinor::statistical_tests::StudentTTest;
//! use elinor::statistical_tests::{StudentTTest, pairs_from_maps};
//!
//! // Prepare true relevance scores.
//! let mut b = TrueRelStoreBuilder::new();
Expand Down Expand Up @@ -120,8 +120,8 @@
//! let result_b = elinor::evaluate(&true_rels, &pred_rels_b, metric)?;
//!
//! // Perform two-sided paired Student's t-test.
//! let tupled_scores = elinor::tupled_scores_from_score_maps([result_a.scores(), result_b.scores()])?;
//! let stat = StudentTTest::from_samples(tupled_scores.iter().map(|x| x[0] - x[1]))?;
//! let pairs = pairs_from_maps(result_a.scores(), result_b.scores())?;
//! let stat = StudentTTest::from_paired_samples(pairs)?;
//!
//! // Various statistics can be obtained from the t-test result.
//! assert!(stat.mean() > 0.0);
Expand Down Expand Up @@ -153,7 +153,7 @@
//! ```
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! use elinor::{TrueRelStoreBuilder, PredRelStoreBuilder, Metric};
//! use elinor::statistical_tests::{RandomizedTukeyHsdTest, TukeyHsdTest};
//! use elinor::statistical_tests::{RandomizedTukeyHsdTest, TukeyHsdTest, tuples_from_maps};
//!
//! // Prepare true relevance scores.
//! let mut b = TrueRelStoreBuilder::new();
Expand Down Expand Up @@ -192,9 +192,7 @@
//! let result_c = elinor::evaluate(&true_rels, &pred_rels_c, metric)?;
//!
//! // Prepare tupled scores for tests.
//! let tupled_scores = elinor::tupled_scores_from_score_maps(
//! [result_a.scores(), result_b.scores(), result_c.scores()]
//! )?;
//! let tupled_scores = tuples_from_maps([result_a.scores(), result_b.scores(), result_c.scores()])?;
//!
//! // Perform Tukey HSD test with paired observations.
//! let hsd_stat = TukeyHsdTest::from_tupled_samples(tupled_scores.iter(), 3)?;
Expand Down Expand Up @@ -391,56 +389,10 @@ where
})
}

/// Converts maps of scores into a vector of tupled scores, where each tuple contains the scores for each key.
///
/// This function is expected to be used to prepare data for statistical tests.
///
/// # Errors
///
/// * [`ElinorError::InvalidArgument`] if score_maps have different sets of keys.
pub fn tupled_scores_from_score_maps<'a, I, K>(score_maps: I) -> Result<Vec<Vec<f64>>>
where
I: IntoIterator<Item = &'a BTreeMap<K, f64>>,
K: Clone + Eq + Ord + std::fmt::Display + 'a,
{
let score_maps = score_maps.into_iter().collect::<Vec<_>>();
if score_maps.len() < 2 {
return Err(ElinorError::InvalidArgument(format!(
"The number of score maps must be at least 2, but got {}.",
score_maps.len()
)));
}
for i in 1..score_maps.len() {
if score_maps[0].len() != score_maps[i].len() {
return Err(ElinorError::InvalidArgument(format!(
"The number of keys in score maps must be the same, but got score_maps[0].len()={} and score_maps[{}].len()={}.",
score_maps[0].len(),
i,
score_maps[i].len()
)));
}
if score_maps[0].keys().ne(score_maps[i].keys()) {
return Err(ElinorError::InvalidArgument(
"The keys in the score maps must be the same.".to_string(),
));
}
}
let mut tupled_scores = vec![];
for query_id in score_maps[0].keys() {
let mut scores = vec![];
for score_map in &score_maps {
scores.push(*score_map.get(query_id).unwrap());
}
tupled_scores.push(scores);
}
Ok(tupled_scores)
}

#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use maplit::btreemap;

#[test]
fn test_evaluate() {
Expand Down Expand Up @@ -475,47 +427,4 @@ mod tests {
assert_relative_eq!(scores["q_1"], 2. / 3.);
assert_relative_eq!(scores["q_2"], 1. / 3.);
}

#[test]
fn test_tupled_scores_from_score_maps() {
let scores_a = btreemap! {
"q_1" => 2.,
"q_2" => 5.,
};
let scores_b = btreemap! {
"q_1" => 1.,
"q_2" => 0.,
};
let scores_c = btreemap! {
"q_1" => 2.,
"q_2" => 1.,
};
let tupled_scores =
tupled_scores_from_score_maps([&scores_a, &scores_b, &scores_c]).unwrap();
assert_eq!(tupled_scores, vec![vec![2., 1., 2.], vec![5., 0., 1.]]);
}

#[test]
fn test_tupled_scores_from_score_maps_different_keys() {
let scores_a = btreemap! {
"q_1" => 2.,
"q_2" => 5.,
};
let scores_b = btreemap! {
"q_1" => 1.,
"q_3" => 0.,
};
let tupled_scores = tupled_scores_from_score_maps([&scores_a, &scores_b]);
assert!(tupled_scores.is_err());
}

#[test]
fn test_tupled_scores_from_score_maps_single_map() {
let scores_a = btreemap! {
"q_1" => 2.,
"q_2" => 5.,
};
let tupled_scores = tupled_scores_from_score_maps([&scores_a]);
assert!(tupled_scores.is_err());
}
}
135 changes: 135 additions & 0 deletions src/statistical_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,138 @@ pub use randomized_tukey_hsd_test::RandomizedTukeyHsdTest;
pub use student_t_test::StudentTTest;
pub use tukey_hsd_test::TukeyHsdTest;
pub use two_way_anova_without_replication::TwoWayAnovaWithoutReplication;

use std::collections::BTreeMap;

use crate::errors::ElinorError;
use crate::errors::Result;

/// Converts two maps of scores, $`A`$ and $`B`$, into a vector of paired scores $`X`$:
///
/// - $`A = \{ (k^A_1 \mapsto v^A_1), (k^A_2 \mapsto v^A_2), \dots, (k^A_n \mapsto v^A_n) \}`$,
/// - $`B = \{ (k^B_1 \mapsto v^B_1), (k^B_2 \mapsto v^B_2), \dots, (k^B_n \mapsto v^B_n) \}`$, and
/// - $`X = [(v^A_1, v^B_1), (v^A_2, v^B_2), \dots, (v^A_n, v^B_n)]`$,
///
/// where $`k^A_i = k^B_i`$ for all $`i`$.
///
/// # Examples
///
/// ```
/// use elinor::statistical_tests::pairs_from_maps;
///
/// let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
/// let map_b = [("a", 0.50), ("b", 0.10), ("c", 0.00)].into();
/// let pairs = pairs_from_maps(&map_a, &map_b).unwrap();
/// assert_eq!(pairs, vec![(0.70, 0.50), (0.30, 0.10), (0.20, 0.00)]);
/// ```
///
/// # Errors
///
/// * [`ElinorError::InvalidArgument`] if maps have different sets of keys.
pub fn pairs_from_maps<K>(
map_a: &BTreeMap<K, f64>,
map_b: &BTreeMap<K, f64>,
) -> Result<Vec<(f64, f64)>>
where
K: Clone + Eq + Ord + std::fmt::Display,
{
tuples_from_maps([map_a, map_b]).map(|tuples| {
tuples
.into_iter()
.map(|tuple| (tuple[0], tuple[1]))
.collect()
})
}

/// Converts maps of scores, $`A_1, A_2, \dots, A_m`$, into a vector of tupled scores $`X`$:
///
/// - $`A_j = \{ (k^j_1 \mapsto v^j_1), (k^j_2 \mapsto v^j_2), \dots, (k^j_n \mapsto v^j_n) \}`$ for all $`j`$,
/// - $`X = [(v^1_1, v^2_1, \dots, v^m_1), (v^1_2, v^2_2, \dots, v^m_2), \dots, (v^1_n, v^2_n, \dots, v^m_n)]`$,
///
/// where $`k^1_i = k^2_i = \dots = k^m_i`$ for all $`i`$.
///
/// # Examples
///
/// ```
/// use elinor::statistical_tests::tuples_from_maps;
///
/// let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
/// let map_b = [("a", 0.50), ("b", 0.10), ("c", 0.00)].into();
/// let map_c = [("a", 0.60), ("b", 0.20), ("c", 0.10)].into();
/// let tuples = tuples_from_maps([&map_a, &map_b, &map_c]).unwrap();
/// assert_eq!(tuples, vec![vec![0.70, 0.50, 0.60], vec![0.30, 0.10, 0.20], vec![0.20, 0.00, 0.10]]);
/// ```
///
/// # Errors
///
/// * [`ElinorError::InvalidArgument`] if maps have different sets of keys.
pub fn tuples_from_maps<'a, I, K>(maps: I) -> Result<Vec<Vec<f64>>>
where
I: IntoIterator<Item = &'a BTreeMap<K, f64>>,
K: Clone + Eq + Ord + std::fmt::Display + 'a,
{
let maps = maps.into_iter().collect::<Vec<_>>();
for i in 1..maps.len() {
if maps[0].len() != maps[i].len() {
return Err(ElinorError::InvalidArgument(format!(
"The number of keys in maps must be the same, but got maps[0].len()={} and maps[{}].len()={}.",
maps[0].len(),
i,
maps[i].len()
)));
}
if maps[0].keys().ne(maps[i].keys()) {
return Err(ElinorError::InvalidArgument(
"The keys in the maps must be the same.".to_string(),
));
}
}
let mut tuples = vec![];
for query_id in maps[0].keys() {
let mut tuple = vec![];
for &map in &maps {
tuple.push(*map.get(query_id).unwrap());
}
tuples.push(tuple);
}
Ok(tuples)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_pairs_from_maps_different_keys() {
let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
let map_b = [("a", 0.50), ("b", 0.10), ("d", 0.00)].into();
assert_eq!(
pairs_from_maps(&map_a, &map_b),
Err(ElinorError::InvalidArgument(
"The keys in the maps must be the same.".to_string()
))
);
}

#[test]
fn test_tuples_from_maps_different_keys() {
let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
let map_b = [("a", 0.50), ("b", 0.10), ("d", 0.00)].into();
let map_c = [("a", 0.60), ("b", 0.20), ("c", 0.10)].into();
assert_eq!(
tuples_from_maps([&map_a, &map_b, &map_c]),
Err(ElinorError::InvalidArgument(
"The keys in the maps must be the same.".to_string()
))
);
}

#[test]
fn test_tuples_from_maps_single_map() {
let map_a = [("a", 0.70), ("b", 0.30), ("c", 0.20)].into();
assert_eq!(
tuples_from_maps([&map_a]),
Ok(vec![vec![0.70], vec![0.30], vec![0.20]])
);
}
}
Loading

0 comments on commit 3d8980e

Please sign in to comment.