Skip to content

Commit

Permalink
Merge pull request #48 from triarius/better-test
Browse files Browse the repository at this point in the history
Update test to use parallelism and increase samples by a factor of 10
  • Loading branch information
triarius authored Jan 9, 2024
2 parents ee00c68 + 16a1b3e commit 061640c
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 24 deletions.
52 changes: 52 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ rand = "0.8.5"
regex = "1.10.2"

[dev-dependencies]
rayon = "1.8.0"
statrs = "0.16.0"
76 changes: 52 additions & 24 deletions src/passphrase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,72 @@ pub fn new(

mod test {
#[test]
// Uses [Pearson's chi-squared test](https://en.wikipedia.org/wiki/Chi-squared_test#Pearson's_chi-squared_test)
// to test that the passphrases are uniformly distributed.
fn chi_squared() {
use crate::{passphrase, words};
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use statrs::distribution::{ChiSquared, ContinuousCDF};
use std::collections::HashMap;

let n = 4;
let n_fact = 24;
// this test file has n = 4 words, which can have 24 permutations
// This test file has W = 4 words, which can have 24 permutations
const W: usize = 4;
const W_FACTORIAL: usize = 24;
const N: usize = 12_000_000; // number of samples

let words = words::list(Some("src/fixtures/test")).unwrap();

let trials = 1_200_000;
let mut rng = rand::thread_rng();
let histogram = Vec::from_iter(0..N)
.par_iter()
.fold_chunks(
N / std::thread::available_parallelism().unwrap(),
|| HashMap::new(),
|mut acc, _| {
let mut rng = rand::thread_rng();
let mut words = words.clone();
let s = passphrase::new(&mut rng, &mut words, W, " ");
*acc.entry(s).or_insert(0) += 1 as usize;
acc
},
)
.collect::<Vec<HashMap<String, usize>>>()
.iter()
.fold(HashMap::new(), |mut acc, h| {
h.iter().for_each(|(k, v)| {
*acc.entry(k.to_owned()).or_insert(0) += v;
});
acc
});

let mut histogram: HashMap<String, u32> = HashMap::new();
(1..trials).for_each(|_| {
let mut words = words.clone();
let s = passphrase::new(&mut rng, &mut words, n, " ");
*histogram.entry(s).or_insert(0) += 1;
});
assert_eq!(histogram.values().sum::<usize>(), N, "missing samples");

assert_eq!(histogram.len(), n_fact);
// There should be at most W! different passphrases. If, by chance, some of them are not
// generated, then the chi-squared test is highly unlikely to conclude that they are
// uniformly distributed.
assert_eq!(W_FACTORIAL, histogram.len(), "missing a permutation");

let expected_frequency = trials as f64 / n_fact as f64;
let expected_frequency = N as f64 / W_FACTORIAL as f64;
let chi_squared_stat: f64 = histogram
.iter()
.map(|(_, v)| (*v as f64 - expected_frequency).powi(2) / expected_frequency)
.values()
.map(|v| (*v as f64 - expected_frequency).powi(2) / expected_frequency)
.sum();

// degrees of freedom = (number of rows - 1) * (number of columns - 1)
let df = ((2 - 1) * (24 - 1)) as f64;
let dist = ChiSquared::new(df).unwrap();
let p = 1.0 - dist.cdf(chi_squared_stat);
// Since the number in any permutation is determined by the number in all the others,
// degrees of freedom = number of permutations - 1
const DF: f64 = (W_FACTORIAL - 1) as f64;
let dist = ChiSquared::new(DF).unwrap();

eprintln!("χ^2: {}", chi_squared_stat);
eprintln!("p: {}", p);
// The p-value is the area under the chi-squared pdf to the right of the chi_squared_stat
let p = 1.0 - dist.cdf(chi_squared_stat);

// the p-value should be greater than 0.05 so that we can't reject the null hypothesis
// if we can reject the null hypothesis, then the passphrase generator is not uniform
assert_eq!(p > 0.05, true);
// The p-value should be greater than 0.05 so that we can't reject the null hypothesis that
// the values are from a uniform distribution.
// If we can reject the null hypothesis, then the passphrase generator may not be uniform.
assert!(
p > 0.05,
"passphrase may not be uniformly random. (p = {} <= 0.05, χ^2 = {}).",
p,
chi_squared_stat,
);
}
}

0 comments on commit 061640c

Please sign in to comment.