Merge pull request #7 from jirigav/automated_tests

Automated tests
jirigav · Jun 26, 2024 · 998fd31 · 998fd31
2 parents 1f21b26 + 9294e72
commit 998fd31
Show file tree

Hide file tree

Showing 7 changed files with 184 additions and 94 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cooltest"
-version = "0.1.1"
+version = "0.1.2"
 edition = "2021"
 
 

diff --git a/src/autotest.rs b/src/autotest.rs
@@ -0,0 +1,69 @@
+use crate::bottomup::bottomup;
+use crate::common::{prepare_data, Args};
+use crate::results::results;
+use std::time::Instant;
+
+const GB: usize = 1000000000;
+const MB: usize = 1000000;
+
+fn choose_k(block_size: usize, data_size: usize) -> usize {
+    if data_size <= 10 * MB && block_size < 128 {
+        4
+    } else if data_size < 2 * GB && block_size < 256 {
+        3
+    } else {
+        2
+    }
+}
+
+pub(crate) fn autotest(mut args: Args) {
+    let (training_data, testing_data) = prepare_data(&args.data_source, args.block, true);
+    let mut testing_data = testing_data.unwrap();
+    let mut tested_cases = 0;
+    let start = Instant::now();
+    let data_size = training_data.len();
+
+    let mut k = choose_k(args.block, data_size);
+
+    tested_cases += 1;
+    let mut hist = bottomup(
+        &training_data,
+        args.block,
+        k,
+        args.top,
+        args.max_bits,
+        args.threads,
+    );
+    let testing_data2;
+    if args.block <= 256 {
+        tested_cases += 1;
+        let (training_data, testing_data_opt2) =
+            prepare_data(&args.data_source, 2 * args.block, true);
+        testing_data2 = testing_data_opt2.unwrap();
+        k = choose_k(2 * args.block, data_size);
+        let hist2 = bottomup(
+            &training_data,
+            args.block * 2,
+            k,
+            args.top,
+            args.max_bits,
+            args.threads,
+        );
+        if hist2.z_score.abs() > hist.z_score.abs() {
+            hist = hist2;
+            testing_data = testing_data2;
+        }
+    }
+    println!("training finished in {:?}", start.elapsed());
+
+    if tested_cases > 1 {
+        let new_alpha = args.alpha / (tested_cases as f64);
+        println!(
+            "Adjusting significance level based on the number of tests from {} to {}",
+            args.alpha, new_alpha
+        );
+        args.alpha = new_alpha;
+    }
+
+    results(hist, &testing_data, args)
+}
diff --git a/src/bottomup.rs b/src/bottomup.rs
@@ -9,7 +9,9 @@ pub(crate) struct Histogram {
     pub(crate) bits: Vec<usize>,
     pub(crate) sorted_indices: Vec<usize>,
     pub(crate) best_division: usize,
+    #[serde(skip_serializing, default)]
     pub(crate) z_score: f64,
+    pub(crate) block_size: usize,
 }
 
 impl Histogram {
@@ -42,10 +44,11 @@ impl Histogram {
             sorted_indices: indices,
             best_division: best_i,
             z_score: max_z,
+            block_size: data[0].len(),
         }
     }
 
-    pub(crate) fn from_bins(bits: Vec<usize>, bins: &[usize]) -> Histogram {
+    pub(crate) fn from_bins(bits: Vec<usize>, bins: &[usize], block_size: usize) -> Histogram {
         let mut indices = (0..2_usize.pow(bits.len() as u32)).collect_vec();
         indices.sort_by(|a, b| bins[*b].cmp(&bins[*a]));
 
@@ -69,6 +72,7 @@ impl Histogram {
             sorted_indices: indices,
             best_division: best_i,
             z_score: max_z,
+            block_size,
         }
     }
 
@@ -179,11 +183,11 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
         hists = new_hists;
     }
     if k > 1 {
-        let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1]); top];
+        let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1], block_size); top];
         let mut bins = vec![0; 2_usize.pow(k as u32)];
         for bits in (0..block_size).combinations(k) {
             compute_bins(&bits, data, k, &hists, &mut bins, block_size);
-            let hist = Histogram::from_bins(bits, &bins);
+            let hist = Histogram::from_bins(bits, &bins, block_size);
             best_hists.push(hist);
             best_hists.sort_by(|a, b| b.z_score.abs().partial_cmp(&a.z_score.abs()).unwrap());
             best_hists.pop();
@@ -194,7 +198,7 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
         let mut best: Vec<_> = hists
             .into_iter()
             .enumerate()
-            .map(|(i, bins)| Histogram::from_bins(bits[i].clone(), &bins))
+            .map(|(i, bins)| Histogram::from_bins(bits[i].clone(), &bins, block_size))
             .collect();
 
         best.sort_by(|a, b| b.z_score.partial_cmp(&a.z_score).unwrap());
@@ -203,7 +207,7 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
 }
 
 fn _combine_bins(hists: &[Histogram], n: usize, data: &[Vec<u8>]) -> Histogram {
-    let mut best_hist = Histogram::from_bins(vec![0], &[1, 1]);
+    let mut best_hist = Histogram::from_bins(vec![0], &[1, 1], data[0].len());
     for comb in hists.iter().combinations(n) {
         let mut bits = comb.iter().flat_map(|x| x.bits.clone()).collect_vec();
         bits.sort();
@@ -352,14 +356,14 @@ fn brute_force_threads(
         .map(|i| {
             let combs = (0..block_size).combinations(k).skip(i);
 
-            let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1]); top];
+            let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1], block_size); top];
 
             for bits in combs.step_by(threads) {
                 let mut bins = vec![0; 2_usize.pow(k as u32)];
                 for (i, bin) in bins.iter_mut().enumerate() {
                     *bin = multi_eval_neg(&bits, data, &neg_data, i);
                 }
-                let new_hist = Histogram::from_bins(bits, &bins);
+                let new_hist = Histogram::from_bins(bits, &bins, block_size);
                 best_hists.push(new_hist);
                 best_hists.sort_by(|a, b| b.z_score.abs().partial_cmp(&a.z_score.abs()).unwrap());
                 best_hists.pop();

diff --git a/src/common.rs b/src/common.rs
@@ -14,7 +14,7 @@ pub(crate) struct Args {
     pub(crate) data_source: String,
 
     /// Length of block of data.
-    #[arg(short, long, default_value_t = 128)]
+    #[arg(short, long, default_value_t = 128)] // Changing the default value changes autotest
     pub(crate) block: usize,
 
     /// Number of bits in histograms in brute-force search.
@@ -53,6 +53,7 @@ pub(crate) enum SubCommand {
         #[arg(short, long)]
         dis_path: String,
     },
+    Autotest {},
 }
 pub(crate) fn bits_block_eval(bits: &[usize], block: &[u8]) -> usize {
     let mut result = 0;

diff --git a/src/main.rs b/src/main.rs
@@ -1,94 +1,18 @@
+mod autotest;
 mod bottomup;
 mod common;
+mod results;
 
 use crate::bottomup::bottomup;
-use crate::common::{p_value, z_score, Args};
+use crate::common::Args;
+use autotest::autotest;
 use bottomup::Histogram;
 use clap::Parser;
 use common::{prepare_data, SubCommand};
-use serde_json::json;
-use std::fs::{self, File};
-use std::io::Write;
+use results::results;
+use std::fs;
 use std::time::Instant;
 
-fn print_results(p_value: f64, z_score: f64, alpha: f64, hist: &Histogram, bins: Vec<usize>) {
-    println!("----------------------------------------------------------------------");
-    println!("RESULTS:\n");
-
-    println!("Histogram(the discovered Boolean function returns 1 for values before the separator and 0 for values after the separator.):\n");
-    let m = bins.iter().max().unwrap();
-    let unit = (m / 50).max(1);
-    for (i, ind) in hist.sorted_indices.iter().enumerate() {
-        for x in &hist.bits {
-            print!("x{} ", x);
-        }
-        let mut j = *ind;
-        print!("| [");
-        for _ in 0..hist.bits.len() {
-            print!("{}", j % 2);
-            j /= 2;
-        }
-        print!("] | ");
-        for _ in 0..bins[*ind] / unit {
-            print!("∎");
-        }
-        println!();
-        if i == (hist.best_division - 1) {
-            for _ in 0..80 {
-                print!("—");
-            }
-            println!();
-        }
-    }
-    println!();
-    println!("Z-score: {z_score}");
-    println!("P-value: {p_value:.0e}");
-    if p_value >= alpha {
-        println!(
-            "As the p-value >= alpha {alpha:.0e}, the randomness hypothesis cannot be rejected."
-        );
-        println!("= CoolTest could not find statistically significant non-randomness.");
-    } else {
-        println!("As the p-value < alpha {alpha:.0e}, the randomness hypothesis is REJECTED.");
-        println!("= Data is not random.");
-    }
-}
-
-fn results(hist: Histogram, testing_data: &[Vec<u8>], args: Args) {
-    let (count, bins) = hist.evaluate(testing_data);
-    let prob = 2.0_f64.powf(-(hist.bits.len() as f64));
-    let z = z_score(
-        testing_data.len(),
-        count,
-        prob * (hist.best_division as f64),
-    );
-    let p_val = p_value(
-        count,
-        testing_data.len(),
-        prob * (hist.best_division as f64),
-    );
-    print_results(p_val, z, args.alpha, &hist, bins);
-
-    if let Some(path) = args.json.clone() {
-        let mut file =
-            File::create(&path).unwrap_or_else(|_| panic!("File {} couldn't be created", path));
-
-        let output = json!({
-            "args": args,
-            "dis": hist,
-            "result": if p_val < args.alpha {"random"} else {"non-random"},
-            "p-value": p_val
-        });
-
-        file.write_all(
-            serde_json::to_string_pretty(&output)
-                .expect("Failed to produce json!")
-                .as_bytes(),
-        )
-        .unwrap();
-    }
-}
-
 fn run_bottomup(args: Args) {
     let (training_data, testing_data) = prepare_data(&args.data_source, args.block, true);
     let testing_data = testing_data.unwrap();
@@ -108,18 +32,25 @@ fn run_bottomup(args: Args) {
 }
 
 fn main() {
-    let args = Args::parse();
+    let mut args = Args::parse();
     println!("\n{args:?}\n");
 
+    if args.block > 600 {
+        println!("With block size {}, the computation can take long time, consider using smaller block size.", args.block);
+    }
+
     match args.subcommand.clone() {
         Some(SubCommand::Evaluate { dis_path }) => {
             let contents = fs::read_to_string(&dis_path)
                 .unwrap_or_else(|_| panic!("Failed to read contents of {}", &dis_path));
             let hist: Histogram =
                 serde_json::from_str(&contents).expect("Invalid distinguisher json!");
-            let (testing_data, _) = prepare_data(&args.data_source, args.block, false);
+            args.block = hist.block_size;
+            args.k = hist.bits.len();
+            let (testing_data, _) = prepare_data(&args.data_source, hist.block_size, false);
             results(hist, &testing_data, args)
         }
+        Some(SubCommand::Autotest {}) => autotest(args),
         None => run_bottomup(args),
     }
 }
diff --git a/src/results.rs b/src/results.rs
@@ -0,0 +1,85 @@
+use crate::{
+    bottomup::Histogram,
+    common::{p_value, z_score, Args},
+};
+use serde_json::json;
+use std::fs::File;
+use std::io::Write;
+
+pub(crate) fn results(hist: Histogram, testing_data: &[Vec<u8>], args: Args) {
+    let (count, bins) = hist.evaluate(testing_data);
+    let prob = 2.0_f64.powf(-(hist.bits.len() as f64));
+    let z = z_score(
+        testing_data.len(),
+        count,
+        prob * (hist.best_division as f64),
+    );
+    let p_val = p_value(
+        count,
+        testing_data.len(),
+        prob * (hist.best_division as f64),
+    );
+    print_results(p_val, z, args.alpha, &hist, bins);
+
+    if let Some(path) = args.json.clone() {
+        let mut file =
+            File::create(&path).unwrap_or_else(|_| panic!("File {} couldn't be created", path));
+
+        let output = json!({
+            "args": args,
+            "dis": hist,
+            "result": if p_val < args.alpha {"random"} else {"non-random"},
+            "p-value": p_val
+        });
+
+        file.write_all(
+            serde_json::to_string_pretty(&output)
+                .expect("Failed to produce json!")
+                .as_bytes(),
+        )
+        .unwrap();
+    }
+}
+
+fn print_results(p_value: f64, z_score: f64, alpha: f64, hist: &Histogram, bins: Vec<usize>) {
+    println!("----------------------------------------------------------------------");
+    println!("RESULTS:\n");
+
+    println!("Histogram(the discovered Boolean function returns 1 for values before the separator and 0 for values after the separator.):\n");
+    let m = bins.iter().max().unwrap();
+    let unit = (m / 50).max(1);
+    for (i, ind) in hist.sorted_indices.iter().enumerate() {
+        for x in &hist.bits {
+            print!("x{} ", x);
+        }
+        let mut j = *ind;
+        print!("| [");
+        for _ in 0..hist.bits.len() {
+            print!("{}", j % 2);
+            j /= 2;
+        }
+        print!("] | ");
+        for _ in 0..bins[*ind] / unit {
+            print!("∎");
+        }
+        println!();
+        if i == (hist.best_division - 1) {
+            for _ in 0..80 {
+                print!("—");
+            }
+            println!();
+        }
+    }
+    println!();
+    println!("Z-score: {z_score}");
+    println!("P-value: {p_value:.0e}");
+    if p_value >= alpha {
+        println!(
+            "As the p-value >= alpha {alpha:.0e}, the randomness hypothesis cannot be rejected."
+        );
+        println!("= CoolTest could not find statistically significant non-randomness.");
+    } else {
+        println!("As the p-value < alpha {alpha:.0e}, the randomness hypothesis is REJECTED.");
+        println!("= Data is not random.");
+    }
+}