Skip to content

Commit

Permalink
Merge pull request #7 from jirigav/automated_tests
Browse files Browse the repository at this point in the history
Automated tests
  • Loading branch information
jirigav authored Jun 26, 2024
2 parents 1f21b26 + 9294e72 commit 998fd31
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 94 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cooltest"
version = "0.1.1"
version = "0.1.2"
edition = "2021"


Expand Down
69 changes: 69 additions & 0 deletions src/autotest.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
use crate::bottomup::bottomup;
use crate::common::{prepare_data, Args};
use crate::results::results;
use std::time::Instant;

const GB: usize = 1000000000;
const MB: usize = 1000000;

fn choose_k(block_size: usize, data_size: usize) -> usize {
if data_size <= 10 * MB && block_size < 128 {
4
} else if data_size < 2 * GB && block_size < 256 {
3
} else {
2
}
}

pub(crate) fn autotest(mut args: Args) {
let (training_data, testing_data) = prepare_data(&args.data_source, args.block, true);
let mut testing_data = testing_data.unwrap();
let mut tested_cases = 0;
let start = Instant::now();
let data_size = training_data.len();

let mut k = choose_k(args.block, data_size);

tested_cases += 1;
let mut hist = bottomup(
&training_data,
args.block,
k,
args.top,
args.max_bits,
args.threads,
);
let testing_data2;
if args.block <= 256 {
tested_cases += 1;
let (training_data, testing_data_opt2) =
prepare_data(&args.data_source, 2 * args.block, true);
testing_data2 = testing_data_opt2.unwrap();
k = choose_k(2 * args.block, data_size);
let hist2 = bottomup(
&training_data,
args.block * 2,
k,
args.top,
args.max_bits,
args.threads,
);
if hist2.z_score.abs() > hist.z_score.abs() {
hist = hist2;
testing_data = testing_data2;
}
}
println!("training finished in {:?}", start.elapsed());

if tested_cases > 1 {
let new_alpha = args.alpha / (tested_cases as f64);
println!(
"Adjusting significance level based on the number of tests from {} to {}",
args.alpha, new_alpha
);
args.alpha = new_alpha;
}

results(hist, &testing_data, args)
}
18 changes: 11 additions & 7 deletions src/bottomup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ pub(crate) struct Histogram {
pub(crate) bits: Vec<usize>,
pub(crate) sorted_indices: Vec<usize>,
pub(crate) best_division: usize,
#[serde(skip_serializing, default)]
pub(crate) z_score: f64,
pub(crate) block_size: usize,
}

impl Histogram {
Expand Down Expand Up @@ -42,10 +44,11 @@ impl Histogram {
sorted_indices: indices,
best_division: best_i,
z_score: max_z,
block_size: data[0].len(),
}
}

pub(crate) fn from_bins(bits: Vec<usize>, bins: &[usize]) -> Histogram {
pub(crate) fn from_bins(bits: Vec<usize>, bins: &[usize], block_size: usize) -> Histogram {
let mut indices = (0..2_usize.pow(bits.len() as u32)).collect_vec();
indices.sort_by(|a, b| bins[*b].cmp(&bins[*a]));

Expand All @@ -69,6 +72,7 @@ impl Histogram {
sorted_indices: indices,
best_division: best_i,
z_score: max_z,
block_size,
}
}

Expand Down Expand Up @@ -179,11 +183,11 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
hists = new_hists;
}
if k > 1 {
let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1]); top];
let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1], block_size); top];
let mut bins = vec![0; 2_usize.pow(k as u32)];
for bits in (0..block_size).combinations(k) {
compute_bins(&bits, data, k, &hists, &mut bins, block_size);
let hist = Histogram::from_bins(bits, &bins);
let hist = Histogram::from_bins(bits, &bins, block_size);
best_hists.push(hist);
best_hists.sort_by(|a, b| b.z_score.abs().partial_cmp(&a.z_score.abs()).unwrap());
best_hists.pop();
Expand All @@ -194,7 +198,7 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
let mut best: Vec<_> = hists
.into_iter()
.enumerate()
.map(|(i, bins)| Histogram::from_bins(bits[i].clone(), &bins))
.map(|(i, bins)| Histogram::from_bins(bits[i].clone(), &bins, block_size))
.collect();

best.sort_by(|a, b| b.z_score.partial_cmp(&a.z_score).unwrap());
Expand All @@ -203,7 +207,7 @@ fn brute_force(data: &Data, block_size: usize, k: usize, top: usize) -> Vec<Hist
}

fn _combine_bins(hists: &[Histogram], n: usize, data: &[Vec<u8>]) -> Histogram {
let mut best_hist = Histogram::from_bins(vec![0], &[1, 1]);
let mut best_hist = Histogram::from_bins(vec![0], &[1, 1], data[0].len());
for comb in hists.iter().combinations(n) {
let mut bits = comb.iter().flat_map(|x| x.bits.clone()).collect_vec();
bits.sort();
Expand Down Expand Up @@ -352,14 +356,14 @@ fn brute_force_threads(
.map(|i| {
let combs = (0..block_size).combinations(k).skip(i);

let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1]); top];
let mut best_hists = vec![Histogram::from_bins(vec![0], &[1, 1], block_size); top];

for bits in combs.step_by(threads) {
let mut bins = vec![0; 2_usize.pow(k as u32)];
for (i, bin) in bins.iter_mut().enumerate() {
*bin = multi_eval_neg(&bits, data, &neg_data, i);
}
let new_hist = Histogram::from_bins(bits, &bins);
let new_hist = Histogram::from_bins(bits, &bins, block_size);
best_hists.push(new_hist);
best_hists.sort_by(|a, b| b.z_score.abs().partial_cmp(&a.z_score.abs()).unwrap());
best_hists.pop();
Expand Down
3 changes: 2 additions & 1 deletion src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub(crate) struct Args {
pub(crate) data_source: String,

/// Length of block of data.
#[arg(short, long, default_value_t = 128)]
#[arg(short, long, default_value_t = 128)] // Changing the default value changes autotest
pub(crate) block: usize,

/// Number of bits in histograms in brute-force search.
Expand Down Expand Up @@ -53,6 +53,7 @@ pub(crate) enum SubCommand {
#[arg(short, long)]
dis_path: String,
},
Autotest {},
}
pub(crate) fn bits_block_eval(bits: &[usize], block: &[u8]) -> usize {
let mut result = 0;
Expand Down
99 changes: 15 additions & 84 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,94 +1,18 @@
mod autotest;
mod bottomup;
mod common;
mod results;

use crate::bottomup::bottomup;
use crate::common::{p_value, z_score, Args};
use crate::common::Args;
use autotest::autotest;
use bottomup::Histogram;
use clap::Parser;
use common::{prepare_data, SubCommand};
use serde_json::json;
use std::fs::{self, File};
use std::io::Write;
use results::results;
use std::fs;
use std::time::Instant;

fn print_results(p_value: f64, z_score: f64, alpha: f64, hist: &Histogram, bins: Vec<usize>) {
println!("----------------------------------------------------------------------");
println!("RESULTS:\n");

println!("Histogram(the discovered Boolean function returns 1 for values before the separator and 0 for values after the separator.):\n");
let m = bins.iter().max().unwrap();
let unit = (m / 50).max(1);
for (i, ind) in hist.sorted_indices.iter().enumerate() {
for x in &hist.bits {
print!("x{} ", x);
}
let mut j = *ind;
print!("| [");
for _ in 0..hist.bits.len() {
print!("{}", j % 2);
j /= 2;
}
print!("] | ");
for _ in 0..bins[*ind] / unit {
print!("∎");
}
println!();
if i == (hist.best_division - 1) {
for _ in 0..80 {
print!("—");
}
println!();
}
}
println!();
println!("Z-score: {z_score}");
println!("P-value: {p_value:.0e}");
if p_value >= alpha {
println!(
"As the p-value >= alpha {alpha:.0e}, the randomness hypothesis cannot be rejected."
);
println!("= CoolTest could not find statistically significant non-randomness.");
} else {
println!("As the p-value < alpha {alpha:.0e}, the randomness hypothesis is REJECTED.");
println!("= Data is not random.");
}
}

fn results(hist: Histogram, testing_data: &[Vec<u8>], args: Args) {
let (count, bins) = hist.evaluate(testing_data);
let prob = 2.0_f64.powf(-(hist.bits.len() as f64));
let z = z_score(
testing_data.len(),
count,
prob * (hist.best_division as f64),
);
let p_val = p_value(
count,
testing_data.len(),
prob * (hist.best_division as f64),
);
print_results(p_val, z, args.alpha, &hist, bins);

if let Some(path) = args.json.clone() {
let mut file =
File::create(&path).unwrap_or_else(|_| panic!("File {} couldn't be created", path));

let output = json!({
"args": args,
"dis": hist,
"result": if p_val < args.alpha {"random"} else {"non-random"},
"p-value": p_val
});

file.write_all(
serde_json::to_string_pretty(&output)
.expect("Failed to produce json!")
.as_bytes(),
)
.unwrap();
}
}

fn run_bottomup(args: Args) {
let (training_data, testing_data) = prepare_data(&args.data_source, args.block, true);
let testing_data = testing_data.unwrap();
Expand All @@ -108,18 +32,25 @@ fn run_bottomup(args: Args) {
}

fn main() {
let args = Args::parse();
let mut args = Args::parse();
println!("\n{args:?}\n");

if args.block > 600 {
println!("With block size {}, the computation can take long time, consider using smaller block size.", args.block);
}

match args.subcommand.clone() {
Some(SubCommand::Evaluate { dis_path }) => {
let contents = fs::read_to_string(&dis_path)
.unwrap_or_else(|_| panic!("Failed to read contents of {}", &dis_path));
let hist: Histogram =
serde_json::from_str(&contents).expect("Invalid distinguisher json!");
let (testing_data, _) = prepare_data(&args.data_source, args.block, false);
args.block = hist.block_size;
args.k = hist.bits.len();
let (testing_data, _) = prepare_data(&args.data_source, hist.block_size, false);
results(hist, &testing_data, args)
}
Some(SubCommand::Autotest {}) => autotest(args),
None => run_bottomup(args),
}
}
85 changes: 85 additions & 0 deletions src/results.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use crate::{
bottomup::Histogram,
common::{p_value, z_score, Args},
};
use serde_json::json;
use std::fs::File;
use std::io::Write;

pub(crate) fn results(hist: Histogram, testing_data: &[Vec<u8>], args: Args) {
let (count, bins) = hist.evaluate(testing_data);
let prob = 2.0_f64.powf(-(hist.bits.len() as f64));
let z = z_score(
testing_data.len(),
count,
prob * (hist.best_division as f64),
);
let p_val = p_value(
count,
testing_data.len(),
prob * (hist.best_division as f64),
);
print_results(p_val, z, args.alpha, &hist, bins);

if let Some(path) = args.json.clone() {
let mut file =
File::create(&path).unwrap_or_else(|_| panic!("File {} couldn't be created", path));

let output = json!({
"args": args,
"dis": hist,
"result": if p_val < args.alpha {"random"} else {"non-random"},
"p-value": p_val
});

file.write_all(
serde_json::to_string_pretty(&output)
.expect("Failed to produce json!")
.as_bytes(),
)
.unwrap();
}
}

fn print_results(p_value: f64, z_score: f64, alpha: f64, hist: &Histogram, bins: Vec<usize>) {
println!("----------------------------------------------------------------------");
println!("RESULTS:\n");

println!("Histogram(the discovered Boolean function returns 1 for values before the separator and 0 for values after the separator.):\n");
let m = bins.iter().max().unwrap();
let unit = (m / 50).max(1);
for (i, ind) in hist.sorted_indices.iter().enumerate() {
for x in &hist.bits {
print!("x{} ", x);
}
let mut j = *ind;
print!("| [");
for _ in 0..hist.bits.len() {
print!("{}", j % 2);
j /= 2;
}
print!("] | ");
for _ in 0..bins[*ind] / unit {
print!("∎");
}
println!();
if i == (hist.best_division - 1) {
for _ in 0..80 {
print!("—");
}
println!();
}
}
println!();
println!("Z-score: {z_score}");
println!("P-value: {p_value:.0e}");
if p_value >= alpha {
println!(
"As the p-value >= alpha {alpha:.0e}, the randomness hypothesis cannot be rejected."
);
println!("= CoolTest could not find statistically significant non-randomness.");
} else {
println!("As the p-value < alpha {alpha:.0e}, the randomness hypothesis is REJECTED.");
println!("= Data is not random.");
}
}

0 comments on commit 998fd31

Please sign in to comment.