diff --git a/crates/bevy_math/src/lib.rs b/crates/bevy_math/src/lib.rs index 3075d5be3da6e..6ab39433c66f1 100644 --- a/crates/bevy_math/src/lib.rs +++ b/crates/bevy_math/src/lib.rs @@ -23,7 +23,7 @@ mod ray; mod rects; mod rotation2d; #[cfg(feature = "rand")] -mod shape_sampling; +pub mod sampling; pub use affine3::*; pub use aspect_ratio::AspectRatio; @@ -34,13 +34,13 @@ pub use ray::{Ray2d, Ray3d}; pub use rects::*; pub use rotation2d::Rotation2d; #[cfg(feature = "rand")] -pub use shape_sampling::ShapeSample; +pub use sampling::ShapeSample; /// The `bevy_math` prelude. pub mod prelude { #[doc(hidden)] #[cfg(feature = "rand")] - pub use crate::shape_sampling::ShapeSample; + pub use crate::sampling::{BoundaryOf, InteriorOf, ShapeSample}; #[doc(hidden)] pub use crate::{ cubic_splines::{ diff --git a/crates/bevy_math/src/sampling/mod.rs b/crates/bevy_math/src/sampling/mod.rs new file mode 100644 index 0000000000000..a0d298e851548 --- /dev/null +++ b/crates/bevy_math/src/sampling/mod.rs @@ -0,0 +1,8 @@ +//! This module contains tools related to random sampling. +//! +//! To use this, the "rand" feature must be enabled. + +mod shape_sampling; +pub mod statistical_tests; + +pub use shape_sampling::*; diff --git a/crates/bevy_math/src/shape_sampling.rs b/crates/bevy_math/src/sampling/shape_sampling.rs similarity index 87% rename from crates/bevy_math/src/shape_sampling.rs rename to crates/bevy_math/src/sampling/shape_sampling.rs index 3728034413bef..8dd3bd1074931 100644 --- a/crates/bevy_math/src/shape_sampling.rs +++ b/crates/bevy_math/src/sampling/shape_sampling.rs @@ -39,6 +39,70 @@ pub trait ShapeSample { /// println!("{:?}", square.sample_boundary(&mut rand::thread_rng())); /// ``` fn sample_boundary(&self, rng: &mut R) -> Self::Output; + + /// Extract a [`Distribution`] whose samples are points of this shape's interior, taken uniformly. + /// + /// # Example + /// + /// ``` + /// # use bevy_math::prelude::*; + /// # use rand::distributions::Distribution; + /// let square = Rectangle::new(2.0, 2.0); + /// let rng = rand::thread_rng(); + /// + /// // Iterate over points randomly drawn from `square`'s interior: + /// for random_val in square.interior_dist().sample_iter(rng).take(5) { + /// println!("{:?}", random_val); + /// } + /// ``` + fn interior_dist(self) -> InteriorOf + where + Self: Sized, + { + InteriorOf(self) + } + + /// Extract a [`Distribution`] whose samples are points of this shape's boundary, taken uniformly. + /// + /// # Example + /// + /// ``` + /// # use bevy_math::prelude::*; + /// # use rand::distributions::Distribution; + /// let square = Rectangle::new(2.0, 2.0); + /// let rng = rand::thread_rng(); + /// + /// // Iterate over points randomly drawn from `square`'s boundary: + /// for random_val in square.boundary_dist().sample_iter(rng).take(5) { + /// println!("{:?}", random_val); + /// } + /// ``` + fn boundary_dist(self) -> BoundaryOf + where + Self: Sized, + { + BoundaryOf(self) + } +} + +#[derive(Clone, Copy)] +/// A wrapper struct that allows interior sampling from a [`ShapeSample`] type directly as a [`Distribution`]. +pub struct InteriorOf(pub T); + +#[derive(Clone, Copy)] +/// A wrapper struct that allows boundary sampling from a [`ShapeSample`] type directly as a [`Distribution`]. +pub struct BoundaryOf(pub T); + +impl Distribution<::Output> for InteriorOf { + fn sample(&self, rng: &mut R) -> ::Output { + self.0.sample_interior(rng) + } +} + +impl Distribution<::Output> for BoundaryOf { + fn sample(&self, rng: &mut R) -> ::Output { + self.0.sample_boundary(rng) + } } impl ShapeSample for Circle { diff --git a/crates/bevy_math/src/sampling/statistical_tests/impls.rs b/crates/bevy_math/src/sampling/statistical_tests/impls.rs new file mode 100644 index 0000000000000..f77329b54adfd --- /dev/null +++ b/crates/bevy_math/src/sampling/statistical_tests/impls.rs @@ -0,0 +1,408 @@ +use super::traits::{BinDistribution, Binned, WithBinDistributions}; +use crate::{ + primitives::*, + sampling::{BoundaryOf, InteriorOf}, + ShapeSample, Vec2, Vec3, Vec3Swizzles, +}; +use std::f32::consts::PI; + +//------------------// +// Helper Functions // +//------------------// + +/// Given a value `v` between `a` and `b`, return the value `t` for which `v == a.lerp(b, t)`. +#[inline] +fn inverse_lerp(v: f32, a: f32, b: f32) -> f32 { + (v - a) / (b - a) +} + +/// Given a `value` and the `lower` and `upper` bounds of an interval partitioned evenly into `bins` +/// bins, return the index of the bin that `value` falls into. +/// +/// Returns None when `value` is outside the interval. +fn bin_from_range(value: f32, lower: f32, upper: f32, bins: usize) -> Option { + let t = inverse_lerp(value, lower, upper); + if !(0. ..=1.).contains(&t) { + None + } else { + let multiplier: f32 = bins as f32; + Some((t * multiplier).floor() as usize) + } +} + +/// Given the `start` and `end` of an interval to be partitioned into `segments` segments, return a +/// vector of length `segments` enumerating the breakpoints of those intervals. The `end` of the +/// interval is included, but the `start` is not. +fn partition_range(start: f32, end: f32, segments: usize) -> Vec { + let mut output = Vec::with_capacity(segments); + let step = (end - start) / (segments as f32); + for i in 0..segments { + output.push(start + step * ((i + 1) as f32)); + } + output +} + +//--------------------------// +// Concrete Implementations // +//--------------------------// + +/// A discretized distribution for the interior of a [`Circle`]. +#[derive(Clone, Copy)] +pub struct CircleInteriorBins { + circle: InteriorOf, + radial_bins: usize, + angular_bins: usize, +} + +impl CircleInteriorBins { + /// Create a new discretized distribution for the interior of the given `circle`, with `radial_bins` bins + /// distributed radially in concentric annuli and `angular_bins` bins distributed angularly as "pie slices". + pub fn new(circle: Circle, radial_bins: usize, angular_bins: usize) -> Self { + Self { + circle: circle.interior_dist(), + radial_bins, + angular_bins, + } + } +} + +impl Binned<2> for CircleInteriorBins { + type IntermediateValue = Vec2; + type InnerDistribution = InteriorOf; + fn inner_dist(&self) -> Self::InnerDistribution { + self.circle + } + + fn bin(&self, sample: Vec2) -> Option<[usize; 2]> { + let radius = self.circle.0.radius; + let theta = sample.to_angle(); + let r = sample.length(); + + if !r.is_finite() || !theta.is_finite() { + None + } else { + let radial_bin = bin_from_range(r, 0., radius, self.radial_bins)?; + let angular_bin = bin_from_range(theta, -PI, PI, self.angular_bins)?; + Some([radial_bin, angular_bin]) + } + } +} + +impl WithBinDistributions<2> for CircleInteriorBins { + fn get_bins(&self) -> [BinDistribution; 2] { + let radii = partition_range(0., self.circle.0.radius, self.radial_bins); + + // Factor out pi here for simplicity + let radial_areas: Vec<_> = radii.into_iter().map(|r| r * r).collect(); + let bins_radial = BinDistribution::from_cdf(radial_areas); + + let bins_angular = + BinDistribution::from_weights(vec![1. / self.angular_bins as f32; self.angular_bins]); + + [bins_radial, bins_angular] + } + + fn dfs(&self) -> [usize; 2] { + [ + self.radial_bins.saturating_sub(1), + self.angular_bins.saturating_sub(1), + ] + } +} + +/// A discretized distribution for the boundary of a [`Circle`]. +#[derive(Clone, Copy)] +pub struct CircleBoundaryBins { + circle: BoundaryOf, + angular_bins: usize, +} + +impl CircleBoundaryBins { + /// Create a new discretized distribution for the boundary of the given [`Circle`], with `angular_bins` bins + /// distributed evenly around the edge. + pub fn new(circle: Circle, angular_bins: usize) -> Self { + Self { + circle: circle.boundary_dist(), + angular_bins, + } + } +} + +impl Binned<1> for CircleBoundaryBins { + type IntermediateValue = Vec2; + type InnerDistribution = BoundaryOf; + fn inner_dist(&self) -> Self::InnerDistribution { + self.circle + } + + fn bin(&self, sample: Vec2) -> Option<[usize; 1]> { + let theta = sample.to_angle(); + if !theta.is_finite() { + None + } else { + Some([bin_from_range(theta, -PI, PI, self.angular_bins)?]) + } + } +} + +impl WithBinDistributions<1> for CircleBoundaryBins { + fn get_bins(&self) -> [BinDistribution; 1] { + let bins_angular = + BinDistribution::from_weights(vec![1. / self.angular_bins as f32; self.angular_bins]); + [bins_angular] + } + + fn dfs(&self) -> [usize; 1] { + [self.angular_bins - 1] + } +} + +/// A discretized distribution for the interior of a [`Sphere`]. +#[derive(Clone, Copy)] +pub struct SphereInteriorBins { + sphere: InteriorOf, + radial_bins: usize, + azimuthal_bins: usize, + polar_bins: usize, +} + +impl SphereInteriorBins { + /// Create a new discretized distribution for the interior of the given `sphere`, with `radial_bins` bins distributed + /// radially, `azimuthal_bins` bins swept out along the azimuth, and `polar_bins` bins swept out along the polar angle. + pub fn new( + sphere: Sphere, + radial_bins: usize, + azimuthal_bins: usize, + polar_bins: usize, + ) -> Self { + Self { + sphere: sphere.interior_dist(), + radial_bins, + azimuthal_bins, + polar_bins, + } + } +} + +impl Binned<3> for SphereInteriorBins { + type IntermediateValue = Vec3; + type InnerDistribution = InteriorOf; + fn inner_dist(&self) -> Self::InnerDistribution { + self.sphere + } + + fn bin(&self, value: Vec3) -> Option<[usize; 3]> { + let radius = self.sphere.0.radius; + let rho = value.length(); + let theta = value.xy().to_angle(); + let psi = (value.z / rho).acos(); + + if !rho.is_finite() || !theta.is_finite() || !psi.is_finite() { + None + } else { + let radial_bin = bin_from_range(rho, 0., radius, self.radial_bins)?; + let azimuthal_bin = bin_from_range(theta, -PI, PI, self.azimuthal_bins)?; + let polar_bin = bin_from_range(psi, 0., PI, self.polar_bins)?; + Some([radial_bin, azimuthal_bin, polar_bin]) + } + } +} + +impl WithBinDistributions<3> for SphereInteriorBins { + fn get_bins(&self) -> [BinDistribution; 3] { + let radius = self.sphere.0.radius; + + // Factor out constant 4/3 * pi from the volume for simplicity + let bins_radial = BinDistribution::from_cdf( + partition_range(0., radius, self.radial_bins) + .into_iter() + .map(|r| r * r * r) + .collect::>(), + ); + + let bins_azimuthal = BinDistribution::from_weights(vec![ + 1. / self.azimuthal_bins as f32; + self.azimuthal_bins + ]); + + // Factor out constant 2/3 * pi from the volume here too + let bins_polar = BinDistribution::from_cdf( + partition_range(0., PI, self.polar_bins) + .into_iter() + .map(|psi| 1. - psi.cos()) + .collect::>(), + ); + + [bins_radial, bins_azimuthal, bins_polar] + } + + fn dfs(&self) -> [usize; 3] { + [ + self.radial_bins.saturating_sub(1), + self.azimuthal_bins.saturating_sub(1), + self.polar_bins.saturating_sub(1), + ] + } +} + +/// A discretized distribution for the boundary of a [`Sphere`]. +#[derive(Clone, Copy)] +pub struct SphereBoundaryBins { + sphere: BoundaryOf, + azimuthal_bins: usize, + polar_bins: usize, +} + +impl SphereBoundaryBins { + /// Create a new discretized distribution for the boundary of the given `sphere`, with `azimuthal_bins` + /// swept out along the azimuth and `polar_bins` swept out along the polar angle. + pub fn new(sphere: Sphere, azimuthal_bins: usize, polar_bins: usize) -> Self { + Self { + sphere: sphere.boundary_dist(), + azimuthal_bins, + polar_bins, + } + } +} + +impl Binned<2> for SphereBoundaryBins { + type IntermediateValue = Vec3; + type InnerDistribution = BoundaryOf; + fn inner_dist(&self) -> Self::InnerDistribution { + self.sphere + } + + fn bin(&self, value: Vec3) -> Option<[usize; 2]> { + let theta = value.xy().to_angle(); + let psi = value.z.acos(); + + if !theta.is_finite() || !psi.is_finite() { + None + } else { + let azimuthal_bin = bin_from_range(theta, -PI, PI, self.azimuthal_bins)?; + let polar_bin = bin_from_range(psi, 0., PI, self.polar_bins)?; + Some([azimuthal_bin, polar_bin]) + } + } +} + +impl WithBinDistributions<2> for SphereBoundaryBins { + fn get_bins(&self) -> [BinDistribution; 2] { + let bins_azimuthal = BinDistribution::from_weights(vec![ + 1. / self.azimuthal_bins as f32; + self.azimuthal_bins + ]); + + // Factor out 2 * pi from the surface area here for simplicity + let bins_polar = BinDistribution::from_cdf( + partition_range(0., PI, self.polar_bins) + .into_iter() + .map(|psi| 1. - psi.cos()) + .collect::>(), + ); + + [bins_azimuthal, bins_polar] + } + + fn dfs(&self) -> [usize; 2] { + [ + self.azimuthal_bins.saturating_sub(1), + self.polar_bins.saturating_sub(1), + ] + } +} + +#[cfg(test)] +mod tests { + use super::{super::stats::*, *}; + use rand::{distributions::Distribution, rngs::StdRng, SeedableRng}; + + /// Run goodness-of-fit tests for all directional components of this distribution. + fn test_components(binned: T, samples: usize) + where + T: Binned + WithBinDistributions + Copy, + { + let rng = StdRng::from_entropy(); + let histogram: Histogram = binned + .binned_dist() + .sample_iter(rng) + .take(samples) + .collect(); + assert!(histogram.is_clean()); + let bin_dists = binned.get_bins(); + let dfs = binned.dfs(); + for i in 0..N { + let chi_squared = chi_squared_fit(&histogram.project(i).unwrap(), &bin_dists[i]); + assert!( + chi_squared < CHI_SQUARED_CRIT_VALUES_EMINUS3[dfs[i]], + "Goodness of fit test failed at index {i} at 0.001 significance level" + ); + } + } + + /// Run independence tests for each pair of directional components of this distribution. + fn test_independence(binned: T, samples: usize) + where + T: Binned + WithBinDistributions + Copy, + { + let rng = StdRng::from_entropy(); + let histogram: Histogram = binned + .binned_dist() + .sample_iter(rng) + .take(samples) + .collect(); + assert!(histogram.is_clean()); // This is cheap, so we do it here as well + let bin_dists = binned.get_bins(); + let dfs = binned.dfs(); + for i in 0..N { + for j in (i + 1)..N { + let chi_squared = chi_squared_independence( + &histogram.project_two([i, j]).unwrap(), + &[bin_dists[i].clone(), bin_dists[j].clone()], + ); + assert!( + chi_squared < CHI_SQUARED_CRIT_VALUES_EMINUS3[dfs[i] * dfs[j]], + "Independence test failed at indices [{i}, {j}] at 0.001 significance level" + ); + } + } + } + + // Independence tests only get run for sample spaces above dimension 1, and should not be implemented + // universally; geometric properties of spheres and circles guarantee that the chosen directions should + // *actually* be independent (constant mass "under" each angle, for example). + + // The tests are marked with #[ignore] so that they do not get run as part of CI testing. + // They can be run by passing `-- --ignored` to `cargo test`. + + #[ignore] + #[test] + fn circle_interior() { + let circle_binned = CircleInteriorBins::new(Circle::new(1.0), 8, 8); + test_components(circle_binned, 100000); + test_independence(circle_binned, 100000); + } + + #[ignore] + #[test] + fn circle_boundary() { + let circle_binned = CircleBoundaryBins::new(Circle::new(1.0), 20); + test_components(circle_binned, 100000); + } + + #[ignore] + #[test] + fn sphere_interior() { + let sphere_binned = SphereInteriorBins::new(Sphere::new(1.0), 5, 8, 8); + test_components(sphere_binned, 100000); + test_independence(sphere_binned, 100000); + } + + #[ignore] + #[test] + fn sphere_boundary() { + let sphere_binned = SphereBoundaryBins::new(Sphere::new(1.0), 8, 8); + test_components(sphere_binned, 100000); + test_independence(sphere_binned, 100000); + } +} diff --git a/crates/bevy_math/src/sampling/statistical_tests/mod.rs b/crates/bevy_math/src/sampling/statistical_tests/mod.rs new file mode 100644 index 0000000000000..6eab70e9ab68b --- /dev/null +++ b/crates/bevy_math/src/sampling/statistical_tests/mod.rs @@ -0,0 +1,23 @@ +//! A specialized library for performing statistical testing on the quality of +//! spatial distributions. +//! +//! Included are traits for binning statistical distributions in space ([`traits`]), +//! comparing those binned distributions to ideal multinomial distributions across the +//! sets of bins ([`stats`]), and concrete implementations of these for Bevy's shape +//! types ([`impls`]). + +/// Holds traits [`Binned`](traits::Binned) and [`WithBinDistributions`](traits::WithBinDistributions), which form the scaffolding for +/// discretization of spatial distributions (with the former) and comparison of the resulting +/// discrete probability distributions with ideal multinomial distributions (with the latter). +pub mod traits; + +/// Holds the [`Histogram`](stats::Histogram) type, which is an `N`-dimensional histogram that can be accumulated +/// from the distributions constructed with implementations of [`Binned`](traits::Binned). Also holds +/// the chi-squared goodness-of-fit and independence tests that are used to assess the quality +/// of binned distributions. +pub mod stats; + +/// Holds concrete implementations of this library's [`traits`] for spatial distributions derived +/// from Bevy's primitive shapes, along with tests utilizing those to verify the statistical +/// quality of those distributions. +pub mod impls; diff --git a/crates/bevy_math/src/sampling/statistical_tests/stats.rs b/crates/bevy_math/src/sampling/statistical_tests/stats.rs new file mode 100644 index 0000000000000..8f7611478f3d2 --- /dev/null +++ b/crates/bevy_math/src/sampling/statistical_tests/stats.rs @@ -0,0 +1,320 @@ +use super::traits::BinDistribution; +use std::collections::BTreeMap; +use thiserror::Error; + +//------------// +// Histograms // +//------------// + +/// An `N`-dimensional histogram, holding data simultaneously assessed to lie in +/// `N` different families of bins. +/// +/// Constructed via its [`FromIterator`] implementation, hence by calling [`Iterator::collect`] +/// on an iterator whose items are of type `Option<[usize; N]>`. Most notably, the sample iterator +/// of [`BinSampler`](super::traits::BinSampler) where `T` implements [`Binned`](super::traits::Binned) +/// produces values of this type. +pub struct Histogram { + /// The actual histogram, with the invalid items diverted to `invalid` + pub(crate) inner: BTreeMap<[usize; N], usize>, + + /// The total samples present in the histogram — i.e., excluding invalid items. + pub total: usize, + + /// Count of invalid items, separate from the actual histogram. + pub invalid_count: usize, +} + +impl FromIterator> for Histogram { + fn from_iter>>(iter: T) -> Self { + let mut hist = BTreeMap::new(); + let mut total = 0; + let mut invalid_count = 0; + + for sample in iter.into_iter() { + let Some(sample) = sample else { + invalid_count += 1; + continue; + }; + + hist.entry(sample).and_modify(|v| *v += 1).or_insert(1); + total += 1; + } + + Self { + inner: hist, + total, + invalid_count, + } + } +} + +/// An error that is thrown when trying to use one or more invalid indices into a [`Histogram`]. +#[derive(Debug, Error)] +#[error("One or more provided dimensions {dimensions:?} was outside of the range 0..{ambient_dimension}")] +pub struct InvalidDimensionError { + ambient_dimension: usize, + dimensions: Vec, +} + +impl Histogram { + /// Get the height of the histogram at a given index. + pub fn get(&self, index: [usize; N]) -> usize { + self.inner.get(&index).copied().unwrap_or(0) + } + + /// Ascertain whether the [`Histogram`] is free of invalid samples. + pub fn is_clean(&self) -> bool { + self.invalid_count == 0 + } + + /// Project this histogram down to a histogram of dimension one by projecting away the other dimensions. + /// Returns an [`InvalidDimensionError`] if the `dimension` parameter is outside of `0..N`. + pub fn project(&self, dimension: usize) -> Result, InvalidDimensionError> { + if !(0..N).contains(&dimension) { + return Err(InvalidDimensionError { + ambient_dimension: N, + dimensions: vec![dimension], + }); + } + + let mut proj_hist = BTreeMap::new(); + + // The `dimension` is the fixed index; the other indices vary. + for (&index, &height) in self.inner.iter() { + let proj_index = index[dimension]; + proj_hist + .entry([proj_index]) + .and_modify(|v| *v += height) + .or_insert(height); + } + + Ok(Histogram { + inner: proj_hist, + total: self.total, + invalid_count: self.invalid_count, + }) + } + + /// Project this histogram down to a histogram of dimension two by projecting away the other dimensions. + /// Returns an [`InvalidDimensionError`] if any of the given `dimensions` are outside of `0..N`. + pub fn project_two( + &self, + dimensions: [usize; 2], + ) -> Result, InvalidDimensionError> { + if !(0..N).contains(&dimensions[0]) || !(0..N).contains(&dimensions[1]) { + return Err(InvalidDimensionError { + ambient_dimension: N, + dimensions: dimensions.into(), + }); + } + + let mut proj_hist = BTreeMap::new(); + + // The `dimensions` are fixed; the other indices vary. + for (&index, &height) in self.inner.iter() { + let proj_index = [index[dimensions[0]], index[dimensions[1]]]; + proj_hist + .entry(proj_index) + .and_modify(|v| *v += height) + .or_insert(height); + } + + Ok(Histogram { + inner: proj_hist, + total: self.total, + invalid_count: self.invalid_count, + }) + } +} + +//------------// +// Statistics // +//------------// + +/// Compute the chi-squared goodness-of-fit test statistic for the `histogram` relative to the ideal +/// distribution described by `ideal`. Note that this is distinct from the p-value, which must be +/// assessed separately. +pub fn chi_squared_fit(histogram: &Histogram<1>, ideal: &BinDistribution) -> f64 { + // The vector giving the number of hits expected in each bin for this many samples + let expecteds: Vec<_> = ideal + .bins + .iter() + .map(|p| (histogram.total as f64) * (*p as f64)) + .collect(); + + let mut chi_squared: f64 = 0.0; + for (i, expected) in expecteds.into_iter().enumerate() { + let observed = histogram.get([i]) as f64; + let contribution = ((observed - expected) * (observed - expected)) / expected; + chi_squared += contribution; + } + + chi_squared +} + +/// Compute the chi-squared independence test statistic for the `histogram` relative to the ideal +/// distributions described by `ideal`. Note that this is distinct from the p-value, which must be +/// assessed separately. +pub fn chi_squared_independence(histogram: &Histogram<2>, ideal: &[BinDistribution; 2]) -> f64 { + // Compute what the expected number of hits in each bin would be based on the total samples. + let mut expecteds: BTreeMap<[usize; 2], f64> = BTreeMap::new(); + let [dist1, dist2] = ideal; + let (bins1, bins2) = (&dist1.bins, &dist2.bins); + let total_samples = histogram.total as f64; + for (i, &bin1) in bins1.iter().enumerate() { + for (j, &bin2) in bins2.iter().enumerate() { + expecteds.insert([i, j], bin1 as f64 * bin2 as f64 * total_samples); + } + } + + let mut chi_squared: f64 = 0.0; + for (index, expected) in expecteds.into_iter() { + let observed = histogram.get(index) as f64; + let contribution = ((observed - expected) * (observed - expected)) / expected; + chi_squared += contribution; + } + + chi_squared +} + +//-----------------// +// Critical Values // +//-----------------// + +/// Critical values of the chi-squared distribution for ascending degrees of freedom at an alpha +/// value of 0.001. This has been shifted by one so that the index corresponds to the degrees +/// of freedom exactly; as a result, index zero is just a placeholder value. +/// +/// Source: [NIST](https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm) +pub const CHI_SQUARED_CRIT_VALUES_EMINUS3: [f64; 101] = [ + 0.0, 10.828, 13.816, 16.266, 18.467, 20.515, 22.458, 24.322, 26.125, 27.877, 29.588, 31.264, + 32.91, 34.528, 36.123, 37.697, 39.252, 40.79, 42.312, 43.82, 45.315, 46.797, 48.268, 49.728, + 51.179, 52.62, 54.052, 55.476, 56.892, 58.301, 59.703, 61.098, 62.487, 63.87, 65.247, 66.619, + 67.985, 69.347, 70.703, 72.055, 73.402, 74.745, 76.084, 77.419, 78.75, 80.077, 81.4, 82.72, + 84.037, 85.351, 86.661, 87.968, 89.272, 90.573, 91.872, 93.168, 94.461, 95.751, 97.039, 98.324, + 99.607, 100.888, 102.166, 103.442, 104.716, 105.988, 107.258, 108.526, 109.791, 111.055, + 112.317, 113.577, 114.835, 116.092, 117.346, 118.599, 119.85, 121.1, 122.348, 123.594, 124.839, + 126.083, 127.324, 128.565, 129.804, 131.041, 132.277, 133.512, 134.746, 135.978, 137.208, + 138.438, 139.666, 140.893, 142.119, 143.344, 144.567, 145.789, 147.01, 148.23, 149.449, +]; + +#[cfg(test)] +mod tests { + use super::{chi_squared_fit, chi_squared_independence, BinDistribution, Histogram}; + use std::collections::BTreeMap; + + const SAMPLES_2D: [Option<[usize; 2]>; 6] = [ + Some([0, 1]), + Some([4, 2]), + None, + Some([3, 3]), + None, + Some([4, 2]), + ]; + + const SAMPLES_3D: [Option<[usize; 3]>; 7] = [ + Some([0, 3, 5]), + None, + Some([7, 6, 2]), + Some([0, 6, 3]), + Some([1, 3, 5]), + Some([3, 6, 2]), + None, + ]; + + #[test] + fn histogram_data() { + let histogram: Histogram<2> = SAMPLES_2D.into_iter().collect(); + assert_eq!(histogram.get([4, 2]), 2); + assert_eq!(histogram.get([0, 1]), 1); + assert_eq!(histogram.get([3, 3]), 1); + assert_eq!(histogram.get([0, 0]), 0); + assert!(!histogram.is_clean()); + assert_eq!(histogram.invalid_count, 2); + assert_eq!(histogram.total, 4); + } + + #[test] + fn histogram_projection() { + let histogram: Histogram<2> = SAMPLES_2D.into_iter().collect(); + + // Compare the first projection histogram to the histogram formed by the + // collection of projected data. + let hist_proj = histogram.project(0).unwrap(); + let hist_proj_direct: Histogram<1> = SAMPLES_2D + .into_iter() + .map(|opt| opt.map(|[a, _]| [a])) + .collect(); + + assert_eq!(hist_proj.invalid_count, hist_proj_direct.invalid_count); + assert_eq!(hist_proj.total, hist_proj_direct.total); + assert_eq!(hist_proj.inner, hist_proj_direct.inner); + } + + #[test] + fn histogram_project_two() { + let histogram: Histogram<3> = SAMPLES_3D.into_iter().collect(); + + // Compare the [1, 2]-projection histogram to the histogram formed by the + // collection of projected data. + let hist_proj = histogram.project_two([1, 2]).unwrap(); + let hist_proj_direct: Histogram<2> = SAMPLES_3D + .into_iter() + .map(|opt| opt.map(|[_, b, c]| [b, c])) + .collect(); + + assert_eq!(hist_proj.invalid_count, hist_proj_direct.invalid_count); + assert_eq!(hist_proj.total, hist_proj_direct.total); + assert_eq!(hist_proj.inner, hist_proj_direct.inner); + } + + #[test] + fn histogram_project_two_duplicate() { + let histogram: Histogram<2> = SAMPLES_2D.into_iter().collect(); + + // Verify that diagonal projections work how one would expect. + let hist_proj: Histogram<2> = histogram.project_two([1, 1]).unwrap(); + let hist_proj_direct: Histogram<2> = SAMPLES_2D + .into_iter() + .map(|opt| opt.map(|[_, b]| [b, b])) + .collect(); + + assert_eq!(hist_proj.invalid_count, hist_proj_direct.invalid_count); + assert_eq!(hist_proj.total, hist_proj_direct.total); + assert_eq!(hist_proj.inner, hist_proj_direct.inner); + } + + #[test] + fn chi_squared_goodness() { + let histogram = SAMPLES_2D + .into_iter() + .collect::>() + .project(0) + .unwrap(); + + // Uniform distribution on 5 bins 0..5 + let ideal = BinDistribution::from_weights(vec![1.; 5]); + let chi_squared = chi_squared_fit(&histogram, &ideal); + assert!((chi_squared - 3.5).abs() < 1e-7); + } + + #[test] + fn chi_squared_indep() { + let mut dist: BTreeMap<[usize; 2], usize> = BTreeMap::new(); + dist.insert([0, 0], 3); + dist.insert([0, 1], 2); + dist.insert([1, 0], 0); + dist.insert([1, 1], 1); + + let histogram = Histogram { + inner: dist, + total: 6, + invalid_count: 0, + }; + + let ideal_first = BinDistribution::from_weights(vec![1.; 2]); + let ideal_second = BinDistribution::from_weights(vec![1.; 2]); + let chi_squared = chi_squared_independence(&histogram, &[ideal_first, ideal_second]); + assert!((chi_squared - 10. / 3.).abs() < 1e-7); + } +} diff --git a/crates/bevy_math/src/sampling/statistical_tests/traits.rs b/crates/bevy_math/src/sampling/statistical_tests/traits.rs new file mode 100644 index 0000000000000..0d8426a35c0f4 --- /dev/null +++ b/crates/bevy_math/src/sampling/statistical_tests/traits.rs @@ -0,0 +1,130 @@ +use rand::distributions::Distribution; +use rand::Rng; + +/// A trait implemented by a type which discretizes the sample space of a [`Distribution`] simultaneously +/// in `N` dimensions. To sample an implementing type as a [`Distribution`], use [`Binned::binned_dist`]. +pub trait Binned { + /// The type defining the sample space discretized by this type. + type IntermediateValue; + + /// The inner distribution type whose samples are to be discretized. + type InnerDistribution: Distribution; + + /// The concrete inner distribution of this distribution whose samples are later mapped into an `N`-dimensional + /// histogram by [`Binned::bin`]. + fn inner_dist(&self) -> Self::InnerDistribution; + + /// A function that takes output from the inner distribution and maps it to `N` bins. This allows + /// any implementor of `Binned` to be a [`Distribution`] — the output of the distribution is `Option<[usize; N]>` + /// because the mapping to bins is generally fallible, resulting in an error state when a sample misses every bin. + fn bin(&self, value: Self::IntermediateValue) -> Option<[usize; N]>; + + /// Bin-sample the discretized distribution. + fn sample(&self, rng: &mut R) -> Option<[usize; N]> { + let v = self.inner_dist().sample(rng); + self.bin(v) + } + + /// Convert this binned distribution into a [`Distribution`] in order to sample from its bins. + fn binned_dist(self) -> BinSampler + where + Self: Sized, + { + BinSampler(self) + } +} + +#[derive(Debug, Clone)] +/// A collection of bins with weights that define a discrete probability distribution over the indices. +/// That is, a formal description of a multinomial distribution, equipped with methods to aid in its +/// construction. +pub struct BinDistribution { + /// The underlying bin weights of this distribution. + pub bins: Vec, +} + +impl BinDistribution { + /// Construct a new [`BinDistribution`] from its sequence of weights. This function normalizes + /// the resulting distribution, so the only thing that matters for the caller is that the `weights` + /// have the correct proportions relative to one another. + pub fn from_weights(weights: impl Into>) -> Self { + let bins = Self { + bins: weights.into(), + }; + bins.normalized() + } + + /// Construct a new [`BinDistribution`] from a (potentially non-normalized) sequence of cumulative weights. + /// Like [`BinDistribution::from_weights`], the resulting distribution is normalized automatically, so the + /// caller need ensure only that the cumulative weights are correctly proportioned. + pub fn from_cdf(cdf_weights: impl Into>) -> Self { + let cdf: Vec = cdf_weights.into(); + let mut pdf = Vec::with_capacity(cdf.len()); + + // Convert cdf to pdf by subtracting adjacent elements + pdf.push(cdf[0]); + for window in cdf.as_slice().windows(2) { + pdf.push(window[1] - window[0]); + } + + BinDistribution::from_weights(pdf) + } + + /// Normalize the bin data inside this distribution. + fn normalize(&mut self) { + let total: f32 = self.bins.iter().copied().sum(); + if total.is_normal() { + self.bins.iter_mut().for_each(|p| *p /= total); + } + } + + /// This distribution, but with its interior bin data normalized. + fn normalized(mut self) -> Self { + self.normalize(); + self + } +} + +/// A discretized ([`Binned`]) probability distribution that also has extrinsic weights associated to its bins; +/// primarily intended for use in chi-squared analysis of spatial distributions. +pub trait WithBinDistributions: Binned { + /// Get the bin weights to compare with actual samples. + fn get_bins(&self) -> [BinDistribution; N]; + + /// Get the degrees of freedom of each set of bins. + fn dfs(&self) -> [usize; N] { + self.get_bins().map(|b| b.bins.len().saturating_sub(1)) + } +} + +/// A wrapper struct that allows a [`Binned`] distribution type to be used directly as a [`Distribution`]. +pub struct BinSampler>(pub T); + +impl> Distribution> for BinSampler { + fn sample(&self, rng: &mut R) -> Option<[usize; N]> { + Binned::sample(&self.0, rng) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bin_normalization() { + let weights = vec![2.0, 3.5, 1.0]; + let bins = BinDistribution::from_weights(weights); + assert_eq!(bins.bins[0], 2.0 / 6.5); + assert_eq!(bins.bins[1], 3.5 / 6.5); + assert_eq!(bins.bins[2], 1.0 / 6.5); + } + + #[test] + fn bin_cdf() { + let cdf = [1., 2., 3., 4., 5.]; + let bins = BinDistribution::from_cdf(cdf); + for i in 0..cdf.len() { + assert_eq!(bins.bins[i], 0.2); + } + } +}