From 455c1bfbe87efec15c1db54755fca09a552bc8bb Mon Sep 17 00:00:00 2001 From: Rostyslav Toch Date: Tue, 30 Jul 2024 00:48:21 +0100 Subject: [PATCH] Optimize cloning for Access-related structs (#14502) # Objective Optimize the cloning process for Access-related structs in the ECS system, specifically targeting the `clone_from` method. Previously, profiling showed that 1% of CPU time was spent in `FixedBitSet`'s `drop_in_place`, due to the default `clone_from` implementation: ```rust fn clone_from(&mut self, source: &Self) { *self = source.clone() } ``` This implementation causes unnecessary allocations and deallocations. However, [FixedBitSet provides a more optimized clone_from method](https://github.com/petgraph/fixedbitset/blob/master/src/lib.rs#L1445-L1465) that avoids these allocations and utilizes SIMD instructions for better performance. This PR aims to leverage the optimized clone_from method of FixedBitSet and implement custom clone_from methods for Access-related structs to take full advantage of this optimization. By doing so, we expect to significantly reduce CPU time spent on cloning operations and improve overall system performance. ![image](https://github.com/user-attachments/assets/7526a5c5-c75b-4a9a-b8d2-891f64fd553b) ## Solution - Implemented custom `clone` and `clone_from` methods for `Access`, `FilteredAccess`, `AccessFilters`, and `FilteredAccessSet` structs. - Removed `#[derive(Clone)]` and manually implemented `Clone` trait to use optimized `clone_from` method from `FixedBitSet`. - Added unit tests for cloning and `clone_from` methods to ensure correctness. ## Testing - Conducted performance testing comparing the original and optimized versions. - Measured CPU time consumption for the `clone_from` method: - Original version: 1.34% of CPU time - Optimized version: 0.338% of CPU time - Compared FPS before and after the changes (results may vary depending on the run): Before optimization: ``` 2024-07-28T12:49:11.864019Z INFO bevy diagnostic: fps : 213.489463 (avg 214.502488) 2024-07-28T12:49:11.864037Z INFO bevy diagnostic: frame_time : 4.704746ms (avg 4.682251ms) 2024-07-28T12:49:11.864042Z INFO bevy diagnostic: frame_count: 7947.000000 (avg 7887.500000) ``` ![image](https://github.com/user-attachments/assets/7865a365-0569-4b46-814a-964779d90973) After optimization: ``` 2024-07-28T12:29:42.705738Z INFO bevy diagnostic: fps : 220.273721 (avg 220.912227) 2024-07-28T12:29:42.705762Z INFO bevy diagnostic: frame_time : 4.559127ms (avg 4.544905ms) 2024-07-28T12:29:42.705769Z INFO bevy diagnostic: frame_count: 7596.000000 (avg 7536.500000) ``` ![image](https://github.com/user-attachments/assets/8dd96908-86d0-4850-8e29-f80176a005d6) --- Reviewers can test these changes by running `cargo run --release --example ssr` --- crates/bevy_ecs/src/query/access.rs | 208 +++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 4 deletions(-) diff --git a/crates/bevy_ecs/src/query/access.rs b/crates/bevy_ecs/src/query/access.rs index f78283cba0bb9..54a70690a126f 100644 --- a/crates/bevy_ecs/src/query/access.rs +++ b/crates/bevy_ecs/src/query/access.rs @@ -47,7 +47,7 @@ impl<'a, T: SparseSetIndex + fmt::Debug> fmt::Debug for FormattedBitSet<'a, T> { /// /// Used internally to ensure soundness during system initialization and execution. /// See the [`is_compatible`](Access::is_compatible) and [`get_conflicts`](Access::get_conflicts) functions. -#[derive(Clone, Eq, PartialEq)] +#[derive(Eq, PartialEq)] pub struct Access { /// All accessed elements. reads_and_writes: FixedBitSet, @@ -64,6 +64,28 @@ pub struct Access { marker: PhantomData, } +// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`. +impl Clone for Access { + fn clone(&self) -> Self { + Self { + reads_and_writes: self.reads_and_writes.clone(), + writes: self.writes.clone(), + reads_all: self.reads_all, + writes_all: self.writes_all, + archetypal: self.archetypal.clone(), + marker: PhantomData, + } + } + + fn clone_from(&mut self, source: &Self) { + self.reads_and_writes.clone_from(&source.reads_and_writes); + self.writes.clone_from(&source.writes); + self.reads_all = source.reads_all; + self.writes_all = source.writes_all; + self.archetypal.clone_from(&source.archetypal); + } +} + impl fmt::Debug for Access { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Access") @@ -325,7 +347,7 @@ impl Access { /// - `Query>` accesses nothing /// /// See comments the [`WorldQuery`](super::WorldQuery) impls of [`AnyOf`](super::AnyOf)/`Option`/[`Or`](super::Or) for more information. -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub struct FilteredAccess { pub(crate) access: Access, pub(crate) required: FixedBitSet, @@ -334,6 +356,23 @@ pub struct FilteredAccess { pub(crate) filter_sets: Vec>, } +// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`. +impl Clone for FilteredAccess { + fn clone(&self) -> Self { + Self { + access: self.access.clone(), + required: self.required.clone(), + filter_sets: self.filter_sets.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.access.clone_from(&source.access); + self.required.clone_from(&source.required); + self.filter_sets.clone_from(&source.filter_sets); + } +} + impl Default for FilteredAccess { fn default() -> Self { Self::matches_everything() @@ -526,13 +565,29 @@ impl FilteredAccess { } } -#[derive(Clone, Eq, PartialEq)] +#[derive(Eq, PartialEq)] pub(crate) struct AccessFilters { pub(crate) with: FixedBitSet, pub(crate) without: FixedBitSet, _index_type: PhantomData, } +// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`. +impl Clone for AccessFilters { + fn clone(&self) -> Self { + Self { + with: self.with.clone(), + without: self.without.clone(), + _index_type: PhantomData, + } + } + + fn clone_from(&mut self, source: &Self) { + self.with.clone_from(&source.with); + self.without.clone_from(&source.without); + } +} + impl fmt::Debug for AccessFilters { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("AccessFilters") @@ -570,12 +625,27 @@ impl AccessFilters { /// It stores multiple sets of accesses. /// - A "combined" set, which is the access of all filters in this set combined. /// - The set of access of each individual filters in this set. -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Eq)] pub struct FilteredAccessSet { combined_access: Access, filtered_accesses: Vec>, } +// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`. +impl Clone for FilteredAccessSet { + fn clone(&self) -> Self { + Self { + combined_access: self.combined_access.clone(), + filtered_accesses: self.filtered_accesses.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.combined_access.clone_from(&source.combined_access); + self.filtered_accesses.clone_from(&source.filtered_accesses); + } +} + impl FilteredAccessSet { /// Returns a reference to the unfiltered access of the entire set. #[inline] @@ -696,6 +766,136 @@ mod tests { use fixedbitset::FixedBitSet; use std::marker::PhantomData; + fn create_sample_access() -> Access { + let mut access = Access::::default(); + + access.add_read(1); + access.add_read(2); + access.add_write(3); + access.add_archetypal(5); + access.read_all(); + + access + } + + fn create_sample_filtered_access() -> FilteredAccess { + let mut filtered_access = FilteredAccess::::default(); + + filtered_access.add_write(1); + filtered_access.add_read(2); + filtered_access.add_required(3); + filtered_access.and_with(4); + + filtered_access + } + + fn create_sample_access_filters() -> AccessFilters { + let mut access_filters = AccessFilters::::default(); + + access_filters.with.grow_and_insert(3); + access_filters.without.grow_and_insert(5); + + access_filters + } + + fn create_sample_filtered_access_set() -> FilteredAccessSet { + let mut filtered_access_set = FilteredAccessSet::::default(); + + filtered_access_set.add_unfiltered_read(2); + filtered_access_set.add_unfiltered_write(4); + filtered_access_set.read_all(); + + filtered_access_set + } + + #[test] + fn test_access_clone() { + let original: Access = create_sample_access(); + let cloned = original.clone(); + + assert_eq!(original, cloned); + } + + #[test] + fn test_access_clone_from() { + let original: Access = create_sample_access(); + let mut cloned = Access::::default(); + + cloned.add_write(7); + cloned.add_read(4); + cloned.add_archetypal(8); + cloned.write_all(); + + cloned.clone_from(&original); + + assert_eq!(original, cloned); + } + + #[test] + fn test_filtered_access_clone() { + let original: FilteredAccess = create_sample_filtered_access(); + let cloned = original.clone(); + + assert_eq!(original, cloned); + } + + #[test] + fn test_filtered_access_clone_from() { + let original: FilteredAccess = create_sample_filtered_access(); + let mut cloned = FilteredAccess::::default(); + + cloned.add_write(7); + cloned.add_read(4); + cloned.append_or(&FilteredAccess::default()); + + cloned.clone_from(&original); + + assert_eq!(original, cloned); + } + + #[test] + fn test_access_filters_clone() { + let original: AccessFilters = create_sample_access_filters(); + let cloned = original.clone(); + + assert_eq!(original, cloned); + } + + #[test] + fn test_access_filters_clone_from() { + let original: AccessFilters = create_sample_access_filters(); + let mut cloned = AccessFilters::::default(); + + cloned.with.grow_and_insert(1); + cloned.without.grow_and_insert(2); + + cloned.clone_from(&original); + + assert_eq!(original, cloned); + } + + #[test] + fn test_filtered_access_set_clone() { + let original: FilteredAccessSet = create_sample_filtered_access_set(); + let cloned = original.clone(); + + assert_eq!(original, cloned); + } + + #[test] + fn test_filtered_access_set_from() { + let original: FilteredAccessSet = create_sample_filtered_access_set(); + let mut cloned = FilteredAccessSet::::default(); + + cloned.add_unfiltered_read(7); + cloned.add_unfiltered_write(9); + cloned.write_all(); + + cloned.clone_from(&original); + + assert_eq!(original, cloned); + } + #[test] fn read_all_access_conflicts() { // read_all / single write