Skip to content

Commit

Permalink
refactor(allocator): Use & instead of a thread-local (#9235)
Browse files Browse the repository at this point in the history
**Description:**

This is a part of #9230. I
profiled the performance, and `thread_local` took too long to get the
address of the thread-local variable. So, I inlined the reference into
the allocator.

# Benchmark result

```

Gnuplot not found, using plotters backend
common/allocator/alloc/std/1000000
                        time:   [4.9478 ms 4.9653 ms 4.9922 ms]
Found 17 outliers among 100 measurements (17.00%)
  4 (4.00%) high mild
  13 (13.00%) high severe

common/allocator/alloc/no-scope/1000000
                        time:   [5.4821 ms 5.4938 ms 5.5068 ms]
Found 17 outliers among 100 measurements (17.00%)
  2 (2.00%) high mild
  15 (15.00%) high severe

common/allocator/alloc/scoped/1000000
                        time:   [3.1401 ms 3.1456 ms 3.1518 ms]
Found 12 outliers among 100 measurements (12.00%)
  3 (3.00%) high mild
  9 (9.00%) high severe

common/allocator/alloc/cached-no-scope/1000000
                        time:   [5.0992 ms 5.1090 ms 5.1198 ms]
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) high mild
  9 (9.00%) high severe

common/allocator/alloc/cached-scoped/1000000
                        time:   [3.0191 ms 3.0230 ms 3.0273 ms]
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) low mild
  1 (1.00%) high mild
  8 (8.00%) high severe
```
  • Loading branch information
kdy1 committed Jul 14, 2024
1 parent 83e75ba commit 8d5670e
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 105 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/swc_allocator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ rkyv = { workspace = true, optional = true }
scoped-tls = { workspace = true }
serde = { workspace = true, optional = true }
serde_derive = { workspace = true, optional = true }
triomphe = "0.1.13"


[dev-dependencies]
Expand Down
6 changes: 3 additions & 3 deletions crates/swc_allocator/benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate swc_malloc;

use codspeed_criterion_compat::{black_box, criterion_group, criterion_main, Bencher, Criterion};
use swc_allocator::{FastAlloc, MemorySpace};
use swc_allocator::{FastAlloc, SwcAllocator};

fn bench_alloc(c: &mut Criterion) {
fn direct_alloc_std(b: &mut Bencher, times: usize) {
Expand Down Expand Up @@ -40,7 +40,7 @@ fn bench_alloc(c: &mut Criterion) {

fn direct_alloc_scoped(b: &mut Bencher, times: usize) {
b.iter(|| {
let allocator = MemorySpace::default();
let allocator = SwcAllocator::default();

allocator.scope(|| {
let mut vec = swc_allocator::vec::Vec::new();
Expand All @@ -56,7 +56,7 @@ fn bench_alloc(c: &mut Criterion) {

fn fast_alloc_scoped(b: &mut Bencher, times: usize) {
b.iter(|| {
MemorySpace::default().scope(|| {
SwcAllocator::default().scope(|| {
let allocator = FastAlloc::default();

let mut vec = allocator.vec();
Expand Down
109 changes: 54 additions & 55 deletions crates/swc_allocator/src/alloc.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,51 @@
use std::{alloc::Layout, ptr::NonNull};
use std::{alloc::Layout, mem::transmute, ptr::NonNull};

use allocator_api2::alloc::Global;
use scoped_tls::scoped_thread_local;

use crate::{FastAlloc, MemorySpace};

scoped_thread_local!(pub(crate) static ALLOC: MemorySpace);
scoped_thread_local!(pub(crate) static ALLOC: &'static SwcAllocator);

#[derive(Debug, Clone, Copy)]
pub struct SwcAlloc {
pub(crate) is_arena_mode: bool,
}
#[derive(Default)]
pub struct SwcAllocator(MemorySpace);

impl Default for FastAlloc {
fn default() -> Self {
Self {
is_arena_mode: ALLOC.is_set(),
}
impl SwcAllocator {
/// Invokes `f` in a scope where the allocations are done in this allocator.
#[inline(always)]
pub fn scope<'a, F, R>(&'a self, f: F) -> R
where
F: FnOnce() -> R,
{
let s = unsafe {
// Safery: We are using a scoped API
transmute::<&'a SwcAllocator, &'static SwcAllocator>(self)
};

ALLOC.set(&s, f)
}
}

impl Default for SwcAlloc {
impl Default for FastAlloc {
fn default() -> Self {
SwcAlloc {
is_arena_mode: ALLOC.is_set(),
Self {
alloc: if ALLOC.is_set() {
Some(ALLOC.with(|v| *v))
} else {
None
},
}
}
}

impl SwcAlloc {
impl FastAlloc {
/// `true` is passed to `f` if the box is allocated with a custom allocator.
fn with_allocator<T>(
&self,
f: impl FnOnce(&dyn allocator_api2::alloc::Allocator, bool) -> T,
) -> T {
if self.is_arena_mode {
ALLOC.with(|a| {
//
f(&&**a as &dyn allocator_api2::alloc::Allocator, true)
})
if let Some(arena) = &self.alloc {
f((&&*arena.0) as &dyn allocator_api2::alloc::Allocator, true)
} else {
f(&allocator_api2::alloc::Global, false)
}
Expand All @@ -49,7 +56,7 @@ fn mark_ptr_as_arena_mode(ptr: NonNull<[u8]>) -> NonNull<[u8]> {
ptr
}

unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
unsafe impl allocator_api2::alloc::Allocator for FastAlloc {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
self.with_allocator(|a, is_arena_mode| {
let ptr = a.allocate(layout)?;
Expand Down Expand Up @@ -78,18 +85,13 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
}

unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
if self.is_arena_mode {
if self.alloc.is_some() {
debug_assert!(
ALLOC.is_set(),
"Deallocating a pointer allocated with arena mode with a non-arena mode allocator"
);

ALLOC.with(|alloc| {
unsafe {
// Safety: We are in unsafe fn
(&**alloc).deallocate(ptr, layout)
}
})
self.with_allocator(|alloc, _| alloc.deallocate(ptr, layout))
} else {
Global.deallocate(ptr, layout)
}
Expand All @@ -101,16 +103,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
old_layout: Layout,
new_layout: Layout,
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
if self.is_arena_mode {
debug_assert!(
ALLOC.is_set(),
"Growing a pointer allocated with arena mode with a non-arena mode allocator"
);
self.with_allocator(|alloc, is_arena_mode| {
let ptr = alloc.grow(ptr, old_layout, new_layout)?;

ALLOC.with(|alloc| (&**alloc).grow(ptr, old_layout, new_layout))
} else {
Global.grow(ptr, old_layout, new_layout)
}
if is_arena_mode {
Ok(mark_ptr_as_arena_mode(ptr))
} else {
Ok(ptr)
}
})
}

unsafe fn grow_zeroed(
Expand All @@ -119,16 +120,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
old_layout: Layout,
new_layout: Layout,
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
if self.is_arena_mode {
debug_assert!(
ALLOC.is_set(),
"Growing a pointer allocated with arena mode with a non-arena mode allocator"
);
self.with_allocator(|alloc, is_arena_mode| {
let ptr = alloc.grow_zeroed(ptr, old_layout, new_layout)?;

ALLOC.with(|alloc| (&**alloc).grow_zeroed(ptr, old_layout, new_layout))
} else {
Global.grow_zeroed(ptr, old_layout, new_layout)
}
if is_arena_mode {
Ok(mark_ptr_as_arena_mode(ptr))
} else {
Ok(ptr)
}
})
}

unsafe fn shrink(
Expand All @@ -137,16 +137,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
old_layout: Layout,
new_layout: Layout,
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
if self.is_arena_mode {
debug_assert!(
ALLOC.is_set(),
"Shrinking a pointer allocated with arena mode with a non-arena mode allocator"
);
self.with_allocator(|alloc, is_arena_mode| {
let ptr = alloc.shrink(ptr, old_layout, new_layout)?;

ALLOC.with(|alloc| (&**alloc).shrink(ptr, old_layout, new_layout))
} else {
Global.shrink(ptr, old_layout, new_layout)
}
if is_arena_mode {
Ok(mark_ptr_as_arena_mode(ptr))
} else {
Ok(ptr)
}
})
}

fn by_ref(&self) -> &Self
Expand Down
16 changes: 8 additions & 8 deletions crates/swc_allocator/src/boxed/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
pin::Pin,
};

use crate::{alloc::SwcAlloc, FastAlloc};
use crate::FastAlloc;

#[cfg(feature = "rkyv")]
mod rkyv;
Expand All @@ -23,7 +23,7 @@ mod serde;
/// The last bit is 1 if the box is allocated with a custom allocator.
#[repr(transparent)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Box<T: ?Sized>(pub(crate) allocator_api2::boxed::Box<T, SwcAlloc>);
pub struct Box<T: ?Sized>(pub(crate) allocator_api2::boxed::Box<T, FastAlloc>);

impl<T> From<T> for Box<T> {
#[inline(always)]
Expand All @@ -32,9 +32,9 @@ impl<T> From<T> for Box<T> {
}
}

impl<T: ?Sized> From<allocator_api2::boxed::Box<T, SwcAlloc>> for Box<T> {
impl<T: ?Sized> From<allocator_api2::boxed::Box<T, FastAlloc>> for Box<T> {
#[inline(always)]
fn from(v: allocator_api2::boxed::Box<T, SwcAlloc>) -> Self {
fn from(v: allocator_api2::boxed::Box<T, FastAlloc>) -> Self {
Box(v)
}
}
Expand All @@ -56,7 +56,7 @@ impl<T> Box<T> {
pub fn new(value: T) -> Self {
Self(allocator_api2::boxed::Box::new_in(
value,
SwcAlloc::default(),
FastAlloc::default(),
))
}

Expand Down Expand Up @@ -111,7 +111,7 @@ impl<T: ?Sized> Box<T> {
pub unsafe fn from_raw(raw: *mut T) -> Self {
Self(allocator_api2::boxed::Box::from_raw_in(
raw,
SwcAlloc::default(),
FastAlloc::default(),
))
}

Expand Down Expand Up @@ -629,7 +629,7 @@ where
}

impl FastAlloc {
pub fn alloc<T>(self, t: T) -> Box<T> {
Box(allocator_api2::boxed::Box::new_in(t, self.swc_alloc()))
pub fn alloc<T>(&self, t: T) -> Box<T> {
Box(allocator_api2::boxed::Box::new_in(t, self.clone()))
}
}
28 changes: 4 additions & 24 deletions crates/swc_allocator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,26 @@

#![allow(clippy::needless_doctest_main)]

use alloc::SwcAlloc;
use std::ops::{Deref, DerefMut};

use bumpalo::Bump;

use crate::alloc::ALLOC;
pub use crate::alloc::SwcAllocator;

mod alloc;
pub mod boxed;
pub mod vec;

#[derive(Debug, Clone, Copy)]
#[derive(Clone)]
pub struct FastAlloc {
is_arena_mode: bool,
}

impl FastAlloc {
fn swc_alloc(self) -> SwcAlloc {
SwcAlloc {
is_arena_mode: self.is_arena_mode,
}
}
alloc: Option<&'static SwcAllocator>,
}

#[derive(Default)]
pub struct MemorySpace {
struct MemorySpace {
alloc: Bump,
}

impl MemorySpace {
/// Invokes `f` in a scope where the allocations are done in this allocator.
#[inline(always)]
pub fn scope<F, R>(&self, f: F) -> R
where
F: FnOnce() -> R,
{
ALLOC.set(self, f)
}
}

impl From<Bump> for MemorySpace {
fn from(alloc: Bump) -> Self {
Self { alloc }
Expand Down
Loading

0 comments on commit 8d5670e

Please sign in to comment.