Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jbachorik committed Aug 21, 2024
1 parent 3a5bfec commit 79d0669
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 48 deletions.
6 changes: 4 additions & 2 deletions src/hotspot/share/gc/shared/allocTracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#endif

void AllocTracer::send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, JavaThread* thread) {
JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, true, thread);)
EventObjectAllocationOutsideTLAB event;
if (event.should_commit()) {
event.set_objectClass(klass);
Expand All @@ -42,7 +41,6 @@ void AllocTracer::send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size
}

void AllocTracer::send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, JavaThread* thread) {
JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, false, thread);)
EventObjectAllocationInNewTLAB event;
if (event.should_commit()) {
event.set_objectClass(klass);
Expand All @@ -52,6 +50,10 @@ void AllocTracer::send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_
}
}

void AllocTracer::send_allocation_sample(Klass* klass, HeapWord* obj, size_t alloc_size, size_t weight, bool large_allocation, JavaThread* thread) {
JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, large_allocation, thread);)
}

void AllocTracer::send_allocation_requiring_gc_event(size_t size, uint gcId) {
EventAllocationRequiringGC event;
if (event.should_commit()) {
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/gc/shared/allocTracer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class AllocTracer : AllStatic {
public:
static void send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, JavaThread* thread);
static void send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, JavaThread* thread);
static void send_allocation_sample(Klass* klass, HeapWord* obj, size_t alloc_size, size_t weight, bool large_allocation, JavaThread* thread);
static void send_allocation_requiring_gc_event(size_t size, uint gcId);
};

Expand Down
53 changes: 43 additions & 10 deletions src/hotspot/share/gc/shared/memAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "gc/shared/memAllocator.hpp"
#include "gc/shared/threadLocalAllocBuffer.inline.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "jfr/jfrEvents.hpp"
#include "memory/universe.hpp"
#include "oops/arrayOop.hpp"
#include "oops/oop.inline.hpp"
Expand All @@ -41,6 +42,7 @@
#include "services/lowMemoryDetector.hpp"
#include "utilities/align.hpp"
#include "utilities/copy.hpp"
#include "utilities/dtrace.hpp"
#include "utilities/globalDefinitions.hpp"

class MemAllocator::Allocation: StackObj {
Expand Down Expand Up @@ -171,7 +173,8 @@ void MemAllocator::Allocation::notify_allocation_jvmti_sampler() {
return;
}

if (!_allocated_outside_tlab && _allocated_tlab_size == 0 && !_tlab_end_reset_for_sample) {
bool hit_mark = _allocated_tlab_size != 0 || _tlab_end_reset_for_sample;
if (!_allocated_outside_tlab && !hit_mark) {
// Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB
// or expands it due to taking a sampler induced slow path.
return;
Expand All @@ -181,23 +184,21 @@ void MemAllocator::Allocation::notify_allocation_jvmti_sampler() {
// before doing the callback. The callback is done in the destructor of
// the JvmtiSampledObjectAllocEventCollector.
size_t bytes_since_last = 0;

size_t bytes_since_allocation = 0;
{
PreserveObj obj_h(_thread, _obj_ptr);
JvmtiSampledObjectAllocEventCollector collector;
size_t size_in_bytes = _allocator._word_size * HeapWordSize;
ThreadLocalAllocBuffer& tlab = _thread->tlab();

if (!_allocated_outside_tlab) {
bytes_since_last = tlab.bytes_since_last_sample_point();
if (_thread->heap_samplers().jvmti().check_for_sampling(&bytes_since_allocation, size_in_bytes, !_allocated_outside_tlab)) {
JvmtiExport::sampled_object_alloc_event_collector(obj_h());
}

_thread->heap_sampler().check_for_sampling(obj_h(), size_in_bytes, bytes_since_last);
}

if (_tlab_end_reset_for_sample || _allocated_tlab_size != 0) {
if (hit_mark) {
ThreadHeapSampler& sampler = _thread->heap_samplers().jvmti();
// Tell tlab to forget bytes_since_last if we passed it to the heap sampler.
_thread->tlab().set_sample_end(bytes_since_last != 0);
sampler.update_bytes(_thread->tlab().set_sample_end(sampler.bytes_until_sample()), !_allocated_outside_tlab);
}
}

Expand All @@ -217,6 +218,36 @@ void MemAllocator::Allocation::notify_allocation_jfr_sampler() {
AllocTracer::send_allocation_in_new_tlab(obj()->klass(), mem, _allocated_tlab_size * HeapWordSize,
size_in_bytes, _thread);
}

EventObjectAllocationSample event;
if (!event.should_commit()) {
return;
}

bool hit_mark = _allocated_tlab_size != 0 || _tlab_end_reset_for_sample;
if (!_allocated_outside_tlab && !hit_mark) {
// Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB
// or expands it due to taking a sampler induced slow path.
return;
}

ThreadHeapSampler& sampler = _thread->heap_samplers().jfr();
if (sampler.bytes_until_sample() == static_cast<size_t>(-1)) {
return;
}

size_t bytes_since_allocation = 0;
if (sampler.check_for_sampling(&bytes_since_allocation, size_in_bytes, !_allocated_outside_tlab)) {
size_t weight = bytes_since_allocation == 0 ? size_in_bytes : bytes_since_allocation;
AllocTracer::send_allocation_sample(obj()->klass(), mem, size_in_bytes, weight, _allocated_outside_tlab, _thread);
HOTSPOT_GC_ALLOCOBJECT_SAMPLE(obj()->klass()->name()->as_C_string(), size_in_bytes, weight);
}

if (hit_mark) {
// Tell tlab to forget bytes_since_last if we passed it to the heap sampler.
size_t bytes_inc = _thread->tlab().set_sample_end(sampler.bytes_until_sample());
sampler.update_bytes(bytes_inc, !_allocated_outside_tlab);
}
}

void MemAllocator::Allocation::notify_allocation_dtrace_sampler() {
Expand Down Expand Up @@ -258,7 +289,9 @@ HeapWord* MemAllocator::mem_allocate_inside_tlab_slow(Allocation& allocation) co
HeapWord* mem = nullptr;
ThreadLocalAllocBuffer& tlab = _thread->tlab();

if (JvmtiExport::should_post_sampled_object_alloc()) {
EventObjectAllocationSample event;

if (JvmtiExport::should_post_sampled_object_alloc() || event.should_commit()) {
tlab.set_back_allocation_end();
mem = tlab.allocate(_word_size);

Expand Down
24 changes: 15 additions & 9 deletions src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() :
_desired_size(0),
_refill_waste_limit(0),
_allocated_before_last_gc(0),
_bytes_since_last_sample_point(0),
_number_of_refills(0),
_refill_waste(0),
_gc_waste(0),
Expand Down Expand Up @@ -203,6 +202,7 @@ void ThreadLocalAllocBuffer::initialize(HeapWord* start,
set_end(end);
set_allocation_end(end);
invariants();
_end_backup = nullptr;
}

void ThreadLocalAllocBuffer::initialize() {
Expand Down Expand Up @@ -313,21 +313,27 @@ void ThreadLocalAllocBuffer::print_stats(const char* tag) {
_refill_waste * HeapWordSize);
}

void ThreadLocalAllocBuffer::set_sample_end(bool reset_byte_accumulation) {
size_t ThreadLocalAllocBuffer::set_sample_end(size_t bytes_until_sample) {
size_t heap_words_remaining = pointer_delta(_end, _top);
size_t bytes_until_sample = thread()->heap_sampler().bytes_until_sample();
size_t words_until_sample = bytes_until_sample / HeapWordSize;

if (reset_byte_accumulation) {
_bytes_since_last_sample_point = 0;
}

if (heap_words_remaining > words_until_sample) {
HeapWord* new_end = _top + words_until_sample;
if (_end_backup != nullptr) {
if (new_end > _end_backup) {
HeapWord* tmp = new_end;
new_end = _end_backup;
_end_backup = tmp;
}
}
set_end(new_end);
_bytes_since_last_sample_point += bytes_until_sample;
return bytes_until_sample;
} else {
_bytes_since_last_sample_point += heap_words_remaining * HeapWordSize;
if (_end_backup != nullptr) {
set_end(_end_backup);
_end_backup = nullptr;
}
return heap_words_remaining * HeapWordSize;
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
HeapWord* _top; // address after last allocation
HeapWord* _pf_top; // allocation prefetch watermark
HeapWord* _end; // allocation end (can be the sampling end point or _allocation_end)
HeapWord* _end_backup; // backup of _end for interleaved jvmti and jfr sampling
HeapWord* _allocation_end; // end for allocations (actual TLAB end, excluding alignment_reserve)

size_t _desired_size; // desired size (including alignment_reserve)
size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
size_t _allocated_before_last_gc; // total bytes allocated up until the last gc
size_t _bytes_since_last_sample_point; // bytes since last sample point.

static size_t _max_size; // maximum size of any TLAB
static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB
Expand Down Expand Up @@ -124,7 +124,6 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
size_t free() const { return pointer_delta(end(), top()); }
// Don't discard tlab if remaining space is larger than this.
size_t refill_waste_limit() const { return _refill_waste_limit; }
size_t bytes_since_last_sample_point() const { return _bytes_since_last_sample_point; }

// For external inspection.
const HeapWord* start_relaxed() const;
Expand Down Expand Up @@ -168,7 +167,7 @@ class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
void initialize();

void set_back_allocation_end();
void set_sample_end(bool reset_byte_accumulation);
size_t set_sample_end(size_t bytes_until_sample);

static size_t refill_waste_limit_increment();

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "jfr/recorder/service/jfrEventThrottler.hpp"
#include "jfr/utilities/jfrSpinlockHelper.hpp"
#include "logging/log.hpp"
#include "runtime/threadHeapSampler.hpp"

constexpr static const JfrSamplerParams _disabled_params = {
0, // sample points per window
Expand Down Expand Up @@ -72,6 +73,9 @@ void JfrEventThrottler::configure(JfrEventId event_id, int64_t sample_size, int6
}
assert(_throttler != nullptr, "JfrEventThrottler has not been properly initialized");
_throttler->configure(sample_size, period_ms);

// TODO: Hack to get the allocation sampler going
ThreadHeapSamplers::set_jfr_sampling_interval(512 * 1024);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/prims/jvmtiEnv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3760,7 +3760,7 @@ JvmtiEnv::SetHeapSamplingInterval(jint sampling_interval) {
if (sampling_interval < 0) {
return JVMTI_ERROR_ILLEGAL_ARGUMENT;
}
ThreadHeapSampler::set_sampling_interval(sampling_interval);
ThreadHeapSamplers::set_jvmti_sampling_interval(sampling_interval);
return JVMTI_ERROR_NONE;
} /* end SetHeapSamplingInterval */

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/runtime/thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ class Thread: public ThreadShadow {
ThreadLocalAllocBuffer _tlab; // Thread-local eden
jlong _allocated_bytes; // Cumulative number of bytes allocated on
// the Java heap
ThreadHeapSampler _heap_sampler; // For use when sampling the memory.
ThreadHeapSamplers _heap_samplers; // Used by the JVMTI allocation sampler

ThreadStatisticalInfo _statistical_info; // Statistics about the thread

Expand Down Expand Up @@ -418,7 +418,7 @@ class Thread: public ThreadShadow {
void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
inline jlong cooked_allocated_bytes();

ThreadHeapSampler& heap_sampler() { return _heap_sampler; }
ThreadHeapSamplers& heap_samplers() { return _heap_samplers; }

ThreadStatisticalInfo& statistical_info() { return _statistical_info; }

Expand Down
59 changes: 44 additions & 15 deletions src/hotspot/share/runtime/threadHeapSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,8 @@
#include "oops/oop.inline.hpp"
#include "utilities/dtrace.hpp"

// Cheap random number generator.
uint64_t ThreadHeapSampler::_rnd;
// Default is 512kb.
volatile int ThreadHeapSampler::_sampling_interval = 512 * 1024;
volatile int ThreadHeapSamplers::_jvmti_sampling_interval = 512 * 1024; // 512KiB default
volatile int ThreadHeapSamplers::_jfr_sampling_interval = -1; // disabled

// Statics for the fast log
static const int FastLogNumBits = 10;
Expand Down Expand Up @@ -397,7 +395,7 @@ void ThreadHeapSampler::pick_next_geometric_sample() {
// negative answer.
double log_val = (fast_log2(q) - 26);
double result =
(0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_interval())) + 1;
(0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_interval())) + 1;
assert(result > 0 && result < static_cast<double>(SIZE_MAX), "Result is not in an acceptable range.");
size_t interval = static_cast<size_t>(result);
_bytes_until_sample = interval;
Expand All @@ -411,35 +409,66 @@ void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
#endif
// Explicitly test if the sampling interval is 0, return 0 to sample every
// allocation.
if (get_sampling_interval() == 0) {
int interval = get_interval();
if (interval == 0) {
_bytes_until_sample = 0;
return;
} else if (interval < 0) {
_bytes_until_sample = static_cast<size_t>(-1);
return;
}

pick_next_geometric_sample();
}

void ThreadHeapSampler::check_for_sampling(oop obj, size_t allocation_size, size_t bytes_since_allocation) {
size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
bool ThreadHeapSampler::check_for_sampling(size_t* bytes_since_allocation, size_t allocation_size, bool in_tlab) {
size_t processed_bytes = in_tlab ? _bytes_since_last_sample_point : 0;

size_t total_allocated_bytes = processed_bytes + allocation_size;

*bytes_since_allocation = processed_bytes;
// If not yet time for a sample, skip it.
if (total_allocated_bytes < _bytes_until_sample) {
_bytes_until_sample -= total_allocated_bytes;
return;
return false;
}

HOTSPOT_GC_ALLOCOBJECT_SAMPLE(obj->klass()->name()->as_C_string(), allocation_size, bytes_since_allocation);

JvmtiExport::sampled_object_alloc_event_collector(obj);

size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
pick_next_sample(overflow_bytes);
return true;
}

int ThreadHeapSampler::get_sampling_interval() {
return Atomic::load_acquire(&_sampling_interval);
return ThreadHeapSamplers::get_jvmti_sampling_interval();
}

void ThreadHeapSampler::set_sampling_interval(int sampling_interval) {
Atomic::release_store(&_sampling_interval, sampling_interval);
ThreadHeapSamplers::set_jvmti_sampling_interval(sampling_interval);
}

void ThreadHeapSampler::update_bytes(size_t bytes, bool reset) {
if (reset) {
_bytes_since_last_sample_point = 0;
}
_bytes_since_last_sample_point += bytes;
}

void ThreadHeapSamplers::set_jvmti_sampling_interval(int interval) {
Atomic::release_store(&_jvmti_sampling_interval, interval);
}

int ThreadHeapSamplers::get_jvmti_sampling_interval() {
return Atomic::load_acquire(&_jvmti_sampling_interval);
}

void ThreadHeapSamplers::set_jfr_sampling_interval(int interval) {
Atomic::release_store(&_jfr_sampling_interval, interval);
}

int ThreadHeapSamplers::get_jfr_sampling_interval() {
return Atomic::load_acquire(&_jfr_sampling_interval);
}

int ThreadHeapSampler::get_interval() {
return Atomic::load_acquire(_sampling_interval_ref);
}
Loading

0 comments on commit 79d0669

Please sign in to comment.