From 79d06692ac823e0f46c3078df021a9a99e5033a8 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 20 Aug 2024 15:57:24 +0200 Subject: [PATCH] WIP --- src/hotspot/share/gc/shared/allocTracer.cpp | 6 +- src/hotspot/share/gc/shared/allocTracer.hpp | 1 + src/hotspot/share/gc/shared/memAllocator.cpp | 53 +++++++++++++--- .../gc/shared/threadLocalAllocBuffer.cpp | 24 ++++--- .../gc/shared/threadLocalAllocBuffer.hpp | 5 +- .../recorder/service/jfrEventThrottler.cpp | 4 ++ src/hotspot/share/prims/jvmtiEnv.cpp | 2 +- src/hotspot/share/runtime/thread.hpp | 4 +- .../share/runtime/threadHeapSampler.cpp | 59 ++++++++++++----- .../share/runtime/threadHeapSampler.hpp | 63 +++++++++++++++++-- 10 files changed, 173 insertions(+), 48 deletions(-) diff --git a/src/hotspot/share/gc/shared/allocTracer.cpp b/src/hotspot/share/gc/shared/allocTracer.cpp index 45fa0c7e631bd..e4fda6bd184d8 100644 --- a/src/hotspot/share/gc/shared/allocTracer.cpp +++ b/src/hotspot/share/gc/shared/allocTracer.cpp @@ -32,7 +32,6 @@ #endif void AllocTracer::send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, JavaThread* thread) { - JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, true, thread);) EventObjectAllocationOutsideTLAB event; if (event.should_commit()) { event.set_objectClass(klass); @@ -42,7 +41,6 @@ void AllocTracer::send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size } void AllocTracer::send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, JavaThread* thread) { - JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, false, thread);) EventObjectAllocationInNewTLAB event; if (event.should_commit()) { event.set_objectClass(klass); @@ -52,6 +50,10 @@ void AllocTracer::send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_ } } +void AllocTracer::send_allocation_sample(Klass* klass, HeapWord* obj, size_t alloc_size, size_t weight, bool large_allocation, JavaThread* thread) { + JFR_ONLY(JfrAllocationTracer tracer(klass, obj, alloc_size, large_allocation, thread);) +} + void AllocTracer::send_allocation_requiring_gc_event(size_t size, uint gcId) { EventAllocationRequiringGC event; if (event.should_commit()) { diff --git a/src/hotspot/share/gc/shared/allocTracer.hpp b/src/hotspot/share/gc/shared/allocTracer.hpp index 273f20d70ea44..e379afcd9cc23 100644 --- a/src/hotspot/share/gc/shared/allocTracer.hpp +++ b/src/hotspot/share/gc/shared/allocTracer.hpp @@ -32,6 +32,7 @@ class AllocTracer : AllStatic { public: static void send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, JavaThread* thread); static void send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, JavaThread* thread); + static void send_allocation_sample(Klass* klass, HeapWord* obj, size_t alloc_size, size_t weight, bool large_allocation, JavaThread* thread); static void send_allocation_requiring_gc_event(size_t size, uint gcId); }; diff --git a/src/hotspot/share/gc/shared/memAllocator.cpp b/src/hotspot/share/gc/shared/memAllocator.cpp index 318ab00188b3d..cfdfae0a572fa 100644 --- a/src/hotspot/share/gc/shared/memAllocator.cpp +++ b/src/hotspot/share/gc/shared/memAllocator.cpp @@ -30,6 +30,7 @@ #include "gc/shared/memAllocator.hpp" #include "gc/shared/threadLocalAllocBuffer.inline.hpp" #include "gc/shared/tlab_globals.hpp" +#include "jfr/jfrEvents.hpp" #include "memory/universe.hpp" #include "oops/arrayOop.hpp" #include "oops/oop.inline.hpp" @@ -41,6 +42,7 @@ #include "services/lowMemoryDetector.hpp" #include "utilities/align.hpp" #include "utilities/copy.hpp" +#include "utilities/dtrace.hpp" #include "utilities/globalDefinitions.hpp" class MemAllocator::Allocation: StackObj { @@ -171,7 +173,8 @@ void MemAllocator::Allocation::notify_allocation_jvmti_sampler() { return; } - if (!_allocated_outside_tlab && _allocated_tlab_size == 0 && !_tlab_end_reset_for_sample) { + bool hit_mark = _allocated_tlab_size != 0 || _tlab_end_reset_for_sample; + if (!_allocated_outside_tlab && !hit_mark) { // Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB // or expands it due to taking a sampler induced slow path. return; @@ -181,23 +184,21 @@ void MemAllocator::Allocation::notify_allocation_jvmti_sampler() { // before doing the callback. The callback is done in the destructor of // the JvmtiSampledObjectAllocEventCollector. size_t bytes_since_last = 0; - + size_t bytes_since_allocation = 0; { PreserveObj obj_h(_thread, _obj_ptr); JvmtiSampledObjectAllocEventCollector collector; size_t size_in_bytes = _allocator._word_size * HeapWordSize; - ThreadLocalAllocBuffer& tlab = _thread->tlab(); - if (!_allocated_outside_tlab) { - bytes_since_last = tlab.bytes_since_last_sample_point(); + if (_thread->heap_samplers().jvmti().check_for_sampling(&bytes_since_allocation, size_in_bytes, !_allocated_outside_tlab)) { + JvmtiExport::sampled_object_alloc_event_collector(obj_h()); } - - _thread->heap_sampler().check_for_sampling(obj_h(), size_in_bytes, bytes_since_last); } - if (_tlab_end_reset_for_sample || _allocated_tlab_size != 0) { + if (hit_mark) { + ThreadHeapSampler& sampler = _thread->heap_samplers().jvmti(); // Tell tlab to forget bytes_since_last if we passed it to the heap sampler. - _thread->tlab().set_sample_end(bytes_since_last != 0); + sampler.update_bytes(_thread->tlab().set_sample_end(sampler.bytes_until_sample()), !_allocated_outside_tlab); } } @@ -217,6 +218,36 @@ void MemAllocator::Allocation::notify_allocation_jfr_sampler() { AllocTracer::send_allocation_in_new_tlab(obj()->klass(), mem, _allocated_tlab_size * HeapWordSize, size_in_bytes, _thread); } + + EventObjectAllocationSample event; + if (!event.should_commit()) { + return; + } + + bool hit_mark = _allocated_tlab_size != 0 || _tlab_end_reset_for_sample; + if (!_allocated_outside_tlab && !hit_mark) { + // Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB + // or expands it due to taking a sampler induced slow path. + return; + } + + ThreadHeapSampler& sampler = _thread->heap_samplers().jfr(); + if (sampler.bytes_until_sample() == static_cast(-1)) { + return; + } + + size_t bytes_since_allocation = 0; + if (sampler.check_for_sampling(&bytes_since_allocation, size_in_bytes, !_allocated_outside_tlab)) { + size_t weight = bytes_since_allocation == 0 ? size_in_bytes : bytes_since_allocation; + AllocTracer::send_allocation_sample(obj()->klass(), mem, size_in_bytes, weight, _allocated_outside_tlab, _thread); + HOTSPOT_GC_ALLOCOBJECT_SAMPLE(obj()->klass()->name()->as_C_string(), size_in_bytes, weight); + } + + if (hit_mark) { + // Tell tlab to forget bytes_since_last if we passed it to the heap sampler. + size_t bytes_inc = _thread->tlab().set_sample_end(sampler.bytes_until_sample()); + sampler.update_bytes(bytes_inc, !_allocated_outside_tlab); + } } void MemAllocator::Allocation::notify_allocation_dtrace_sampler() { @@ -258,7 +289,9 @@ HeapWord* MemAllocator::mem_allocate_inside_tlab_slow(Allocation& allocation) co HeapWord* mem = nullptr; ThreadLocalAllocBuffer& tlab = _thread->tlab(); - if (JvmtiExport::should_post_sampled_object_alloc()) { + EventObjectAllocationSample event; + + if (JvmtiExport::should_post_sampled_object_alloc() || event.should_commit()) { tlab.set_back_allocation_end(); mem = tlab.allocate(_word_size); diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp index e967a65555a15..29ddb977660e4 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp @@ -50,7 +50,6 @@ ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() : _desired_size(0), _refill_waste_limit(0), _allocated_before_last_gc(0), - _bytes_since_last_sample_point(0), _number_of_refills(0), _refill_waste(0), _gc_waste(0), @@ -203,6 +202,7 @@ void ThreadLocalAllocBuffer::initialize(HeapWord* start, set_end(end); set_allocation_end(end); invariants(); + _end_backup = nullptr; } void ThreadLocalAllocBuffer::initialize() { @@ -313,21 +313,27 @@ void ThreadLocalAllocBuffer::print_stats(const char* tag) { _refill_waste * HeapWordSize); } -void ThreadLocalAllocBuffer::set_sample_end(bool reset_byte_accumulation) { +size_t ThreadLocalAllocBuffer::set_sample_end(size_t bytes_until_sample) { size_t heap_words_remaining = pointer_delta(_end, _top); - size_t bytes_until_sample = thread()->heap_sampler().bytes_until_sample(); size_t words_until_sample = bytes_until_sample / HeapWordSize; - if (reset_byte_accumulation) { - _bytes_since_last_sample_point = 0; - } - if (heap_words_remaining > words_until_sample) { HeapWord* new_end = _top + words_until_sample; + if (_end_backup != nullptr) { + if (new_end > _end_backup) { + HeapWord* tmp = new_end; + new_end = _end_backup; + _end_backup = tmp; + } + } set_end(new_end); - _bytes_since_last_sample_point += bytes_until_sample; + return bytes_until_sample; } else { - _bytes_since_last_sample_point += heap_words_remaining * HeapWordSize; + if (_end_backup != nullptr) { + set_end(_end_backup); + _end_backup = nullptr; + } + return heap_words_remaining * HeapWordSize; } } diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp index 7675957ab8b99..4fdddff29e695 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp @@ -51,12 +51,12 @@ class ThreadLocalAllocBuffer: public CHeapObj { HeapWord* _top; // address after last allocation HeapWord* _pf_top; // allocation prefetch watermark HeapWord* _end; // allocation end (can be the sampling end point or _allocation_end) + HeapWord* _end_backup; // backup of _end for interleaved jvmti and jfr sampling HeapWord* _allocation_end; // end for allocations (actual TLAB end, excluding alignment_reserve) size_t _desired_size; // desired size (including alignment_reserve) size_t _refill_waste_limit; // hold onto tlab if free() is larger than this size_t _allocated_before_last_gc; // total bytes allocated up until the last gc - size_t _bytes_since_last_sample_point; // bytes since last sample point. static size_t _max_size; // maximum size of any TLAB static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB @@ -124,7 +124,6 @@ class ThreadLocalAllocBuffer: public CHeapObj { size_t free() const { return pointer_delta(end(), top()); } // Don't discard tlab if remaining space is larger than this. size_t refill_waste_limit() const { return _refill_waste_limit; } - size_t bytes_since_last_sample_point() const { return _bytes_since_last_sample_point; } // For external inspection. const HeapWord* start_relaxed() const; @@ -168,7 +167,7 @@ class ThreadLocalAllocBuffer: public CHeapObj { void initialize(); void set_back_allocation_end(); - void set_sample_end(bool reset_byte_accumulation); + size_t set_sample_end(size_t bytes_until_sample); static size_t refill_waste_limit_increment(); diff --git a/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp b/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp index af0697b5e28ca..27beb07e046d4 100644 --- a/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp +++ b/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp @@ -27,6 +27,7 @@ #include "jfr/recorder/service/jfrEventThrottler.hpp" #include "jfr/utilities/jfrSpinlockHelper.hpp" #include "logging/log.hpp" +#include "runtime/threadHeapSampler.hpp" constexpr static const JfrSamplerParams _disabled_params = { 0, // sample points per window @@ -72,6 +73,9 @@ void JfrEventThrottler::configure(JfrEventId event_id, int64_t sample_size, int6 } assert(_throttler != nullptr, "JfrEventThrottler has not been properly initialized"); _throttler->configure(sample_size, period_ms); + + // TODO: Hack to get the allocation sampler going + ThreadHeapSamplers::set_jfr_sampling_interval(512 * 1024); } /* diff --git a/src/hotspot/share/prims/jvmtiEnv.cpp b/src/hotspot/share/prims/jvmtiEnv.cpp index ccccb5f1bda78..d958e88a597fa 100644 --- a/src/hotspot/share/prims/jvmtiEnv.cpp +++ b/src/hotspot/share/prims/jvmtiEnv.cpp @@ -3760,7 +3760,7 @@ JvmtiEnv::SetHeapSamplingInterval(jint sampling_interval) { if (sampling_interval < 0) { return JVMTI_ERROR_ILLEGAL_ARGUMENT; } - ThreadHeapSampler::set_sampling_interval(sampling_interval); + ThreadHeapSamplers::set_jvmti_sampling_interval(sampling_interval); return JVMTI_ERROR_NONE; } /* end SetHeapSamplingInterval */ diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp index e9fee4d113aaa..f04e3dee6b6d6 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -267,7 +267,7 @@ class Thread: public ThreadShadow { ThreadLocalAllocBuffer _tlab; // Thread-local eden jlong _allocated_bytes; // Cumulative number of bytes allocated on // the Java heap - ThreadHeapSampler _heap_sampler; // For use when sampling the memory. + ThreadHeapSamplers _heap_samplers; // Used by the JVMTI allocation sampler ThreadStatisticalInfo _statistical_info; // Statistics about the thread @@ -418,7 +418,7 @@ class Thread: public ThreadShadow { void incr_allocated_bytes(jlong size) { _allocated_bytes += size; } inline jlong cooked_allocated_bytes(); - ThreadHeapSampler& heap_sampler() { return _heap_sampler; } + ThreadHeapSamplers& heap_samplers() { return _heap_samplers; } ThreadStatisticalInfo& statistical_info() { return _statistical_info; } diff --git a/src/hotspot/share/runtime/threadHeapSampler.cpp b/src/hotspot/share/runtime/threadHeapSampler.cpp index 26a0aa3793bac..f958988c20ad1 100644 --- a/src/hotspot/share/runtime/threadHeapSampler.cpp +++ b/src/hotspot/share/runtime/threadHeapSampler.cpp @@ -36,10 +36,8 @@ #include "oops/oop.inline.hpp" #include "utilities/dtrace.hpp" -// Cheap random number generator. -uint64_t ThreadHeapSampler::_rnd; -// Default is 512kb. -volatile int ThreadHeapSampler::_sampling_interval = 512 * 1024; +volatile int ThreadHeapSamplers::_jvmti_sampling_interval = 512 * 1024; // 512KiB default +volatile int ThreadHeapSamplers::_jfr_sampling_interval = -1; // disabled // Statics for the fast log static const int FastLogNumBits = 10; @@ -397,7 +395,7 @@ void ThreadHeapSampler::pick_next_geometric_sample() { // negative answer. double log_val = (fast_log2(q) - 26); double result = - (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_interval())) + 1; + (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_interval())) + 1; assert(result > 0 && result < static_cast(SIZE_MAX), "Result is not in an acceptable range."); size_t interval = static_cast(result); _bytes_until_sample = interval; @@ -411,35 +409,66 @@ void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) { #endif // Explicitly test if the sampling interval is 0, return 0 to sample every // allocation. - if (get_sampling_interval() == 0) { + int interval = get_interval(); + if (interval == 0) { _bytes_until_sample = 0; return; + } else if (interval < 0) { + _bytes_until_sample = static_cast(-1); + return; } pick_next_geometric_sample(); } -void ThreadHeapSampler::check_for_sampling(oop obj, size_t allocation_size, size_t bytes_since_allocation) { - size_t total_allocated_bytes = bytes_since_allocation + allocation_size; +bool ThreadHeapSampler::check_for_sampling(size_t* bytes_since_allocation, size_t allocation_size, bool in_tlab) { + size_t processed_bytes = in_tlab ? _bytes_since_last_sample_point : 0; + + size_t total_allocated_bytes = processed_bytes + allocation_size; + *bytes_since_allocation = processed_bytes; // If not yet time for a sample, skip it. if (total_allocated_bytes < _bytes_until_sample) { _bytes_until_sample -= total_allocated_bytes; - return; + return false; } - HOTSPOT_GC_ALLOCOBJECT_SAMPLE(obj->klass()->name()->as_C_string(), allocation_size, bytes_since_allocation); - - JvmtiExport::sampled_object_alloc_event_collector(obj); - size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample; pick_next_sample(overflow_bytes); + return true; } int ThreadHeapSampler::get_sampling_interval() { - return Atomic::load_acquire(&_sampling_interval); + return ThreadHeapSamplers::get_jvmti_sampling_interval(); } void ThreadHeapSampler::set_sampling_interval(int sampling_interval) { - Atomic::release_store(&_sampling_interval, sampling_interval); + ThreadHeapSamplers::set_jvmti_sampling_interval(sampling_interval); +} + +void ThreadHeapSampler::update_bytes(size_t bytes, bool reset) { + if (reset) { + _bytes_since_last_sample_point = 0; + } + _bytes_since_last_sample_point += bytes; +} + +void ThreadHeapSamplers::set_jvmti_sampling_interval(int interval) { + Atomic::release_store(&_jvmti_sampling_interval, interval); +} + +int ThreadHeapSamplers::get_jvmti_sampling_interval() { + return Atomic::load_acquire(&_jvmti_sampling_interval); +} + +void ThreadHeapSamplers::set_jfr_sampling_interval(int interval) { + Atomic::release_store(&_jfr_sampling_interval, interval); +} + +int ThreadHeapSamplers::get_jfr_sampling_interval() { + return Atomic::load_acquire(&_jfr_sampling_interval); +} + +int ThreadHeapSampler::get_interval() { + return Atomic::load_acquire(_sampling_interval_ref); } diff --git a/src/hotspot/share/runtime/threadHeapSampler.hpp b/src/hotspot/share/runtime/threadHeapSampler.hpp index 6f77547fc8390..40a2bf39fd24d 100644 --- a/src/hotspot/share/runtime/threadHeapSampler.hpp +++ b/src/hotspot/share/runtime/threadHeapSampler.hpp @@ -26,15 +26,22 @@ #ifndef SHARE_RUNTIME_THREADHEAPSAMPLER_HPP #define SHARE_RUNTIME_THREADHEAPSAMPLER_HPP +#include "gc/shared/threadLocalAllocBuffer.hpp" #include "memory/allocation.hpp" +class ThreadHeapSamplers; + class ThreadHeapSampler { + friend ThreadHeapSamplers; + private: - size_t _bytes_until_sample; + volatile size_t _bytes_until_sample; + volatile size_t _bytes_since_last_sample_point; + // Cheap random number generator - static uint64_t _rnd; + uint64_t _rnd; - static volatile int _sampling_interval; + volatile int* _sampling_interval_ref; void pick_next_geometric_sample(); void pick_next_sample(size_t overflowed_bytes = 0); @@ -42,8 +49,11 @@ class ThreadHeapSampler { static double fast_log2(const double& d); uint64_t next_random(uint64_t rnd); + int get_interval(); + + volatile bool _active_flag; public: - ThreadHeapSampler() { + ThreadHeapSampler(volatile int* sampling_interval_ref) : _bytes_until_sample(0), _bytes_since_last_sample_point(0), _sampling_interval_ref(sampling_interval_ref), _active_flag(true) { _rnd = static_cast(reinterpret_cast(this)); if (_rnd == 0) { _rnd = 1; @@ -53,12 +63,53 @@ class ThreadHeapSampler { pick_next_sample(); } - size_t bytes_until_sample() { return _bytes_until_sample; } + inline size_t bytes_until_sample() const { + return _bytes_until_sample; + } + + inline size_t bytes_since_last_sample_point() const { + return _bytes_since_last_sample_point; + } + + void update_bytes(size_t bytes, bool reset); - void check_for_sampling(oop obj, size_t size_in_bytes, size_t bytes_allocated_before); + bool check_for_sampling(size_t* bytes_since_allocation, size_t size_in_bytes, bool in_tlab); + // TODO: For compatibility purposes only static void set_sampling_interval(int sampling_interval); static int get_sampling_interval(); + + inline bool is_active() const { + return _active_flag; + } +}; + +class ThreadHeapSamplers { + private: + static volatile int _jvmti_sampling_interval; + static volatile int _jfr_sampling_interval; + + ThreadHeapSampler _jvmti; + ThreadHeapSampler _jfr; + + + + public: + ThreadHeapSamplers() : _jvmti(&_jvmti_sampling_interval), _jfr(&_jfr_sampling_interval) { + } + + ThreadHeapSampler& jvmti() { + return _jvmti; + } + + ThreadHeapSampler& jfr() { + return _jfr; + } + + static void set_jvmti_sampling_interval(int interval); + static int get_jvmti_sampling_interval(); + static void set_jfr_sampling_interval(int interval); + static int get_jfr_sampling_interval(); }; #endif // SHARE_RUNTIME_THREADHEAPSAMPLER_HPP