From c92591373a330fe2dca120c09879f829e76098c2 Mon Sep 17 00:00:00 2001 From: howsohazard <143410553+howsohazard@users.noreply.github.com> Date: Mon, 4 Dec 2023 12:03:32 -0500 Subject: [PATCH] 18578: Improves performance by tightening inner interpreter loops and hash map functions, evens out garbage collection thresholds (#39) Co-authored-by: J. Caleb Wherry <337871+calebwherry@users.noreply.github.com> --- src/3rd_party/skarupke_maps/flat_hash_map.hpp | 146 +++++++++--------- src/Amalgam/ThreadPool.cpp | 20 ++- src/Amalgam/ThreadPool.h | 11 +- .../evaluablenode/EvaluableNodeManagement.cpp | 42 +---- .../evaluablenode/EvaluableNodeManagement.h | 55 ++++++- src/Amalgam/interpreter/Interpreter.cpp | 12 +- src/Amalgam/interpreter/Interpreter.h | 27 ++-- 7 files changed, 176 insertions(+), 137 deletions(-) diff --git a/src/3rd_party/skarupke_maps/flat_hash_map.hpp b/src/3rd_party/skarupke_maps/flat_hash_map.hpp index 222fc901..13d1a38c 100644 --- a/src/3rd_party/skarupke_maps/flat_hash_map.hpp +++ b/src/3rd_party/skarupke_maps/flat_hash_map.hpp @@ -47,12 +47,12 @@ struct functor_storage : Functor { } template - Result operator()(Args &&... args) + inline Result operator()(Args &&... args) { return static_cast(*this)(std::forward(args)...); } template - Result operator()(Args &&... args) const + inline Result operator()(Args &&... args) const { return static_cast(*this)(std::forward(args)...); } @@ -66,15 +66,15 @@ struct functor_storage : function(function) { } - Result operator()(Args... args) const + inline Result operator()(Args... args) const { return function(std::forward(args)...); } - operator function_ptr &() + inline operator function_ptr &() { return function; } - operator const function_ptr &() + inline operator const function_ptr &() { return function; } @@ -88,29 +88,29 @@ struct KeyOrValueHasher : functor_storage : hasher_storage(hash) { } - size_t operator()(const key_type & key) + inline size_t operator()(const key_type & key) { return static_cast(*this)(key); } - size_t operator()(const key_type & key) const + inline size_t operator()(const key_type & key) const { return static_cast(*this)(key); } - size_t operator()(const value_type & value) + inline size_t operator()(const value_type & value) { return static_cast(*this)(value.first); } - size_t operator()(const value_type & value) const + inline size_t operator()(const value_type & value) const { return static_cast(*this)(value.first); } template - size_t operator()(const std::pair & value) + inline size_t operator()(const std::pair & value) { return static_cast(*this)(value.first); } template - size_t operator()(const std::pair & value) const + inline size_t operator()(const std::pair & value) const { return static_cast(*this)(value.first); } @@ -124,44 +124,44 @@ struct KeyOrValueEquality : functor_storage : equality_storage(equality) { } - bool operator()(const key_type & lhs, const key_type & rhs) + inline bool operator()(const key_type & lhs, const key_type & rhs) { return static_cast(*this)(lhs, rhs); } - bool operator()(const key_type & lhs, const value_type & rhs) + inline bool operator()(const key_type & lhs, const value_type & rhs) { return static_cast(*this)(lhs, rhs.first); } - bool operator()(const value_type & lhs, const key_type & rhs) + inline bool operator()(const value_type & lhs, const key_type & rhs) { return static_cast(*this)(lhs.first, rhs); } - bool operator()(const value_type & lhs, const value_type & rhs) + inline bool operator()(const value_type & lhs, const value_type & rhs) { return static_cast(*this)(lhs.first, rhs.first); } template - bool operator()(const key_type & lhs, const std::pair & rhs) + inline bool operator()(const key_type & lhs, const std::pair & rhs) { return static_cast(*this)(lhs, rhs.first); } template - bool operator()(const std::pair & lhs, const key_type & rhs) + inline bool operator()(const std::pair & lhs, const key_type & rhs) { return static_cast(*this)(lhs.first, rhs); } template - bool operator()(const value_type & lhs, const std::pair & rhs) + inline bool operator()(const value_type & lhs, const std::pair & rhs) { return static_cast(*this)(lhs.first, rhs.first); } template - bool operator()(const std::pair & lhs, const value_type & rhs) + inline bool operator()(const std::pair & lhs, const value_type & rhs) { return static_cast(*this)(lhs.first, rhs.first); } template - bool operator()(const std::pair & lhs, const std::pair & rhs) + inline bool operator()(const std::pair & lhs, const std::pair & rhs) { return static_cast(*this)(lhs.first, rhs.first); } @@ -170,10 +170,10 @@ static constexpr int8_t min_lookups = 4; template struct sherwood_v3_entry { - sherwood_v3_entry() + inline sherwood_v3_entry() { } - sherwood_v3_entry(int8_t distance_from_desired) + inline sherwood_v3_entry(int8_t distance_from_desired) : distance_from_desired(distance_from_desired) { } @@ -186,15 +186,15 @@ struct sherwood_v3_entry return result; } - bool has_value() const + inline bool has_value() const { return distance_from_desired >= 0; } - bool is_empty() const + inline bool is_empty() const { return distance_from_desired < 0; } - bool is_at_desired_position() const + inline bool is_at_desired_position() const { return distance_from_desired <= 0; } @@ -205,7 +205,7 @@ struct sherwood_v3_entry distance_from_desired = distance; } - void destroy_value() + inline void destroy_value() { value.~T(); distance_from_desired = -1; @@ -298,7 +298,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal public: #ifdef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION - static constexpr float _max_load_factor = 0.5f; + static constexpr float _max_load_factor = 0.5f; #endif using value_type = T; @@ -370,7 +370,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal sherwood_v3_table(const sherwood_v3_table & other, const ArgumentAlloc & alloc) : EntryAlloc(alloc), Hasher(other), Equal(other) #ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION - , _max_load_factor(other._max_load_factor) + , _max_load_factor(other._max_load_factor) #endif { rehash_for_other_container(other); @@ -454,15 +454,15 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal deallocate_data(entries, num_slots_minus_one, max_lookups); } - const allocator_type & get_allocator() const + inline const allocator_type & get_allocator() const { return static_cast(*this); } - const ArgumentEqual & key_eq() const + inline const ArgumentEqual & key_eq() const { return static_cast(*this); } - const ArgumentHash & hash_function() const + inline const ArgumentHash & hash_function() const { return static_cast(*this); } @@ -483,11 +483,11 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal using pointer = ValueType *; using reference = ValueType &; - friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs) + inline friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs) { return lhs.current == rhs.current; } - friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs) + inline friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs) { return !(lhs == rhs); } @@ -508,16 +508,16 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal return copy; } - ValueType & operator*() const + inline ValueType & operator*() const { return current->value; } - ValueType * operator->() const + inline ValueType * operator->() const { return std::addressof(current->value); } - operator templated_iterator() const + inline operator templated_iterator() const { return { current }; } @@ -541,7 +541,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal return { it }; } } - const_iterator cbegin() const + inline const_iterator cbegin() const { return begin(); } @@ -553,7 +553,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal { return { entries + static_cast(num_slots_minus_one + max_lookups) }; } - const_iterator cend() const + inline const_iterator cend() const { return end(); } @@ -569,11 +569,11 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal } return end(); } - const_iterator find(const FindKey & key) const + inline const_iterator find(const FindKey & key) const { return const_cast(this)->find(key); } - size_t count(const FindKey & key) const + inline size_t count(const FindKey & key) const { return find(key) == end() ? 0 : 1; } @@ -608,37 +608,37 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal return emplace_new_key(distance_from_desired, current_entry, std::forward(key), std::forward(args)...); } - std::pair insert(const value_type & value) + inline std::pair insert(const value_type & value) { return emplace(value); } - std::pair insert(value_type && value) + inline std::pair insert(value_type && value) { return emplace(std::move(value)); } template - iterator emplace_hint(const_iterator, Args &&... args) + inline iterator emplace_hint(const_iterator, Args &&... args) { return emplace(std::forward(args)...).first; } - iterator insert(const_iterator, const value_type & value) + inline iterator insert(const_iterator, const value_type & value) { return emplace(value).first; } - iterator insert(const_iterator, value_type && value) + inline iterator insert(const_iterator, value_type && value) { return emplace(std::move(value)).first; } template - void insert(It begin, It end) + inline void insert(It begin, It end) { for (; begin != end; ++begin) { emplace(*begin); } } - void insert(std::initializer_list il) + inline void insert(std::initializer_list il) { insert(il.begin(), il.end()); } @@ -751,7 +751,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal num_elements = 0; } - void shrink_to_fit() + inline void shrink_to_fit() { rehash_for_other_container(*this); } @@ -766,7 +766,7 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal swap(static_cast(*this), static_cast(other)); } - size_t size() const + inline size_t size() const { return num_elements; } @@ -800,12 +800,12 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal _max_load_factor = value; #endif } - float max_load_factor() const + inline float max_load_factor() const { return _max_load_factor; } - bool empty() const + inline bool empty() const { return num_elements == 0; } @@ -818,9 +818,9 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal float _max_load_factor = 0.5f; #endif size_t num_elements = 0; - int8_t max_lookups = detailv3::min_lookups - 1; + int8_t max_lookups = detailv3::min_lookups - 1; - static int8_t compute_max_lookups(size_t num_buckets) + static int8_t compute_max_lookups(size_t num_buckets) { int8_t desired = detailv3::log2(num_buckets); return std::max(detailv3::min_lookups, desired); @@ -917,17 +917,17 @@ class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal } template - size_t hash_object(const U & key) + inline size_t hash_object(const U & key) { return static_cast(*this)(key); } template - size_t hash_object(const U & key) const + inline size_t hash_object(const U & key) const { return static_cast(*this)(key); } template - bool compares_equal(const L & lhs, const R & rhs) + inline bool compares_equal(const L & lhs, const R & rhs) { return static_cast(*this)(lhs, rhs); } @@ -1271,23 +1271,23 @@ struct prime_number_hash_policy struct power_of_two_hash_policy { - size_t index_for_hash(size_t hash, size_t num_slots_minus_one) const + inline size_t index_for_hash(size_t hash, size_t num_slots_minus_one) const { return hash & num_slots_minus_one; } - size_t keep_in_range(size_t index, size_t num_slots_minus_one) const + inline size_t keep_in_range(size_t index, size_t num_slots_minus_one) const { return index_for_hash(index, num_slots_minus_one); } - int8_t next_size_over(size_t & size) const + inline int8_t next_size_over(size_t & size) const { size = detailv3::next_power_of_two(size); return 0; } - void commit(int8_t) + inline void commit(int8_t) { } - void reset() + inline void reset() { } @@ -1295,11 +1295,11 @@ struct power_of_two_hash_policy struct fibonacci_hash_policy { - size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const + inline size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const { return (11400714819323198485ull * hash) >> shift; } - size_t keep_in_range(size_t index, size_t num_slots_minus_one) const + inline size_t keep_in_range(size_t index, size_t num_slots_minus_one) const { return index & num_slots_minus_one; } @@ -1309,11 +1309,11 @@ struct fibonacci_hash_policy size = std::max(size_t(2), detailv3::next_power_of_two(size)); return 64 - detailv3::log2(size); } - void commit(int8_t shift) + inline void commit(int8_t shift) { this->shift = shift; } - void reset() + inline void reset() { shift = 63; } @@ -1381,7 +1381,7 @@ class flat_hash_map } using Table::emplace; - std::pair emplace() + inline std::pair emplace() { return emplace(key_type(), convertible_to_value()); } @@ -1434,7 +1434,7 @@ class flat_hash_map private: struct convertible_to_value { - operator V() const + inline operator V() const { return V(); } @@ -1476,23 +1476,23 @@ class flat_hash_set } template - std::pair emplace(Args &&... args) + inline std::pair emplace(Args &&... args) { return Table::emplace(T(std::forward(args)...)); } - std::pair emplace(const key_type & arg) + inline std::pair emplace(const key_type & arg) { return Table::emplace(arg); } - std::pair emplace(key_type & arg) + inline std::pair emplace(key_type & arg) { return Table::emplace(arg); } - std::pair emplace(const key_type && arg) + inline std::pair emplace(const key_type && arg) { return Table::emplace(std::move(arg)); } - std::pair emplace(key_type && arg) + inline std::pair emplace(key_type && arg) { return Table::emplace(std::move(arg)); } @@ -1508,7 +1508,7 @@ class flat_hash_set } return true; } - friend bool operator!=(const flat_hash_set & lhs, const flat_hash_set & rhs) + inline friend bool operator!=(const flat_hash_set & lhs, const flat_hash_set & rhs) { return !(lhs == rhs); } diff --git a/src/Amalgam/ThreadPool.cpp b/src/Amalgam/ThreadPool.cpp index d7d66a43..26407e3c 100644 --- a/src/Amalgam/ThreadPool.cpp +++ b/src/Amalgam/ThreadPool.cpp @@ -14,7 +14,8 @@ ThreadPool::ThreadPool(size_t max_num_threads) void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) { - std::unique_lock lock(threadsMutex); + std::unique_lock threads_lock(threadsMutex); + std::unique_lock queue_lock(taskQueueMutex); //don't need to change anything if(new_max_num_threads == threads.size()) @@ -38,6 +39,11 @@ void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) threads.emplace_back( [this] { + //count this thread as active during startup + //this is important, as the inner loop assumes the default state of the thread is to count itself + //so the number of threads doesn't change when switching between a completed task and a new one + numActiveThreads++; + //infinite loop waiting for work for(;;) { @@ -51,6 +57,8 @@ void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) //if no more work, wait until shutdown or more work if(taskQueue.empty()) { + numActiveThreads--; + //wait until either shutting down or more work has been added waitForTask.wait(lock, [this] { return shutdownThreads || !taskQueue.empty(); }); @@ -58,19 +66,18 @@ void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) //only can make it here if shutting down (otherwise taskQueue has something in it) if(shutdownThreads) return; + + //got a task, resuming the thread + numActiveThreads++; } //take ownership of the task so it can be destructed when complete // (won't increment shared_ptr counter) task = std::move(taskQueue.front()); taskQueue.pop(); - - //count the thread as active before releasing the lock - numActiveThreads++; } task(); - numActiveThreads--; } } ); @@ -78,7 +85,8 @@ void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) //notify all just in case a new task was added as the threads were being created // but unlock to allow threads to proceed - lock.unlock(); + threads_lock.unlock(); + queue_lock.unlock(); waitForTask.notify_all(); } diff --git a/src/Amalgam/ThreadPool.h b/src/Amalgam/ThreadPool.h index eeec94ed..6d1da074 100644 --- a/src/Amalgam/ThreadPool.h +++ b/src/Amalgam/ThreadPool.h @@ -26,10 +26,19 @@ class ThreadPool return numActiveThreads; } + //returns the current maximum number of threads that are available + inline size_t GetCurrentMaxNumThreads() + { + std::unique_lock lock(threadsMutex); + return threads.size(); + } + //returns a vector of the thread ids for the thread pool inline std::vector GetThreadIds() { std::vector thread_ids; + + std::unique_lock lock(threadsMutex); thread_ids.reserve(threads.size() + 1); thread_ids.push_back(mainThreadId); for(std::thread &worker : threads) @@ -147,7 +156,7 @@ class ThreadPool //need to make sure there's at least one extra thread available to make sure that this batch of tasks can be run // in case there are any interdependencies, in order to prevent deadlock if(taskQueue.size() + numActiveThreads >= threads.size()) - btel.MarkAsNoThreadsAvailable(); + btel.MarkAsNoThreadsAvailable(); } return btel; diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp index 104b91ab..e9253ee0 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp @@ -13,13 +13,13 @@ Concurrency::ReadWriteMutex EvaluableNodeManager::memoryModificationMutex; #endif const double EvaluableNodeManager::allocExpansionFactor = 1.5; -const ExecutionCycleCountCompactDelta EvaluableNodeManager::minCycleCountBetweenGarbageCollects = 150000; - -EvaluableNodeManager::EvaluableNodeManager() -{ - firstUnusedNodeIndex = 0; - executionCyclesSinceLastGarbageCollection = 0; -} +#ifdef MULTITHREAD_SUPPORT +const ExecutionCycleCountCompactDelta EvaluableNodeManager::minCycleCountBetweenGarbageCollectsPerThread = 150000; +#else +//make the next value constant if no threads +const +#endif +ExecutionCycleCountCompactDelta EvaluableNodeManager::minCycleCountBetweenGarbageCollects = 150000; EvaluableNodeManager::~EvaluableNodeManager() { @@ -163,34 +163,6 @@ EvaluableNode *EvaluableNodeManager::AllocListNodeWithOrderedChildNodes(Evaluabl return retval; } -bool EvaluableNodeManager::RecommendGarbageCollection() -{ - //makes sure to perform garbage collection between every opcode to find memory reference errors -#ifdef PEDANTIC_GARBAGE_COLLECTION - return true; -#endif - -#ifdef MULTITHREAD_SUPPORT - if(executionCyclesSinceLastGarbageCollection > minCycleCountBetweenGarbageCollects * static_cast(Concurrency::threadPool.GetNumActiveThreads())) -#else - if(executionCyclesSinceLastGarbageCollection > minCycleCountBetweenGarbageCollects) -#endif - { - auto cur_size = GetNumberOfUsedNodes(); - - size_t next_expansion_size = static_cast(cur_size * allocExpansionFactor); - if(next_expansion_size < nodes.size()) - { - executionCyclesSinceLastGarbageCollection = 0; - return false; - } - - return true; - } - - return false; -} - #ifdef MULTITHREAD_SUPPORT void EvaluableNodeManager::CollectGarbage(Concurrency::ReadLock *memory_modification_lock) #else diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h index 58ec462f..02704937 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h @@ -124,7 +124,9 @@ class EvaluableNodeStackStateSaver class EvaluableNodeManager { public: - EvaluableNodeManager(); + EvaluableNodeManager() : + executionCyclesSinceLastGarbageCollection(0), firstUnusedNodeIndex(0) + { } ~EvaluableNodeManager(); @@ -329,7 +331,30 @@ class EvaluableNodeManager } //heuristic used to determine whether unused memory should be collected (e.g., by FreeAllNodesExcept*) - bool RecommendGarbageCollection(); + //force this inline because it occurs in inner loops + __forceinline bool RecommendGarbageCollection() + { + //makes sure to perform garbage collection between every opcode to find memory reference errors + #ifdef PEDANTIC_GARBAGE_COLLECTION + return true; + #endif + + if(executionCyclesSinceLastGarbageCollection > minCycleCountBetweenGarbageCollects) + { + auto cur_size = GetNumberOfUsedNodes(); + + size_t next_expansion_size = static_cast(cur_size * allocExpansionFactor); + if(next_expansion_size < nodes.size()) + { + executionCyclesSinceLastGarbageCollection = 0; + return false; + } + + return true; + } + + return false; + } //moves garbage collection to be more likely to be triggered next time CollectGarbage is called __forceinline void AdvanceGarbageCollectionTrigger() @@ -672,8 +697,21 @@ class EvaluableNodeManager static void ValidateEvaluableNodeTreeMemoryIntegrityRecurse(EvaluableNode *en, EvaluableNode::ReferenceSetType &checked); -#ifdef MULTITHREAD_SUPPORT +#ifdef MULTITHREAD_SUPPORT public: + + //updates garbage collection process based on current number of threads and number of tasks + static inline void UpdateMinCycleCountBetweenGarbageCollectsBasedOnThreads(size_t num_tasks) + { + //can't go above the max number of threads + num_tasks = std::min(num_tasks, Concurrency::threadPool.GetCurrentMaxNumThreads()); + //don't want to go below the number of threads being used by other things + num_tasks = std::max(num_tasks, Concurrency::threadPool.GetNumActiveThreads()); + + minCycleCountBetweenGarbageCollects = minCycleCountBetweenGarbageCollectsPerThread + * static_cast(num_tasks); + } + //mutex to manage attributes of manager, including operations such as // memory allocation, reference management, etc. Concurrency::ReadWriteMutex managerAttributesMutex; @@ -707,6 +745,13 @@ class EvaluableNodeManager //extra space to allocate when allocating static const double allocExpansionFactor; - //minimum number of cycles between collects as to not spend too much time garbage collecting - static const ExecutionCycleCountCompactDelta minCycleCountBetweenGarbageCollects; +#ifdef MULTITHREAD_SUPPORT + //minimum number of cycles between collects per thread + static const ExecutionCycleCountCompactDelta minCycleCountBetweenGarbageCollectsPerThread; +#else + //make the next value constant if no threads + const +#endif + //current number of cycles between collects based on number of threads + static ExecutionCycleCountCompactDelta minCycleCountBetweenGarbageCollects; }; diff --git a/src/Amalgam/interpreter/Interpreter.cpp b/src/Amalgam/interpreter/Interpreter.cpp index c02a2a8c..4f5d7263 100644 --- a/src/Amalgam/interpreter/Interpreter.cpp +++ b/src/Amalgam/interpreter/Interpreter.cpp @@ -775,21 +775,21 @@ bool Interpreter::InterpretEvaluableNodesConcurrently(EvaluableNode *parent_node if(!parent_node->GetConcurrency()) return false; - size_t num_elements = nodes.size(); - if(num_elements < 2) + size_t num_tasks = nodes.size(); + if(num_tasks < 2) return false; auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); if(!enqueue_task_lock.AreThreadsAvailable()) return false; - ConcurrencyManager concurrency_manager(this, num_elements); + ConcurrencyManager concurrency_manager(this, num_tasks); //kick off interpreters - for(size_t element_index = 0; element_index < num_elements; element_index++) + for(size_t task_index = 0; task_index < num_tasks; task_index++) { - auto &interpreter = *concurrency_manager.interpreters[element_index]; - EvaluableNode *node_to_execute = nodes[element_index]; + auto &interpreter = *concurrency_manager.interpreters[task_index]; + EvaluableNode *node_to_execute = nodes[task_index]; concurrency_manager.resultFutures.emplace_back( Concurrency::threadPool.EnqueueBatchTask( diff --git a/src/Amalgam/interpreter/Interpreter.h b/src/Amalgam/interpreter/Interpreter.h index 5347f834..30dc3b1f 100644 --- a/src/Amalgam/interpreter/Interpreter.h +++ b/src/Amalgam/interpreter/Interpreter.h @@ -394,18 +394,18 @@ class Interpreter public: //constructs the concurrency manager. Assumes parent_interpreter is NOT null - ConcurrencyManager(Interpreter *parent_interpreter, size_t num_elements) + ConcurrencyManager(Interpreter *parent_interpreter, size_t num_tasks) { parentInterpreter = parent_interpreter; - numElements = num_elements; + numTasks = num_tasks; //set up data - interpreters.reserve(numElements); - resultFutures.reserve(numElements); + interpreters.reserve(numTasks); + resultFutures.reserve(numTasks); size_t max_execution_steps_per_element = 0; if(parentInterpreter->maxNumExecutionSteps > 0) - max_execution_steps_per_element = (parentInterpreter->maxNumExecutionSteps - parentInterpreter->GetNumStepsExecuted()) / numElements; + max_execution_steps_per_element = (parentInterpreter->maxNumExecutionSteps - parentInterpreter->GetNumStepsExecuted()) / numTasks; //since each thread has a copy of the constructionStackNodes, it's possible that more than one of the threads //obtains previous_results, so they must all be marked as not unique @@ -413,7 +413,7 @@ class Interpreter //set up all the interpreters // do this as its own loop to make sure that the vector memory isn't reallocated once the threads have kicked off - for(size_t element_index = 0; element_index < numElements; element_index++) + for(size_t element_index = 0; element_index < numTasks; element_index++) { //create interpreter interpreters.emplace_back(std::make_unique(parentInterpreter->evaluableNodeManager, max_execution_steps_per_element, parentInterpreter->maxNumExecutionNodes, @@ -421,6 +421,8 @@ class Interpreter parentInterpreter->writeListeners, parentInterpreter->printListener, parentInterpreter->curEntity)); } + EvaluableNodeManager::UpdateMinCycleCountBetweenGarbageCollectsBasedOnThreads(num_tasks); + //begins concurrency over all interpreters parentInterpreter->memoryModificationLock.unlock(); } @@ -482,6 +484,9 @@ class Interpreter Concurrency::threadPool.CountCurrentThreadAsResumed(); + //merged back to one task (this method will attempt to account for other concurrency) + EvaluableNodeManager::UpdateMinCycleCountBetweenGarbageCollectsBasedOnThreads(1); + parentInterpreter->memoryModificationLock.lock(); } @@ -491,11 +496,11 @@ class Interpreter inline std::vector GetResultsAndFreeReferences() { std::vector results; - results.resize(numElements); + results.resize(numTasks); //fill in results from result_futures and free references // note that std::future becomes invalid once get is called - for(size_t i = 0; i < numElements; i++) + for(size_t i = 0; i < numTasks; i++) results[i] = resultFutures[i].get(); parentInterpreter->evaluableNodeManager->FreeNodeReferences(results); @@ -514,10 +519,10 @@ class Interpreter return &callStackWriteMutex; } - //interpreters run concurrently, the size of numElements + //interpreters run concurrently, the size of numTasks std::vector> interpreters; - //where results are placed, the size of numElements + //where results are placed, the size of numTasks std::vector> resultFutures; //mutex to allow only one thread to write to a call stack symbol at once @@ -528,7 +533,7 @@ class Interpreter Interpreter *parentInterpreter; //the number of elements being processed - size_t numElements; + size_t numTasks; }; //computes the nodes concurrently and stores the interpreted values into interpreted_nodes