Skip to content

Commit

Permalink
21198: Fixes bugs that can cause segfault, another bug that could cau…
Browse files Browse the repository at this point in the history
…se false positive corrupt memory diagnosis (#219)
  • Loading branch information
howsohazard authored Aug 10, 2024
1 parent 9226915 commit fd5a9a6
Show file tree
Hide file tree
Showing 24 changed files with 214 additions and 236 deletions.
4 changes: 2 additions & 2 deletions src/Amalgam/AssetManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ class AssetManager
static bool StoreResourcePathFromProcessedResourcePaths(EvaluableNode *code, std::string &complete_resource_path,
std::string &file_type, EvaluableNodeManager *enm, bool escape_filename, bool sort_keys);

//Loads an entity, including contained entites, etc. from the resource path specified
//Loads an entity, including contained entities, etc. from the resource path specified
//if file_type is not an empty string, it will use the specified file_type instead of the filename's extension
// if persistent is true, then it will keep the resource updated based on any calls to UpdateEntity
//if the resource does not have a metadata file, will use default_random_seed as its seed
Entity *LoadEntityFromResourcePath(std::string &resource_path, std::string &file_type, bool persistent,
bool load_contained_entities, bool escape_filename, bool escape_contained_filenames,
std::string default_random_seed, Interpreter *calling_interpreter, EntityExternalInterface::LoadEntityStatus &status);

//Stores an entity, including contained entites, etc. from the resource path specified
//Stores an entity, including contained entities, etc. from the resource path specified
//if file_type is not an empty string, it will use the specified file_type instead of the filename's extension
// if persistent is true, then it will keep the resource updated based on any calls to UpdateEntity (will not make not persistent if was previously loaded as persistent)
// if all_contained_entities is nullptr, then it will be populated, as read locks are necessary for entities in multithreading
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/GeneralizedDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class GeneralizedDistanceEvaluator
static constexpr double s_surprisal_of_gaussian = 1.1283791670955126;
static constexpr double s_surprisal_of_gaussian_approx = 1.128615528679644;

//computes the Lukaszyk–Karmowski metric deviation component for the minkowski distance equation given the feature difference and feature deviation
//computes the Lukaszyk–Karmowski metric deviation component for the Minkowski distance equation given the feature difference and feature deviation
// and adds the deviation to diff. assumes deviation is nonnegative
//if surprisal_transform is true, then it will transform the result into surprisal space and remove the appropriate assumption of uncertainty
// for Laplace, the Laplace distribution has 1 nat worth of information, but additionally, there is a 50/50 chance that the
Expand Down Expand Up @@ -461,7 +461,7 @@ class GeneralizedDistanceEvaluator
return ComputeDistanceTermNominalUniversallySymmetricNonMatch(index, high_accuracy);
}

//exponentiats and weights the difference term contextually based on pValue
//exponentiates and weights the difference term contextually based on pValue
//note that it has extra logic to account for extreme values like infinity, negative infinity, and 0
__forceinline double ContextuallyExponentiateAndWeightDifferenceTerm(double dist_term, size_t index, bool high_accuracy)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/Merger.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class Merger
virtual bool KeepAllNonMergeableValues() = 0;

//Returns true if the merge should keep some elements that do not have a corresponding element to merge with
//if KeepAllNonMergeableValues retursn true, then this should return true too
//if KeepAllNonMergeableValues returns true, then this should return true too
virtual bool KeepSomeNonMergeableValues() = 0;

//Returns true if the merge should keep one of either particular element, a or b, that does not have a corresponding element
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/Opcodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ void StringInternPool::InitializeStaticStrings()
EmplaceStaticString(ENBISI_json, "json");
EmplaceStaticString(ENBISI_yaml, "yaml");

//formapt opcode params
//format opcode params
EmplaceStaticString(ENBISI_sort_keys, "sort_keys");
EmplaceStaticString(ENBISI_locale, "locale");
EmplaceStaticString(ENBISI_timezone, "timezone");
Expand Down
6 changes: 3 additions & 3 deletions src/Amalgam/SBFDSColumnData.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class SBFDSColumnData
{
codeIndices.insert(index);

//find the entities that have the correspending size; if the size doesn't exist, create it
//find the entities that have the corresponding size; if the size doesn't exist, create it
size_t code_size = EvaluableNode::GetDeepSize(value.code);

auto [size_entry, inserted] = valueCodeSizeToIndices.emplace(code_size, nullptr);
Expand Down Expand Up @@ -532,7 +532,7 @@ class SBFDSColumnData
{
codeIndices.erase(index);

//find the entities that have the correspending size
//find the entities that have the corresponding size
size_t num_indices = EvaluableNode::GetDeepSize(value.code);
auto id_entry = valueCodeSizeToIndices.find(num_indices);
if(id_entry == end(valueCodeSizeToIndices))
Expand Down Expand Up @@ -672,7 +672,7 @@ class SBFDSColumnData
//value_type == ENIVT_CODE
codeIndices.insert(index);

//find the entities that have the correspending size; if the size doesn't exist, create it
//find the entities that have the corresponding size; if the size doesn't exist, create it
size_t code_size = EvaluableNode::GetDeepSize(value.code);

auto [size_entry, inserted] = valueCodeSizeToIndices.emplace(code_size, nullptr);
Expand Down
25 changes: 23 additions & 2 deletions src/Amalgam/amlg_code/test.amlg
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
(seq
(print (tail "abcdef" -10) "\n")
(print (tail "") "\n")

(declare (assoc
mymap
{
".f1_rate_1" {
auto_derive_on_train {
ordered_by_features ["date"]
}

}
".f3_rate_1" {

auto_derive_on_train {
ordered_by_features [ @(get (target 5) [".f1_rate_1" "auto_derive_on_train" "ordered_by_features" 0])]

}

}
}

))

(print mymap)
)
2 changes: 1 addition & 1 deletion src/Amalgam/entity/Entity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ std::pair<bool, bool> Entity::SetValuesAtLabels(EvaluableNodeReference new_label
EntityQueryCaches *container_caches = GetContainerQueryCaches();
if(direct_set)
{
//direct assigments need a rebuild of the index just in case a label collision occurs -- will update node flags if needed
//direct assignments need a rebuild of the index just in case a label collision occurs -- will update node flags if needed
RebuildLabelIndex();
if(container_caches != nullptr)
container_caches->UpdateAllEntityLabels(this, GetEntityIndexOfContainer());
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/entity/Entity.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class PrintListener;
//base class for accessing an entity via a reference
// includes everything that can be accessed via a read operation
// note that this class should not be used directly, which is why it does not yield access to edit entity other than nullptr
//need to templatize EntityType because can't foreward declare a method
//need to templatize EntityType because can't forward declare a method
template<typename EntityType = Entity>
class EntityReference
{
Expand Down
10 changes: 5 additions & 5 deletions src/Amalgam/entity/EntityQueriesStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ class EntityQueriesStatistics
{
const auto &[curr_value, curr_weight] = value_weights[i];

//calculuate cdf term
//calculate cdf term
double cdf_term = 0.0;
accum_weight += value_weights[i].second;
cdf_term += accum_weight - 0.5 * value_weights[i].second;
Expand Down Expand Up @@ -507,7 +507,7 @@ class EntityQueriesStatistics
}
}

//can divide at the end because multiplication is associative and communative
//can divide at the end because multiplication is associative and commutative
mean /= weights_sum;
}
else if(p_value == 2.0) // root mean square (quadratic)
Expand All @@ -530,7 +530,7 @@ class EntityQueriesStatistics
}
}

//can divide at the end because multiplication is associative and communative
//can divide at the end because multiplication is associative and commutative
mean /= weights_sum;
if(!calculate_moment)
mean = std::sqrt(mean);
Expand Down Expand Up @@ -589,7 +589,7 @@ class EntityQueriesStatistics
}
}

//can divide at the end because multiplication is associative and communative
//can divide at the end because multiplication is associative and commutative
mean /= weights_sum;
if(!calculate_moment)
mean = (1.0 / mean);
Expand All @@ -613,7 +613,7 @@ class EntityQueriesStatistics
}
}

//can divide at the end because multiplication is associative and communative
//can divide at the end because multiplication is associative and commutative
mean /= weights_sum;
if(!calculate_moment)
mean = std::pow(mean, 1.0 / p_value);
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/entity/EntityQueryCaches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ void EntityQueryCaches::GetMatchingEntitiesViaSamplingWithReplacement(EntityQuer
EvaluableNodeReference EntityQueryCaches::GetMatchingEntitiesFromQueryCaches(Entity *container,
std::vector<EntityQueryCondition> &conditions, EvaluableNodeManager *enm, bool return_query_value)
{
//get the label existance cache associated with this container
//get the cache associated with this container
// use the first condition as an heuristic for building it if it doesn't exist
EntityQueryCaches *entity_caches = container->GetQueryCaches();

Expand Down
27 changes: 16 additions & 11 deletions src/Amalgam/evaluablenode/EvaluableNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@
#include <cctype>
#include <iomanip>

double EvaluableNode::zeroNumberValue = 0.0;
std::string EvaluableNode::emptyStringValue = "";
EvaluableNode *EvaluableNode::emptyEvaluableNodeNullptr = nullptr;
std::vector<std::string> EvaluableNode::emptyStringVector;
std::vector<StringInternPool::StringID> EvaluableNode::emptyStringIdVector;
std::vector<EvaluableNode *> EvaluableNode::emptyOrderedChildNodes;
EvaluableNode::AssocType EvaluableNode::emptyMappedChildNodes;

//field for watching EvaluableNodes for debugging
FastHashSet<EvaluableNode *> EvaluableNode::debugWatch;
#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE)
Concurrency::SingleMutex EvaluableNode::debugWatchMutex;
#endif

void EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels)
{
num_common_labels = 0;
Expand Down Expand Up @@ -576,6 +590,7 @@ void EvaluableNode::SetType(EvaluableNodeType new_type, EvaluableNodeManager *en
else //just set up empty assoc
{
InitMappedChildNodes();
SetNeedCycleCheck(false);
}
}
else //ordered pairs
Expand Down Expand Up @@ -603,14 +618,12 @@ void EvaluableNode::SetType(EvaluableNodeType new_type, EvaluableNodeManager *en
else //just set up empty ordered
{
InitOrderedChildNodes();
SetNeedCycleCheck(false);
}
}

type = new_type;

//cleared child nodes, so no cycles
SetNeedCycleCheck(false);

//put the extra label back on if exists (already have the reference)
if(extra_label != StringInternPool::NOT_A_STRING_ID)
AppendLabelStringId(extra_label, true);
Expand Down Expand Up @@ -1826,11 +1839,3 @@ size_t EvaluableNode::GetDeepSizeNoCycleRecurse(EvaluableNode *n)

return size;
}

double EvaluableNode::zeroNumberValue = 0.0;
std::string EvaluableNode::emptyStringValue = "";
EvaluableNode *EvaluableNode::emptyEvaluableNodeNullptr = nullptr;
std::vector<std::string> EvaluableNode::emptyStringVector;
std::vector<StringInternPool::StringID> EvaluableNode::emptyStringIdVector;
std::vector<EvaluableNode *> EvaluableNode::emptyOrderedChildNodes;
EvaluableNode::AssocType EvaluableNode::emptyMappedChildNodes;
41 changes: 38 additions & 3 deletions src/Amalgam/evaluablenode/EvaluableNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class EvaluableNode
}
}

constexpr void InitializeType(double number_value)
inline void InitializeType(double number_value)
{
attributes.allAttributes = 0;
if(FastIsNaN(number_value))
Expand Down Expand Up @@ -345,7 +345,7 @@ class EvaluableNode
return DoesEvaluableNodeTypeUseNumberData(GetType());
}

//Converts a number to a string in a consistent way that should be used for anything dealing with EvaulableNode
//Converts a number to a string in a consistent way that should be used for anything dealing with EvaluableNode
static __forceinline std::string NumberToString(double value)
{
return StringManipulation::NumberToString(value);
Expand Down Expand Up @@ -640,7 +640,7 @@ class EvaluableNode
//using ordered or mapped child nodes as appropriate, transforms into numeric values and passes into store_value
// if node is mapped child nodes, it will use element_names to order populate out and use default_value if any given id is not found
//will use num_expected_elements for immediate values
//store_nomeric_value takes in 3 parameters, the index, a bool if the value was found, and the EvaluableNode of the value
//store_value takes in 3 parameters, the index, a bool if the value was found, and the EvaluableNode of the value
template<typename StoreValueFunction = void(size_t, bool, EvaluableNode *)>
static inline void ConvertChildNodesAndStoreValue(EvaluableNode *node, std::vector<StringInternPool::StringID> &element_names,
size_t num_expected_elements, StoreValueFunction store_value)
Expand Down Expand Up @@ -806,6 +806,35 @@ class EvaluableNode
return value.stringValueContainer.labelStringID;
}

//registers and unregisters an EvaluableNode for debug watching
static inline void RegisterEvaluableNodeForDebugWatch(EvaluableNode *en)
{
#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE)
Concurrency::SingleLock lock(debugWatchMutex);
#endif
debugWatch.emplace(en);
}

static inline void UnregisterEvaluableNodeForDebugWatch(EvaluableNode *en)
{
#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE)
Concurrency::SingleLock lock(debugWatchMutex);
#endif
debugWatch.erase(en);
}

//returns true if the EvaluableNode is in the debug watch
static inline void AssertIfInDebugWatch(EvaluableNode *en)
{
#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE)
Concurrency::SingleLock lock(debugWatchMutex);
#endif
if(debugWatch.find(en) != end(debugWatch))
{
assert(false);
}
}

protected:

//align to the nearest 2-bytes to minimize alignment issues but reduce the overall memory footprint
Expand Down Expand Up @@ -941,6 +970,12 @@ class EvaluableNode
static std::vector<StringInternPool::StringID> emptyStringIdVector;
static std::vector<EvaluableNode *> emptyOrderedChildNodes;
static AssocType emptyMappedChildNodes;

//field for watching EvaluableNodes for debugging
static FastHashSet<EvaluableNode *> debugWatch;
#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE)
static Concurrency::SingleMutex debugWatchMutex;
#endif
};

//EvaluableNode type upper taxonomy for determining the most generic way
Expand Down
Loading

0 comments on commit fd5a9a6

Please sign in to comment.