Skip to content

Commit

Permalink
21174: Fixes potential segfaults and code cleanup (#215)
Browse files Browse the repository at this point in the history
  • Loading branch information
howsohazard authored Aug 8, 2024
1 parent b905d7f commit a12e827
Show file tree
Hide file tree
Showing 13 changed files with 84 additions and 80 deletions.
4 changes: 3 additions & 1 deletion src/Amalgam/SBFDSColumnData.h
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,7 @@ class SBFDSColumnData

//returns true if the value is equal to notAValue
//note that this is a method due to not-a-number being treated as not equal to itself
constexpr bool IsNotAValue(ValueType value)
__forceinline bool IsNotAValue(ValueType value)
{
if constexpr(std::is_same_v<ValueType, double>)
return FastIsNaN(value);
Expand All @@ -1360,6 +1360,8 @@ class SBFDSColumnData
{
if constexpr(std::is_same_v<ValueType, double>)
return std::numeric_limits<double>::quiet_NaN();
else if constexpr(std::is_same_v<ValueType, StringInternPool::StringID>)
return string_intern_pool.NOT_A_STRING_ID;
else
return ValueType();
}();
Expand Down
23 changes: 12 additions & 11 deletions src/Amalgam/SeparableBoxFilterDataStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ void SeparableBoxFilterDataStore::RemoveColumnIndex(size_t column_index_to_remov
//will replace the values at index_to_remove with the values at index_to_move
size_t column_index_to_move = columnData.size() - 1;

size_t label_id = columnData[column_index_to_remove]->stringId;
StringInternPool::StringID label_id = columnData[column_index_to_remove]->stringId;

size_t num_columns = columnData.size();

Expand All @@ -144,7 +144,7 @@ void SeparableBoxFilterDataStore::RemoveColumnIndex(size_t column_index_to_remov
matrix[i * num_columns + column_index_to_remove] = matrix[i * num_columns + column_index_to_move];

//update column lookup
size_t label_id_to_move = columnData[column_index_to_move]->stringId;
StringInternPool::StringID label_id_to_move = columnData[column_index_to_move]->stringId;
labelIdToColumnIndex[label_id_to_move] = column_index_to_remove;

//rearrange columns
Expand Down Expand Up @@ -369,7 +369,8 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit
// and sets distances_out to the found entities. Infinity is allowed to compute all distances.
//if enabled_indices is not nullptr, it will only find distances to those entities, and it will modify enabled_indices in-place
// removing entities that do not have the corresponding labels
void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistanceEvaluator &dist_eval, std::vector<size_t> &position_label_ids,
void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistanceEvaluator &dist_eval,
std::vector<StringInternPool::StringID> &position_label_sids,
std::vector<EvaluableNodeImmediateValue> &position_values, std::vector<EvaluableNodeImmediateValueType> &position_value_types,
double max_dist, StringInternPool::StringID radius_label,
BitArrayIntegerSet &enabled_indices, std::vector<DistanceReferencePair<size_t>> &distances_out)
Expand All @@ -381,7 +382,7 @@ void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistance
r_dist_eval.distEvaluator = &dist_eval;

//look up these data structures upfront for performance
PopulateTargetValuesAndLabelIndices(r_dist_eval, position_label_ids, position_values, position_value_types);
PopulateTargetValuesAndLabelIndices(r_dist_eval, position_label_sids, position_values, position_value_types);

bool high_accuracy = dist_eval.highAccuracyDistances;
double max_dist_exponentiated = dist_eval.ExponentiateDifferenceTerm(max_dist, high_accuracy);
Expand Down Expand Up @@ -531,7 +532,7 @@ void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistance
}

void SeparableBoxFilterDataStore::FindEntitiesNearestToIndexedEntity(GeneralizedDistanceEvaluator &dist_eval,
std::vector<size_t> &position_label_ids, size_t search_index,
std::vector<StringInternPool::StringID> &position_label_sids, size_t search_index,
size_t top_k, StringInternPool::StringID radius_label, BitArrayIntegerSet &enabled_indices,
bool expand_to_first_nonzero_distance, std::vector<DistanceReferencePair<size_t>> &distances_out, size_t ignore_index, RandomStream rand_stream)
{
Expand All @@ -548,7 +549,7 @@ void SeparableBoxFilterDataStore::FindEntitiesNearestToIndexedEntity(Generalized
r_dist_eval.featureData.resize(num_enabled_features);
for(size_t i = 0; i < num_enabled_features; i++)
{
auto found = labelIdToColumnIndex.find(position_label_ids[i]);
auto found = labelIdToColumnIndex.find(position_label_sids[i]);
if(found == end(labelIdToColumnIndex))
continue;

Expand Down Expand Up @@ -713,7 +714,7 @@ void SeparableBoxFilterDataStore::FindEntitiesNearestToIndexedEntity(Generalized
}

void SeparableBoxFilterDataStore::FindNearestEntities(GeneralizedDistanceEvaluator &dist_eval,
std::vector<size_t> &position_label_ids, std::vector<EvaluableNodeImmediateValue> &position_values,
std::vector<StringInternPool::StringID> &position_label_sids, std::vector<EvaluableNodeImmediateValue> &position_values,
std::vector<EvaluableNodeImmediateValueType> &position_value_types,
size_t top_k, StringInternPool::StringID radius_label, size_t ignore_entity_index,
BitArrayIntegerSet &enabled_indices, std::vector<DistanceReferencePair<size_t>> &distances_out, RandomStream rand_stream)
Expand All @@ -725,7 +726,7 @@ void SeparableBoxFilterDataStore::FindNearestEntities(GeneralizedDistanceEvaluat
r_dist_eval.distEvaluator = &dist_eval;

//look up these data structures upfront for performance
PopulateTargetValuesAndLabelIndices(r_dist_eval, position_label_ids, position_values, position_value_types);
PopulateTargetValuesAndLabelIndices(r_dist_eval, position_label_sids, position_values, position_value_types);

enabled_indices.erase(ignore_entity_index);

Expand Down Expand Up @@ -959,7 +960,7 @@ void SeparableBoxFilterDataStore::VerifyAllEntitiesForColumn(size_t column_index
if(feature_type == ENIVT_STRING_ID_INDIRECTION_INDEX && feature_value.indirectionIndex != 0)
{
auto feature_value_resolved = column_data->GetResolvedValue(feature_type, feature_value);
assert(feature_value_resolved.stringID != string_intern_pool.EMPTY_STRING_ID);
assert(feature_value_resolved.stringID != string_intern_pool.NOT_A_STRING_ID);
}
}
}
Expand All @@ -976,13 +977,13 @@ void SeparableBoxFilterDataStore::DeleteEntityIndexFromColumns(size_t entity_ind
}
}

size_t SeparableBoxFilterDataStore::AddLabelsAsEmptyColumns(std::vector<size_t> &label_ids, size_t num_entities)
size_t SeparableBoxFilterDataStore::AddLabelsAsEmptyColumns(std::vector<StringInternPool::StringID> &label_sids, size_t num_entities)
{
size_t num_existing_columns = columnData.size();
size_t num_inserted_columns = 0;

//create columns for the labels, don't count any that already exist
for(auto label_id : label_ids)
for(auto label_id : label_sids)
{
auto [_, inserted] = labelIdToColumnIndex.insert(std::make_pair(label_id, columnData.size()));
if(inserted)
Expand Down
66 changes: 33 additions & 33 deletions src/Amalgam/SeparableBoxFilterDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class SeparableBoxFilterDataStore
}

//returns true if the structure already has the label
inline bool DoesHaveLabel(size_t label_id)
inline bool DoesHaveLabel(StringInternPool::StringID label_id)
{
return (labelIdToColumnIndex.count(label_id) > 0);
}
Expand All @@ -128,14 +128,14 @@ class SeparableBoxFilterDataStore
}

//expand the structure by adding a new column/label/feature and populating with data from entities
void AddLabels(std::vector<size_t> &label_ids, const std::vector<Entity *> &entities)
void AddLabels(std::vector<StringInternPool::StringID> &label_sids, const std::vector<Entity *> &entities)
{
//make sure have data to add
if(label_ids.size() == 0 || entities.size() == 0)
if(label_sids.size() == 0 || entities.size() == 0)
return;

//resize the matrix and populate column and label_id lookups
size_t num_columns_added = AddLabelsAsEmptyColumns(label_ids, entities.size());
size_t num_columns_added = AddLabelsAsEmptyColumns(label_sids, entities.size());

size_t num_columns = columnData.size();
size_t num_previous_columns = columnData.size() - num_columns_added;
Expand Down Expand Up @@ -223,9 +223,9 @@ class SeparableBoxFilterDataStore
return columnData[column_index]->stringIdIndices;
}

//given a feature_id and a range [low, high], fills out with all the entities with values of feature feature_id within specified range
//given a feature_id and a range [low, high], fills out with all the entities with values of feature feature_sid within specified range
//if the feature value is null, it will NOT be present in the search results, ie "x" != 3 will NOT include elements with x is null, even though null != 3
inline void FindAllEntitiesWithinRange(size_t feature_id, EvaluableNodeImmediateValueType value_type,
inline void FindAllEntitiesWithinRange(StringInternPool::StringID feature_sid, EvaluableNodeImmediateValueType value_type,
EvaluableNodeImmediateValue &low, EvaluableNodeImmediateValue &high, BitArrayIntegerSet &out, bool between_values = true)
{
if(numEntities == 0)
Expand All @@ -234,7 +234,7 @@ class SeparableBoxFilterDataStore
return;
}

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
{
out.clear();
Expand All @@ -245,15 +245,15 @@ class SeparableBoxFilterDataStore
}

//sets out to include only entities that have the given feature
inline void FindAllEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out)
inline void FindAllEntitiesWithFeature(StringInternPool::StringID feature_sid, BitArrayIntegerSet &out)
{
if(numEntities == 0)
{
out.clear();
return;
}

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
{
out.clear();
Expand All @@ -266,15 +266,15 @@ class SeparableBoxFilterDataStore
//filters out to include only entities that have the given feature
//if in_batch is true, will update out in batch for performance,
//meaning its number of elements will need to be updated
inline void IntersectEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out, bool in_batch)
inline void IntersectEntitiesWithFeature(StringInternPool::StringID feature_sid, BitArrayIntegerSet &out, bool in_batch)
{
if(numEntities == 0)
{
out.clear();
return;
}

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
{
out.clear();
Expand All @@ -286,13 +286,13 @@ class SeparableBoxFilterDataStore

//sets out to include only entities that have the given feature and records the values into
// entities and values respectively. enabled_entities is used as a buffer
inline void FindAllEntitiesWithValidNumbers(size_t feature_id, BitArrayIntegerSet &enabled_entities,
inline void FindAllEntitiesWithValidNumbers(StringInternPool::StringID feature_sid, BitArrayIntegerSet &enabled_entities,
std::vector<size_t> &entities, std::vector<double> &values)
{
if(numEntities == 0)
return;

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
return;
size_t column_index = column->second;
Expand All @@ -315,13 +315,13 @@ class SeparableBoxFilterDataStore

//filters enabled_indices to include only entities that have the given feature
// records the entities into entities and values respectively
inline void IntersectEntitiesWithValidNumbers(size_t feature_id, BitArrayIntegerSet &enabled_entities,
inline void IntersectEntitiesWithValidNumbers(StringInternPool::StringID feature_sid, BitArrayIntegerSet &enabled_entities,
std::vector<size_t> &entities, std::vector<double> &values)
{
if(numEntities == 0)
return;

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
return;
size_t column_index = column->second;
Expand All @@ -343,15 +343,15 @@ class SeparableBoxFilterDataStore
}

//sets out to include only entities that don't have the given feature
inline void FindAllEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out)
inline void FindAllEntitiesWithoutFeature(StringInternPool::StringID feature_sid, BitArrayIntegerSet &out)
{
if(numEntities == 0)
{
out.clear();
return;
}

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
{
out.clear();
Expand All @@ -364,23 +364,23 @@ class SeparableBoxFilterDataStore
//filters out to include only entities that don't have the given feature
//if in_batch is true, will update out in batch for performance,
//meaning its number of elements will need to be updated
inline void IntersectEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out, bool in_batch)
inline void IntersectEntitiesWithoutFeature(StringInternPool::StringID feature_sid, BitArrayIntegerSet &out, bool in_batch)
{
if(numEntities == 0)
return;

auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
return;

columnData[column->second]->invalidIndices.IntersectTo(out, in_batch);
}

//given a feature_id, value_type, and value, inserts into out all the entities that have the value
inline void UnionAllEntitiesWithValue(size_t feature_id,
//given a feature_sid, value_type, and value, inserts into out all the entities that have the value
inline void UnionAllEntitiesWithValue(StringInternPool::StringID feature_sid,
EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value, BitArrayIntegerSet &out)
{
auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
return;
size_t column_index = column->second;
Expand All @@ -400,11 +400,11 @@ class SeparableBoxFilterDataStore
}

//Finds the Minimum or Maximum (with respect to feature_id feature value) num_to_find entities in the database; if is_max is true, finds max, else finds min
inline void FindMinMax(size_t feature_id,
inline void FindMinMax(StringInternPool::StringID feature_sid,
EvaluableNodeImmediateValueType value_type, size_t num_to_find, bool is_max,
BitArrayIntegerSet *enabled_indices, BitArrayIntegerSet &out)
{
auto column = labelIdToColumnIndex.find(feature_id);
auto column = labelIdToColumnIndex.find(feature_sid);
if(column == labelIdToColumnIndex.end())
return;

Expand Down Expand Up @@ -485,7 +485,7 @@ class SeparableBoxFilterDataStore
//populates distances_out with all entities and their distances that have a distance to target less than max_dist
//if enabled_indices is not nullptr, intersects with the enabled_indices set.
//assumes that enabled_indices only contains indices that have valid values for all the features
void FindEntitiesWithinDistance(GeneralizedDistanceEvaluator &r_dist_eval, std::vector<size_t> &position_label_ids,
void FindEntitiesWithinDistance(GeneralizedDistanceEvaluator &r_dist_eval, std::vector<StringInternPool::StringID> &position_label_sids,
std::vector<EvaluableNodeImmediateValue> &position_values, std::vector<EvaluableNodeImmediateValueType> &position_value_types,
double max_dist, StringInternPool::StringID radius_label, BitArrayIntegerSet &enabled_indices,
std::vector<DistanceReferencePair<size_t>> &distances_out);
Expand All @@ -494,7 +494,7 @@ class SeparableBoxFilterDataStore
// if expand_to_first_nonzero_distance is set, then it will expand top_k until it it finds the first nonzero distance or until it includes all enabled indices
//will not modify enabled_indices, but instead will make a copy for any modifications
//assumes that enabled_indices only contains indices that have valid values for all the features
void FindEntitiesNearestToIndexedEntity(GeneralizedDistanceEvaluator &dist_eval, std::vector<size_t> &position_label_ids,
void FindEntitiesNearestToIndexedEntity(GeneralizedDistanceEvaluator &dist_eval, std::vector<StringInternPool::StringID> &position_label_sids,
size_t search_index, size_t top_k, StringInternPool::StringID radius_label,
BitArrayIntegerSet &enabled_indices, bool expand_to_first_nonzero_distance,
std::vector<DistanceReferencePair<size_t>> &distances_out,
Expand All @@ -503,7 +503,7 @@ class SeparableBoxFilterDataStore
//Finds the nearest neighbors
//enabled_indices is the set of entities to find from, and will be modified
//assumes that enabled_indices only contains indices that have valid values for all the features
void FindNearestEntities(GeneralizedDistanceEvaluator &dist_eval, std::vector<size_t> &position_label_ids,
void FindNearestEntities(GeneralizedDistanceEvaluator &dist_eval, std::vector<StringInternPool::StringID> &position_label_sids,
std::vector<EvaluableNodeImmediateValue> &position_values, std::vector<EvaluableNodeImmediateValueType> &position_value_types,
size_t top_k, StringInternPool::StringID radius_label, size_t ignore_entity_index, BitArrayIntegerSet &enabled_indices,
std::vector<DistanceReferencePair<size_t>> &distances_out, RandomStream rand_stream = RandomStream());
Expand Down Expand Up @@ -539,7 +539,7 @@ class SeparableBoxFilterDataStore
//adds a new labels to the database
// assumes label_ids is not empty and num_entities is nonzero
//returns the number of new columns inserted
size_t AddLabelsAsEmptyColumns(std::vector<size_t> &label_ids, size_t num_entities);
size_t AddLabelsAsEmptyColumns(std::vector<StringInternPool::StringID> &label_sids, size_t num_entities);

//computes each partial sum and adds the term to the partial sums associated for each id in entity_indices for query_feature_index
//returns the number of entities indices accumulated
Expand Down Expand Up @@ -946,14 +946,14 @@ class SeparableBoxFilterDataStore

//populates all target values given the selected target values for each value in corresponding position* parameters
void PopulateTargetValuesAndLabelIndices(RepeatedGeneralizedDistanceEvaluator &r_dist_eval,
std::vector<size_t> &position_label_ids, std::vector<EvaluableNodeImmediateValue> &position_values,
std::vector<StringInternPool::StringID> &position_label_sids, std::vector<EvaluableNodeImmediateValue> &position_values,
std::vector<EvaluableNodeImmediateValueType> &position_value_types)
{
size_t num_features = position_values.size();
r_dist_eval.featureData.resize(num_features);
for(size_t query_feature_index = 0; query_feature_index < num_features; query_feature_index++)
{
auto column = labelIdToColumnIndex.find(position_label_ids[query_feature_index]);
auto column = labelIdToColumnIndex.find(position_label_sids[query_feature_index]);
if(column != end(labelIdToColumnIndex))
PopulateTargetValueAndLabelIndex(r_dist_eval, query_feature_index,
position_values[query_feature_index], position_value_types[query_feature_index]);
Expand All @@ -962,11 +962,11 @@ class SeparableBoxFilterDataStore

//sets values in dist_eval corresponding to the columns specified by position_label_ids
inline void PopulateGeneralizedDistanceEvaluatorFromColumnData(
GeneralizedDistanceEvaluator &dist_eval, std::vector<size_t> &position_label_ids)
GeneralizedDistanceEvaluator &dist_eval, std::vector<StringInternPool::StringID> &position_label_sids)
{
for(size_t query_feature_index = 0; query_feature_index < position_label_ids.size(); query_feature_index++)
for(size_t query_feature_index = 0; query_feature_index < position_label_sids.size(); query_feature_index++)
{
auto column = labelIdToColumnIndex.find(position_label_ids[query_feature_index]);
auto column = labelIdToColumnIndex.find(position_label_sids[query_feature_index]);
if(column == end(labelIdToColumnIndex))
continue;

Expand Down
Loading

0 comments on commit a12e827

Please sign in to comment.