Skip to content

Commit

Permalink
21949: Fixes issue when emitting and parsing strings that start looki…
Browse files Browse the repository at this point in the history
…ng numeric but are not (e.g., 1.0.0), and fixes issue with conditioned code features (#294)
  • Loading branch information
howsohazard authored Oct 21, 2024
1 parent 5aae786 commit 7d8295c
Show file tree
Hide file tree
Showing 4 changed files with 4,128 additions and 4,013 deletions.
19 changes: 16 additions & 3 deletions src/Amalgam/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,22 @@ std::string Parser::GetNextIdentifier(bool allow_leading_label_marks)
}
}

EvaluableNode *Parser::GetNextToken(EvaluableNode *parent_node, EvaluableNode *new_token)
EvaluableNode *Parser::GetNextToken(EvaluableNode *parent_node, EvaluableNode *reuse_assoc_token_as_value)
{
if(new_token == nullptr)
EvaluableNode *new_token = nullptr;
bool parsing_assoc_key = false;

if(reuse_assoc_token_as_value == nullptr)
{
new_token = evaluableNodeManager->AllocNode(ENT_NULL);
//if parsing an assoc but haven't been passed a value to reuse, it's a key
if(parent_node != nullptr && parent_node->IsAssociativeArray())
parsing_assoc_key = true;
}
else
{
new_token = reuse_assoc_token_as_value;
}

SkipWhitespaceAndAccumulateAttributes(new_token);
if(pos >= code->size())
Expand Down Expand Up @@ -497,7 +509,8 @@ EvaluableNode *Parser::GetNextToken(EvaluableNode *parent_node, EvaluableNode *n
FreeNode(new_token);
return nullptr;
}
else if(StringManipulation::IsUtf8ArabicNumerals(cur_char) || cur_char == '-' || cur_char == '.')
else if(!parsing_assoc_key
&& (StringManipulation::IsUtf8ArabicNumerals(cur_char) || cur_char == '-' || cur_char == '.'))
{
size_t start_pos = pos;
SkipToEndOfIdentifier();
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ class Parser

//Returns a EvaluableNode containing the next token, null if none left in current context
// parent_node is primarily to check for errors or warnings
//if new_token is not nullptr, it will put the token in the EvaluableNode provided, otherwise will return a new one
EvaluableNode *GetNextToken(EvaluableNode *parent_node, EvaluableNode *new_token = nullptr);
//if reuse_assoc_token_as_value is not nullptr, it will put the token in the EvaluableNode provided, otherwise will return a new one
EvaluableNode *GetNextToken(EvaluableNode *parent_node, EvaluableNode *reuse_assoc_token_as_value = nullptr);

//deallocates the current node in case there is an early exit or error
void FreeNode(EvaluableNode *node);
Expand Down
19 changes: 18 additions & 1 deletion src/Amalgam/SeparableBoxFilterDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,15 +543,32 @@ class SeparableBoxFilterDataStore
SortedIntegerSet &entity_indices, size_t query_feature_index, size_t absolute_feature_index, bool high_accuracy)
{
size_t num_entity_indices = entity_indices.size();
size_t max_index = num_entity_indices;

auto &partial_sums = parametersAndBuffers.partialSums;
const auto accum_location = partial_sums.GetAccumLocation(query_feature_index);
size_t max_element = partial_sums.numInstances;

auto &entity_indices_vector = entity_indices.GetIntegerVector();

//it's almost always faster to just accumulate an index than to check if it is a valid index
// and then only accumulate if it is valid
//however, indices beyond the range of partial_sums will cause an issue
//therefore, only trim back the end if needed, and trim back to the largest possible element id (max_element - 1)
if(entity_indices.GetEndInteger() >= max_element)
{
max_index = entity_indices.GetFirstIntegerVectorLocationGreaterThan(max_element - 1);
num_entity_indices = max_index - 1;
}

auto &column_data = columnData[absolute_feature_index];

//for each found element, accumulate associated partial sums
for(size_t entity_index : entity_indices)
#pragma omp parallel for schedule(static) if(max_index > 300)
for(int64_t i = 0; i < static_cast<int64_t>(max_index); i++)
{
const auto entity_index = entity_indices_vector[i];

//get value
auto other_value_type = column_data->GetIndexValueType(entity_index);
auto other_value = column_data->GetResolvedValue(other_value_type, GetValue(entity_index, absolute_feature_index));
Expand Down
Loading

0 comments on commit 7d8295c

Please sign in to comment.