Skip to content

Commit

Permalink
20397: Fixes bug with utf-8 space characters in code which could lead…
Browse files Browse the repository at this point in the history
… to undefined behavior, adds top_k parameter to sort opcode, MINOR (#139)

Co-authored-by: howsoRes <144272317+howsoRes@users.noreply.github.com>
  • Loading branch information
howsohazard and howsoRes authored May 28, 2024
1 parent 0884aae commit c660307
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 171 deletions.
4 changes: 2 additions & 2 deletions docs/language.js
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ var data = [
},

{
"parameter" : "sort [* function] list l",
"parameter" : "sort [* function] list l [number k]",
"output" : "list",
"new value" : "new",
"new target scope": true,
"description" : "Returns a new list containing the list with its elements sorted in increasing order. Numerical values come before strings, and code will be evaluated as the representative strings. If function is specified, it pushes a pair of new target scope onto the stack, so that current_value accesses a list of elements to from the list, and current_index accesses the list or assoc index if it is not already reduced, with target representing the original list or assoc, and evaluates function. The function should return a number, positive if \"(current_value)\" is greater, negative if \"(current_value 1)\" is greater, 0 if equal.",
"description" : "Returns a new list containing the list with its elements sorted in increasing order. Numerical values come before strings, and code will be evaluated as the representative strings. If function is specified and not null, it pushes a pair of new target scope onto the stack, so that current_value accesses a list of elements to from the list, and current_index accesses the list or assoc index if it is not already reduced, with target representing the original list or assoc, and evaluates function. The function should return a number, positive if \"(current_value)\" is greater, negative if \"(current_value 1)\" is greater, 0 if equal. If k is specified in addition to function, then it will only return the k smallest values sorted in order, or, if k is negative, it will ignore the negative sign and return the highest k values.",
"example" : "(print (sort (list 4 9 3 5 1)))\n(print (sort (list \"n\" \"b\" \"hello\" 4 1 3.2 (list 1 2 3))))\n(print (sort (list 1 \"1x\" \"10\" 20 \"z2\" \"z10\" \"z100\")))\n(print (sort (lambda (- (current_value) (current_value 1))) (list 4 9 3 5 1)))"
},

Expand Down
8 changes: 4 additions & 4 deletions src/Amalgam/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,15 +232,15 @@ void Parser::SkipWhitespaceAndAccumulateAttributes(EvaluableNode *target)
while(pos < code->size())
{
//eat any whitespace
if(StringManipulation::IsUtf8Whitespace(*code, pos))
if(size_t space_size = StringManipulation::IsUtf8Whitespace(*code, pos); space_size > 0)
{
if(StringManipulation::IsUtf8Newline(*code, pos))
if(StringManipulation::IsUtf8Newline(*code, pos) > 0)
{
lineNumber++;
lineStartPos = pos + 1;
lineStartPos = pos + space_size;
}

pos++;
pos += space_size;
continue;
}

Expand Down
6 changes: 6 additions & 0 deletions src/Amalgam/amlg_code/full_test.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,12 @@
"2020-06-08 lunes 11.33.46"
)))

(print (sort (null) (list 4 9 3 5 1) 2))
(print (sort (null) (list 4 9 3 5 1) -2))

(print (sort (lambda (- (current_value) (current_value 1))) (list 4 9 3 5 1) 2))
(print (sort (lambda (- (current_value) (current_value 1))) (list 4 9 3 5 1) -2))

(print "--indices--\n")
(print (indices (associate "a" 1 "b" 2 "c" 3 4 "d")))
(print (indices (list "a" 1 "b" 2 "c" 3 4 "d")))
Expand Down
120 changes: 1 addition & 119 deletions src/Amalgam/amlg_code/test.amlg
Original file line number Diff line number Diff line change
@@ -1,119 +1 @@
(seq

(print "40 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 4) ;attributes
(list
(assoc
a (assoc a 0.00744879 b 0.996275605 c 0.996275605)
b (assoc a 0.501736111 b 0.501736111 c 0.996527778)
c (assoc a 0.996539792 b 0.996539792 c 0.006920415)
)
) ;deviations
1 ;p
(list "b") ;vector 1
(list "c") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)

(print "41 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 4) ;attributes
(list
(assoc
a (assoc a 0.00744879 b 0.996275605 c 0.996275605)
b (assoc a 0.501736111 b 0.501736111 c 0.996527778)
c (assoc a 0.996539792 b 0.996539792 c 0.006920415)
)
) ;deviations
1 ;p
(list "b") ;vector 1
(list "a") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)

(print "42 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 4) ;attributes
(list
(assoc
a (assoc a 0.00744879 b 0.996275605 c 0.996275605)
b (list (assoc a 0.501736111 b 0.501736111 c 0.996527778) 0.8)
c (assoc a 0.996539792 b 0.996539792 c 0.006920415)
)
) ;deviations
1 ;p
(list "b") ;vector 1
(list "q") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)

(print "43 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 2 2) ;attributes
(list
0.2
) ;deviations
1 ;p
(list "q") ;vector 1
(list "u") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)

(print "44 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 4) ;attributes
(list
(list (assoc
a (assoc a 0.00744879 b 0.996275605 c 0.996275605)
b (list (assoc a 0.501736111 b 0.501736111 c 0.996527778) 0.8)
c (assoc a 0.996539792 b 0.996539792 c 0.006920415)
)
0.2
)
) ;deviations
1 ;p
(list "q") ;vector 1
(list "u") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)

(print "45 " (generalized_distance
(list 1 1) ;weights
(list "nominal_string") ;types
(list 4) ;attributes
(list
(list (list
(assoc
a (assoc a 0.00744879 b 0.996275605 c 0.996275605)
b (list (assoc a 0.501736111 b 0.501736111 c 0.996527778) 0.8)
c (assoc a 0.996539792 b 0.996539792 c 0.006920415)
)
0.2
)
0.2
)
) ;deviations
1 ;p
(list "q") ;vector 1
(list "u") ;vector 2
(null) ;names
(true) ;surprisal
) "\n"
)
)
(6)
2 changes: 1 addition & 1 deletion src/Amalgam/entity/EntityQueriesStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ class EntityQueriesStatistics
if(FastIsNaN(q_percentage) || q_percentage < 0.0 || q_percentage > 1.0)
return std::numeric_limits<double>::quiet_NaN();

std::vector<std::pair<double,double>>& value_weights = values_buffer;
std::vector<std::pair<double,double>> &value_weights = values_buffer;
value_weights.clear();
double total_weight = 0.0;
bool eq_or_no_weights = true;
Expand Down
5 changes: 5 additions & 0 deletions src/Amalgam/evaluablenode/EvaluableNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,11 @@ class EvaluableNode
return IsLessThan(a, b, false);
}

static inline bool IsStrictlyGreaterThan(EvaluableNode *a, EvaluableNode *b)
{
return !IsLessThan(a, b, true);
}

//if the node's contents can be represented as a number, which includes numbers, infinity, and even null and NaN, then return true
// otherwise returns false
static constexpr bool CanRepresentValueAsANumber(EvaluableNode *e)
Expand Down
77 changes: 67 additions & 10 deletions src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,31 +855,77 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_SORT(EvaluableNode *en, bo
if(ocn.size() < 1)
return EvaluableNodeReference::Null();

if(ocn.size() == 1)
size_t list_index = (ocn.size() == 1 ? 0 : 1);

EvaluableNodeReference function;
size_t highest_k = 0;
size_t lowest_k = 0;

if(ocn.size() == 3)
{
double k = InterpretNodeIntoNumberValue(ocn[2]);
if(k > 0)
lowest_k = static_cast<size_t>(k);
else if(k < 0)
highest_k = static_cast<size_t>(-k);
//else nan, leave both as zero
}

if(ocn.size() >= 2)
function = InterpretNodeForImmediateUse(ocn[0]);

if(EvaluableNode::IsNull(function))
{
//get list
auto list = InterpretNode(ocn[0]);
auto list = InterpretNode(ocn[list_index]);
if(list == nullptr)
return EvaluableNodeReference::Null();

//make sure it is an editable copy
evaluableNodeManager->EnsureNodeIsModifiable(list);

std::sort(begin(list->GetOrderedChildNodes()), end(list->GetOrderedChildNodes()), EvaluableNode::IsStrictlyLessThan);
auto &list_ocn = list->GetOrderedChildNodes();

if(highest_k > 0 && highest_k < list_ocn.size())
{
std::partial_sort(begin(list_ocn),
begin(list_ocn) + highest_k,
end(list_ocn), EvaluableNode::IsStrictlyGreaterThan);

if(list.unique && !list->GetNeedCycleCheck())
{
for(size_t i = highest_k; i < list_ocn.size(); i++)
evaluableNodeManager->FreeNodeTree(list_ocn[i]);
}

list_ocn.erase(begin(list_ocn) + highest_k, end(list_ocn));
}
else if(lowest_k > 0 && lowest_k < list_ocn.size())
{
std::partial_sort(begin(list_ocn), begin(list_ocn) + lowest_k,
end(list_ocn), EvaluableNode::IsStrictlyLessThan);

if(list.unique && !list->GetNeedCycleCheck())
{
for(size_t i = lowest_k; i < list_ocn.size(); i++)
evaluableNodeManager->FreeNodeTree(list_ocn[i]);
}

list_ocn.erase(begin(list_ocn) + lowest_k, end(list_ocn));
}
else
{
std::sort(begin(list_ocn), end(list_ocn), EvaluableNode::IsStrictlyLessThan);
}

return list;
}
else
{
//get function to apply to list
auto function = InterpretNodeForImmediateUse(ocn[0]);
if(function == nullptr)
return EvaluableNodeReference::Null();

auto node_stack = CreateInterpreterNodeStackStateSaver(function);

//get list
auto list = InterpretNode(ocn[1]);
auto list = InterpretNode(ocn[list_index]);
if(list == nullptr)
return EvaluableNodeReference::Null();

Expand All @@ -891,6 +937,17 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_SORT(EvaluableNode *en, bo
//sort list; can't use the C++ sort function because it requires weak ordering and will crash otherwise
// the custom comparator does not guarantee this
std::vector<EvaluableNode *> sorted = CustomEvaluableNodeOrderedChildNodesSort(list->GetOrderedChildNodes(), comparator);

if(highest_k > 0 && highest_k < sorted.size())
{
sorted.erase(begin(sorted), begin(sorted) + (sorted.size() - highest_k));
std::reverse(begin(sorted), end(sorted));
}
else if(lowest_k > 0 && lowest_k < sorted.size())
{
sorted.erase(begin(sorted) + lowest_k, end(sorted));
}

list->SetOrderedChildNodes(sorted);

return list;
Expand Down
28 changes: 16 additions & 12 deletions src/Amalgam/out.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,10 @@ abcdef
"2020-06-08 lunes 11.33.47"
"2020-06-08 lunes 11.33.48"
)
(list 1 3)
(list 9 5)
(list 1 3)
(list 9 5)
--indices--
(list "b" "4" "a" "c")
(list
Expand Down Expand Up @@ -1249,7 +1253,7 @@ current_index: 2
8
)
accum_string "abcdef"
argv (list "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
argv (list "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
bar (declare
(assoc x 6)
(+ x 2)
Expand All @@ -1262,10 +1266,10 @@ current_index: 2
A (assoc B 2)
B 2
)
interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
raaa 2
rwww 1
start_time 1716570941.005401
start_time 1716915936.050118
www 1
x 12
zz 10
Expand All @@ -1292,7 +1296,7 @@ current_index: 2
8
)
accum_string "abcdef"
argv (list "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
argv (list "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
bar (declare
(assoc x 6)
(+ x 2)
Expand All @@ -1305,10 +1309,10 @@ current_index: 2
A (assoc B 2)
B 2
)
interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
raaa 2
rwww 1
start_time 1716570941.005401
start_time 1716915936.050118
www 1
x 12
zz 10
Expand All @@ -1334,7 +1338,7 @@ current_index: 2
8
)
accum_string "abcdef"
argv (list "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
argv (list "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg")
bar (declare
(assoc x 6)
(+ x 2)
Expand All @@ -1347,10 +1351,10 @@ current_index: 2
A (assoc B 2)
B 2
)
interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe"
raaa 2
rwww 1
start_time 1716570941.005401
start_time 1716915936.050118
www 1
x 12
zz 10
Expand Down Expand Up @@ -1618,7 +1622,7 @@ e:
- .inf

25: (assoc a 1)
current date-time in epoch: 2024-05-24-13.15.41.2972120
current date-time in epoch: 2024-05-28-13.05.36.1075120
2020-06-07 00:22:59
1391230800
1391230800
Expand Down Expand Up @@ -3438,7 +3442,7 @@ deep sets

--set_entity_root_permission--
RootTest
1716570941.565219
1716915936.289861
(true)

RootTest
Expand Down Expand Up @@ -4681,4 +4685,4 @@ concurrent entity writes successful: (true)

--clean-up test files--
--total execution time--
1.186690092086792
1.9868049621582031
Loading

0 comments on commit c660307

Please sign in to comment.