diff --git a/src/Amalgam/GeneralizedDistance.h b/src/Amalgam/GeneralizedDistance.h index dd45a1a7..631dbe7b 100644 --- a/src/Amalgam/GeneralizedDistance.h +++ b/src/Amalgam/GeneralizedDistance.h @@ -7,6 +7,7 @@ //system headers: #include +#include #include //If defined, will use the Laplace LK metric (default). Otherwise will use Gaussian. @@ -1028,8 +1029,10 @@ class RepeatedGeneralizedDistanceEvaluator EFDT_CONTINUOUS_NUMERIC, //like FDT_CONTINUOUS_NUMERIC, but has cycles EFDT_CONTINUOUS_NUMERIC_CYCLIC, - //continuous precomputed (cyclic or not), may contain nonnumeric data - EFDT_CONTINUOUS_NUMERIC_PRECOMPUTED, + //continuous or nominal numeric precomputed (cyclic or not), may contain nonnumeric data + EFDT_NUMERIC_INTERNED_PRECOMPUTED, + //continuous or nominal string precomputed, may contain nonnumeric data + EFDT_STRING_INTERNED_PRECOMPUTED, //nominal compared to a string value where nominals may not be symmetric EFDT_NOMINAL_STRING, //nominal compared to a number value where nominals may not be symmetric @@ -1148,7 +1151,8 @@ class RepeatedGeneralizedDistanceEvaluator } //for the feature index, computes and stores the distance terms as measured from value to each interned value - inline void ComputeAndStoreInternedNumberValuesAndDistanceTerms(size_t index, std::vector *interned_values) + template + inline void ComputeAndStoreInternedDistanceTerms(size_t index, std::vector *interned_values) { bool compute_accurate = distEvaluator->NeedToPrecomputeAccurate(); bool compute_approximate = distEvaluator->NeedToPrecomputeApproximate(); @@ -1158,7 +1162,6 @@ class RepeatedGeneralizedDistanceEvaluator featureData.resize(index + 1); auto &feature_data = featureData[index]; - feature_data.internedNumberIndexToNumberValue = interned_values; if(interned_values == nullptr) { @@ -1170,42 +1173,41 @@ class RepeatedGeneralizedDistanceEvaluator auto &feature_attribs = distEvaluator->featureAttribs[index]; - double value = feature_data.targetValue.GetValueAsNumber(); - if(FastIsNaN(value)) + bool high_accuracy_interned_values = (compute_accurate && !compute_approximate); + + if(feature_data.targetValue.IsNull()) { //first entry is unknown-unknown distance - feature_data.internedDistanceTerms[0] = feature_attribs.unknownToUnknownDistanceTerm; + feature_data.internedDistanceTerms[0] = feature_attribs.unknownToUnknownDistanceTerm.GetValue(high_accuracy_interned_values);; - auto k_to_unk = feature_attribs.knownToUnknownDistanceTerm; + double k_to_unk = feature_attribs.knownToUnknownDistanceTerm.GetValue(high_accuracy_interned_values); for(size_t i = 1; i < feature_data.internedDistanceTerms.size(); i++) feature_data.internedDistanceTerms[i] = k_to_unk; } else { //first entry is known-unknown distance - feature_data.internedDistanceTerms[0] = feature_attribs.knownToUnknownDistanceTerm; + feature_data.internedDistanceTerms[0] = feature_attribs.knownToUnknownDistanceTerm.GetValue(high_accuracy_interned_values); + + EvaluableNodeImmediateValueType immediate_type = ENIVT_NULL; + if constexpr(std::is_same::value) + immediate_type = ENIVT_NUMBER; + else if constexpr(std::is_same::value) + immediate_type = ENIVT_STRING_ID; for(size_t i = 1; i < feature_data.internedDistanceTerms.size(); i++) { - double difference = value - (*interned_values)[i]; - if(compute_accurate) - feature_data.internedDistanceTerms[i].SetValue(distEvaluator->ComputeDistanceTermContinuousNonNullRegular(difference, index, true), true); - if(compute_approximate) - feature_data.internedDistanceTerms[i].SetValue(distEvaluator->ComputeDistanceTermContinuousNonNullRegular(difference, index, false), false); + feature_data.internedDistanceTerms[i] = distEvaluator->ComputeDistanceTermRegular( + feature_data.targetValue.nodeValue, (*interned_values)[i], immediate_type, immediate_type, + index, high_accuracy_interned_values); } } } - //returns true if the feature at index has interned number values - __forceinline bool HasNumberInternValues(size_t index) - { - return featureData[index].internedNumberIndexToNumberValue != nullptr; - } - //returns the precomputed distance term for the interned value with intern_value_index - __forceinline double ComputeDistanceTermInternedPrecomputed(size_t intern_value_index, size_t index, bool high_accuracy) + __forceinline double ComputeDistanceTermInternedPrecomputed(size_t intern_value_index, size_t index) { - return featureData[index].internedDistanceTerms[intern_value_index].GetValue(high_accuracy); + return featureData[index].internedDistanceTerms[intern_value_index]; } //returns true if the nominal feature has a specific distance term when compared with unknown values @@ -1360,8 +1362,7 @@ class RepeatedGeneralizedDistanceEvaluator public: FeatureData() - : effectiveFeatureType(EFDT_CONTINUOUS_NUMERIC), - internedNumberIndexToNumberValue(nullptr) + : effectiveFeatureType(EFDT_CONTINUOUS_NUMERIC) { } //clears all the feature data @@ -1369,7 +1370,6 @@ class RepeatedGeneralizedDistanceEvaluator { effectiveFeatureType = EFDT_CONTINUOUS_NUMERIC; precomputedRemainingIdenticalDistanceTerm = 0.0; - internedNumberIndexToNumberValue = nullptr; internedDistanceTerms.clear(); nominalStringDistanceTerms.clear(); nominalNumberDistanceTerms.clear(); @@ -1394,8 +1394,7 @@ class RepeatedGeneralizedDistanceEvaluator //the distance term for EFDT_REMAINING_IDENTICAL_PRECOMPUTED double precomputedRemainingIdenticalDistanceTerm; - std::vector *internedNumberIndexToNumberValue; - std::vector internedDistanceTerms; + std::vector internedDistanceTerms; //used to store distance terms for the respective targetValue for the sparse deviation matrix FastHashMap nominalStringDistanceTerms; diff --git a/src/Amalgam/SBFDSColumnData.h b/src/Amalgam/SBFDSColumnData.h index 77614a8e..48a3a7ef 100644 --- a/src/Amalgam/SBFDSColumnData.h +++ b/src/Amalgam/SBFDSColumnData.h @@ -81,13 +81,11 @@ class SBFDSColumnData stringIdIndices.insert(index); //try to insert the value if not already there, inserting an empty pointer - auto [id_entry, inserted] = stringIdValueToIndices.emplace(value.stringID, nullptr); + auto [id_entry, inserted] = stringIdValueEntries.emplace(value.stringID, nullptr); if(inserted) - id_entry->second = std::make_unique(); + id_entry->second = std::make_unique(value.stringID); - auto &ids = id_entry->second; - - ids->InsertNewLargestInteger(index); + id_entry->second->indicesWithValue.InsertNewLargestInteger(index); UpdateLongestString(value.stringID, index); } @@ -153,7 +151,12 @@ class SBFDSColumnData } if(stringIdIndices.contains(index)) + { + if(internedStringIdValues.valueInterningEnabled) + return ENIVT_STRING_ID_INDIRECTION_INDEX; return ENIVT_STRING_ID; + } + if(nullIndices.contains(index)) return ENIVT_NULL; if(invalidIndices.contains(index)) @@ -166,6 +169,8 @@ class SBFDSColumnData { if(value_type == ENIVT_NUMBER_INDIRECTION_INDEX) return ENIVT_NUMBER; + if(value_type == ENIVT_STRING_ID_INDIRECTION_INDEX) + return ENIVT_STRING_ID; return value_type; } @@ -174,6 +179,8 @@ class SBFDSColumnData { if(value_type == ENIVT_NUMBER && internedNumberValues.valueInterningEnabled) return ENIVT_NUMBER_INDIRECTION_INDEX; + if(value_type == ENIVT_STRING_ID && internedStringIdValues.valueInterningEnabled) + return ENIVT_STRING_ID_INDIRECTION_INDEX; return value_type; } @@ -182,6 +189,8 @@ class SBFDSColumnData { if(value_type == ENIVT_NUMBER_INDIRECTION_INDEX) return EvaluableNodeImmediateValue(internedNumberValues.internedIndexToValue[value.indirectionIndex]); + if(value_type == ENIVT_STRING_ID_INDIRECTION_INDEX) + return EvaluableNodeImmediateValue(internedStringIdValues.internedIndexToValue[value.indirectionIndex]); return value; } @@ -200,90 +209,105 @@ class SBFDSColumnData invalidIndices.insert(index); } - if(internedNumberValues.valueInterningEnabled) + if(internedNumberValues.valueInterningEnabled || internedStringIdValues.valueInterningEnabled) return EvaluableNodeImmediateValue(ValueEntry::NULL_INDEX); else return EvaluableNodeImmediateValue(); } + auto old_value_type_resolved = GetResolvedValueType(old_value_type); + auto old_value_resolved = GetResolvedValue(old_value_type, old_value); + auto new_value_type_resolved = GetResolvedValueType(new_value_type); + auto new_value_resolved = GetResolvedValue(new_value_type, new_value); + //if the types are the same, some shortcuts may apply //note that if the values match types and match resolved values, the old_value should be returned //because it is already in the correct storage format for the column - if(old_value_type == new_value_type) + if(old_value_type_resolved == new_value_type_resolved) { - if(old_value_type == ENIVT_NULL) + if(old_value_type_resolved == ENIVT_NULL) return old_value; - if(old_value_type == ENIVT_NUMBER) + if(old_value_type_resolved == ENIVT_NUMBER) { - double old_number_value = GetResolvedValue(old_value_type, old_value).number; - double new_number_value = GetResolvedValue(new_value_type, new_value).number; + double old_number_value = old_value_resolved.number; + double new_number_value = new_value_resolved.number; if(old_number_value == new_number_value) return old_value; //if the value already exists, then put the index in the list //but return the lower bound if not found so don't have to search a second time //need to search the old value before inserting, as FindExactIndexForValue is fragile a placeholder empty entry - auto [new_value_index, new_exact_index_found] = FindExactIndexForValue(new_number_value, true); - auto [old_value_index, old_exact_index_found] = FindExactIndexForValue(old_number_value, true); + auto [new_value_entry_index, new_value_entry_index_found] = FindExactIndexForValue(new_number_value, true); + auto [old_value_entry_index, old_value_entry_index_found] = FindExactIndexForValue(old_number_value, true); - if(old_exact_index_found) + size_t new_value_index = 0; + if(old_value_entry_index_found) { //if there are multiple entries for this number, just move the id - if(sortedNumberValueEntries[old_value_index]->indicesWithValue.size() > 1) + if(sortedNumberValueEntries[old_value_entry_index]->indicesWithValue.size() > 1) { - //erase with old_value_index first so don't need to update index - sortedNumberValueEntries[old_value_index]->indicesWithValue.erase(index); + //erase with old_value_entry_index first so don't need to update index + sortedNumberValueEntries[old_value_entry_index]->indicesWithValue.erase(index); - if(!new_exact_index_found) + if(!new_value_entry_index_found) { - sortedNumberValueEntries.emplace(sortedNumberValueEntries.begin() + new_value_index, std::make_unique(new_number_value)); - InsertFirstIndexIntoNumberValueEntry(index, new_value_index); + sortedNumberValueEntries.emplace(sortedNumberValueEntries.begin() + new_value_entry_index, std::make_unique(new_number_value)); + InsertFirstIndexIntoNumberValueEntry(index, new_value_entry_index); } else //just insert { - sortedNumberValueEntries[new_value_index]->indicesWithValue.insert(index); + sortedNumberValueEntries[new_value_entry_index]->indicesWithValue.insert(index); } + + new_value_index = sortedNumberValueEntries[new_value_entry_index]->valueInternIndex; } else //it's the last old_number_entry { - if(!new_exact_index_found) + if(!new_value_entry_index_found) { //remove old value and update to new - std::unique_ptr new_value_entry = std::move(sortedNumberValueEntries[old_value_index]); + std::unique_ptr new_value_entry = std::move(sortedNumberValueEntries[old_value_entry_index]); new_value_entry->value.number = new_number_value; //move the other values out of the way if(old_number_value < new_number_value) { - for(size_t i = old_value_index; i + 1 < new_value_index; i++) + for(size_t i = old_value_entry_index; i + 1 < new_value_entry_index; i++) sortedNumberValueEntries[i] = std::move(sortedNumberValueEntries[i + 1]); - new_value_index--; + new_value_entry_index--; } else { - for(size_t i = old_value_index; i > new_value_index; i--) + for(size_t i = old_value_entry_index; i > new_value_entry_index; i--) sortedNumberValueEntries[i] = std::move(sortedNumberValueEntries[i - 1]); } //move new value in to empty slot created - sortedNumberValueEntries[new_value_index] = std::move(new_value_entry); + sortedNumberValueEntries[new_value_entry_index] = std::move(new_value_entry); + internedNumberValues.UpdateInternIndexValue(sortedNumberValueEntries[new_value_entry_index].get(), + new_number_value); + new_value_index = sortedNumberValueEntries[new_value_entry_index]->valueInternIndex; } else //already has an entry for the new value, just delete as normal { - sortedNumberValueEntries[new_value_index]->indicesWithValue.insert(index); - DeleteNumberValueEntry(old_value_index); + sortedNumberValueEntries[new_value_entry_index]->indicesWithValue.insert(index); + new_value_index = sortedNumberValueEntries[new_value_entry_index]->valueInternIndex; + + internedNumberValues.DeleteInternIndex(sortedNumberValueEntries[old_value_entry_index]->valueInternIndex); + sortedNumberValueEntries.erase(sortedNumberValueEntries.begin() + old_value_entry_index); } } } else //shouldn't make it here, but ensure integrity just in case { + assert(false); //insert new value in correct position - sortedNumberValueEntries.emplace(sortedNumberValueEntries.begin() + new_value_index, + sortedNumberValueEntries.emplace(sortedNumberValueEntries.begin() + new_value_entry_index, std::make_unique(new_number_value)); - InsertFirstIndexIntoNumberValueEntry(index, new_value_index); + InsertFirstIndexIntoNumberValueEntry(index, new_value_entry_index); } if(internedNumberValues.valueInterningEnabled) @@ -292,54 +316,82 @@ class SBFDSColumnData return EvaluableNodeImmediateValue(new_value); } - if(old_value_type == ENIVT_STRING_ID) + if(old_value_type_resolved == ENIVT_STRING_ID) { - if(old_value.stringID == new_value.stringID) + StringInternPool::StringID old_sid_value = old_value_resolved.stringID; + StringInternPool::StringID new_sid_value = new_value_resolved.stringID; + if(old_sid_value == new_sid_value) return old_value; //try to insert the new value if not already there - auto [new_id_entry, inserted] = stringIdValueToIndices.emplace(new_value.stringID, nullptr); - - auto old_id_entry = stringIdValueToIndices.find(old_value.stringID); - if(old_id_entry != end(stringIdValueToIndices)) + auto [new_id_entry, inserted] = stringIdValueEntries.emplace(new_sid_value, nullptr); + + size_t new_value_index = 0; + auto old_id_entry = stringIdValueEntries.find(old_sid_value); + if(old_id_entry != end(stringIdValueEntries)) { //if there are multiple entries for this string, just move the id - if(old_id_entry->second->size() > 1) + if(old_id_entry->second->indicesWithValue.size() > 1) { + old_id_entry->second->indicesWithValue.erase(index); + + //if it was inserted, then construct everything if(inserted) - new_id_entry->second = std::make_unique(); + { + new_id_entry->second = std::make_unique(new_sid_value); + InsertFirstIndexIntoStringIdValueEntry(index, new_id_entry); + } + else + { + new_id_entry->second->indicesWithValue.insert(index); + } - new_id_entry->second->insert(index); - old_id_entry->second->erase(index); + new_value_index = new_id_entry->second->valueInternIndex; } else //it's the last old_id_entry { - //put the SortedIntegerSet in the new value or move the container + //if newly inserted, then can just move the data structure if(inserted) + { new_id_entry->second = std::move(old_id_entry->second); - else - new_id_entry->second->insert(index); + internedStringIdValues.UpdateInternIndexValue(new_id_entry->second.get(), + new_sid_value); + new_value_index = new_id_entry->second->valueInternIndex; + //perform erase at the end since the iterator may no longer be viable after + stringIdValueEntries.erase(old_id_entry); + } + else //need to clean up + { + new_id_entry->second->indicesWithValue.insert(index); + new_value_index = new_id_entry->second->valueInternIndex; - //erase after no longer need inserted_id_entry, as it may be invalidated - stringIdValueToIndices.erase(old_id_entry); + //erase after no longer need inserted_id_entry + internedStringIdValues.DeleteInternIndex(old_id_entry->second->valueInternIndex); + stringIdValueEntries.erase(old_id_entry); + } } } else if(inserted) //shouldn't make it here, but ensure integrity just in case { - new_id_entry->second = std::make_unique(); - new_id_entry->second->insert(index); + assert(false); + new_id_entry->second = std::make_unique(new_sid_value); + InsertFirstIndexIntoStringIdValueEntry(index, new_id_entry); + new_value_index = new_id_entry->second->valueInternIndex; } //update longest string as appropriate if(index == indexWithLongestString) RecomputeLongestString(); else - UpdateLongestString(new_value.stringID, index); + UpdateLongestString(new_sid_value, index); - return new_value; + if(internedStringIdValues.valueInterningEnabled) + return EvaluableNodeImmediateValue(new_value_index); + else + return EvaluableNodeImmediateValue(new_value); } - if(old_value_type == ENIVT_CODE) + if(old_value_type_resolved == ENIVT_CODE) { //only early exit if the pointers to the code are exactly the same, // as equivalent code may be garbage collected @@ -380,6 +432,7 @@ class SBFDSColumnData } else if(inserted) //shouldn't make it here, but ensure integrity just in case { + assert(false); new_size_entry->second = std::make_unique(); new_size_entry->second->insert(index); } @@ -395,7 +448,7 @@ class SBFDSColumnData return new_value; } - if(old_value_type == ENIVT_NUMBER_INDIRECTION_INDEX) + if(old_value_type == ENIVT_NUMBER_INDIRECTION_INDEX || old_value_type == ENIVT_STRING_ID_INDIRECTION_INDEX) { if(old_value.indirectionIndex == new_value.indirectionIndex) return old_value; @@ -403,17 +456,10 @@ class SBFDSColumnData } //delete index at old value - DeleteIndexValue(old_value_type, old_value, index); + DeleteIndexValue(old_value_type_resolved, old_value_resolved, index); //add index at new value bucket - return InsertIndexValue(new_value_type, new_value, index); - } - - //deletes a particular value based on the value_index - void DeleteNumberValueEntry(size_t value_index) - { - internedNumberValues.DeleteInternIndex(sortedNumberValueEntries[value_index]->valueInternIndex); - sortedNumberValueEntries.erase(sortedNumberValueEntries.begin() + value_index); + return InsertIndexValue(new_value_type_resolved, new_value_resolved, index); } //deletes everything involving the value at the index @@ -439,29 +485,41 @@ class SBFDSColumnData //look up value auto [value_index, exact_index_found] = FindExactIndexForValue(resolved_value.number); if(!exact_index_found) - return; + assert(false); //if the bucket has only one entry, we must delete the entire bucket if(sortedNumberValueEntries[value_index]->indicesWithValue.size() == 1) - DeleteNumberValueEntry(value_index); + { + internedNumberValues.DeleteInternIndex(sortedNumberValueEntries[value_index]->valueInternIndex); + sortedNumberValueEntries.erase(sortedNumberValueEntries.begin() + value_index); + } else //else we can just remove the id from the bucket + { sortedNumberValueEntries[value_index]->indicesWithValue.erase(index); + } break; } case ENIVT_STRING_ID: + case ENIVT_STRING_ID_INDIRECTION_INDEX: { stringIdIndices.erase(index); - auto id_entry = stringIdValueToIndices.find(value.stringID); - if(id_entry != end(stringIdValueToIndices)) - { - auto &entities = *(id_entry->second); - entities.erase(index); - //if no more entries have the value, remove it - if(entities.size() == 0) - stringIdValueToIndices.erase(id_entry); + auto resolved_value = GetResolvedValue(value_type, value); + + auto id_entry = stringIdValueEntries.find(resolved_value.stringID); + if(id_entry == end(stringIdValueEntries)) + assert(false); + + auto &entities = id_entry->second->indicesWithValue; + entities.erase(index); + + //if no more entries have the value, remove it + if(entities.size() == 0) + { + internedStringIdValues.DeleteInternIndex(id_entry->second->valueInternIndex); + stringIdValueEntries.erase(id_entry); } //see if need to compute new longest string @@ -478,7 +536,22 @@ class SBFDSColumnData size_t num_indices = EvaluableNode::GetDeepSize(value.code); auto id_entry = valueCodeSizeToIndices.find(num_indices); if(id_entry == end(valueCodeSizeToIndices)) - return; + { + //value must have changed sizes, look in each size + //note that this is inefficient -- if this ends up being a bottleneck, + //an additional data structure will need to be built to maintain the previous size + for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry) + { + if(cur_id_entry->second->contains(index)) + { + id_entry = cur_id_entry; + break; + } + } + + if(id_entry == end(valueCodeSizeToIndices)) + assert(false); + } //remove the entity auto &entities = *(id_entry->second); @@ -498,7 +571,7 @@ class SBFDSColumnData } } - //deletes a particular value based on the value_index + //inserts a particular value based on the value_index void InsertFirstIndexIntoNumberValueEntry(size_t index, size_t value_index) { ValueEntry *value_entry = sortedNumberValueEntries[value_index].get(); @@ -507,6 +580,17 @@ class SBFDSColumnData internedNumberValues.InsertValueEntry(value_entry, sortedNumberValueEntries.size()); } + //inserts a particular value based on the value_index + //templated to make it efficiently work regardless of the container + template + void InsertFirstIndexIntoStringIdValueEntry(size_t index, StringIdValueEntryIterator &value_iter) + { + ValueEntry *value_entry = value_iter->second.get(); + + value_entry->indicesWithValue.insert(index); + internedStringIdValues.InsertValueEntry(value_entry, stringIdValueEntries.size()); + } + //inserts the value at id //returns the value that should be used to reference the value, which may be an index //depending on the state of the column data @@ -527,7 +611,7 @@ class SBFDSColumnData { nullIndices.insert(index); - if(internedNumberValues.valueInterningEnabled) + if(internedNumberValues.valueInterningEnabled || internedStringIdValues.valueInterningEnabled) return EvaluableNodeImmediateValue(ValueEntry::NULL_INDEX); else return value; @@ -564,20 +648,25 @@ class SBFDSColumnData return value; } - if(value_type == ENIVT_STRING_ID) + if(value_type == ENIVT_STRING_ID || value_type == ENIVT_STRING_ID_INDIRECTION_INDEX) { stringIdIndices.insert(index); + auto string_id = GetResolvedValue(value_type, value).stringID; + //try to insert the value if not already there - auto [inserted_id_entry, inserted] = stringIdValueToIndices.emplace(value.stringID, nullptr); + auto [inserted_id_entry, inserted] = stringIdValueEntries.emplace(string_id, nullptr); if(inserted) - inserted_id_entry->second = std::make_unique(); + inserted_id_entry->second = std::make_unique(string_id); - auto &ids = *(inserted_id_entry->second); - ids.insert(index); + InsertFirstIndexIntoStringIdValueEntry(index, inserted_id_entry); - UpdateLongestString(value.stringID, index); - return value; + UpdateLongestString(string_id, index); + + if(internedStringIdValues.valueInterningEnabled) + return inserted_id_entry->second->valueInternIndex; + else + return value; } //value_type == ENIVT_CODE @@ -839,11 +928,11 @@ class SBFDSColumnData } else if(value_type == ENIVT_STRING_ID) { - if(stringIdValueToIndices.size() == 0) + if(stringIdValueEntries.size() == 0) return; //check every string value to see if between - for(auto &[id, entry] : stringIdValueToIndices) + for(auto &[id, entry] : stringIdValueEntries) { //check where the string is in the order; empty strings for comparison always pass bool value_less_than_low = true; @@ -866,7 +955,7 @@ class SBFDSColumnData } //insert all entities with this value - for(auto index : *entry) + for(auto index : entry->indicesWithValue) out.insert(index); } } @@ -892,9 +981,9 @@ class SBFDSColumnData } else if(value_type == ENIVT_STRING_ID) { - auto id_entry = stringIdValueToIndices.find(value.stringID); - if(id_entry != end(stringIdValueToIndices)) - out.InsertInBatch(*(id_entry->second)); + auto id_entry = stringIdValueEntries.find(value.stringID); + if(id_entry != end(stringIdValueEntries)) + out.InsertInBatch(id_entry->second->indicesWithValue); } } @@ -932,15 +1021,15 @@ class SBFDSColumnData } else if(value_type == ENIVT_STRING_ID) { - if(stringIdValueToIndices.size() == 0) + if(stringIdValueEntries.size() == 0) return; //else it's a string, need to do it the brute force way std::vector all_sids; - all_sids.reserve(stringIdValueToIndices.size()); + all_sids.reserve(stringIdValueEntries.size()); //get all strings - for(auto &[id, _] : stringIdValueToIndices) + for(auto &[id, _] : stringIdValueEntries) all_sids.push_back(id); std::sort(begin(all_sids), end(all_sids), StringIDNaturalCompareSort); @@ -950,8 +1039,8 @@ class SBFDSColumnData while(value_index < static_cast(all_sids.size()) && value_index >= 0) { - const auto &sid_entry = stringIdValueToIndices.find(all_sids[value_index]); - for(auto index : *(sid_entry->second)) + const auto &sid_entry = stringIdValueEntries.find(all_sids[value_index]); + for(auto index : sid_entry->second->indicesWithValue) { if(indices_to_consider != nullptr && !indices_to_consider->contains(index)) continue; @@ -972,6 +1061,12 @@ class SBFDSColumnData // than number values given the current data inline bool AreNumberInternsPreferredToValues() { + #ifdef FORCE_SBFDS_VALUE_INTERNING + return true; + #endif + #ifdef DISABLE_SBFDS_VALUE_INTERNING + return false; + #endif //use heuristic of sqrt number of values compared to num unique values // (but computed with a multiply instead of sqrt) size_t num_unique_values = sortedNumberValueEntries.size(); @@ -979,9 +1074,15 @@ class SBFDSColumnData } //returns true if switching to number values would be expected to yield better results - // than number interning given the current data + // than interning given the current data inline bool AreNumberValuesPreferredToInterns() { + #ifdef FORCE_SBFDS_VALUE_INTERNING + return false; + #endif + #ifdef DISABLE_SBFDS_VALUE_INTERNING + return true; + #endif //use heuristic of sqrt number of values compared to num unique values // (but computed with a multiply instead of sqrt) //round up to reduce flipping back and forth @@ -989,16 +1090,63 @@ class SBFDSColumnData return (num_unique_values * num_unique_values > numberIndices.size() - num_unique_values); } + //returns true if switching to StringId interning would be expected to yield better results + // than StringId values given the current data + inline bool AreStringIdInternsPreferredToValues() + { + #ifdef FORCE_SBFDS_VALUE_INTERNING + return true; + #endif + #ifdef DISABLE_SBFDS_VALUE_INTERNING + return false; + #endif + //use heuristic of sqrt number of values compared to num unique values + // (but computed with a multiply instead of sqrt) + size_t num_unique_values = stringIdValueEntries.size(); + return (num_unique_values * num_unique_values <= stringIdIndices.size()); + } + + //returns true if switching to StringID values would be expected to yield better results + // than interning given the current data + inline bool AreStringIdValuesPreferredToInterns() + { + #ifdef FORCE_SBFDS_VALUE_INTERNING + return false; + #endif + #ifdef DISABLE_SBFDS_VALUE_INTERNING + return true; + #endif + //use heuristic of sqrt number of values compared to num unique values + // (but computed with a multiply instead of sqrt) + //round up to reduce flipping back and forth + size_t num_unique_values = stringIdValueEntries.size(); + return (num_unique_values * num_unique_values > stringIdIndices.size() - num_unique_values); + } + //clears number intern caches and changes state to not perform interning for numbers - void ConvertNumberInternsToValues() + inline void ConvertNumberInternsToValues() { internedNumberValues.ClearInterning(); } //initializes and sets up number value interning caches and changes state to perform interning for numbers - void ConvertNumberValuesToInterns() + inline void ConvertNumberValuesToInterns() { - internedNumberValues.ConvertValueCollectionToInterns(sortedNumberValueEntries); + internedNumberValues.ConvertValueCollectionToInterns(sortedNumberValueEntries, + [](auto &value_entry_iter) { return value_entry_iter.get(); }); + } + + //clears string intern caches and changes state to not perform interning for StringIds + inline void ConvertStringIdInternsToValues() + { + internedStringIdValues.ClearInterning(); + } + + //initializes and sets up number value interning caches and changes state to perform interning for StringIds + inline void ConvertStringIdValuesToInterns() + { + internedStringIdValues.ConvertValueCollectionToInterns(stringIdValueEntries, + [](auto &value_entry_iter) { return value_entry_iter.second.get(); }); } protected: @@ -1021,8 +1169,8 @@ class SBFDSColumnData longestStringLength = 0; //initialize to 0 in case there are no entities with strings indexWithLongestString = 0; - for(auto &[s_id, s_entry] : stringIdValueToIndices) - UpdateLongestString(s_id, *s_entry->begin()); + for(auto &[s_id, s_entry] : stringIdValueEntries) + UpdateLongestString(s_id, s_entry->indicesWithValue.GetNthElement(0)); } //updates largestCodeSize and indexWithLargestCode based on parameters @@ -1054,7 +1202,7 @@ class SBFDSColumnData std::vector> sortedNumberValueEntries; //maps a string id to a vector of indices that have that string - CompactHashMap> stringIdValueToIndices; + CompactHashMap> stringIdValueEntries; //for any value that doesn't fit into other values ( ENIVT_CODE ), maps the number of elements in the code // to the indices of the same size @@ -1105,8 +1253,8 @@ class SBFDSColumnData } //converts the values in value_collection into interned values - template - inline void ConvertValueCollectionToInterns(ValueCollectionType &value_collection) + template + inline void ConvertValueCollectionToInterns(ValueEntryCollectionType &value_collection, GetValueEntryFunction get_value_entry) { if(valueInterningEnabled) return; @@ -1116,8 +1264,9 @@ class SBFDSColumnData internedIndexToValue[0] = notAValue; size_t intern_index = 1; - for(auto &value_entry : value_collection) + for(auto &value_entry_iter : value_collection) { + ValueEntry *value_entry = get_value_entry(value_entry_iter); value_entry->valueInternIndex = intern_index; internedIndexToValue[intern_index] = value_entry->value; intern_index++; @@ -1164,6 +1313,16 @@ class SBFDSColumnData internedIndexToValue[value_entry->valueInternIndex] = value_entry->value; } + //if interning is enabled, updates internedIndexToValue with the appropriate + //new value for value_entry + inline void UpdateInternIndexValue(ValueEntry *value_entry, ValueType value) + { + if(!valueInterningEnabled) + return; + + internedIndexToValue[value_entry->valueInternIndex] = value; + } + //deletes the intern index if interning is enabled inline void DeleteInternIndex(size_t intern_index) { @@ -1221,4 +1380,5 @@ class SBFDSColumnData //object that contains interned number values if applicable InternedValues internedNumberValues; + InternedValues internedStringIdValues; }; diff --git a/src/Amalgam/SeparableBoxFilterDataStore.cpp b/src/Amalgam/SeparableBoxFilterDataStore.cpp index 8ed360ee..2ee4ab6b 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.cpp +++ b/src/Amalgam/SeparableBoxFilterDataStore.cpp @@ -41,10 +41,18 @@ void SeparableBoxFilterDataStore::BuildLabel(size_t column_index, const std::vec column_data->AppendSortedNumberIndicesWithSortedIndices(entities_with_number_values); OptimizeColumn(column_index); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif } void SeparableBoxFilterDataStore::OptimizeColumn(size_t column_index) { +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif + auto &column_data = columnData[column_index]; if(column_data->internedNumberValues.valueInterningEnabled) @@ -78,10 +86,50 @@ void SeparableBoxFilterDataStore::OptimizeColumn(size_t column_index) for(auto entity_index : column_data->nullIndices) GetValue(entity_index, column_index).indirectionIndex = SBFDSColumnData::ValueEntry::NULL_INDEX; } + + if(column_data->internedStringIdValues.valueInterningEnabled) + { + if(column_data->AreStringIdValuesPreferredToInterns()) + { + for(auto &[sid, value_entry] : column_data->stringIdValueEntries) + { + auto value = value_entry->value.stringID; + for(auto entity_index : value_entry->indicesWithValue) + GetValue(entity_index, column_index).stringID = value; + } + + for(auto entity_index : column_data->nullIndices) + GetValue(entity_index, column_index).stringID = StringInternPool::NOT_A_STRING_ID; + + column_data->ConvertStringIdInternsToValues(); + } + } + else if(column_data->AreStringIdInternsPreferredToValues()) + { + column_data->ConvertStringIdValuesToInterns(); + + for(auto &[sid, value_entry] : column_data->stringIdValueEntries) + { + size_t value_index = value_entry->valueInternIndex; + for(auto entity_index : value_entry->indicesWithValue) + GetValue(entity_index, column_index).indirectionIndex = value_index; + } + + for(auto entity_index : column_data->nullIndices) + GetValue(entity_index, column_index).indirectionIndex = SBFDSColumnData::ValueEntry::NULL_INDEX; + } + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif } void SeparableBoxFilterDataStore::RemoveColumnIndex(size_t column_index_to_remove) { +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif + //will replace the values at index_to_remove with the values at index_to_move size_t column_index_to_move = columnData.size() - 1; @@ -119,10 +167,18 @@ void SeparableBoxFilterDataStore::RemoveColumnIndex(size_t column_index_to_remov matrix.resize(columnData.size() * numEntities); for(size_t i = 0; i < numEntities; i++) memcpy((char *)&matrix[i * columnData.size()], (char *)&old_matrix[i * (columnData.size() + 1)], sizeof(EvaluableNodeImmediateValue) * (columnData.size())); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif } void SeparableBoxFilterDataStore::AddEntity(Entity *entity, size_t entity_index) { +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif + size_t starting_cell_index = GetMatrixCellIndex(entity_index); //fill with missing values, including any empty indices @@ -143,6 +199,10 @@ void SeparableBoxFilterDataStore::AddEntity(Entity *entity, size_t entity_index) numEntities = entity_index + 1; OptimizeAllColumns(); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif } void SeparableBoxFilterDataStore::RemoveEntity(Entity *entity, size_t entity_index, size_t entity_index_to_reassign) @@ -150,18 +210,32 @@ void SeparableBoxFilterDataStore::RemoveEntity(Entity *entity, size_t entity_ind if(entity_index >= numEntities || columnData.size() == 0) return; +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif + //if was the last entity and reassigning the last one or one out of bounds, // simply delete from column data, delete last row, and return if(entity_index + 1 == GetNumInsertedEntities() && entity_index_to_reassign >= entity_index) { DeleteEntityIndexFromColumns(entity_index); DeleteLastRow(); + + #ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); + #endif + return; } //make sure it's a valid rassignment if(entity_index_to_reassign >= numEntities) + { + #ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); + #endif return; + } //if deleting a row and not replacing it, just fill as if it has no data if(entity_index == entity_index_to_reassign) @@ -172,6 +246,10 @@ void SeparableBoxFilterDataStore::RemoveEntity(Entity *entity, size_t entity_ind size_t starting_cell_index = GetMatrixCellIndex(entity_index); for(size_t column_index = 0; column_index < columnData.size(); column_index++) matrix[starting_cell_index + column_index].number = std::numeric_limits::quiet_NaN(); + + #ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); + #endif return; } @@ -180,26 +258,19 @@ void SeparableBoxFilterDataStore::RemoveEntity(Entity *entity, size_t entity_ind { auto &column_data = columnData[column_index]; - auto &val_to_overwrite_raw = GetValue(entity_index, column_index); - auto type_to_overwrite_raw = column_data->GetIndexValueType(entity_index); - auto val_to_overwrite = column_data->GetResolvedValue(type_to_overwrite_raw, val_to_overwrite_raw); - auto type_to_overwrite = column_data->GetResolvedValueType(type_to_overwrite_raw); + auto &val_to_overwrite = GetValue(entity_index, column_index); + auto type_to_overwrite = column_data->GetIndexValueType(entity_index); + + auto &value_to_reassign = GetValue(entity_index_to_reassign, column_index); + auto value_type_to_reassign = column_data->GetIndexValueType(entity_index_to_reassign); - auto &raw_value_to_reassign = GetValue(entity_index_to_reassign, column_index); - auto raw_value_type_to_reassign = column_data->GetIndexValueType(entity_index_to_reassign); - auto value_to_reassign = column_data->GetResolvedValue(raw_value_type_to_reassign, raw_value_to_reassign); - auto value_type_to_reassign = column_data->GetResolvedValueType(raw_value_type_to_reassign); + //change the destination to the value + val_to_overwrite = columnData[column_index]->ChangeIndexValue(type_to_overwrite, val_to_overwrite, value_type_to_reassign, value_to_reassign, entity_index); //remove the value where it is columnData[column_index]->DeleteIndexValue(value_type_to_reassign, value_to_reassign, entity_index_to_reassign); - - //change the destination to the value - columnData[column_index]->ChangeIndexValue(type_to_overwrite, val_to_overwrite, value_type_to_reassign, value_to_reassign, entity_index); } - //copy data from entity_index_to_reassign to entity_index - memcpy((char *)&(matrix[entity_index * columnData.size()]), (char *)&(matrix[entity_index_to_reassign * columnData.size()]), sizeof(EvaluableNodeImmediateValue) * columnData.size()); - //truncate matrix cache if removing the last entry, either by moving the last entity or by directly removing the last if(entity_index_to_reassign + 1 == numEntities || (entity_index_to_reassign + 1 >= numEntities && entity_index + 1 == numEntities)) @@ -209,6 +280,10 @@ void SeparableBoxFilterDataStore::RemoveEntity(Entity *entity, size_t entity_ind RemoveAnyUnusedLabels(); OptimizeAllColumns(); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif } void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t entity_index) @@ -216,6 +291,10 @@ void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t e if(entity_index >= numEntities) return; +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif + size_t matrix_index = GetMatrixCellIndex(entity_index); for(size_t column_index = 0; column_index < columnData.size(); column_index++) { @@ -239,6 +318,10 @@ void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t e RemoveAnyUnusedLabels(); OptimizeAllColumns(); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForAllColumns(); +#endif } void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entity_index, StringInternPool::StringID label_updated) @@ -253,6 +336,10 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit size_t column_index = column->second; auto &column_data = columnData[column_index]; +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif + //get the new value EvaluableNodeImmediateValueType value_type; EvaluableNodeImmediateValue value; @@ -261,7 +348,7 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit //update the value auto &matrix_value = GetValue(entity_index, column_index); auto previous_value_type = column_data->GetIndexValueType(entity_index); - + //assign the matrix location to the updated value (which may be an index) matrix_value = column_data->ChangeIndexValue(previous_value_type, matrix_value, value_type, value, entity_index); @@ -270,6 +357,10 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit RemoveColumnIndex(column_index); else OptimizeColumn(column_index); + +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif } //populates distances_out with all entities and their distances that have a distance to target less than max_dist @@ -814,6 +905,64 @@ void SeparableBoxFilterDataStore::FindNearestEntities(GeneralizedDistanceEvaluat } } +#ifdef SBFDS_VERIFICATION +void SeparableBoxFilterDataStore::VerifyAllEntitiesForColumn(size_t column_index) +{ + auto &column_data = columnData[column_index]; + + for(auto &value_entry : column_data->sortedNumberValueEntries) + { + //ensure all interned values are valid + if(column_data->internedNumberValues.valueInterningEnabled) + { + auto &interns = column_data->internedNumberValues; + assert(value_entry->valueInternIndex < interns.internedIndexToValue.size()); + assert(!FastIsNaN(interns.internedIndexToValue[value_entry->valueInternIndex])); + } + + //ensure all entity ids are not out of range + for(auto entity_index : value_entry->indicesWithValue) + assert(entity_index < numEntities); + } + + //ensure all numbers are valid + for(auto entity_index : column_data->numberIndices) + { + auto &feature_value = GetValue(entity_index, column_index); + auto feature_type = column_data->GetIndexValueType(entity_index); + assert(feature_type == ENIVT_NUMBER || feature_type == ENIVT_NUMBER_INDIRECTION_INDEX); + if(feature_type == ENIVT_NUMBER_INDIRECTION_INDEX && feature_value.indirectionIndex != 0) + { + auto feature_value_resolved = column_data->GetResolvedValue(feature_type, feature_value); + assert(!FastIsNaN(feature_value_resolved.number)); + } + } + + for(auto &[sid, value_entry] : column_data->stringIdValueEntries) + { + //ensure all interned values are valid + if(column_data->internedStringIdValues.valueInterningEnabled) + { + auto &interns = column_data->internedStringIdValues; + assert(value_entry->valueInternIndex < interns.internedIndexToValue.size()); + } + } + + //ensure all string ids are valid + for(auto entity_index : column_data->stringIdIndices) + { + auto &feature_value = GetValue(entity_index, column_index); + auto feature_type = column_data->GetIndexValueType(entity_index); + assert(feature_type == ENIVT_STRING_ID || feature_type == ENIVT_STRING_ID_INDIRECTION_INDEX); + if(feature_type == ENIVT_STRING_ID_INDIRECTION_INDEX && feature_value.indirectionIndex != 0) + { + auto feature_value_resolved = column_data->GetResolvedValue(feature_type, feature_value); + assert(feature_value_resolved.stringID != string_intern_pool.EMPTY_STRING_ID); + } + } +} +#endif + void SeparableBoxFilterDataStore::DeleteEntityIndexFromColumns(size_t entity_index) { for(size_t i = 0; i < columnData.size(); i++) @@ -1067,11 +1216,11 @@ double SeparableBoxFilterDataStore::PopulatePartialSumsWithSimilarFeatureValue(R { if(value.nodeType == ENIVT_STRING_ID) { - auto value_found = column->stringIdValueToIndices.find(value.nodeValue.stringID); - if(value_found != end(column->stringIdValueToIndices)) + auto value_found = column->stringIdValueEntries.find(value.nodeValue.stringID); + if(value_found != end(column->stringIdValueEntries)) { double term = r_dist_eval.distEvaluator->ComputeDistanceTermContinuousExactMatch(query_feature_index, high_accuracy); - AccumulatePartialSums(*(value_found->second), query_feature_index, term); + AccumulatePartialSums(value_found->second->indicesWithValue, query_feature_index, term); } } @@ -1406,15 +1555,47 @@ void SeparableBoxFilterDataStore::PopulateTargetValueAndLabelIndex(RepeatedGener auto &feature_type = feature_attribs.featureType; auto &feature_data = r_dist_eval.featureData[query_feature_index]; auto &effective_feature_type = r_dist_eval.featureData[query_feature_index].effectiveFeatureType; + auto &column_data = columnData[feature_attribs.featureIndex]; feature_data.Clear(); + feature_data.targetValue = EvaluableNodeImmediateValueWithType(position_value, position_value_type); - if(feature_attribs.IsFeatureNominal() + bool complex_comparison = (feature_type == GeneralizedDistanceEvaluator::FDT_NOMINAL_CODE || feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_STRING - || feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_CODE) + || feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_CODE); + + //consider computing interned values if appropriate + //however, symmetric nominals are fast, so don't compute interned values for them + if(!feature_attribs.IsFeatureSymmetricNominal() && !complex_comparison) { - feature_data.targetValue = EvaluableNodeImmediateValueWithType(position_value, position_value_type); + if(position_value_type == ENIVT_NUMBER && column_data->internedNumberValues.valueInterningEnabled) + { + size_t num_values_stored_as_numbers = column_data->numberIndices.size() + column_data->invalidIndices.size() + column_data->nullIndices.size(); + + if(GetNumInsertedEntities() == num_values_stored_as_numbers) + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_UNIVERSALLY_INTERNED_PRECOMPUTED; + else + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_NUMERIC_INTERNED_PRECOMPUTED; + + r_dist_eval.ComputeAndStoreInternedDistanceTerms(query_feature_index, &column_data->internedNumberValues.internedIndexToValue); + return; + } + else if(position_value_type == ENIVT_STRING_ID && column_data->internedStringIdValues.valueInterningEnabled) + { + size_t num_values_stored_as_string_ids = column_data->stringIdIndices.size() + column_data->invalidIndices.size() + column_data->nullIndices.size(); + + if(GetNumInsertedEntities() == num_values_stored_as_string_ids) + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_UNIVERSALLY_INTERNED_PRECOMPUTED; + else + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_STRING_INTERNED_PRECOMPUTED; + + r_dist_eval.ComputeAndStoreInternedDistanceTerms(query_feature_index, &column_data->internedStringIdValues.internedIndexToValue); + return; + } + } + if(feature_attribs.IsFeatureNominal() || complex_comparison) + { if(feature_type == GeneralizedDistanceEvaluator::FDT_NOMINAL_NUMERIC) effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_NOMINAL_NUMERIC; else if(feature_type == GeneralizedDistanceEvaluator::FDT_NOMINAL_STRING) @@ -1431,36 +1612,14 @@ void SeparableBoxFilterDataStore::PopulateTargetValueAndLabelIndex(RepeatedGener } else // feature_type is some form of continuous numeric { - //looking for continuous; if not a number, so just put as nan - double position_value_numeric = (position_value_type == ENIVT_NUMBER - ? position_value.number : std::numeric_limits::quiet_NaN()); - - feature_data.targetValue = EvaluableNodeImmediateValueWithType(position_value_numeric); - - //set up effective_feature_type - auto &column_data = columnData[feature_attribs.featureIndex]; - - //determine if all values are numeric - size_t num_values_stored_as_numbers = column_data->numberIndices.size() + column_data->invalidIndices.size() + column_data->nullIndices.size(); - bool all_values_numeric = (GetNumInsertedEntities() == num_values_stored_as_numbers); - - if(column_data->internedNumberValues.valueInterningEnabled) - { - if(all_values_numeric) - effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_UNIVERSALLY_INTERNED_PRECOMPUTED; - else - effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC_PRECOMPUTED; - - r_dist_eval.ComputeAndStoreInternedNumberValuesAndDistanceTerms(query_feature_index, &column_data->internedNumberValues.internedIndexToValue); - } + size_t num_values_stored_as_numbers = column_data->numberIndices.size() + column_data->invalidIndices.size(); + if(GetNumInsertedEntities() == num_values_stored_as_numbers + && feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_NUMERIC + && !column_data->internedNumberValues.valueInterningEnabled) + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_UNIVERSALLY_NUMERIC; + else if(feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_NUMERIC_CYCLIC) + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC_CYCLIC; else - { - if(all_values_numeric && feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_NUMERIC) - effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_UNIVERSALLY_NUMERIC; - else if(feature_type == GeneralizedDistanceEvaluator::FDT_CONTINUOUS_NUMERIC_CYCLIC) - effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC_CYCLIC; - else - effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC; - } + effective_feature_type = RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC; } } diff --git a/src/Amalgam/SeparableBoxFilterDataStore.h b/src/Amalgam/SeparableBoxFilterDataStore.h index 382e8507..bbf41808 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.h +++ b/src/Amalgam/SeparableBoxFilterDataStore.h @@ -1,10 +1,15 @@ #pragma once -//------------------------------------------------------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------------------------------------------- //Seperable Box-Filter Data Store -//Spatial acceleration database for high-dimensional data with no constraints on metric space (Minkowski, Euclidean, LK, etc). -//The structure can efficiently search for data when using different metric space parameters without being rebuilt. -//------------------------------------------------------------------------------------------------------------------------------------- +//Spatial acceleration database for high-dimensional data without constraints on metric space (Minkowski, Euclidean, LK, etc). +//The structure can efficiently search data when using different metric space parameters without being rebuilt. +//---------------------------------------------------------------------------------------------------------------------------- + +//if SBFDS_VERIFICATION is defined, then it will frequently verify integrity at cost of performance +//if FORCE_SBFDS_VALUE_INTERNING is defined, then it will force value interning to always be on +//if DISABLE_SBFDS_VALUE_INTERNING is defined, then it will disable all value interning +//if FORCE_SBFDS_VALUE_INTERNING and DISABLE_SBFDS_VALUE_INTERNING, FORCE_SBFDS_VALUE_INTERNING takes precedence //project headers: #include "Concurrency.h" @@ -460,16 +465,18 @@ class SeparableBoxFilterDataStore template inline std::function GetStringIdValueFromEntityIteratorFunction(size_t column_index) { - auto string_indices_ptr = &columnData[column_index]->stringIdIndices; + auto column_data = columnData[column_index].get(); + auto string_indices_ptr = &column_data->stringIdIndices; + auto value_type = column_data->GetUnresolvedValueType(ENIVT_STRING_ID); - return [&, string_indices_ptr, column_index] + return [&, string_indices_ptr, column_index, column_data, value_type] (Iter i, StringInternPool::StringID &value) { size_t entity_index = *i; if(!string_indices_ptr->contains(entity_index)) return false; - value = GetValue(entity_index, column_index).stringID; + value = column_data->GetResolvedValue(value_type, GetValue(entity_index, column_index)).stringID; return true; }; } @@ -502,6 +509,18 @@ class SeparableBoxFilterDataStore protected: +#ifdef SBFDS_VERIFICATION + //used for debugging to make sure all entities are valid + void VerifyAllEntitiesForColumn(size_t column_index); + + //used for debugging to make sure all entities are valid + inline void VerifyAllEntitiesForAllColumns() + { + for(size_t i = 0; i < columnData.size(); i++) + VerifyAllEntitiesForColumn(i); + } +#endif + //deletes/pops off the last row in the matrix cache inline void DeleteLastRow() { @@ -664,11 +683,11 @@ class SeparableBoxFilterDataStore inline double AccumulatePartialSumsForNominalStringIdValueIfExists(RepeatedGeneralizedDistanceEvaluator &r_dist_eval, StringInternPool::StringID value, size_t query_feature_index, SBFDSColumnData &column, bool high_accuracy) { - auto value_found = column.stringIdValueToIndices.find(value); - if(value_found != end(column.stringIdValueToIndices)) + auto value_found = column.stringIdValueEntries.find(value); + if(value_found != end(column.stringIdValueEntries)) { double term = r_dist_eval.ComputeDistanceTermNominal(value, ENIVT_STRING_ID, query_feature_index, high_accuracy); - AccumulatePartialSums(*(value_found->second), query_feature_index, term); + AccumulatePartialSums(value_found->second->indicesWithValue, query_feature_index, term); return term; } @@ -756,7 +775,7 @@ class SeparableBoxFilterDataStore { auto &feature_attribs = r_dist_eval.distEvaluator->featureAttribs[query_feature_index]; return r_dist_eval.ComputeDistanceTermInternedPrecomputed( - GetValue(entity_index, feature_attribs.featureIndex).indirectionIndex, query_feature_index, high_accuracy); + GetValue(entity_index, feature_attribs.featureIndex).indirectionIndex, query_feature_index); } case RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC: @@ -783,13 +802,24 @@ class SeparableBoxFilterDataStore return r_dist_eval.distEvaluator->ComputeDistanceTermKnownToUnknown(query_feature_index, high_accuracy); } - case RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_NUMERIC_PRECOMPUTED: + case RepeatedGeneralizedDistanceEvaluator::EFDT_NUMERIC_INTERNED_PRECOMPUTED: { auto &feature_attribs = r_dist_eval.distEvaluator->featureAttribs[query_feature_index]; auto &column_data = columnData[feature_attribs.featureIndex]; if(column_data->numberIndices.contains(entity_index)) return r_dist_eval.ComputeDistanceTermInternedPrecomputed( - GetValue(entity_index, feature_attribs.featureIndex).indirectionIndex, query_feature_index, high_accuracy); + GetValue(entity_index, feature_attribs.featureIndex).indirectionIndex, query_feature_index); + else + return r_dist_eval.distEvaluator->ComputeDistanceTermKnownToUnknown(query_feature_index, high_accuracy); + } + + case RepeatedGeneralizedDistanceEvaluator::EFDT_STRING_INTERNED_PRECOMPUTED: + { + auto &feature_attribs = r_dist_eval.distEvaluator->featureAttribs[query_feature_index]; + auto &column_data = columnData[feature_attribs.featureIndex]; + if(column_data->stringIdIndices.contains(entity_index)) + return r_dist_eval.ComputeDistanceTermInternedPrecomputed( + GetValue(entity_index, feature_attribs.featureIndex).indirectionIndex, query_feature_index); else return r_dist_eval.distEvaluator->ComputeDistanceTermKnownToUnknown(query_feature_index, high_accuracy); } @@ -827,7 +857,10 @@ class SeparableBoxFilterDataStore auto &feature_attribs = r_dist_eval.distEvaluator->featureAttribs[query_feature_index]; auto &column_data = columnData[feature_attribs.featureIndex]; auto other_value_type = column_data->GetIndexValueType(entity_index); + + //resolve value auto other_value = column_data->GetResolvedValue(other_value_type, GetValue(entity_index, feature_attribs.featureIndex)); + other_value_type = column_data->GetResolvedValueType(other_value_type); return r_dist_eval.ComputeDistanceTerm(other_value, other_value_type, query_feature_index, high_accuracy); } diff --git a/src/Amalgam/amlg_code/full_test.amlg b/src/Amalgam/amlg_code/full_test.amlg index 403b7b31..09a9c4a6 100644 --- a/src/Amalgam/amlg_code/full_test.amlg +++ b/src/Amalgam/amlg_code/full_test.amlg @@ -3565,7 +3565,6 @@ (create_entities (list "nan_queries" "a2") (lambda (null ##A 11 ##B 2))) (create_entities (list "nan_queries" "a3") (lambda (null ##A (null) ##B 1))) - ;expected output is 3 neighbors in order: a1, a2, a3 (print (compute_on_contained_entities "nan_queries" (list @@ -3593,12 +3592,12 @@ (null) (null) (null) - 2 ;p-value + 2 ;p-value ) )) ) - ;expected output is only 1 neighbor + ;expected output is only 1 neighbor, a1 or a3 (print (compute_on_contained_entities "nan_queries" (list (query_nearest_generalized_distance @@ -3609,7 +3608,7 @@ (null) (null) (null) - 2 ;p-value + 2 ;p-value ) )) ) diff --git a/src/Amalgam/amlg_code/test.amlg b/src/Amalgam/amlg_code/test.amlg index 1dbfde1a..02ecc319 100644 --- a/src/Amalgam/amlg_code/test.amlg +++ b/src/Amalgam/amlg_code/test.amlg @@ -1,12 +1,15 @@ (seq - (create_entities "MergeEntity1" (lambda (associate "a" 3 "b" 4)) ) - ;(create_entities (list "MergeEntity1" "MergeEntityChild1") (lambda (associate "x" 3 "y" 4)) ) - ;(create_entities (list "MergeEntity1" "MergeEntityChild2") (lambda (associate "p" 3 "q" 4)) ) - ;(create_entities (list "MergeEntity1") (lambda (associate "E" 3 "F" 4)) ) - ;(create_entities (list "MergeEntity1") (lambda (associate "e" 3 "f" 4 "g" 5 "h" 6)) ) - - (store_entity "amlg_code/module_test_c.caml" "MergeEntity1") - (load_entity "amlg_code/module_test_c.caml" "MergeEntity1Decompressed") - (print "Compression difference: [" (difference_entities "MergeEntity1" "MergeEntity1Decompressed") "]\n") -) + (create_entities "QueryCacheTest1" (lambda + (parallel ##a 3 ) + ) ) + + (print (size (contained_entities (list + (query_equals "a" 3) + ))) "\n") + + (assign_to_entities "QueryCacheTest1" (assoc a 3)) + + (destroy_entities "QueryCacheTest1") + +) \ No newline at end of file diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h index bf86eda7..9810c3d8 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.h +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -952,7 +952,8 @@ enum EvaluableNodeImmediateValueType : uint8_t ENIVT_NUMBER, //number ENIVT_STRING_ID, //stringID ENIVT_CODE, //code (more general than any of the above) - ENIVT_NUMBER_INDIRECTION_INDEX //not a real EvaluableNode type, but an index to some data structure that has a number + ENIVT_NUMBER_INDIRECTION_INDEX, //not a real EvaluableNode type, but an index to some data structure that has a number + ENIVT_STRING_ID_INDIRECTION_INDEX //not a real EvaluableNode type, but an index to some data structure that has a stringID }; //structure that can hold the most immediate value type of an EvaluableNode @@ -1031,7 +1032,7 @@ union EvaluableNodeImmediateValue return (value_1.number == value_2.number); else if(type_1 == ENIVT_STRING_ID) return (value_1.stringID == value_2.stringID); - else if(type_1 == ENIVT_NUMBER_INDIRECTION_INDEX) + else if(type_1 == ENIVT_NUMBER_INDIRECTION_INDEX || type_1 == ENIVT_STRING_ID_INDIRECTION_INDEX) return (value_1.indirectionIndex == value_2.indirectionIndex); else return EvaluableNode::AreDeepEqual(value_1.code, value_2.code); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h index d5edce77..f262e70a 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h @@ -7,6 +7,9 @@ //system headers: #include +//if the macro PEDANTIC_GARBAGE_COLLECTION is defined, then garbage collection will be performed +//after every opcode, to help find and debug memory issues + typedef int64_t ExecutionCycleCount; typedef int32_t ExecutionCycleCountCompactDelta; diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp index 233f7132..aa746fda 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp @@ -124,11 +124,19 @@ std::tuple(relative_entity_container, traverser_1); Entity *entity_1_ptr = entity_1; + if(entity_1_ptr == nullptr) + return std::make_tuple(nullptr, nullptr, + Entity::EntityReferenceBufferReference()); + auto erbr = entity_1->GetAllDeeplyContainedEntityReferencesGroupedByDepth(false); erbr->emplace_back(std::move(entity_1)); EntityReadReference entity_2 = TraverseToExistingEntityReferenceViaEvaluableNodeIDPath(relative_entity_container, traverser_2); Entity *entity_2_ptr = entity_2; + if(entity_2_ptr == nullptr) + return std::make_tuple(nullptr, nullptr, + Entity::EntityReferenceBufferReference()); + entity_2->AppendAllDeeplyContainedEntityReferencesGroupedByDepth(erbr); erbr->emplace_back(std::move(entity_2)); @@ -138,11 +146,19 @@ std::tuple(relative_entity_container, traverser_2); Entity *entity_2_ptr = entity_2; + if(entity_2_ptr == nullptr) + return std::make_tuple(nullptr, nullptr, + Entity::EntityReferenceBufferReference()); + auto erbr = entity_2->GetAllDeeplyContainedEntityReferencesGroupedByDepth(false); erbr->emplace_back(std::move(entity_2)); EntityReadReference entity_1 = TraverseToExistingEntityReferenceViaEvaluableNodeIDPath(relative_entity_container, traverser_1); Entity *entity_1_ptr = entity_1; + if(entity_1_ptr == nullptr) + return std::make_tuple(nullptr, nullptr, + Entity::EntityReferenceBufferReference()); + entity_1->AppendAllDeeplyContainedEntityReferencesGroupedByDepth(erbr); erbr->emplace_back(std::move(entity_1)); diff --git a/src/Amalgam/interpreter/Interpreter.cpp b/src/Amalgam/interpreter/Interpreter.cpp index 353d3914..65347a63 100644 --- a/src/Amalgam/interpreter/Interpreter.cpp +++ b/src/Amalgam/interpreter/Interpreter.cpp @@ -506,13 +506,15 @@ EvaluableNodeReference Interpreter::InterpretNode(EvaluableNode *en, bool immedi //especially because only one node is kept interpreterNodeStackNodes->push_back(en); - //for deep debugging only - //ValidateEvaluableNodeIntegrity(); +#ifdef AMALGAM_MEMORY_INTEGRITY + VerifyEvaluableNodeIntegrity(); +#endif CollectGarbage(); - //for deep debugging only - //ValidateEvaluableNodeIntegrity(); +#ifdef AMALGAM_MEMORY_INTEGRITY + VerifyEvaluableNodeIntegrity(); +#endif //make sure don't eat more memory than allowed if(!AllowUnlimitedExecutionNodes()) @@ -528,8 +530,9 @@ EvaluableNodeReference Interpreter::InterpretNode(EvaluableNode *en, bool immedi EvaluableNodeReference retval = (this->*oc)(en, immediate_result); - //for deep debugging only - //ValidateEvaluableNodeIntegrity(); +#ifdef AMALGAM_MEMORY_INTEGRITY + VerifyEvaluableNodeIntegrity(); +#endif //finished with opcode interpreterNodeStackNodes->pop_back(); diff --git a/src/Amalgam/interpreter/Interpreter.h b/src/Amalgam/interpreter/Interpreter.h index 8ff07c75..86ae4f40 100644 --- a/src/Amalgam/interpreter/Interpreter.h +++ b/src/Amalgam/interpreter/Interpreter.h @@ -20,6 +20,9 @@ #include #include +//if the macro AMALGAM_MEMORY_INTEGRITY is defined, then it will continuously verify memory, at a high cost of performance +//this is useful for diagnosing and debugging memory issues + //forward declarations: class EntityQueryCondition; @@ -959,7 +962,7 @@ class Interpreter EvaluableNodeReference InterpretNode_PROFILE(EvaluableNode *en, bool immediate_result); //ensures that there are no reachable nodes that are deallocated - void ValidateEvaluableNodeIntegrity(); + void VerifyEvaluableNodeIntegrity(); //current execution step - number of nodes executed ExecutionCycleCount curExecutionStep; diff --git a/src/Amalgam/interpreter/InterpreterDebugger.cpp b/src/Amalgam/interpreter/InterpreterDebugger.cpp index f1462bb9..65c5b811 100644 --- a/src/Amalgam/interpreter/InterpreterDebugger.cpp +++ b/src/Amalgam/interpreter/InterpreterDebugger.cpp @@ -541,7 +541,7 @@ EvaluableNodeReference Interpreter::InterpretNode_DEBUG(EvaluableNode *en, bool } else if(command == "validate") { - ValidateEvaluableNodeIntegrity(); + VerifyEvaluableNodeIntegrity(); std::cout << "validation completed successfully" << std::endl; } #ifdef MULTITHREAD_SUPPORT diff --git a/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp b/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp index bf620a2a..67e2864a 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp @@ -2020,7 +2020,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_NOT_A_BUILT_IN_TYPE(Evalua return EvaluableNodeReference::Null(); } -void Interpreter::ValidateEvaluableNodeIntegrity() +void Interpreter::VerifyEvaluableNodeIntegrity() { for(EvaluableNode *en : *callStackNodes) EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); @@ -2039,5 +2039,5 @@ void Interpreter::ValidateEvaluableNodeIntegrity() EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); if(callingInterpreter != nullptr) - callingInterpreter->ValidateEvaluableNodeIntegrity(); + callingInterpreter->VerifyEvaluableNodeIntegrity(); } diff --git a/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp b/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp index 9fd7c106..59f9e3a4 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp @@ -264,8 +264,9 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIR //collect garbage, but not on current entity, save that for between instructions if(target_entity != curEntity) { - //for deep debugging only - //ValidateEvaluableNodeIntegrity(); + #ifdef AMALGAM_MEMORY_INTEGRITY + VerifyEvaluableNodeIntegrity(); + #endif #ifdef MULTITHREAD_SUPPORT target_entity->CollectGarbage(&memoryModificationLock); @@ -273,8 +274,9 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIR target_entity->CollectGarbage(); #endif - //for deep debugging only - //ValidateEvaluableNodeIntegrity(); + #ifdef AMALGAM_MEMORY_INTEGRITY + VerifyEvaluableNodeIntegrity(); + #endif } } diff --git a/src/Amalgam/out.txt b/src/Amalgam/out.txt index 7fab71d2..04fc3b9d 100644 --- a/src/Amalgam/out.txt +++ b/src/Amalgam/out.txt @@ -1,5 +1,5 @@ --Amalgam Version-- -0.0.0 +54.0.1-alpha+local.dev --system_time-- --label-- hello world: 12 and 2 @@ -1263,7 +1263,7 @@ current_index: 2 8 ] accum_string "abcdef" - argv ["C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] + argv ["C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] bar (declare {x 6} (+ x 2) @@ -1276,10 +1276,10 @@ current_index: 2 A {B 2} B 2 } - interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" + interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" raaa 2 rwww 1 - start_time 1719353144.303555 + start_time 1719860994.963796 www 1 x 12 zz 10 @@ -1306,7 +1306,7 @@ current_index: 2 8 ] accum_string "abcdef" - argv ["C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] + argv ["C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] bar (declare {x 6} (+ x 2) @@ -1319,10 +1319,10 @@ current_index: 2 A {B 2} B 2 } - interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" + interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" raaa 2 rwww 1 - start_time 1719353144.303555 + start_time 1719860994.963796 www 1 x 12 zz 10 @@ -1348,7 +1348,7 @@ current_index: 2 8 ] accum_string "abcdef" - argv ["C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] + argv ["C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg"] bar (declare {x 6} (+ x 2) @@ -1361,10 +1361,10 @@ current_index: 2 A {B 2} B 2 } - interpreter "C:\\Users\\ChristopherHazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" + interpreter "C:\\Users\\Chris Hazard\\Desktop\\Howso_repos\\amalgam\\x64\\MT_Release_EXE\\Amalgam.exe" raaa 2 rwww 1 - start_time 1719353144.303555 + start_time 1719860994.963796 www 1 x 12 zz 10 @@ -1630,7 +1630,7 @@ e: - .inf 25: {a 1} -current date-time in epoch: 2024-06-25-18.05.44.3895120 +current date-time in epoch: 2024-07-01-15.09.55.2165990 2020-06-07 00:22:59 1391230800 1391230800 @@ -3451,7 +3451,7 @@ deep sets --set_entity_root_permission-- RootTest -1719353144.664102 +1719860995.44061 (true) RootTest @@ -4694,4 +4694,4 @@ concurrent entity writes successful: (true) --clean-up test files-- --total execution time-- -1.7899150848388672 +1.2001371383666992