Skip to content

Commit

Permalink
19916: Improves approximate deviation accuracy that fixes issue where…
Browse files Browse the repository at this point in the history
… nearest neighbors would be missed with large deviations (#115)
  • Loading branch information
howsohazard authored Apr 10, 2024
1 parent 276a6ca commit 6d43623
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
15 changes: 12 additions & 3 deletions src/Amalgam/GeneralizedDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,11 @@ class GeneralizedDistanceEvaluator
else //!high_accuracy
{
//multiplying by the reciprocal is lower accuracy due to rounding differences but faster
//cast to float before taking the exponent since it's faster than a double, and because if the
//difference divided by the deviation exceeds the single precision floating point range,
//it will just set the term to zero, which is appropriate
double deviation_reciprocal = feature_attribs.deviationReciprocal;
diff += FastExp(-diff * deviation_reciprocal) * (3 * deviation + diff) * 0.5;
diff += std::exp(static_cast<float>(-diff * deviation_reciprocal)) * (3 * deviation + diff) * 0.5;
if(!surprisal_transform)
return diff;
else
Expand All @@ -269,15 +272,21 @@ class GeneralizedDistanceEvaluator
const double term = diff / (2.0 * deviation); //diff / (2*sigma)
if(high_accuracy)
{
diff += s_two_over_sqrt_pi * deviation * std::exp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
//2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
diff += s_two_over_sqrt_pi * deviation * std::exp(-term * term) - diff * std::erfc(term);
if(!surprisal_transform)
return diff;
else
return (diff / deviation) - s_surprisal_of_gaussian;
}
else //!high_accuracy
{
diff += s_two_over_sqrt_pi * deviation * FastExp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
//multiplying by the reciprocal is lower accuracy due to rounding differences but faster
//cast to float before taking the exponent since it's faster than a double, and because if the
//difference divided by the deviation exceeds the single precision floating point range,
//it will just set the term to zero, which is appropriate
//2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
diff += s_two_over_sqrt_pi * deviation * std::exp(static_cast<float>(-term * term)) - diff * std::erfc(term);
if(!surprisal_transform)
return diff;
else
Expand Down
7 changes: 5 additions & 2 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ typedef FastHashMap<std::pair<EvaluableNode *, EvaluableNode *>, MergeMetricResu
inline double NumberCommonality(double difference, double a, double b)
{
double max_abs = std::max(std::fabs(a), std::fabs(b));
//since this is called frequently in comparing and merging, and perfect accuracy isn't required, just use fast version
double difference_commonality = FastExp(-difference / max_abs);
//since this is called frequently in comparing and merging, and perfect accuracy isn't required,
//cast to float before taking the exponent since it's faster than a double, and because if the
//difference divided by the range exceeds the single precision floating point range,
//it will just set the term to zero, which is appropriate
double difference_commonality = std::exp(static_cast<float>(-difference / max_abs));
return difference_commonality;
}

Expand Down

0 comments on commit 6d43623

Please sign in to comment.