19916: Improves approximate deviation accuracy that fixes issue where…

… nearest neighbors would be missed with large deviations (#115)
howsoai · Apr 10, 2024 · 6d43623 · 6d43623
1 parent 276a6ca
commit 6d43623
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 5 deletions.
diff --git a/src/Amalgam/GeneralizedDistance.h b/src/Amalgam/GeneralizedDistance.h
@@ -258,8 +258,11 @@ class GeneralizedDistanceEvaluator
 		else //!high_accuracy
 		{
 			//multiplying by the reciprocal is lower accuracy due to rounding differences but faster
+			//cast to float before taking the exponent since it's faster than a double, and because if the
+			//difference divided by the deviation exceeds the single precision floating point range,
+			//it will just set the term to zero, which is appropriate
 			double deviation_reciprocal = feature_attribs.deviationReciprocal;
-			diff += FastExp(-diff * deviation_reciprocal) * (3 * deviation + diff) * 0.5;
+			diff += std::exp(static_cast<float>(-diff * deviation_reciprocal)) * (3 * deviation + diff) * 0.5;
 			if(!surprisal_transform)
 				return diff;
 			else
@@ -269,15 +272,21 @@ class GeneralizedDistanceEvaluator
 		const double term = diff / (2.0 * deviation); //diff / (2*sigma)
 		if(high_accuracy)
 		{
-			diff += s_two_over_sqrt_pi * deviation * std::exp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
+			//2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
+			diff += s_two_over_sqrt_pi * deviation * std::exp(-term * term) - diff * std::erfc(term);
 			if(!surprisal_transform)
 				return diff;
 			else
 				return (diff / deviation) - s_surprisal_of_gaussian;
 		}
 		else //!high_accuracy
 		{
-			diff += s_two_over_sqrt_pi * deviation * FastExp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
+			//multiplying by the reciprocal is lower accuracy due to rounding differences but faster
+			//cast to float before taking the exponent since it's faster than a double, and because if the
+			//difference divided by the deviation exceeds the single precision floating point range,
+			//it will just set the term to zero, which is appropriate
+			//2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma))
+			diff += s_two_over_sqrt_pi * deviation * std::exp(static_cast<float>(-term * term)) - diff * std::erfc(term);
 			if(!surprisal_transform)
 				return diff;
 			else

diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h
@@ -54,8 +54,11 @@ typedef FastHashMap<std::pair<EvaluableNode *, EvaluableNode *>, MergeMetricResu
 inline double NumberCommonality(double difference, double a, double b)
 {
 	double max_abs = std::max(std::fabs(a), std::fabs(b));
-	//since this is called frequently in comparing and merging, and perfect accuracy isn't required, just use fast version
-	double difference_commonality = FastExp(-difference / max_abs);
+	//since this is called frequently in comparing and merging, and perfect accuracy isn't required,
+	//cast to float before taking the exponent since it's faster than a double, and because if the
+	//difference divided by the range exceeds the single precision floating point range,
+	//it will just set the term to zero, which is appropriate
+	double difference_commonality = std::exp(static_cast<float>(-difference / max_abs));
 	return difference_commonality;
 }