From f475050ad9eb12a58de0948fe3293257986e3c9e Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:23:17 -0500 Subject: [PATCH] use surprisal math for unique check --- howso/conviction.amlg | 10 +++++----- howso/distances.amlg | 20 ++++++++++++-------- howso/synthesis_validation.amlg | 11 +++++++---- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/howso/conviction.amlg b/howso/conviction.amlg index 4046d681..d61f23c5 100644 --- a/howso/conviction.amlg +++ b/howso/conviction.amlg @@ -797,7 +797,7 @@ feature_weights (get hyperparam_map "featureWeights") feature_deviations (get hyperparam_map "featureDeviations") model_size (call !GetNumTrainingCases) - dt_parameter (if (= (get hyperparam_map "dt") "surprisal_to_prob") "surprisal" (get hyperparam_map "dt") ) + dt_parameter (get hyperparam_map "dt") p_parameter (get hyperparam_map "p") query_closest_k (get hyperparam_map "k") query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") @@ -837,7 +837,7 @@ feature_deviations p_parameter ;pull actual distance or surprisal, not influence - (if (= "surprisal" dt_parameter) "surprisal" 1) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (if use_case_weights weight_feature (null)) (rand) (null) ;radius @@ -847,7 +847,7 @@ )) (assign (assoc - non_zero_distances (filter (lambda (!= (current_value) 0)) (values closest_cases_distances_map)) + non_zero_distances (filter (lambda (> (current_value) 1e-13)) (values closest_cases_distances_map)) )) (if (or (> (size non_zero_distances) 0) (= query_closest_k model_size)) @@ -869,7 +869,7 @@ ) ;if the distance weight exponent is not the default value of -1, apply the negative value of it to all the distances - (if (and (!= -1 dt_parameter) (!= "surprisal" dt_parameter)) + (if (and (!= -1 dt_parameter) (!= "surprisal_to_prob" dt_parameter)) (assign (assoc non_zero_distances (map (lambda (pow (current_value) (- dt_parameter))) non_zero_distances) @@ -894,7 +894,7 @@ )) ;if all the neighbors have zero distance, return a 0 - (if (= "surprisal" dt_parameter) + (if (= "surprisal_to_prob" dt_parameter) (let (assoc probabilities (map (lambda (exp (- (current_value)))) (values closest_cases_distances_map)) diff --git a/howso/distances.amlg b/howso/distances.amlg index 81688d48..58a24ec8 100644 --- a/howso/distances.amlg +++ b/howso/distances.amlg @@ -25,9 +25,10 @@ query_feature_attributes_map ;Feature deviations are not used in order to ensure that we are measuring privacy ;assuming it has been maximally preserved. Deviations make cases look farther away than they are. - (null) ;feature_deviations + (if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) ) p_parameter - 1 ;dt of 1 queries distance in ascending order + ;dt of 1 queries distance in ascending order + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (null) ;Weight_feature is set to null so the computation done here matches the rejection criteria ;in generate.amlg. ;Use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed"). @@ -718,9 +719,10 @@ feature_weights !queryDistanceTypeMap query_feature_attributes_map - (if use_feature_deviations (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties")) + (if (or use_feature_deviations (= "surprisal_to_prob" dt_parameter)) (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties")) p_parameter - 1 ;dt = 1 means return computed distance to the case + ;dt = 1 means return computed distance to the case + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (null) ;weight_feature (rand) (null) ;radius @@ -752,9 +754,10 @@ query_feature_attributes_map ;Feature deviations are not used in order to ensure that privacy is maximally preserved. ;If feature deviations are used, duplicate cases may be deemed private. - (null) ;feature_deviations + (if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) ) p_parameter - 1 ;dt = 1 means return computed distance to each case + ;dt = 1 means return computed distance to each case + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (null) ;weight (rand) (null) ;radius @@ -776,9 +779,10 @@ feature_weights !queryDistanceTypeMap query_feature_attributes_map - (if use_feature_deviations (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties")) + (if (or use_feature_deviations (= "surprisal_to_prob" dt_parameter)) (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties")) p_parameter - 1 ;dt = 1 means return computed distance to the case + ;dt = 1 means return computed distance to the case + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (null) ;weight_feature (rand) (null) ;radius diff --git a/howso/synthesis_validation.amlg b/howso/synthesis_validation.amlg index fb9f0e96..d60a0275 100644 --- a/howso/synthesis_validation.amlg +++ b/howso/synthesis_validation.amlg @@ -46,6 +46,7 @@ feature_deviations (get hyperparam_map "featureDeviations") p_parameter (get hyperparam_map "p") query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") + dt_parameter (get hyperparam_map "dt") ;override global residuals with calculated residuals per feature threshold_feature_residuals_map @@ -180,11 +181,12 @@ feature_weights !queryDistanceTypeMap query_feature_attributes_map - ;Feature deviations are not used in order to ensure that privacy is maximally preserved. + ;Feature deviations are not used in order to ensure that privacy is maximally preserved when using distance ;If feature deviations are used, duplicate cases may be deemed private. - (null) ;feature_deviations + (if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) ) p_parameter - 1 ;dt = 1 means return computed distance to each case + ;dt = 1 means return computed distance to each case + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) (null) ;weight (rand) (null) ;radius @@ -208,7 +210,7 @@ ;only test for uniqueness if the generated case is not a perfect match because has_dupes begins with true, ;skipping this block will indicate that the case is a duplicate - (if (!= dist_to_closest_case 0) + (if (> dist_to_closest_case 1e-13) (seq (assign (assoc closest_case_values (retrieve_from_entity closest_case (if has_novel_substitions non_novel_context_features context_features)) @@ -410,6 +412,7 @@ query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") non_novel_context_features (null) has_novel_substitions (and exclude_novel_nominals_from_uniqueness_check (size !novelSubstitionFeatureSet)) + dt_parameter (get hyperparam_map "dt") )) ;find the closest cases using the same code as generate case, set generate_attempt to 2 so that it