Skip to content

Commit

Permalink
use surprisal math for unique check
Browse files Browse the repository at this point in the history
  • Loading branch information
howsoRes committed Dec 19, 2024
1 parent b4b396e commit f475050
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
10 changes: 5 additions & 5 deletions howso/conviction.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@
feature_weights (get hyperparam_map "featureWeights")
feature_deviations (get hyperparam_map "featureDeviations")
model_size (call !GetNumTrainingCases)
dt_parameter (if (= (get hyperparam_map "dt") "surprisal_to_prob") "surprisal" (get hyperparam_map "dt") )
dt_parameter (get hyperparam_map "dt")
p_parameter (get hyperparam_map "p")
query_closest_k (get hyperparam_map "k")
query_feature_attributes_map (get hyperparam_map "featureDomainAttributes")
Expand Down Expand Up @@ -837,7 +837,7 @@
feature_deviations
p_parameter
;pull actual distance or surprisal, not influence
(if (= "surprisal" dt_parameter) "surprisal" 1)
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(if use_case_weights weight_feature (null))
(rand)
(null) ;radius
Expand All @@ -847,7 +847,7 @@
))

(assign (assoc
non_zero_distances (filter (lambda (!= (current_value) 0)) (values closest_cases_distances_map))
non_zero_distances (filter (lambda (> (current_value) 1e-13)) (values closest_cases_distances_map))
))

(if (or (> (size non_zero_distances) 0) (= query_closest_k model_size))
Expand All @@ -869,7 +869,7 @@
)

;if the distance weight exponent is not the default value of -1, apply the negative value of it to all the distances
(if (and (!= -1 dt_parameter) (!= "surprisal" dt_parameter))
(if (and (!= -1 dt_parameter) (!= "surprisal_to_prob" dt_parameter))
(assign (assoc
non_zero_distances
(map (lambda (pow (current_value) (- dt_parameter))) non_zero_distances)
Expand All @@ -894,7 +894,7 @@
))

;if all the neighbors have zero distance, return a 0
(if (= "surprisal" dt_parameter)
(if (= "surprisal_to_prob" dt_parameter)
(let
(assoc
probabilities (map (lambda (exp (- (current_value)))) (values closest_cases_distances_map))
Expand Down
20 changes: 12 additions & 8 deletions howso/distances.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
query_feature_attributes_map
;Feature deviations are not used in order to ensure that we are measuring privacy
;assuming it has been maximally preserved. Deviations make cases look farther away than they are.
(null) ;feature_deviations
(if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) )
p_parameter
1 ;dt of 1 queries distance in ascending order
;dt of 1 queries distance in ascending order
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(null) ;Weight_feature is set to null so the computation done here matches the rejection criteria
;in generate.amlg.
;Use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed").
Expand Down Expand Up @@ -718,9 +719,10 @@
feature_weights
!queryDistanceTypeMap
query_feature_attributes_map
(if use_feature_deviations (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties"))
(if (or use_feature_deviations (= "surprisal_to_prob" dt_parameter)) (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties"))
p_parameter
1 ;dt = 1 means return computed distance to the case
;dt = 1 means return computed distance to the case
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(null) ;weight_feature
(rand)
(null) ;radius
Expand Down Expand Up @@ -752,9 +754,10 @@
query_feature_attributes_map
;Feature deviations are not used in order to ensure that privacy is maximally preserved.
;If feature deviations are used, duplicate cases may be deemed private.
(null) ;feature_deviations
(if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) )
p_parameter
1 ;dt = 1 means return computed distance to each case
;dt = 1 means return computed distance to each case
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(null) ;weight
(rand)
(null) ;radius
Expand All @@ -776,9 +779,10 @@
feature_weights
!queryDistanceTypeMap
query_feature_attributes_map
(if use_feature_deviations (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties"))
(if (or use_feature_deviations (= "surprisal_to_prob" dt_parameter)) (get hyperparam_map "featureDeviations") (get hyperparam_map "nullUncertainties"))
p_parameter
1 ;dt = 1 means return computed distance to the case
;dt = 1 means return computed distance to the case
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(null) ;weight_feature
(rand)
(null) ;radius
Expand Down
11 changes: 7 additions & 4 deletions howso/synthesis_validation.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
feature_deviations (get hyperparam_map "featureDeviations")
p_parameter (get hyperparam_map "p")
query_feature_attributes_map (get hyperparam_map "featureDomainAttributes")
dt_parameter (get hyperparam_map "dt")

;override global residuals with calculated residuals per feature
threshold_feature_residuals_map
Expand Down Expand Up @@ -180,11 +181,12 @@
feature_weights
!queryDistanceTypeMap
query_feature_attributes_map
;Feature deviations are not used in order to ensure that privacy is maximally preserved.
;Feature deviations are not used in order to ensure that privacy is maximally preserved when using distance
;If feature deviations are used, duplicate cases may be deemed private.
(null) ;feature_deviations
(if (= "surprisal_to_prob" dt_parameter) feature_deviations (null) )
p_parameter
1 ;dt = 1 means return computed distance to each case
;dt = 1 means return computed distance to each case
(if (= "surprisal_to_prob" dt_parameter) "surprisal" 1)
(null) ;weight
(rand)
(null) ;radius
Expand All @@ -208,7 +210,7 @@

;only test for uniqueness if the generated case is not a perfect match because has_dupes begins with true,
;skipping this block will indicate that the case is a duplicate
(if (!= dist_to_closest_case 0)
(if (> dist_to_closest_case 1e-13)
(seq
(assign (assoc
closest_case_values (retrieve_from_entity closest_case (if has_novel_substitions non_novel_context_features context_features))
Expand Down Expand Up @@ -410,6 +412,7 @@
query_feature_attributes_map (get hyperparam_map "featureDomainAttributes")
non_novel_context_features (null)
has_novel_substitions (and exclude_novel_nominals_from_uniqueness_check (size !novelSubstitionFeatureSet))
dt_parameter (get hyperparam_map "dt")
))

;find the closest cases using the same code as generate case, set generate_attempt to 2 so that it
Expand Down

0 comments on commit f475050

Please sign in to comment.