Skip to content

Commit

Permalink
Merge pull request #47 from Holzhaus/testing
Browse files Browse the repository at this point in the history
Fix division by zero error when calculating string distance
  • Loading branch information
Holzhaus authored Dec 30, 2024
2 parents 8583959 + 15957a6 commit c59aefd
Show file tree
Hide file tree
Showing 6 changed files with 1,681 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/distance/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ pub struct Distance(f64);

impl Distance {
/// Minimum distance (representing equality).
const MIN: Distance = Distance(0.0);
pub const MIN: Distance = Distance(0.0);

/// Maximum distance.
const MAX: Distance = Distance(1.0);
pub const MAX: Distance = Distance(1.0);

/// Return `true` if the distance is zero.
pub const fn is_equality(&self) -> bool {
Expand Down
9 changes: 8 additions & 1 deletion src/distance/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,16 @@ pub fn between(lhs: &str, rhs: &str) -> Distance {
let lhs = normalize(lhs);
let rhs = normalize(rhs);

let levenshtein_distance = levenshtein(&lhs, &rhs);
let max_possible_distance = cmp::max(lhs.len(), rhs.len());

// Special case: If both strings are empty after normalization, then the strings should be
// considered equal and we can exit early. Otherwise we would divide by zero later on.
if max_possible_distance == 0 {
return Distance::MIN;
}

let levenshtein_distance = levenshtein(&lhs, &rhs);

// FIXME: It's extremely unlikely, but this conversion to f64 is fallible. Hence, it should use
// f64::try_from(usize) instead, but unfortunately that doesn't exist.
Distance::from(levenshtein_distance as f64 / max_possible_distance as f64)
Expand Down
38 changes: 38 additions & 0 deletions src/release_candidate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,41 @@ impl<T: ReleaseLike> From<Vec<ReleaseCandidate<T>>> for ReleaseCandidateCollecti
Self { candidates }
}
}

#[cfg(test)]
mod tests {
use crate::{
distance::Distance,
release_candidate::{ReleaseCandidate, ReleaseCandidateCollection},
util::FakeRelease,
Config,
};

const RELEASE_DATA: &[u8] = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/data/debug/tuxedo/release.json"
));
const RELEASE_CANDIDATE_0_DATA: &[u8] = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/data/debug/tuxedo/candidate_0.json"
));

#[test]
fn test_track_assignment_exact() {
let release: FakeRelease = serde_json::from_slice(RELEASE_DATA).unwrap();
let candidate_0: FakeRelease = serde_json::from_slice(RELEASE_CANDIDATE_0_DATA).unwrap();

let config = Config::default();
let candidates = ReleaseCandidateCollection::from(
[candidate_0]
.into_iter()
.map(|candidate| ReleaseCandidate::with_base_release(candidate, &release, &config))
.collect::<Vec<_>>(),
);
let distances = candidates
.iter()
.map(|candidate| candidate.similarity().total_distance(&config))
.collect::<Vec<_>>();
assert_eq!(distances, [Distance::MIN]);
}
}
2 changes: 1 addition & 1 deletion src/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ mod time;

pub use fs::{move_file, walk_dir};
pub use keyed_binheap::KeyedBinaryHeap;
#[cfg(feature = "dev")]
#[cfg(any(test, feature = "dev"))]
pub use testing::FakeRelease;
#[cfg(test)]
pub use testing::FakeTrack;
Expand Down
Loading

0 comments on commit c59aefd

Please sign in to comment.