Skip to content

Commit

Permalink
Bug 1931963 - Add UniFFI bindings for fetch_geonames() in suggest
Browse files Browse the repository at this point in the history
I didn't expect to be adding bindings for this, and there might be better ways
to support the use case described in bug than exposing `fetch_geonames()` as is,
but this works for now at least, and we need a fix for Firefox 134. Desktop will
call this to validate cities and states returned by the Yelp ML model. Not only
to validate but also to do prefix matching, since we can get that for free.

I considered adding a new function that matches both cities and regions at once
so that desktop doesn't need to call into suggest twice and ideally so that we
run only one SQL query. But I don't really like the idea of having a completely
different code path for this. Or, it might be possible to rewrite the current
function so it works that way. But that ends up getting a little messy pretty
quickly. So for now I did the simplest thing.

I also considered making geonames a full-fledged `SuggestionProvider` and
`Suggestion`, but then we'd have to add more options to
`SuggestionProviderConstraints` to support the `fetch_geonames()` params, and
these aren't really standalone suggestions anyway.
  • Loading branch information
0c0w3 committed Nov 20, 2024
1 parent c84cc9f commit 3303de1
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 48 deletions.
17 changes: 8 additions & 9 deletions components/suggest/src/benchmarks/geoname.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub struct FetchGeonamesArgs {
query: &'static str,
match_name_prefix: bool,
geoname_type: Option<GeonameType>,
filter: Option<Vec<&'static Geoname>>,
filter: Option<Vec<Geoname>>,
}

pub struct IterationInput {
Expand Down Expand Up @@ -66,16 +66,15 @@ impl BenchmarkWithInput for GeonameBenchmark {
}

pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
static NY_STATE: std::sync::OnceLock<Geoname> = std::sync::OnceLock::new();
let ny_state = NY_STATE.get_or_init(|| Geoname {
let ny_state = Geoname {
geoname_id: 8,
name: "New York".to_string(),
latitude: 43.00035,
longitude: -75.4999,
country_code: "US".to_string(),
admin1_code: "NY".to_string(),
population: 19274244,
});
};

vec![
// no matches
Expand Down Expand Up @@ -388,7 +387,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
query: "ny",
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![&ny_state]),
filter: Some(vec![ny_state.clone()]),
},
should_match: true,
}
Expand All @@ -402,7 +401,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
query: "ny",
match_name_prefix: false,
geoname_type: Some(GeonameType::City),
filter: Some(vec![&ny_state]),
filter: Some(vec![ny_state.clone()]),
},
should_match: true,
}
Expand All @@ -414,7 +413,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
query: "ny",
match_name_prefix: false,
geoname_type: Some(GeonameType::Region),
filter: Some(vec![&ny_state]),
filter: Some(vec![ny_state.clone()]),
},
should_match: true,
}
Expand All @@ -428,7 +427,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
query: "ny",
match_name_prefix: true,
geoname_type: Some(GeonameType::City),
filter: Some(vec![&ny_state]),
filter: Some(vec![ny_state.clone()]),
},
should_match: true,
}
Expand All @@ -440,7 +439,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> {
query: "ny",
match_name_prefix: true,
geoname_type: Some(GeonameType::Region),
filter: Some(vec![&ny_state]),
filter: Some(vec![ny_state.clone()]),
},
should_match: true,
}
Expand Down
135 changes: 125 additions & 10 deletions components/suggest/src/geoname.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
/// used by MaxMind's databases [2]. We use GeoNames to detect city and region
/// names and to map cities to regions.
///
/// [1] https://www.geonames.org/
/// [2] https://www.maxmind.com/en/geoip-databases
/// [1]: https://www.geonames.org/
/// [2]: https://www.maxmind.com/en/geoip-databases
use rusqlite::{named_params, Connection};
use serde::Deserialize;
use sql_support::ConnExt;
Expand All @@ -24,18 +24,20 @@ use crate::{
Result,
};

#[derive(Clone, Debug, Eq, Hash, PartialEq)]
/// The type of a geoname.
#[derive(Clone, Debug, Eq, Hash, PartialEq, uniffi::Enum)]
pub enum GeonameType {
City,
Region,
}

/// A single geographic place.
///
/// This corresponds to a single row in the main "geoname" table described in
/// the GeoNames documentation [1]. It represents a single place. We exclude
/// fields we don't need.
/// the GeoNames documentation [1]. We exclude fields we don't need.
///
/// [1] https://download.geonames.org/export/dump/readme.txt
#[derive(Clone, Debug)]
/// [1]: https://download.geonames.org/export/dump/readme.txt
#[derive(Clone, Debug, uniffi::Record)]
pub struct Geoname {
/// The `geonameid` straight from the geoname table.
pub geoname_id: i64,
Expand Down Expand Up @@ -82,15 +84,18 @@ impl Hash for Geoname {
}
}

/// Value returned by `fetch_geonames()`.
#[derive(Clone, Debug, Eq, PartialEq)]
/// A fetched geoname with info on how it was matched.
#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)]
pub struct GeonameMatch {
/// The geoname that was matched.
pub geoname: Geoname,
/// The type of name that was matched.
pub match_type: GeonameMatchType,
/// Whether the name was matched by prefix.
pub prefix: bool,
}

#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq, uniffi::Enum)]
pub enum GeonameMatchType {
/// For U.S. states, abbreviations are the usual two-letter codes ("CA").
Abbreviation,
Expand Down Expand Up @@ -1419,4 +1424,114 @@ pub(crate) mod tests {

Ok(())
}

#[test]
fn geonames_store_api() -> anyhow::Result<()> {
before_each();

let store = new_test_store();

// Ingest weather to also ingest geonames.
store.ingest(SuggestIngestionConstraints {
providers: Some(vec![SuggestionProvider::Weather]),
..SuggestIngestionConstraints::all_providers()
});

#[derive(Debug)]
struct Test {
query: &'static str,
match_name_prefix: bool,
geoname_type: Option<GeonameType>,
filter: Option<Vec<Geoname>>,
expected: Vec<GeonameMatch>,
}

// This only tests a few different calls to exercise all the fetch
// options. Comprehensive fetch cases are in the main `geonames` test.
let tests = [
// simple fetch with no options
Test {
query: "ia",
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
// filter
Test {
query: "ia",
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![waterloo_ia(), waterloo_al()]),
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
// geoname type: city
Test {
query: "ia",
match_name_prefix: false,
geoname_type: Some(GeonameType::Region),
filter: None,
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
// geoname type: region
Test {
query: "ny",
match_name_prefix: false,
geoname_type: Some(GeonameType::City),
filter: None,
expected: vec![GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
// prefix matching
Test {
query: "ny",
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
];

for t in tests {
assert_eq!(
store.fetch_geonames(
t.query,
t.match_name_prefix,
t.geoname_type.clone(),
t.filter.clone()
),
t.expected,
"Test: {:?}",
t
);
}

Ok(())
}
}
1 change: 1 addition & 0 deletions components/suggest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ mod yelp;

pub use config::{SuggestGlobalConfig, SuggestProviderConfig};
pub use error::{Error, SuggestApiError};
pub use geoname::{Geoname, GeonameMatch, GeonameType};
pub use metrics::{LabeledTimingSample, SuggestIngestionMetrics};
pub use provider::{SuggestionProvider, SuggestionProviderConstraints};
pub use query::{QueryWithMetricsResult, SuggestionQuery};
Expand Down
2 changes: 1 addition & 1 deletion components/suggest/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl SuggestionProvider {
Self::Wikipedia => vec![SuggestRecordType::AmpWikipedia],
Self::Amo => vec![SuggestRecordType::Amo],
Self::Pocket => vec![SuggestRecordType::Pocket],
Self::Yelp => vec![SuggestRecordType::Yelp],
Self::Yelp => vec![SuggestRecordType::Yelp, SuggestRecordType::Geonames],
Self::Mdn => vec![SuggestRecordType::Mdn],
Self::Weather => vec![SuggestRecordType::Weather, SuggestRecordType::Geonames],
Self::AmpMobile => vec![SuggestRecordType::AmpMobile],
Expand Down
Loading

0 comments on commit 3303de1

Please sign in to comment.