diff --git a/components/suggest/src/benchmarks/geoname.rs b/components/suggest/src/benchmarks/geoname.rs index 428784b283..26a1a6d176 100644 --- a/components/suggest/src/benchmarks/geoname.rs +++ b/components/suggest/src/benchmarks/geoname.rs @@ -18,7 +18,7 @@ pub struct FetchGeonamesArgs { query: &'static str, match_name_prefix: bool, geoname_type: Option, - filter: Option>, + filter: Option>, } pub struct IterationInput { @@ -66,8 +66,7 @@ impl BenchmarkWithInput for GeonameBenchmark { } pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { - static NY_STATE: std::sync::OnceLock = std::sync::OnceLock::new(); - let ny_state = NY_STATE.get_or_init(|| Geoname { + let ny_state = Geoname { geoname_id: 8, name: "New York".to_string(), latitude: 43.00035, @@ -75,7 +74,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { country_code: "US".to_string(), admin1_code: "NY".to_string(), population: 19274244, - }); + }; vec![ // no matches @@ -388,7 +387,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { query: "ny", match_name_prefix: false, geoname_type: None, - filter: Some(vec![&ny_state]), + filter: Some(vec![ny_state.clone()]), }, should_match: true, } @@ -402,7 +401,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { query: "ny", match_name_prefix: false, geoname_type: Some(GeonameType::City), - filter: Some(vec![&ny_state]), + filter: Some(vec![ny_state.clone()]), }, should_match: true, } @@ -414,7 +413,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { query: "ny", match_name_prefix: false, geoname_type: Some(GeonameType::Region), - filter: Some(vec![&ny_state]), + filter: Some(vec![ny_state.clone()]), }, should_match: true, } @@ -428,7 +427,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { query: "ny", match_name_prefix: true, geoname_type: Some(GeonameType::City), - filter: Some(vec![&ny_state]), + filter: Some(vec![ny_state.clone()]), }, should_match: true, } @@ -440,7 +439,7 @@ pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { query: "ny", match_name_prefix: true, geoname_type: Some(GeonameType::Region), - filter: Some(vec![&ny_state]), + filter: Some(vec![ny_state.clone()]), }, should_match: true, } diff --git a/components/suggest/src/geoname.rs b/components/suggest/src/geoname.rs index 6e10a8c566..4cc46068e9 100644 --- a/components/suggest/src/geoname.rs +++ b/components/suggest/src/geoname.rs @@ -8,8 +8,8 @@ /// used by MaxMind's databases [2]. We use GeoNames to detect city and region /// names and to map cities to regions. /// -/// [1] https://www.geonames.org/ -/// [2] https://www.maxmind.com/en/geoip-databases +/// [1]: https://www.geonames.org/ +/// [2]: https://www.maxmind.com/en/geoip-databases use rusqlite::{named_params, Connection}; use serde::Deserialize; use sql_support::ConnExt; @@ -24,18 +24,20 @@ use crate::{ Result, }; -#[derive(Clone, Debug, Eq, Hash, PartialEq)] +/// The type of a geoname. +#[derive(Clone, Debug, Eq, Hash, PartialEq, uniffi::Enum)] pub enum GeonameType { City, Region, } +/// A single geographic place. +/// /// This corresponds to a single row in the main "geoname" table described in -/// the GeoNames documentation [1]. It represents a single place. We exclude -/// fields we don't need. +/// the GeoNames documentation [1]. We exclude fields we don't need. /// -/// [1] https://download.geonames.org/export/dump/readme.txt -#[derive(Clone, Debug)] +/// [1]: https://download.geonames.org/export/dump/readme.txt +#[derive(Clone, Debug, uniffi::Record)] pub struct Geoname { /// The `geonameid` straight from the geoname table. pub geoname_id: i64, @@ -82,15 +84,18 @@ impl Hash for Geoname { } } -/// Value returned by `fetch_geonames()`. -#[derive(Clone, Debug, Eq, PartialEq)] +/// A fetched geoname with info on how it was matched. +#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)] pub struct GeonameMatch { + /// The geoname that was matched. pub geoname: Geoname, + /// The type of name that was matched. pub match_type: GeonameMatchType, + /// Whether the name was matched by prefix. pub prefix: bool, } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, uniffi::Enum)] pub enum GeonameMatchType { /// For U.S. states, abbreviations are the usual two-letter codes ("CA"). Abbreviation, @@ -1419,4 +1424,114 @@ pub(crate) mod tests { Ok(()) } + + #[test] + fn geonames_store_api() -> anyhow::Result<()> { + before_each(); + + let store = new_test_store(); + + // Ingest weather to also ingest geonames. + store.ingest(SuggestIngestionConstraints { + providers: Some(vec![SuggestionProvider::Weather]), + ..SuggestIngestionConstraints::all_providers() + }); + + #[derive(Debug)] + struct Test { + query: &'static str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + expected: Vec, + } + + // This only tests a few different calls to exercise all the fetch + // options. Comprehensive fetch cases are in the main `geonames` test. + let tests = [ + // simple fetch with no options + Test { + query: "ia", + match_name_prefix: false, + geoname_type: None, + filter: None, + expected: vec![GeonameMatch { + geoname: ia(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }], + }, + // filter + Test { + query: "ia", + match_name_prefix: false, + geoname_type: None, + filter: Some(vec![waterloo_ia(), waterloo_al()]), + expected: vec![GeonameMatch { + geoname: ia(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }], + }, + // geoname type: city + Test { + query: "ia", + match_name_prefix: false, + geoname_type: Some(GeonameType::Region), + filter: None, + expected: vec![GeonameMatch { + geoname: ia(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }], + }, + // geoname type: region + Test { + query: "ny", + match_name_prefix: false, + geoname_type: Some(GeonameType::City), + filter: None, + expected: vec![GeonameMatch { + geoname: nyc(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }], + }, + // prefix matching + Test { + query: "ny", + match_name_prefix: true, + geoname_type: None, + filter: None, + expected: vec![ + GeonameMatch { + geoname: nyc(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }, + GeonameMatch { + geoname: ny_state(), + match_type: GeonameMatchType::Abbreviation, + prefix: false, + }, + ], + }, + ]; + + for t in tests { + assert_eq!( + store.fetch_geonames( + t.query, + t.match_name_prefix, + t.geoname_type.clone(), + t.filter.clone() + ), + t.expected, + "Test: {:?}", + t + ); + } + + Ok(()) + } } diff --git a/components/suggest/src/lib.rs b/components/suggest/src/lib.rs index bfa5f5f524..689f39a57b 100644 --- a/components/suggest/src/lib.rs +++ b/components/suggest/src/lib.rs @@ -26,6 +26,7 @@ mod yelp; pub use config::{SuggestGlobalConfig, SuggestProviderConfig}; pub use error::{Error, SuggestApiError}; +pub use geoname::{Geoname, GeonameMatch, GeonameType}; pub use metrics::{LabeledTimingSample, SuggestIngestionMetrics}; pub use provider::{SuggestionProvider, SuggestionProviderConstraints}; pub use query::{QueryWithMetricsResult, SuggestionQuery}; diff --git a/components/suggest/src/provider.rs b/components/suggest/src/provider.rs index bb67c84cbd..7a88f39f30 100644 --- a/components/suggest/src/provider.rs +++ b/components/suggest/src/provider.rs @@ -105,7 +105,7 @@ impl SuggestionProvider { Self::Wikipedia => vec![SuggestRecordType::AmpWikipedia], Self::Amo => vec![SuggestRecordType::Amo], Self::Pocket => vec![SuggestRecordType::Pocket], - Self::Yelp => vec![SuggestRecordType::Yelp], + Self::Yelp => vec![SuggestRecordType::Yelp, SuggestRecordType::Geonames], Self::Mdn => vec![SuggestRecordType::Mdn], Self::Weather => vec![SuggestRecordType::Weather, SuggestRecordType::Geonames], Self::AmpMobile => vec![SuggestRecordType::AmpMobile], diff --git a/components/suggest/src/store.rs b/components/suggest/src/store.rs index a0f0b0ad5d..08b7b4854a 100644 --- a/components/suggest/src/store.rs +++ b/components/suggest/src/store.rs @@ -20,6 +20,7 @@ use crate::{ config::{SuggestGlobalConfig, SuggestProviderConfig}, db::{ConnectionType, IngestedRecord, Sqlite3Extension, SuggestDao, SuggestDb}, error::Error, + geoname::{Geoname, GeonameMatch, GeonameType}, metrics::{MetricsContext, SuggestIngestionMetrics, SuggestQueryMetrics}, provider::{SuggestionProvider, SuggestionProviderConstraints, DEFAULT_INGEST_PROVIDERS}, rs::{ @@ -30,9 +31,6 @@ use crate::{ QueryWithMetricsResult, Result, SuggestApiResult, Suggestion, SuggestionQuery, }; -#[cfg(feature = "benchmark_api")] -use crate::geoname::{Geoname, GeonameMatch, GeonameType}; - /// Builder for [SuggestStore] /// /// Using a builder is preferred to calling the constructor directly since it's harder to confuse @@ -247,13 +245,13 @@ impl SuggestStore { self.inner.clear() } - // Returns global Suggest configuration data. + /// Returns global Suggest configuration data. #[handle_error(Error)] pub fn fetch_global_config(&self) -> SuggestApiResult { self.inner.fetch_global_config() } - // Returns per-provider Suggest configuration data. + /// Returns per-provider Suggest configuration data. #[handle_error(Error)] pub fn fetch_provider_config( &self, @@ -261,6 +259,39 @@ impl SuggestStore { ) -> SuggestApiResult> { self.inner.fetch_provider_config(provider) } + + /// Fetches geonames stored in the database. A geoname represents a + /// geographic place. + /// + /// `query` is a string that will be matched directly against geoname names. + /// It is not a query string in the usual Suggest sense. `match_name_prefix` + /// determines whether prefix matching is performed on names excluding + /// abbreviations and airport codes. When `true`, names that start with + /// `query` will match. When false, names that equal `query` will match. + /// + /// `geoname_type` restricts returned geonames to a [`GeonameType`]. + /// + /// `filter` restricts returned geonames to certain cities or regions. + /// Cities can be restricted to regions by including the regions in + /// `filter`, and regions can be restricted to those containing certain + /// cities by including the cities in `filter`. This is especially useful + /// since city and region names are not unique. `filter` is disjunctive: If + /// any item in `filter` matches a geoname, the geoname will be filtered in. + /// + /// The query can match a geoname in more than one way, for example both a + /// full name and an abbreviation. The returned vec of [`GeonameMatch`] + /// values will include all matches for a geoname, one match per geoname. + #[handle_error(Error)] + pub fn fetch_geonames( + &self, + query: &str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + ) -> SuggestApiResult> { + self.inner + .fetch_geonames(query, match_name_prefix, geoname_type, filter) + } } impl SuggestStore { @@ -277,17 +308,6 @@ impl SuggestStore { pub fn checkpoint(&self) { self.inner.checkpoint(); } - - pub fn fetch_geonames( - &self, - query: &str, - match_name_prefix: bool, - geoname_type: Option, - filter: Option>, - ) -> Result> { - self.inner - .fetch_geonames(query, match_name_prefix, geoname_type, filter) - } } /// Constraints limit which suggestions to ingest from Remote Settings. @@ -470,6 +490,23 @@ impl SuggestStoreInner { let writer = &self.dbs().unwrap().writer; writer.write(|dao| dao.force_reingest()).unwrap(); } + + fn fetch_geonames( + &self, + query: &str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + ) -> Result> { + self.dbs()?.reader.read(|dao| { + dao.fetch_geonames( + query, + match_name_prefix, + geoname_type, + filter.as_ref().map(|f| f.iter().collect()), + ) + }) + } } impl SuggestStoreInner @@ -847,18 +884,6 @@ where conn.query_one("SELECT page_size * page_count FROM pragma_page_count(), pragma_page_size()") .unwrap() } - - pub fn fetch_geonames( - &self, - query: &str, - match_name_prefix: bool, - geoname_type: Option, - filter: Option>, - ) -> Result> { - self.dbs()? - .reader - .read(|dao| dao.fetch_geonames(query, match_name_prefix, geoname_type, filter)) - } } /// Holds a store's open connections to the Suggest database. @@ -940,6 +965,18 @@ pub(crate) mod tests { .fetch_provider_config(provider) .expect("Error fetching provider config") } + + pub fn fetch_geonames( + &self, + query: &str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + ) -> Vec { + self.inner + .fetch_geonames(query, match_name_prefix, geoname_type, filter) + .expect("Error fetching geonames") + } } /// Tests that `SuggestStore` is usable with UniFFI, which requires exposed