From afa0d1c821949daa92614850368f5db6d1f5653c Mon Sep 17 00:00:00 2001 From: Rizzen Yazston Date: Wed, 29 Mar 2023 13:54:30 +0200 Subject: [PATCH] Corrected runtime bug and redundancy code in 'add_previous_characters()' ^Cf 'i18n_lexer' crate. Completed implementation of 'Message' crate, and corrected all tests and examples affected by changes done. --- CHANGELOG.asciidoc | 18 +- crates/lexer/README.asciidoc | 12 +- crates/lexer/src/error.rs | 40 -- crates/lexer/src/lexer.rs | 442 ++++++++++------------- crates/lexer/src/lib.rs | 13 +- crates/lexer/tests/tokenise.rs | 36 +- crates/lstring/src/lib.rs | 2 +- crates/message/README.asciidoc | 60 ++- crates/message/i18n/i18n_message.sqlite3 | Bin 0 -> 94208 bytes crates/message/src/error.rs | 40 +- crates/message/src/lib.rs | 61 +++- crates/message/src/message.rs | 293 ++++++++++++++- crates/message/tests/message.rs | 57 +++ crates/pattern/README.asciidoc | 8 +- crates/pattern/src/formatter.rs | 22 +- crates/pattern/src/lib.rs | 8 +- crates/pattern/src/parser.rs | 8 +- crates/pattern/tests/formatter.rs | 44 +-- crates/pattern/tests/parser.rs | 21 +- crates/provider/core/src/provider.rs | 17 +- crates/provider/sqlite3/README.asciidoc | 5 +- crates/provider/sqlite3/src/lib.rs | 5 +- crates/provider/sqlite3/src/provider.rs | 67 +++- crates/provider/sqlite3/tests/sqlite3.rs | 12 +- 24 files changed, 872 insertions(+), 419 deletions(-) delete mode 100644 crates/lexer/src/error.rs create mode 100644 crates/message/i18n/i18n_message.sqlite3 create mode 100644 crates/message/tests/message.rs diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index adc86b2..4af77d0 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -3,13 +3,13 @@ Rizzen Yazston == i18n 0.6.0 (2023-03-??) -WARNING: This update has many API breaking changes for many `i18n` crates. +WARNING: This update has API breaking changes for some `i18n` crates. Breaking change is the result of changing how ICU data providers are used and passed to various components, thus many examples are affected even if the module is not affected by the ICU data provider change. * Added the `icu` crate: -* Added `IcuDataProvider`, `DataProviderWrapper`, and `IcuError`. +** Added `IcuDataProvider`, `DataProviderWrapper`, and `IcuError`. ** Added the `Cargo.toml`, license, and documentation. @@ -23,15 +23,29 @@ Breaking change is the result of changing how ICU data providers are used and pa ** Added `LStringProviderSqlite3`, `AsLStringProviderSqlite3`, and its blanket implementation. +** Removed the requirement of `RefCell` for `language_tag_registry` parameter and struct, as it was redundant. + ** Updated tests, examples and documentation. * Updated the `i18n_lexer` crate: +** Made `Lexer` struct private, made both `tokenise()` and `add_previous_characters()` methods as normal functions, removed `try_new` method and added `&Rc` to `tokenise()` function parameters. + +** Removed the `error.rs` as neither of the functions returns errors. + +** Updated the `lib.rs` to remove `error` module. +** Updated `Cargo.toml`, tests, examples and documentation. * Updated the `i18n_pattern` crate: +** Updated `Formatter` to use `IcuDataProvider` + +** Updated `Cargo.toml`, tests, examples and documentation. + +* Updated `i18n_lstring` crate: +** Added `Clone` to `#[Derive()]` to allow cloning. * Added the `i18n_message` crate: diff --git a/crates/lexer/README.asciidoc b/crates/lexer/README.asciidoc index f4b1e71..883cbac 100644 --- a/crates/lexer/README.asciidoc +++ b/crates/lexer/README.asciidoc @@ -66,19 +66,19 @@ features = [ "serde" ] ``` use i18n_icu::IcuDataProvider; -use i18n_lexer::{Token, TokenType, Lexer}; +use i18n_lexer::{Token, TokenType, tokenise}; use icu_testdata::buffer; use icu_provider::serde::AsDeserializingBufferProvider; use std::rc::Rc; use std::error::Error; -fn tokenise() -> Result<(), Box> { +fn test_tokenise() -> Result<(), Box> { let buffer_provider = buffer(); let data_provider = buffer_provider.as_deserializing(); - let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - let tokens = lexer.tokenise( - "String contains a {placeholder}.", &vec![ '{', '}' ] + let tokens = tokenise( + "String contains a {placeholder}.", + &vec![ '{', '}' ], + &Rc::new( icu_data_provider ), ); let mut grammar = 0; assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); diff --git a/crates/lexer/src/error.rs b/crates/lexer/src/error.rs deleted file mode 100644 index 5e44747..0000000 --- a/crates/lexer/src/error.rs +++ /dev/null @@ -1,40 +0,0 @@ -// This file is part of `i18n_lexer-rizzen-yazston` crate. For the terms of use, please see the file -// called `LICENSE-BSD-3-Clause` at the top level of the `i18n_lexer-rizzen-yazston` crate. - -use icu_properties::PropertiesError; -use icu_segmenter::SegmenterError; -use std::error::Error; // Experimental in `core` crate. -use core::fmt::{ Display, Formatter, Result }; - -#[derive( Debug, Copy, Clone )] -#[non_exhaustive] -pub enum LexerError { - Properties( PropertiesError ), - Segmenter( SegmenterError ), -} - -impl Display for LexerError { - - /// Simply call the display formatter of embedded error. - fn fmt( &self, formatter: &mut Formatter ) -> Result { - match *self { - LexerError::Properties( ref error ) => error.fmt( formatter ), - LexerError::Segmenter( ref error ) => error.fmt( formatter ), - } - } -} - -// Source is embedded in the enum value. -impl Error for LexerError {} - -impl From for LexerError { - fn from( error: PropertiesError ) -> LexerError { - LexerError::Properties( error ) - } -} - -impl From for LexerError { - fn from( error: SegmenterError ) -> LexerError { - LexerError::Segmenter( error ) - } -} diff --git a/crates/lexer/src/lexer.rs b/crates/lexer/src/lexer.rs index acbac8d..caf99f2 100644 --- a/crates/lexer/src/lexer.rs +++ b/crates/lexer/src/lexer.rs @@ -1,11 +1,8 @@ // This file is part of `i18n_lexer-rizzen-yazston` crate. For the terms of use, please see the file // called `LICENSE-BSD-3-Clause` at the top level of the `i18n_lexer-rizzen-yazston` crate. -// TODO: Change return of tokenise to include a boolean to indicate if there are any Grammar tokens. This to aid in -// skipping the costly parsing and formatter functions, where a cheap alternative function can be used. // FUTURE: Look into storing &str instead of String, perhaps original string can live long enough for Token existence. -use crate::LexerError; use i18n_icu::IcuDataProvider; use icu_provider::prelude::*; use icu_properties::{ provider::{ PatternSyntaxV1Marker, PatternWhiteSpaceV1Marker } }; @@ -65,44 +62,56 @@ pub enum TokenType { Syntax, // All other Pattern_Syntax characters (UAX #31), excluding listed grammar syntax characters. } -/// The `Lexer` tokenises the provided string into a vector of `Token`s. +/// Tokenise a string (as [`&str`]) into a vector of tokens ([`Vec`]`<`[`Rc`]`>`). /// -/// No character is removed during the tokenising process, thus the original string can be reconstructed just from the -/// tokens. Editors and formatting tools can reformat source files, usually just altering the white space, in order for -/// the source file to adhere to formatting specifications. +/// Non-grammar syntax characters are simply made into `Syntax` tokens for the parser to handle. /// -/// Identifiers are determined according to the -/// [Unicode Standard Annex #31: Unicode Identifier and Pattern Syntax](https://unicode.org/reports/tr31/). +/// No characters are discarded, thus every character belongs to a token. Allowing for the full reconstruction of +/// the original string, that was tokenised. /// -/// White space and syntax characters are identified according to the character properties `Pattern_Syntax` and -/// `Pattern_White_Space` as defined in the -/// [Unicode Standard Annex #44: Unicode Character Database](https://www.unicode.org/reports/tr44/). -pub struct Lexer<'a, P> -where - P: ?Sized + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider - + DataProvider + DataProvider, -{ - data_provider: Rc>, - token_position_byte: Option, - token_position_character: Option, - token_position_grapheme: Option, - position_byte: usize, - position_character: usize, - position_grapheme: usize, -} - -impl<'a, P> Lexer<'a, P> -where +/// The `grammar` parameter contain of a simple vector of [`char`]s containing all the characters that are used as +/// grammar syntax characters within a parser. Each grammar syntax character is placed in its own `Token` of type +/// `Grammar`. +/// +/// Note: Only single character graphemes are supported for grammar syntax characters. +/// +/// # Examples +/// +/// ``` +/// use i18n_icu::IcuDataProvider; +/// use i18n_lexer::{Token, TokenType, tokenise}; +/// use icu_testdata::buffer; +/// use icu_provider::serde::AsDeserializingBufferProvider; +/// use std::rc::Rc; +/// use std::error::Error; +/// +/// fn test_tokenise() -> Result<(), Box> { +/// let buffer_provider = buffer(); +/// let data_provider = buffer_provider.as_deserializing(); +/// let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; +/// let tokens = tokenise( +/// "String contains a {placeholder}.", +/// &vec![ '{', '}' ], +/// &Rc::new( icu_data_provider ), +/// ); +/// let mut grammar = 0; +/// assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); +/// for token in tokens.0.iter() { +/// if token.token_type == TokenType::Grammar { +/// grammar += 1; +/// } +/// } +/// assert_eq!( grammar, 2, "Supposed to be 2 grammar tokens." ); +/// Ok( () ) +/// } +/// ``` +/// +/// [`&str`]: https://doc.rust-lang.org/core/primitive.str.html +/// [`Vec`]: https://doc.rust-lang.org/std/vec/index.html +/// [`Rc`]: https://doc.rust-lang.org/std/rc/struct.Rc.html +/// [`char`]: https://doc.rust-lang.org/core/primitive.char.html +pub fn tokenise<'a, + T: AsRef, P: ?Sized + DataProvider + DataProvider + DataProvider + DataProvider + DataProvider + DataProvider + DataProvider @@ -114,231 +123,101 @@ where + DataProvider + DataProvider + DataProvider + DataProvider + DataProvider + DataProvider - + DataProvider + DataProvider, -{ - - /// Attempts to initialise the `Lexer` for tokenising a string using an ICU provider for character data. - /// - /// For the parameter `data_provider`, the [`FsDataProvider`] type is usually used in creating the - /// `IcuDataProvider`, though other providers are available to use. - /// - /// # Examples - /// - /// ``` - /// use i18n_icu::IcuDataProvider; - /// use i18n_lexer::{Token, TokenType, Lexer}; - /// use icu_testdata::buffer; - /// use icu_provider::serde::AsDeserializingBufferProvider; - /// use std::rc::Rc; - /// use std::error::Error; - /// - /// fn tokenise() -> Result<(), Box> { - /// let buffer_provider = buffer(); - /// let data_provider = buffer_provider.as_deserializing(); - /// let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - /// let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - /// let tokens = lexer.tokenise( - /// "String contains a {placeholder}.", &vec![ '{', '}' ] - /// ); - /// let mut grammar = 0; - /// assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); - /// for token in tokens.0.iter() { - /// if token.token_type == TokenType::Grammar { - /// grammar += 1; - /// } - /// } - /// assert_eq!( grammar, 2, "Supposed to be 2 grammar tokens." ); - /// Ok( () ) - /// } - /// ``` - /// [`FsDataProvider`]: https://docs.rs/icu_provider_fs/latest/icu_provider_fs/struct.FsDataProvider.html - pub fn try_new( data_provider: &Rc> ) -> Result, LexerError> { - Ok( Lexer { - data_provider: Rc::clone( data_provider ), - token_position_byte: None, - token_position_character: None, - token_position_grapheme: None, - position_byte: 0, - position_character: 0, - position_grapheme: 0, - } ) + + DataProvider + DataProvider +>( string: T, grammar: &Vec, data_provider: &Rc>, ) -> ( Vec>, bool ) { + let mut tokens = Vec::>::new(); + let mut has_grammar = false; + if string.as_ref().len() == 0 { + return ( tokens, has_grammar ); } + let mut lexer = Lexer { + data_provider: Rc::clone( data_provider ), + position_byte: 0, + position_character: 0, + token_position_byte: 0, + token_position_character: 0, + token_position_grapheme: 0, + }; + let mut state = LexerStates::Identifier; // Most strings would begin with an alphabet letter. + let mut iterator = string.as_ref().char_indices(); + while let Some( ( position, character ) ) = iterator.next() { + lexer.position_byte = position; - /// Tokenise a string (as [`&str`]) into a vector of tokens ([`Vec`]`<`[`Rc`]`>`). - /// - /// Non-grammar syntax characters are simply made into `Syntax` tokens for the parser to handle. - /// - /// No characters are discarded, thus every character belongs to a token. Allowing for the full reconstruction of - /// the original string, that was tokenised. - /// - /// The `grammar` parameter contain of a simple vector of [`char`]s containing all the characters that are used as - /// grammar syntax characters within a parser. Each grammar syntax character is placed in its own `Token` of type - /// `Grammar`. - /// - /// Note: Only single character graphemes are supported for grammar syntax characters. - /// - /// # Examples - /// - /// ``` - /// use i18n_icu::IcuDataProvider; - /// use i18n_lexer::{Token, TokenType, Lexer}; - /// use icu_testdata::buffer; - /// use icu_provider::serde::AsDeserializingBufferProvider; - /// use std::rc::Rc; - /// use std::error::Error; - /// - /// fn tokenise() -> Result<(), Box> { - /// let buffer_provider = buffer(); - /// let data_provider = buffer_provider.as_deserializing(); - /// let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - /// let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - /// let tokens = lexer.tokenise( - /// "String contains a {placeholder}.", &vec![ '{', '}' ] - /// ); - /// let mut grammar = 0; - /// assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); - /// for token in tokens.0.iter() { - /// if token.token_type == TokenType::Grammar { - /// grammar += 1; - /// } - /// } - /// assert_eq!( grammar, 2, "Supposed to be 2 grammar tokens." ); - /// Ok( () ) - /// } - /// ``` - /// - /// [`&str`]: https://doc.rust-lang.org/core/primitive.str.html - /// [`Vec`]: https://doc.rust-lang.org/std/vec/index.html - /// [`Rc`]: https://doc.rust-lang.org/std/rc/struct.Rc.html - /// [`char`]: https://doc.rust-lang.org/core/primitive.char.html - pub fn tokenise>( &mut self, string: T, grammar: &Vec ) -> ( Vec>, bool ) { - let mut tokens = Vec::>::new(); - let mut has_grammar = false; - if string.as_ref().len() == 0 { - return ( tokens, has_grammar ); - } - - // Resets the Lexer - self.position_byte = 0; - self.position_character = 0; - self.position_grapheme = 0; - self.token_position_byte = Some( self.position_byte ); - self.token_position_character = Some( self.position_character ); - self.token_position_grapheme = Some( self.position_grapheme ); - - let mut state = LexerStates::Identifier; +// These are left here until the GraphemeClusterSegmenter works correctly, currently buggy. +//println!( "Byte position: {}", lexer.position_byte ); +//println!( "Character position: {}", lexer.position_character ); +//println!( "Grapheme position: {}", lexer.position_grapheme ); - let mut iterator = string.as_ref().char_indices(); - - while let Some( ( position, character ) ) = iterator.next() { - self.position_byte = position; - if self.data_provider.pattern_white_space().as_borrowed().contains( character ) { - if state == LexerStates::Identifier { - self.add_previous_characters( &mut tokens, TokenType::Identifier, string.as_ref() ); - } - else if state == LexerStates::Grammar { - self.add_previous_characters( &mut tokens, TokenType::Grammar, string.as_ref() ); - } - else if state == LexerStates::Syntax { - self.add_previous_characters( &mut tokens, TokenType::Syntax, string.as_ref() ); - } - state = LexerStates::WhiteSpace; + if lexer.data_provider.pattern_white_space().as_borrowed().contains( character ) { + if state == LexerStates::Identifier { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Identifier, string.as_ref() ); } - else if self.data_provider.pattern_syntax().as_borrowed().contains( character ) { - let state_previous = state; - if grammar.contains( &character ) { - state = LexerStates::Grammar; - has_grammar = true; - } - else { - state = LexerStates::Syntax; - } - if state_previous == LexerStates::Identifier { - self.add_previous_characters( &mut tokens, TokenType::Identifier, string.as_ref() ); - } - else if state_previous == LexerStates::WhiteSpace { - self.add_previous_characters( &mut tokens, TokenType::WhiteSpace, string.as_ref() ); - } - else { - if state_previous == LexerStates::Grammar { - self.add_previous_characters( &mut tokens, TokenType::Grammar, string.as_ref() ); - } - else { - if state == LexerStates::Grammar { - self.add_previous_characters( &mut tokens, TokenType::Syntax, string.as_ref() ); - } - } - } + else if state == LexerStates::Grammar { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Grammar, string.as_ref() ); + } + else if state == LexerStates::Syntax { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Syntax, string.as_ref() ); + } + state = LexerStates::WhiteSpace; + } + else if lexer.data_provider.pattern_syntax().as_borrowed().contains( character ) { + let state_previous = state; + if grammar.contains( &character ) { + state = LexerStates::Grammar; + has_grammar = true; } else { - if state == LexerStates::WhiteSpace { - self.add_previous_characters( &mut tokens, TokenType::WhiteSpace, string.as_ref() ); - } - else if state == LexerStates::Grammar { - self.add_previous_characters( &mut tokens, TokenType::Grammar, string.as_ref() ); + state = LexerStates::Syntax; + } + if state_previous == LexerStates::Identifier { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Identifier, string.as_ref() ); + } + else if state_previous == LexerStates::WhiteSpace { + add_previous_characters( &mut lexer, &mut tokens, TokenType::WhiteSpace, string.as_ref() ); + } + else { + if state_previous == LexerStates::Grammar { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Grammar, string.as_ref() ); } - else if state == LexerStates::Syntax { - self.add_previous_characters( &mut tokens, TokenType::Syntax, string.as_ref() ); + else { + if state == LexerStates::Grammar { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Syntax, string.as_ref() ); + } } - state = LexerStates::Identifier; } - self.position_character += 1; } - - // Complete final token - if !self.token_position_byte.is_none() { - self.position_byte = string.as_ref().len(); - match state { - LexerStates::Grammar => { - self.add_previous_characters( &mut tokens, TokenType::Grammar, string.as_ref() ); - }, - LexerStates::Syntax => { - self.add_previous_characters( &mut tokens, TokenType::Syntax, string.as_ref() ); - }, - LexerStates::Identifier => { - self.add_previous_characters( &mut tokens, TokenType::Identifier, string.as_ref() ); - }, - LexerStates::WhiteSpace => { - self.add_previous_characters( &mut tokens, TokenType::WhiteSpace, string.as_ref() ); - } + else { + if state == LexerStates::WhiteSpace { + add_previous_characters( &mut lexer, &mut tokens, TokenType::WhiteSpace, string.as_ref() ); + } + else if state == LexerStates::Grammar { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Grammar, string.as_ref() ); } + else if state == LexerStates::Syntax { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Syntax, string.as_ref() ); + } + state = LexerStates::Identifier; } - ( tokens, has_grammar ) + lexer.position_character += 1; } - // Create a token for slice starting at the byte position after the previous token until current byte position. - fn add_previous_characters>( - &mut self, - tokens: &mut Vec::>, - token: TokenType, - string: T, - ) { - if self.token_position_byte != Some( self.position_byte ) { - let start_byte = self.token_position_byte.unwrap(); - let start_character = self.token_position_character.unwrap(); - let start_grapheme = self.token_position_grapheme.unwrap(); - let slice = &string.as_ref()[ start_byte .. self.position_byte ]; - let len_byte = self.position_character - start_character; - let len_character = self.position_character - start_character; - let len_grapheme = self.data_provider.grapheme_segmenter().segment_str( slice ).count() - 1; - self.position_grapheme += len_grapheme; - tokens.push( Rc::new( - Token { - token_type: token, - string: slice.to_string(), - position_byte: start_byte, - position_character: start_character, - position_grapheme: start_grapheme, - length_bytes: len_byte, - length_characters: len_character, - length_graphemes: len_grapheme, - } - ) ); - self.token_position_byte = Some( start_byte + len_byte ); - self.token_position_character = Some( start_character + len_character ); - self.token_position_grapheme = Some( start_grapheme + len_grapheme ); + // Complete final token + lexer.position_byte = string.as_ref().len(); + match state { + LexerStates::Grammar => { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Grammar, string.as_ref() ); + }, + LexerStates::Syntax => { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Syntax, string.as_ref() ); + }, + LexerStates::Identifier => { + add_previous_characters( &mut lexer, &mut tokens, TokenType::Identifier, string.as_ref() ); + }, + LexerStates::WhiteSpace => { + add_previous_characters( &mut lexer, &mut tokens, TokenType::WhiteSpace, string.as_ref() ); } } + ( tokens, has_grammar ) } // Internal structures, enums, etc. @@ -351,3 +230,72 @@ enum LexerStates { Grammar, // A grammar syntax character. Syntax, // Any other syntax character. } + +struct Lexer<'a, P> +where + P: ?Sized + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider, +{ + data_provider: Rc>, + token_position_byte: usize, + token_position_character: usize, + token_position_grapheme: usize, + position_byte: usize, + position_character: usize, +} + +// Create a token for slice starting at the byte position after the previous token until current byte position. +fn add_previous_characters<'a, +T: AsRef, +P: ?Sized + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider +>( + lexer: &mut Lexer<'a, P>, + tokens: &mut Vec::>, + token: TokenType, + string: T, +) { + if lexer.token_position_byte != lexer.position_byte { + let slice = &string.as_ref()[ lexer.token_position_byte .. lexer.position_byte ]; + let len_byte = lexer.position_byte - lexer.token_position_byte; + let len_character = lexer.position_character - lexer.token_position_character; + // GraphemeClusterSegmenter is currently buggy, thus is commented out. + //let len_grapheme = lexer.data_provider.grapheme_segmenter().segment_str( slice ).count(); + let len_grapheme = 1; // Temporary statement. + tokens.push( Rc::new( + Token { + token_type: token, + string: slice.to_string(), + position_byte: lexer.token_position_byte, + position_character: lexer.token_position_character, + position_grapheme: lexer.token_position_grapheme, + length_bytes: len_byte, + length_characters: len_character, + length_graphemes: len_grapheme, + } + ) ); + lexer.token_position_byte += len_byte; + lexer.token_position_character += len_character; + lexer.token_position_grapheme += len_grapheme; + } +} diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 2812221..a7127fd 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -18,19 +18,20 @@ //! //! ``` //! use i18n_icu::IcuDataProvider; -//! use i18n_lexer::{Token, TokenType, Lexer}; +//! use i18n_lexer::{Token, TokenType, tokenise}; //! use icu_testdata::buffer; //! use icu_provider::serde::AsDeserializingBufferProvider; //! use std::rc::Rc; //! use std::error::Error; //! -//! fn tokenise() -> Result<(), Box> { +//! fn test_tokenise() -> Result<(), Box> { //! let buffer_provider = buffer(); //! let data_provider = buffer_provider.as_deserializing(); //! let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; -//! let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; -//! let tokens = lexer.tokenise( -//! "String contains a {placeholder}.", &vec![ '{', '}' ] +//! let tokens = tokenise( +//! "String contains a {placeholder}.", +//! &vec![ '{', '}' ], +//! &Rc::new( icu_data_provider ), //! ); //! let mut grammar = 0; //! assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); @@ -51,5 +52,3 @@ pub mod lexer; pub use lexer::*; -pub mod error; -pub use error::*; diff --git a/crates/lexer/tests/tokenise.rs b/crates/lexer/tests/tokenise.rs index 7dd7e12..8f1204d 100644 --- a/crates/lexer/tests/tokenise.rs +++ b/crates/lexer/tests/tokenise.rs @@ -4,20 +4,22 @@ //! Testing `tokenise()`. use i18n_icu::IcuDataProvider; -use i18n_lexer::{ Lexer, TokenType }; +use i18n_lexer::{ tokenise, TokenType }; use icu_testdata::buffer; use icu_provider::serde::AsDeserializingBufferProvider; use std::rc::Rc; use std::error::Error; #[test] -fn tokenise() -> Result<(), Box> { +fn tokenise_single_byte_character_string() -> Result<(), Box> { let buffer_provider = buffer(); let data_provider = buffer_provider.as_deserializing(); - let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - let tokens = lexer.tokenise( - "String contains a {placeholder}.", &vec![ '{', '}' ] + let icu_data_provider = + IcuDataProvider::try_new( &data_provider )?; + let tokens = tokenise( + "String contains a {placeholder}.", + &vec![ '{', '}' ], + &Rc::new( icu_data_provider ), ); let mut grammar = 0; assert_eq!( tokens.0.iter().count(), 10, "Supposed to be a total of 10 tokens." ); @@ -29,3 +31,25 @@ fn tokenise() -> Result<(), Box> { assert_eq!( grammar, 2, "Supposed to be 2 grammar tokens." ); Ok( () ) } + +#[test] +fn tokenise_multi_byte_character_string() -> Result<(), Box> { + let buffer_provider = buffer(); + let data_provider = buffer_provider.as_deserializing(); + let icu_data_provider = + IcuDataProvider::try_new( &data_provider )?; + let tokens = tokenise( + "String contains a ‘{identifier}’.", + &vec![ '{', '}' ], + &Rc::new( icu_data_provider ), + ); + let mut grammar = 0; + assert_eq!( tokens.0.iter().count(), 11, "Supposed to be a total of 11 tokens." ); + for token in tokens.0.iter() { + if token.token_type == TokenType::Grammar { + grammar += 1; + } + } + assert_eq!( grammar, 2, "Supposed to be 2 grammar tokens." ); + Ok( () ) +} diff --git a/crates/lstring/src/lib.rs b/crates/lstring/src/lib.rs index 5f5d77b..f5f44f1 100644 --- a/crates/lstring/src/lib.rs +++ b/crates/lstring/src/lib.rs @@ -56,7 +56,7 @@ use std::rc::Rc; /// [`String`]: https://doc.rust-lang.org/std/string/struct.String.html /// [`Rc`]: https://doc.rust-lang.org/std/rc/struct.Rc.html /// [BCP 47 Language Tag]: https://www.rfc-editor.org/rfc/bcp/bcp47.txt -#[derive( PartialEq, Debug )] +#[derive( PartialEq, Debug, Clone )] pub struct LString { string: String, language_tag: Rc, diff --git a/crates/message/README.asciidoc b/crates/message/README.asciidoc index 1f2e172..3094391 100644 --- a/crates/message/README.asciidoc +++ b/crates/message/README.asciidoc @@ -3,7 +3,13 @@ Rizzen Yazston == Message system -TODO: Get from lib.rs or other .rs files +The `i18n_message` crate contains the messaging system. + +A message system that connects to a string data store, to obtain strings for the specified language using a string identifier, and formatting the string to replace any placeholders within the string with provided values. + +The message is capable of caching retrieved strings that are prepared for placeholder replacement, thus can be reused without the need to parse the string for placeholders. + +The message system makes use of all the other component crates that make up the `i18n` project. Ideally one only needs to use the meta crate `i18n`, as it includes all the crates including this `i18n_message` crate. == Cargo.toml @@ -20,5 +26,55 @@ i18n_message-rizzen-yazston = "0.5.0" == Examples ``` -TODO: Get from lib.rs or other .rs files +use i18n_icu::IcuDataProvider; +use i18n_registry::LanguageTagRegistry; +use i18n_provider_sqlite3::ProviderSqlite3; +use i18n_pattern::PlaceholderValue; +use i18n_message::Message; +use icu_testdata::buffer; +use icu_provider::serde::AsDeserializingBufferProvider; +use std::collections::HashMap; +use std::rc::Rc; +use std::error::Error; + +fn message() -> Result<(), Box> { + let buffer_provider = buffer(); + let data_provider = buffer_provider.as_deserializing(); + let icu_data_provider = Rc::new( + IcuDataProvider::try_new( &data_provider )? + ); + let language_tag_registry = Rc::new( LanguageTagRegistry::new() ); + let lstring_provider = ProviderSqlite3::try_new( + "./i18n/", &language_tag_registry + )?; + let message_system = Message::try_new( + &icu_data_provider, &language_tag_registry, &lstring_provider, true, true + )?; + let mut values = HashMap::::new(); + values.insert( + "identifier".to_string(), + PlaceholderValue::String( "i18n_message/string_not_found".to_string() ) + ); + values.insert( + "language_tag".to_string(), + PlaceholderValue::String( "en-ZA".to_string() ) + ); + values.insert( + "fallback".to_string(), + PlaceholderValue::String( "true".to_string() ) + ); + let lstring = message_system.format( + "i18n_message/string_not_found", + &values, + &language_tag_registry.get_language_tag( "en-ZA" ).unwrap(), + None, + None + )?; + assert_eq!( + lstring.as_str(), + "No string was found for identifier ‘i18n_message/string_not_found’ and language tag ‘en-ZA’. Fallback used: True.", + "Check placeholder values." + ); + Ok( () ) +} ``` diff --git a/crates/message/i18n/i18n_message.sqlite3 b/crates/message/i18n/i18n_message.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..9ae63e898b102a310e3fcb09998d8885b2bf8843 GIT binary patch literal 94208 zcmeIb34B}Gbszj50b(PT(P$)S8I2xCk|jz4L~y6pfhbC%BrXz3X*7}s1A-ul5eZNL zD2cA(&Tg8FNx#2t=-0rziCz{`5HG%?Ivm3 zCQjRP&fVU7cmToKJjoBFC=&18<=k`cJ@?#m&i&uXt5)JDh zp=m9Bnx;kJ#|1xI;b#VZ-0&m5bn!>+-%v(8qlNt6uep3b-0J(`K)UVY*6%>NcJAhz zCI^}vXmX&*fhGr<9Qbx{An<>C+`YWf(>6yjsM7mtg7W4d9Xz1E-a%$L^N}jzk zZ19K1p=*T)Bj=2fv8mw;!`F<7Ya^q{Yd4L{!#9r_BeU6jIX9Qf7LBRlXQzy@arl|O za^!_h4~;{`+EnCfQn6T@ic-0l%U98s zDJ(5PlZfP_#{Fy&8#im51GXGBhQ`Mxr>-S|6UIu4H9cDt4K=36My^f|8;2A%M-5Z` z9UX!9YoCvvIH5n6LPUKBr3MbUBwv zCD?Bd6!cEa)jRBoj_j+IX?Zc7$u1NYXS2mr!t@~$xT*Otd+e+yn%rG0`%-#&85opG z@ITt1R+jBdy==U{phniI{@{82F|4gJHb{N`2pRz+Z+m#p@^or6TPndI=6!qANN36`=|w&|GKJZ!>Cf}U!jk-0a};${ zp1S0X?%AV1iVz=`vgPz_x||ljy%tW0r;WAg>ATt0r>)I_F`QmpVPu+GUuT$ig!XIj zPjme&_1gY@UL3QPR5^X5r`{Nyr1_Leu-~rUW3|U-W&E5sx^JKU2-*%q!8A5ae+Dg# zFuiDu7q6{<*#zUgct2gtA$<*WYGhRJjPBg2KRQ4ynxy&4{%^IkgFUMwQ%#-3B#aWS zT)-$9%UI1jymEMi_0(M=fyt0kd1W?NNR_io%g}+)mKu*+EJW5wbSPjLaxFLtAD+G} z#v6XRE(aQZ8Zrw~d@Drh*z{+&joZCd1az!dl?8vYOJ zajyoyUK>+wy^g#z_4CH84FS!A+Rtf`Pe!gqc7=a0{HgH$@S)J3hW>fzQRs5$@!)?5 zet$3>Jk<8hwtv?4?zWSGzX<$nU?p%W(Bl7s|2zH9`J=us`CjrJ@&2m!?|J(@f8zOZ z&vj3*^`ouVTerDib$`%(+x?j9S6%q$ z=~9U(nJcX1XKf|g-uA@0VqJ_zEOa#GKqD5~FpXVi8podZY2$-78VfToLgP#Ee0Cl* zhWptRNq-_!NHh7a2Uysx$V=VvX*aLdD36Ls6;{fr!dz+wn_jZD-5~{5U~2!uZJ%~4 zU891a)`n?CJs zFVdII-_I2bc~k`Bpr$P5GQiA2KBd-_I0)48D(GdXL)th&uW)=y=63d<9P(*5h@xC# zARk{8IkS0CB2dfXjpIY1JJv5a*B9&RJ#p5jP1nm!Q)6X$xlk-;XE8Hx7G}Pbgt6R@ z&0sTJbtcVRT#$s6mXJE_`AtG_Pn9c!{7K_>R z>?+KrTsa5xt(2XGp|6^d7)YM>X=zJNUP1~|Te*ylDKF%TP=w@RUo0_j{FG0dtX%|5 zZYCnKnL?iO^09|`&z$sWcWURILz(cPP`t~gcCJ_|W95>IiGgziJ}sl|whfL_rkGnU zr_#mw6&x(3crkl#C0ESOvUvx>3FL}t3gr8J+WegjO98<}0!v7zxM>)pP-8dDLOm#HQNTMH_9lx40CWrp);xIE1D663PH`Z{RWx zG7KCYnFW}1Vz-U5t<)XwiSq#rqN_i-4|u5UbQFU!Uuk6qvZj`cx%)6`Q1~UJ4Cw0Z z-0RcMC{h*+nKXz^o9s17JrLYa`LzBcl6amYmsv?=i^W1QP7+yugm#PGL1Ej!M<_$J z1veuY2^~~Ydhkh<(#}mKEg)Y`@eLp~(Invec0(7|$Xd+K<04ps^{`C&dR3J}yHGqj zSILDWOhy}@sgQnE$nl*pnQIg>yTX>?Y)Umb(WmmB+u_r)HS*f3O%=1HZ1FyfmmZ=( z4U*ooQCy{*n^CJ2Dnl+cUA?{AQTcE#BVQ1VFHttGvrun8;nObH$X3WNuBOt9%M0n5 zEMtAAYI)yQ%&5kgd40#h467lSu}CjS#7x}of4uhaQf(ETZDMu*mf9Jif$2rGD3bPx zZ8Qy%!N;(FohfCC{i+-ynAOCc6A`NhWNsu|0zvIfm0mXQ_W}31>jSQDxIWwRfBB#F zzc-WyE5j#3L!tKYgYb>eFNXhN_;~1Rp}!9AZdq-OMf|>BikuGr)9}}VUkQA)?PS{% z{+|teGw@~K|H#~}Xixj}{%ECJYd?w`^{V}9HXnN~d0p7#e)ruUS`$gufA`%VHb5IT z(uPnBx!IGY0s|-}Ichvu$YmCcT*-h%+ej}i79M10<8lypuDAR2GttTct^Ek205XDp zVbLOqim8RH0nc+wD@&$|;0aLz<#V;uC@-Y*@E@d3KeVS#Kg86g>WhX?zYwi>wf4hU zJ((#Dy$S?e6n3dNiwFZzp`zYPftZFda`|A4WfhuXL}JGRYcARv+Kd^<2HI4 zcsS6_o&p^7rq47UhnHruv$HT%pr=N;dxy3lCc4&l3PJERTO1u!eMhdap`V_oX~E@8 zNf>5mY{XIBPtVL0v-fi})YpwGtaQ;=UA>h{JA8Uav~urWyFvBKErP6c&7qvd+)OcD zTs6?Bw1QFr8VTZurV@1yOt9qwjO;RzDL)ah4|33Pd`ujt_xuCCy}uJJ&?^20 z1u35fGph!4Ja~bLlUY+A_f}j{pWYp<_>=8>tulr-s??$lg$v3u5HQ#pG`v|QOandI zS=qncr%y*KceM6BmMMt)uoO)r5E|et<@15a8earKgS%B*hcS*WWiyZucArch6m#?Q z#PUMEhCmR*t%|*(=W&*9^Xa{?1SQ*_TrAAT+2piK1>k~^Rt?IcnpJPiJ0QyQgint} zE8CLoyUj!)>xfTDF~|y;%t{f5-R61F5F!4pG?ajlT}a3bSS-N+naQSLOcz&)76ZQa zt>sr>mKWEL8$+rw?RLaQM`SEL`$*Ga?u|~?e5ca}I z0L~gPcuU zRZ&X)7`}FG{8}RvU2elp+>>m7TonW6VQ3pziICw#*G8aq$t#T%HXPtIZBd0mF??h} z8P}#7DQ476u&HKFic=WE;9^BCUPc5Ec z9tD!tJcn2qxyJw|qVyQZ^*1q4_Ig1RzL{)qvsOf_i=!fpD?Pm(15Jc2Xl50-p($NC z;PL6{Xk}7s4~TmC?>b%zYUvc{KqgO-!LwO(B=Tv5{bF+(W?UXdf_ z3hbBH4Y_YNEfR-XefngyGOxA!9qGgs3wH}8a)n3&ot82uLRq#3C{6L)#2rUcZ@O`N z0gJB3vgo4bEtfD!pP7MKh=ZCnNGZWZUo3ZOjPzc0dG*2QwAS8QU9?=8?EPXZmW;5H zvAt6*UfBOZk?yJ@!LX%dLY-3c1*UJA2wl>>`eoSmw7bluI=IXvBcwYDR5|I1WE))O zXoI1X%{$MXKHSXfk-vifwQZ5VdQ1KPAJe~%)z<#%KIpCM%WV4pZN3rQ)d(*jh7*{a zH%_~X#LRoiJ@}(6^oTj1rad05H&kfn?<}XwYCC3)$=uR1#N^1R7#3i~io^qCp(EM{ z>Q{_sCqvI^cZU;N`z#z>g7649G{}68aB2q517+~P`_)faEdy}2s$;iDi|rN7!{a0D zc+6zBJWj_ERno*Rk9J|3)^76j!P#Uy|78k@nt}%+OXooMPLI~#;gAJ`8PAJzTZT2$ z^&K8<&vddK;-=!5ZUm#DSY#ob-X9WSP$i0rn$G9&B!6u_OJe#@MWLx%S-EDCM~0>w z$v(Q>qwP&5+g~if=~SfaQu=;+lFpDBj8(>10gsLs@{JUCW}8Pl2C*hdqTP3Rn3t6yN1t*D=RRK6k%R zq@$z9kE`H@Lmif4&L2FOgp3K#T~*7;B7Ac#W>kL|?2ET81EXMl3wv&V^wL!(uVEHZWAkm(#iYi(ao53s?4t z1aP34C#c>J}SJDDru&bt&RH;1}C_ z;uYi_3wSiYgFPo+BV>OEFTm1QEfLo~Q2~qBy+}ZRNkDtz*P99u_%eVIGk`!wz*~zC zc<(SE%Mzj;){f8s0O#}O!UN_yyf6lWC7*UGTFFAtmBOS#tSq3z1d9RTLGaL6YbOu` zvz2R|gOI(!kXeaN2;u@r%dcn$0i6mW#~9vbTp@LwN02t=NR%psi$knF z4p38Gz(6EtbS|hWF#uKNv_XV35j6=EkIW)Oa=|TB2>XO_Pxe9)DosG%u(T|qO%(e$ z6bq49qF5_bC>492P(T6S5Sx>y8X$TKQ1%u$(jYG?@Oc12j#h3!VR+MkuvwZ=@Cbq! zVEIxjd5IZ7RZOeT2ADLL5W24cL_LJH6KH>!T$yc&uj(UmS2epIuts-l0Jj4$HZ@5V z7Zk_?X^a;L^T)22LOG~vaHx>t&`hmoeXXv>LZYyDM1Wcv5O)}cKSTDk5bS?ZTZujb zgiTH777C4DhVi^;yIGSULg$%z>v4hR2ZSWF)UBG(^W;SLHr=V zC&9p86MwBAHCC1pT~{&)MhqZhVqGPGkIj^2UAa8K(xy57DUKfK5#Qg_03))!p$3*# zA*Al1Y5q?*5Y7^rCmw@j%^ z93rSHgdeJzL9h)#gRb2I$i#UF@3dTJ!x~e4^8-0+7Ab=R7zX3sbIujkSMv+$Ci@_0Ixt2rJ8Un+0-8i&X zF5HC})AbGzX$17S&Y~Y#1+9Nxh?X{00XSzQ3LlDN8HOq@xBwxF^QHu#O7{sk7w>~% z6TWoZQuo7K(Tufjv-c@Qc!zLJ47%6K3=$8!fj?I28b}yOg5nzo2q<@!F2||Y8t#uC zv(`iiwHTXD3>ya2ERf5ySzr?WQapm(?k^*1Bpb7|{1E_{3Ceb?kdMJ74v0~S@wgyC zIdms3*P;FTUhP=45=ytPdt+vRpFj)sH^$cCOJ=wzr2^xpu1M%Y6 zDaVgvt+lGopays^3muYu2$ygH?mb+S!I^>bPS7KU2@C|46bT@{8JN^MT+(?XIWYpu zBHSAU)Dz?zPY%PykT-rAFvd#d%0<9_M@nveyD7nG1WH}mC<4}Q4h~PSCW;}BV@hy} zzgiuG#GXu8(n){JQ_W}PN&EI>?46LSxpvHx~ zN7QTLv3P)cbQ0snc|e(+fg>X%1VKE{iO)>sGRno3>^}U@fH$S=KKg$>zHg15O~H$G z%mZrLdHjFf$^_P4I2m*vFJ9x#Kgw?fJZj9qg$s%q-GRum))Kj{MSct7{jW!sBmW}u z`;p)Gz9PO`Uvl_Pw7yB-dM@-E`nB-)>HQ$SjL=``fpA3o)9_*K)s}Cz9B=)eh~Iyh z-3Xw6UjP2!w*p^pdvDtda5KP@Rknv6?qgeYg=5OmUUTK8s5@qU0Y(PUwsYmhZSD^9 zbHuF8mG5qMA2B~Aft)M9xXaybc`Q&;LBF=;AP8MR%iifum~#lM*IaqtmVtml$ja0OV zQ=*w;?i;E~xHe`kH^0I*&g?ofn@CFcxYM>m`KC-Igp*OURL-PJY=wA%+GU|v2>?_3 zpFiXtu@x@%g7Q!pQGLg74@PNNz_#jsuCP+Fp=IA@H8RscO5DW(8+5#;gUM7X=vH-& zysuPqDB*LpnF-o`xkQO>I+aFuZQDw_LG%+t;9epE%8%oykQ}?N*sKyTp=hxd3YbW$ ze!9<{W%)IL-*u}Q>0x@DP8^gNolsLm^!6&p}od)*0JQ_w@sA2Y20 zQqaTm_nV$`K@U$~GCdW79v)}Rk4=IeUR^Z35`rEcCrpn`f*#&5*~7rqg`khz_I^)G zT~n#{cOXq(=k~sOQmGxpTT30gpjO%8KB!WtZqr_YD%xaL2TP^)D018LpV{KE!Nl6c z%#ORK?Lt6KRFpPw`uunM(U7JG9hZey;pLu>Dj+;(4O+>@0t+oD=Lt2K+Z4{Fyo$V61#P|Jxb zoe6Ea}LT}QV8dJbvd$(>SP7F4iIs*gefOA5Va?z*~1+{uvVRBBvTFE?T3IviG z3s=wSFFT|acA%=;8!6W%$aUT!7ve>TyCNy!2qc8~Y`Xl|e0iVcYC}qSPBqR#G-why zVDOqKNu%tu`gKN0dP`$>Jp=m7k=xo=brZBfocF&l zoR-98T|$5=OY6g4cP6@-ywsLes4;8FCD{u@)ycm40CIG1oC7cCndd&aGBE6AOBOP% zE-%1dxkE<{iD>b{$6Y#EJkTm;+Kza3x?@p4+b!6n1Qeaum8nbETUIG-pNehOT6IYE zA|3#6vL51fd_Em6x+1|O46vZm;{C}fykl~{P+Wzpj0-Ev?d}s%y`BxGCD3wQ>(#%Z z!Hs-N6l`LgFDo>{-!H{*?a|B2_M*X{d^XKU+kdSCV4Z2ffbvn?NU&9r=8 z|3=F;*P!d~y8fmQ7rtSG|cgl%9_DmLis>6NRDUXA)sW!H$FENUUkf9od zl9nRw*rt?cYZNMLpexiVX|hIA;JE1?ZA$u#GMUMOkW~B4*_8N9jl^O{Ev`|g#HUT; z*{qw(y{Wudo06ZZL9-P~o06VqR`OxP-G}qCZ!JNEuv1`l}Ykm2z$|2M%s-=mQ5I1;x=v%F;LB z&RIp731Uke93==a+cMJ|$Y`@JBwSC99UFKaOoj>ZOMB;HT4j2Xez3o z!DRF9-dH+|Xej0apo$WKuWLrtN}EzEEg31y61Ylw%TndOf=WI{L(7~~F)JlIg_6dD zSG8ZVSzEb&4r||hdBMR#8zU6}?W)ePqeu6-&oH|yGX*d4Pc(RY=^#IiEggznR3>+*LNRv0bAi}UAp`94tEEaD(K)=%CHd{(Vnu633bWr zUJ`Q_*3ks2arx_>$Ol^Xkz&c_lZ`D;?t&em zCf)>JpeC?vipN{w&9v!_u!d8Scxn7D(@S+_kg4Wzme$G5f*0LqoW;8t834^CbTcCn5{S7cl@HXiCZN(xwiR`$qTgA)w$2|9 z&@rrAh8VsrR{-ie>G_}*_=xwA_pIyfBq$(y}fVc!$WG$Ip7TX)Vg*P&d%BZ5N&$+w0ZNsJsqRq#JJ1 zGgtPMSL;6_D`Od?WQmh7k{>ZIzl9G89Z7n%k$#!itUd`H5;c8@BP9#7R4nmfcPtSb zP;Ksh22M^O-6E}RY`64j-KMRV#X&Egc?VJkMamwECBG1i6PIsenk*!D-yPfLcbH|SB@rJ^A=jG zG#uJrnr?6GKjzBR&mnapB4@z!$NF!WIJlW>tC5h@;ChsP2NY!lZ7 z4XR10QpZ}qseRm1qnMbSxX~)AMFQ!XxdLU}rZOPN1k6cRZRaw)Jxy;F8+ioBC3v;Z z`U=l+?RSmpa+RoIkDswk7aWD=>5@h+m{$ZoLRe(83S8$`_}C{`)p(M_Ppag7hg~Wc49Ho(6 zZrFODN?OWL~6r#X?T;Z2Urigso~e z-5`xcu61rsuGwZ6lqNi=Oia;Oq=4a=ZN_88wi$mEnQvYzOdHl(TiqjaHL{Fv`GFfX zxqWiq9@iN@Z5$NSy@*>z^#cq%Y@Xs$?R*Ml9(&`YW3o#2AcLzDk+DO$`=4~4kY;P} zZXD*-1@WN>F`c&7?ZV+wo!)M3+}Y{MIwZ3d2G>srH~bEq^3~hp(@6}WWL_M z&vn_MK#O7c3FkBxJMUKK?ofm+gH`Zx=R{W9?iOd(*{!;dDRFD=|7sI$#JN&w`ULQ} z{9M>@4Lh(wnPr2x^i?xn8f4p?d1Nug2As=~l=z)Ts|>Vcii;1YR~bUw**;8i7){^o zR+8ds92XIcGtPQ@TuU3KmK4B91I2dMgF9R9WNcZ8Fn?+e}Z{9*9dgC7Y_2cK;F zZP>%ld43@9n}Ht*EC%BKKlFdp|APO3?+<-H;(ONjwD%9ZpK3ec8TYib{#fgefkC19 z)#N~v15FMzIq-&Y;Msn!HaQLVk&BIwWlLk-qp-8w$0wK(@d~&o4aC^60KqatB3$Uz zd%4f6-AU9c*NshTdaag^}1npCDnbCv5=8Y^lY?EQhkN2*f>_&Sp+!%z_ORdMgC6O=a=&Y!{yK}J1t0k**n_Y}5 zD?U|OdlF#p7Fnx2q2d8M=&f>H?(}LY4NjUXoNqA^{AgE6chyWZkQaZWk=1z^hP-UD1fS(Z!z%HuGFmDKP01+k(hF`1P{fE5TkOnuXIpv;(!>Z|g8K@R$ z5-MZgL1cs@>6X11+t;dmC*J|$!?{9fw57OjBG{fgv`Y(&cBAk)(*xynG%$C$EE_f7 z)`*k0e}B!EOTFK*<)`;y3mh3PB_72G)nNIFr;&4xEVt~w94qf{_i9%)L`td}fA+mP zRi&9;@OyZj$^gD*H43I?uME=!cbE))E!VRz>KA+I0cTn={or>c1KaF z${zZ++L<%4dg}$6TWT&BC!h3cw~%>OgBSOCRb#SPIM~y*+pBd8p(}+Nb7skp1Og-o zP7Fl0YVfagHw}Wy!O|}c^{^3UkVSY`=L#K>s9L^^_PNFM{BgErwqET?c~z}vcXF}P zSd}ERnbSd)F}?%K=-z;AD=G~81Q00*1!G-(5(6iq$b9HKGf{HFz-Y6b6o4m^8h}U9 zPWQ%YEcA9g%Cb%a08Rg~ZC>pNe9N_hpSa{JgTX(_>iUMCz+nm(_R%bpJ|Rgh4pVsw zPCOMzeyW(>t*9RW)pea=c5*vIEu-3A?NvSUJ>LK2`TxQnX!&61ncx?Lnc%jzFSWhW zb}H~^fgcaN5ZLAaMgOYrzxZy1zM=o9cT{`T^^9kr^@m&A-5+!xc74`$rRCp3L2sw8 z$|b|6bwn$rdyKKtHcF8jh`Iw%Ht^7L9mP6a^f*&Y7gr6G2Y}R(1sfLKn2Yfjgak!F zcyNBUOyCak6Cyh>G!o9@DquK{m;^WQLI4RMh_N*S!NfSUjst8`R{}4&?CP!T-{aG! zqm`6(M6-~-8WBmT1H5(MdUzM-fagmX-B5vvj9Q;i4P~?easGN50utdoC$Uc?A?^XD zfsr=Ivrswc_i2*^_9j#={u}Q0gosO=g(SXJbPexOtk^7**TqaR1dtWN+>|Jvd<8g5 zGg%`&I}4o+e`5*(h&7=$QR{$%G7q(b1(8iVZ1Vckgz+d>Xoqcu9B^{-;&NvKcET|L ztuf>|EX2rj9GzEK@uK>!UPN3>BH$UeTP_TQ_9$XysQL+n4g-5%k?NHRiSL3j0^y4& zgY}%iN@P`?FM{@VE4l_@QnrBpdI5;z95`!@iR;8VVWjX+UF$|Boj+Jc<`W)NxiFsv z<`?4?-$5T>=T*G6@%}KAWwkXByodv0C7(&J%rBG`b{nC~@Ci3pEG)5`UEyBYe0H7K zj>ch*kw{~@q8;#QccK-zYS_G($Xy(ZqpS{uOu|Th;9M_5d!lKJ7)ChUstIiuoKFl? zw(R#I(4J7^xaerW+vGU(K=!PFNdTCRHJ4yqkYJlBlo#j;KhI1(#ULg)m{3WM`>y`V z6Z?ExDq7LudTZ1Ut7#G@aJEZB^@gY!$-{%(EUegE4v-#*;6$dwzHF3QVHe}I@9Pz; z>gxCYd(BUft0e$_N=7SN?7aptuP8iJtQ-y#lU)J?KS}|@l>)4(9J>m9v5q8_OiA=s zTK4+1Zb0H=mt@On#`LzvCL#&~6h;6)y0lzgWpYBy#s*l>8Tz_*6hMM22a@e3Pysn! ziHanYj&tdT+Y3NVX$K~mjs-b$h$Vp4>`=bI02@2ZoOoh4;wx3Qr7T3qC`KfJmQ)FW z2C$TL$#T_HG)CD0`mkRf@vvFf%-oQ-q?V=8XYtWH5Dx?830;5ZVUl76O0EE6>9?RZ!hL!jRMrvYU1oY)e(qid0lJ6me~;ky5TahGTQDnWC&E0(*FL_}Ycxv7wudlr|Rx6Ik@F+7Z={*yb7+ zS5zntmedqblDNSQW*^{e(Ww@WM@uX9n~5bZgb>3!Rf2AyvOnNMSmH_5Jd37kT3SeB zRl(zSvC6Yq7H3NRMyRP;_>VMjAVe#BpY-A4kYs)9cve9y4g?j9${<;!sSgmjb7SR6 zFsn*pH!@nD^`J%zpxIV!3)XGx5@u(gb={U}RaYVI-zNv7(@rJJb!mfAF`cQzNmxG5 zIF@J$$FEbUSUXLHF1o;E2YR-J0S=~S675*DbkTtBaz9rrykWqY9w)K&YJZL3)!_#g<7VRH(K*tR$L|p(tX^ofJhj7>qA+6Zd%nBtH zTt0o3La$HDMIYG|93?*D^^VjbZ-uFhq)|IA_QpYxCk|0pcMsCMpchv7X4t}&ZK@@o z;c;U!OV$Tj5FtPVuCWsoz7z}TJm8iUJHRuNUr8^L%08Pd13HpuF|LumZQ6Sg&>e

@zt3#x&o?sM~(hr;SD{mz}y(dJf0~ zN_1V}{v~Em$6X#FRwMHisx@E*<}zr@SXoRLVIKfN>Lr>$hu~TEA);!xTl}f~h^X2X zcl9>!)_AM$)~JjmaVR`WI1L3gvoVs84^dE<#B+n8wv+LL+(o#tEr6=ASk5hjOcUu! zAcR0FkxyW~BW{f15aPjw%vu0o&bc`lJ|YK9cQjrBWOH^7bYIvi(S9F>6TvUxS?W|=Cw$s)v?81(qB9U$87{YhF^Fts2S3lrk5)F* zO3oNULK~Km+NRjJ+A^!o$RGDll9#n^npTH2|3kg zapF>)7m9EZ%N(40A=icA&}*r8egK@-(N(8vQzxmy(|4HK8;fBf#-Z35VGPB4PqasgV`#vCCDUlTrDL8Cb6Cn5lf;IkWUTQ< zufyt_k8!@peCWmkEAn!>gcq`;=g{+mTA3kEU%L zEd1<@ms;vO1L0rM_6NRO3j@S(<`aH{%Pd5+WsK$`+$sZL4jo!YI)E-Q+-% z15FMzInd<5-$V|a#-QKV5wKt0<<;)aLqML%DqoF{EDX#l78Ao0%;ro(>QG>?#rPw$ z=ESqJy%gSm_lAd3HpBm$;r|$hh(~DN{P6z%z19Sb&Iwl2MWgJi=@(y3V$t6OTn(Q`3&rif4EXfjWfy zsqVR#IF+~2F_)P0VQgxPf(|(9eC!)tPe20%LW8aD$R@D^4sQ%c(BXWNT$<(%Ig@2M zNpi5px%eD-N&tq8*$>}QBa!7~+5u;3qy=rib0NaQkRo&aK4+3G*1xBnp#khB==O~c z^BWC=y6jab167AJz>cl%6?m#HjK!X6Jd&EHB%X}A0EE>N>q=pA2%^}t$%S^;Cec}T zZkV1KQUrqe4s4g1CfTPn16IIWBCmpt?r`-|yC{B$M~W2TX4UOmT}R~13Q9-gkLDo~K(cyWbc2c=$KM&xBqLbq9YX_-tD?@B{vz@t^no zL-#4y*IlPtex#*c{}KIZ?YrFnzx$K$>~pRcT0Ya#sekqj(DHm}a;`vB9Qhr1xSB-& zVYV5vlz>l&TWTnD56mTSehl4B0u7xRP*eyhGoh%@XdKzl2a70!6>SD1QdVowq)e1R z60<&9oKvqisuf9iDecp4klC~$8BPqEU@{zz;hApcGvHVwF#>)g1CC-?MGpLQjBs>5 z5hJ;Qs6PfVt@;aaq&IrY)_fi$%nb6#`>Gpu#zbM*D`Rl`}TF+TXQ)3(g!xm!S0<-j{eD0#(3f3`X7}+#+vK@C2Rr z;O+xz;!@@K1>f5tdhK=5MfWi?-oq z;g)8lhtZh*2%K}(&R|iP2q`k->Q5AN_tDD@M?1I&x z<1#(Q24Oovdhj-^#*L9Y?0;eWV41wGdga%u;3b+j$^-e}9JyHGj7N17EqWn!7lj7k zV-&7D;>RIs3gg+ZVJ2rY`%{_U;l#iYg+hp<9g?5u(UxLF-A#ougvWBQvVb>qAQOCF z4jz9M^n(J}N{niKC3KdWf5NHx7HwUQR=ksb3Qr{fHFWBHol1wl;n*2 zW2;~vv8-cMkjxAD#ON5Nu%L$dS~)jC3+w}XBONbv#S@RSxe`vwTz&;s8z%~gdK{cg z(J`i%fo68`Jzk=VBNc@U#S}T^7vme^{7@1#)>q5pxU4M3x}(&kvgB&ch&d+NBc128{^5WSNj7XUs*U+EIJ~bSma6uL+@!$| ziOH6-#~7)^-^kj<0ey&06n~mSgL0!7Aq9sFm;kS;4@SM(kIL#qNyIMkfmi?(SQc5c z%-(l}V(53?lV`kzq_zZzntP4^Es8d4dBOtBbRz6G5l|X`9Y==yvX}AHMNTlJ4)BQN zhFUFn44@^{4j3;(15Ink&Px>1;kb72z>aDJNUD;DbO`Cd5(^F?^Cf3sw&p+)fHiIr zwZ~N$984EQo->curVg3{yB0VgJ)PGPxoatcpnv!^0p4#k91xFk!}c(5c(|C&N2 zpTCGIHUy4TD<-hIhz=os!m=VhgKlJsk5rLyykVxnjQIaa&y42zJ@4+wzXlY53Ga7D z?nRDATEm|YzdxMu9S#qKBc6|W-sAaP=nq0a4L9~>Ll;6vL!sa|Ju|^y3w}2EL&5h3 z^T8Xz!QfLtciW$|eWmUHY5PRmKWHnoU2p4cYYqITz^4Q63`_*}`@iM?ivN?o$Grd5 z{~g{h_~-rI{;==9??3terSGQihumLw|CIZ~?svHt+?TZ9b$4oCbVps^a{UgV3w+#l z*EQe@wER}fPq)0g<=K`aEg}6?{WJP^>I?cgeZThCWO;xrUAj6$MV6FY`9*lV+x%D@ zWP&3^n&utRH$0`#_eR_UBkzhqBXAsXXU*1N>c6fx^sX4XO~q;{<1*I?=a&eG(XAWS zJf;g#=#nTd$oNefxZKl~LMb?IvS8YTXC1EHP>IzcxmmhW$XckyVqwQ+B0|#+rQ6K! zl&)-WiTl(AeZf&V^*-Om}Vl>UY-8`G*$tk$$#q}-o8!^wQ9gkTr=Y(M0nU(omd7<_d zH9OsjEvDkciO5`eUfy}b&VY(HRL|qKdg$B1^*n)pR*!J~+FBxR31R zFQr?EDyw&?kG#M}oiSENdv`j#RVR>ly@s2)I^8#;`o^)JQa|aT8MLw4=9an)P^y2` zx_8|hy!N?IMK`+MzUHO$miz3*Avk^$0A8`CJ+h=w@V0PuE`YRPipp!fR$SdbDD2QC zTdrx_X}QDvb>5r)+AnOEi-!yov||GjJJy&$0S}7Z(?G9RXrc%q#r+NRrlhX@lOsW> zUL@%xMiC`1(!9d8giOoQyGbwaF}ZAU<4`iI*OJl}-iXy#GKW@Q*cVa6C#ZiYZD}J& zbv1Si+Z(^K(#@>MD}Lopm1>ZGQ`X+*D#}jtU7;IY#h`CM{n&8T-QhlFUcA)h)Ag3) zs5@rP1VaQfYLf29A^Luu;mdf1(7} z>!|=h{u_LOX2|uITD$(st&z3XUkHCJd@tM+c7f;rABWx*8V&wY@cCe{?Oz4{H1LCg zT`gbs&-(t-_fFp)?>*0-cwTOexv#iJ!e4Uh{{OcntN*gAlrTdm;(k`*?;;?W zd2Xtcy{JM?YU~-#0|mla1^!uO;HS#eM>36~A>OnNVQf)V2=l2hFsGFfmFeM!7Ec?Cg&cwrR0$(W4TZQRRu=Xn)qw|sIkI4 zKB5@z@PWEYa3xBnEb9^;9>ucCW9tOO*KY0O8VxM1f^+zxY4T0532o^=mdHHnVXke| z2F3IK%3v1VfR&p7IcJI5RMs||X>JNe5W?Ec7vUyy3<$uKYK{r#1De~xoD7C!7)wYK zBLCn4LgBqHEu_18`_>U?W&v29&Dvr3I>CJhTXhCn-J(F#zWPjEyE6VYT2G zvBJHeoRa0SS&5LFkQ1jQr(q!0-C5~)$p<)^75{vr$k5b!>@{iB7iSnJv*mH(faHWY z6$ssmn@AojARI~Ts3M5?QVFnRU?yP4I>64d!?KBi)GFXIM*#)E9L30;I`UX(+#2;r z)EWkd_*@4-2?o);j<_NugVv?c9C+!WS1X{Ga1#}n6l-LPa#{(BdsaRYc3}wVB`#NfNsWGFP zk}LxicNs1QiDr|Q&T(WAY7FxncL6J^;t)?@XVFHX6}YaBmk`ko5S)Z~GaIEQc#tFV zoxi6Qkf*^B2lz#>04RR;Ri=xCrF@42LraxkHVyX{R%;eEO1zc<6XJnbb0M1{v!Q}i z#yMMqLg!$}*Xr;EM)f^&4`BpPv0=g%dvQx64YBL##TDop0l6jKNzv3fII)br#tRn6 zdU^YCL4-9^Vc=`DOe`EpLOhvDtON=^@IqGrw8koMi9QEwA2OQW@s)mK)Zz`!Kbweo zA&+3EAZ(D7T9!5@PiZ2vB-Fchu}bqC!=KEuy>qa036LeXfbOZhkka&%ynY?f7e_{i z$EL=G0PO7VSOiSVEr3R#atON*WPp%7l{9u2kTk7@GkXt*(R>bJG*$9LFT%XBmQ31k zuFQZbRmx*IKzrP>*Gz-7=0Iu&_4r&;WjL{bMMry0M00-*!Gb1dVMTz2WjH?%VBHG_ zfOE{M%tz;8m~Me%ajM0_gOe*Ws7q6$JLa$|OKR@YNnW{S^_vJn4{z4MU}0vz1+aW^+F;^>-mu9wr6MSZ?}GL>-EULbbZ48_uaqX?(pm3FGhBSU-sT` zf1#0Y2YW#CQIi8r4s3=4&)tMgCtQMN)gR3<14_g&q;V1bW?geBlp_&K#JZIFbJh zoq&MqX=+O&;fUQah?ry*;bk9}0O${{(EvtN$9xIY4AR8<|auJYF&uTq)w0;1ly{q9NH`Bo8J6O40FNqtGE>dbZ zOK92hRBd=gT%tO&59gy}-J)cl_X=L--@lN|dzZM?9@tkk! zIS_iM#^7k#_+SkIVq*Y`w5s#kC?=#t?e4tT1@^cqc5spbqbW5HA*_hu05=>;YN_=w zk1cN}04yzaxklr;D+*46l_A)DKE;K#=ps3`Emy%@1n{$Qdx+!=fp_-C2;cCA?1Gf`yqF<}SC(RzdX^(`gJ zYV)sU=hA?5s;;3&DE820Xn0^~d@ZL(alEnv4T4>xMWWQ~7wtK>qOOwctm! z;f5O`-JRGFDbTxm!=13c(`sNA#TH8Jt0+VizGFl&P_>T{8jC*hpS6a1C*MJ znbf%G?}al64!jYWhl6n9BqLMt5u!jx`Sr*+g%ZAhPO_=Fq4f7-6QRY&7Li+){ek zD%a{2Cx-(r;gc@_hevGGVR5!mWK$?@Z+nzZt3 zD3pJ3oP~c@Y{wJjo#JM>P~cNq7%udSWWzrd4u_V4KOH>O_Nlfh?`U9)|GWHKeDCxA zPaH4JuO<(!k7ppw;Y7thF(js ziu6}{z$XINyQXZ^2tfyD9<( zO~okz-5A-gKhhUnyv*N=uI7!osqWj@sF#VaPha;zC{bmo3aG*yqAn4(6z+#(I1BZz zbXG%1nqa0aTxS!gTR>!A<=8Yu+V)lv>15~Q8*91ZDqIU;kZ=y@5$qLVE|%2L@4Vbb z`n|8RV~T)M~i$m@;><0{AzNb$$=&Z znjC0ypvi$I2bvsca-hk9CI^}vXmX&*fz5K@7@W-*7*a8#i1uK9>4^RQP|KFuu)*@%aUkg78r~aeC zWkB)!-oVHFU-kW0-@5l-dE=h7*8kAj?Ot>Jv1`$_z2!yy-|1JhFF_{jS4E%p>I9TO z?u@Ue( zZdmB9n&-quN5GCZ*^IyMvJb{Dn(?m_h4PH62oz3KHY(q? zhWwo)K0O<)fFXaX0g~TROMa#EqEBb2`I8OMrY7094>rGe!K)K&{xvJXHISs3+VFaD z^E2mtI>XJsYOPhcS^ic4=MN8)jDo|Z6V_H%qjv(F*ZA8OIse`{oN$PoKW=RY_sglU zVhrk+!RI93(D%!iHYfd|mS+#Wll&ce$KP`3{KAkIbXYj2K1MCxD4o~9RoBE%o_*W!^Ut0}A%gh%Bh|$H4FS+kokFD>U|ud(Hv!feftXL@4i*^k z*}63~VNzA!GH~J~=>%(9`-SQ%b=z9Ie5AFN(;2$1e`xi z6%bWW^1=@O#O~h{>g8rJ^(vQV@8KqDc%xADaxZ$J zD?r_PV6I$hL2o3i-l?2dCs@7v=BgUT)qAi&8jdo0#+JgsfK*8?o07t#>`^gPmjrp!JHR z3QN1AY#eSSB}kr}h2phyC@$Yl%4}~vwqA19s}r`~Jsc_wtwCTzP$RBy2(EWEgZmHw z*IPD;D1o1&;A5yX*oO$?1l5ImRw1ZNfOeI(86WIY5nZof6~2_qipL{)%}T_;VF?mJ zJ9zntw%J1HH4d+rPJ4BN*UKX<9FM|$T$?BuXNaw6V_``wwF%hL8u)sN7rZ**>n$np zxD5dI4yU|21K3kpU<}gD3U-9O`14+!5cck>wDlnNI_|&(LWsQ=Rr(qjd!4tvI$`YP zOi9HJNsb_UCvRcl0J68Bax{*z7kdr{EaK+PtK4;9_72~~${_iiN?r?RuX5~JG@zn$ z0dApY2t>6ZI^?i>?U2I^X!v#D3|Cw?aPEOEbhZ}3ZvoO?W$SgHK17$>XGl0!oenI+ OLXr^z;4Qk{{{H}1M1&6j literal 0 HcmV?d00001 diff --git a/crates/message/src/error.rs b/crates/message/src/error.rs index 68e1db0..1b049a9 100644 --- a/crates/message/src/error.rs +++ b/crates/message/src/error.rs @@ -1,27 +1,47 @@ // This file is part of `i18n_message-rizzen-yazston` crate. For the terms of use, please see the file // called `LICENSE-BSD-3-Clause` at the top level of the `i18n_message-rizzen-yazston` crate. -use i18n_lexer::LexerError; +use i18n_registry::RegistryError; use i18n_pattern::{ ParserError, FormatterError }; +use i18n_provider::ProviderError; use std::error::Error; // Experimental in `core` crate. use core::fmt::{ Display, Formatter, Result }; #[derive( Debug )] #[non_exhaustive] pub enum MessageError { - Lexer( LexerError ), + Registry( RegistryError ), Parser( ParserError ), Formatter( FormatterError ), + Provider( ProviderError ), + StringNotFound( String, String, bool ), // identifier, language_tag, fallback + NoDefaultLanguageTag( String ), } impl Display for MessageError { /// Simply call the display formatter of embedded error. fn fmt( &self, formatter: &mut Formatter ) -> Result { - match *self { - MessageError::Lexer( ref error ) => error.fmt( formatter ), + match self { + MessageError::Registry( ref error ) => error.fmt( formatter ), MessageError::Parser( ref error ) => error.fmt( formatter ), MessageError::Formatter( ref error ) => error.fmt( formatter ), + MessageError::Provider( ref error ) => error.fmt( formatter ), + MessageError::StringNotFound( identifier, language_tag, fallback ) => { + let string = match fallback { + true => "True".to_string(), + false => "False".to_string() + }; + write!( + formatter, + "No string was found for identifier ‘{}’ and language tag ‘{}’. Fallback used: {}.", + identifier, + language_tag, + string, + ) + }, + MessageError::NoDefaultLanguageTag( identifier ) => + write!( formatter, "No default language tag was found for identifier ‘{}’.", identifier ) } } } @@ -29,9 +49,9 @@ impl Display for MessageError { // Source is embedded in the enum value. impl Error for MessageError {} -impl From for MessageError { - fn from( error: LexerError ) -> MessageError { - MessageError::Lexer( error ) +impl From for MessageError { + fn from( error: RegistryError ) -> MessageError { + MessageError::Registry( error ) } } @@ -46,3 +66,9 @@ impl From for MessageError { MessageError::Formatter( error ) } } + +impl From for MessageError { + fn from( error: ProviderError ) -> MessageError { + MessageError::Provider( error ) + } +} diff --git a/crates/message/src/lib.rs b/crates/message/src/lib.rs index 831294e..befa169 100644 --- a/crates/message/src/lib.rs +++ b/crates/message/src/lib.rs @@ -3,12 +3,69 @@ //! The `i18n_message` crate contains the messaging system. //! -//! TODO: Complete crate description +//! A message system that connects to a string data store, to obtain strings for the specified language using a +//! string identifier, and formatting the string to replace any placeholders within the string with provided values. +//! +//! The message is capable of caching retrieved strings that are prepared for placeholder replacement, thus can be +//! reused without the need to parse the string for placeholders. +//! +//! The message system makes use of all the other component crates that make up the `i18n` project. Ideally one only +//! needs to use the meta crate `i18n`, as it includes all the crates including this `i18n_message` crate. //! //! # Examples //! //! ``` -//! // TODO: crate example +//! use i18n_icu::IcuDataProvider; +//! use i18n_registry::LanguageTagRegistry; +//! use i18n_provider_sqlite3::ProviderSqlite3; +//! use i18n_pattern::PlaceholderValue; +//! use i18n_message::Message; +//! use icu_testdata::buffer; +//! use icu_provider::serde::AsDeserializingBufferProvider; +//! use std::collections::HashMap; +//! use std::rc::Rc; +//! use std::error::Error; +//! +//! fn message() -> Result<(), Box> { +//! let buffer_provider = buffer(); +//! let data_provider = buffer_provider.as_deserializing(); +//! let icu_data_provider = Rc::new( +//! IcuDataProvider::try_new( &data_provider )? +//! ); +//! let language_tag_registry = Rc::new( LanguageTagRegistry::new() ); +//! let lstring_provider = ProviderSqlite3::try_new( +//! "./i18n/", &language_tag_registry +//! )?; +//! let message_system = Message::try_new( +//! &icu_data_provider, &language_tag_registry, &lstring_provider, true, true +//! )?; +//! let mut values = HashMap::::new(); +//! values.insert( +//! "identifier".to_string(), +//! PlaceholderValue::String( "i18n_message/string_not_found".to_string() ) +//! ); +//! values.insert( +//! "language_tag".to_string(), +//! PlaceholderValue::String( "en-ZA".to_string() ) +//! ); +//! values.insert( +//! "fallback".to_string(), +//! PlaceholderValue::String( "true".to_string() ) +//! ); +//! let lstring = message_system.format( +//! "i18n_message/string_not_found", +//! &values, +//! &language_tag_registry.get_language_tag( "en-ZA" ).unwrap(), +//! None, +//! None +//! )?; +//! assert_eq!( +//! lstring.as_str(), +//! "No string was found for identifier ‘i18n_message/string_not_found’ and language tag ‘en-ZA’. Fallback used: True.", +//! "Check placeholder values." +//! ); +//! Ok( () ) +//! } //! ``` pub mod error; diff --git a/crates/message/src/message.rs b/crates/message/src/message.rs index c1aae4b..3e615a4 100644 --- a/crates/message/src/message.rs +++ b/crates/message/src/message.rs @@ -3,10 +3,11 @@ use crate::MessageError; use i18n_icu::IcuDataProvider; -use i18n_lexer::{ Lexer, Token, TokenType }; +use i18n_lexer::tokenise; use i18n_provider::{ LStringProvider, LStringProviderWrapper }; use i18n_registry::LanguageTagRegistry; use i18n_lstring::LString; +use i18n_pattern::{ parse, Formatter, PlaceholderValue }; use icu_provider::DataProvider; use icu_properties::{ provider::{ PatternSyntaxV1Marker, PatternWhiteSpaceV1Marker } }; use icu_segmenter::provider::GraphemeClusterBreakDataV1Marker; @@ -31,7 +32,7 @@ use icu_datetime::provider::calendar::{ EthiopianDateSymbolsV1Marker, }; use icu_calendar::provider::{ WeekDataV1Marker, JapaneseErasV1Marker, JapaneseExtendedErasV1Marker }; -use std::rc::Rc; +use std::{ rc::Rc, cell::RefCell, collections::HashMap }; pub struct Message<'a, I, L> where @@ -50,11 +51,12 @@ where L: ?Sized + LStringProvider, { icu_data_provider: Rc>, - registry: Rc, - lexer: Rc>, + language_tag_registry: Rc, lstring_provider: LStringProviderWrapper<'a, L>, fallback: bool, caching: bool, + cache: RefCell, HashMap>>>, + grammar: Vec, } impl<'a, I, L> Message<'a, I, L> @@ -74,38 +76,303 @@ where L: ?Sized + LStringProvider, { - /// TODO + /// Create a new `Message` instance, that is connected to a language string provider `LStringProvider`. A + /// reference to the language tag registry `Rc` instance and reference to the ICU data + /// provider `Rc` are stored within the `Message` to facilitate the parsing of language string + /// patterns, and for formatting strings. + /// + /// Two boolean flags `fallback` and `caching` are also set to be the defaults of the `Message` instance. These + /// flags govern whether parsed strings are cached for reuse, and if no string is found for the specified language + /// whether the `format()` method should fallback to the default language tag of the string identifier. + /// + /// # Examples + /// + /// ``` + /// use i18n_icu::IcuDataProvider; + /// use i18n_registry::LanguageTagRegistry; + /// use i18n_provider_sqlite3::ProviderSqlite3; + /// use i18n_pattern::PlaceholderValue; + /// use i18n_message::Message; + /// use icu_testdata::buffer; + /// use icu_provider::serde::AsDeserializingBufferProvider; + /// use std::collections::HashMap; + /// use std::rc::Rc; + /// use std::error::Error; + /// + /// fn message() -> Result<(), Box> { + /// let buffer_provider = buffer(); + /// let data_provider = buffer_provider.as_deserializing(); + /// let icu_data_provider = Rc::new( + /// IcuDataProvider::try_new( &data_provider )? + /// ); + /// let language_tag_registry = Rc::new( LanguageTagRegistry::new() ); + /// let lstring_provider = ProviderSqlite3::try_new( + /// "./i18n/", &language_tag_registry + /// )?; + /// let message_system = Message::try_new( + /// &icu_data_provider, &language_tag_registry, &lstring_provider, true, true + /// )?; + /// let mut values = HashMap::::new(); + /// values.insert( + /// "identifier".to_string(), + /// PlaceholderValue::String( "i18n_message/string_not_found".to_string() ) + /// ); + /// values.insert( + /// "language_tag".to_string(), + /// PlaceholderValue::String( "en-ZA".to_string() ) + /// ); + /// values.insert( + /// "fallback".to_string(), + /// PlaceholderValue::String( "true".to_string() ) + /// ); + /// let lstring = message_system.format( + /// "i18n_message/string_not_found", + /// &values, + /// &language_tag_registry.get_language_tag( "en-ZA" ).unwrap(), + /// None, + /// None + /// )?; + /// assert_eq!( + /// lstring.as_str(), + /// "No string was found for identifier ‘i18n_message/string_not_found’ and language tag ‘en-ZA’. Fallback used: True.", + /// "Check placeholder values." + /// ); + /// Ok( () ) + /// } + /// ``` // TODO: Add struct contain callback functions for commands pub fn try_new( icu_data_provider: &Rc>, - language_registry: &Rc, - lexer: &Rc>, + language_tag_registry: &Rc, lstring_provider: &'a L, fallback: bool, //true = fallback to default language caching: bool, ) -> Result { Ok( Message { icu_data_provider: Rc::clone( icu_data_provider ), - registry: Rc::clone( language_registry ), - lexer: Rc::clone( lexer ), + language_tag_registry: Rc::clone( language_tag_registry ), lstring_provider: LStringProviderWrapper( lstring_provider ), fallback, caching, + cache: RefCell::new( HashMap::, HashMap>>::new() ), + grammar: vec![ '{', '}', '`', '#' ], } ) } + /// For the specified string identifier, format a string for the specified language tag with the supplied values + /// for the placeholders. Optionally specify whether to fallback to the default language tag of string identifier + /// when there is no string pattern for the specified language. Optionally specify whether the parsed string should + /// be cache for reuse. + /// + /// # Examples + /// + /// ``` + /// use i18n_icu::IcuDataProvider; + /// use i18n_registry::LanguageTagRegistry; + /// use i18n_provider_sqlite3::ProviderSqlite3; + /// use i18n_pattern::PlaceholderValue; + /// use i18n_message::Message; + /// use icu_testdata::buffer; + /// use icu_provider::serde::AsDeserializingBufferProvider; + /// use std::collections::HashMap; + /// use std::rc::Rc; + /// use std::error::Error; + /// + /// fn message() -> Result<(), Box> { + /// let buffer_provider = buffer(); + /// let data_provider = buffer_provider.as_deserializing(); + /// let icu_data_provider = Rc::new( + /// IcuDataProvider::try_new( &data_provider )? + /// ); + /// let language_tag_registry = Rc::new( LanguageTagRegistry::new() ); + /// let lstring_provider = ProviderSqlite3::try_new( + /// "./i18n/", &language_tag_registry + /// )?; + /// let message_system = Message::try_new( + /// &icu_data_provider, &language_tag_registry, &lstring_provider, true, true + /// )?; + /// let mut values = HashMap::::new(); + /// values.insert( + /// "identifier".to_string(), + /// PlaceholderValue::String( "i18n_message/string_not_found".to_string() ) + /// ); + /// values.insert( + /// "language_tag".to_string(), + /// PlaceholderValue::String( "en-ZA".to_string() ) + /// ); + /// values.insert( + /// "fallback".to_string(), + /// PlaceholderValue::String( "true".to_string() ) + /// ); + /// let lstring = message_system.format( + /// "i18n_message/string_not_found", + /// &values, + /// &language_tag_registry.get_language_tag( "en-ZA" ).unwrap(), + /// None, + /// None + /// )?; + /// assert_eq!( + /// lstring.as_str(), + /// "No string was found for identifier ‘i18n_message/string_not_found’ and language tag ‘en-ZA’. Fallback used: True.", + /// "Check placeholder values." + /// ); + /// Ok( () ) + /// } + /// ``` pub fn format>( &self, identifier: T, + values: &HashMap, language_tag: &Rc, - fallback: Option, //true = fallback to default language, None = use the Message default. - caching: Option, //cache the resultant Format for repeating use with different values. + mut fallback: Option, // true = fallback to default language, None = use the Message default. + mut caching: Option, // true = cache the resultant Formatter for repeating use with different values. ) -> Result { - let lstring = self.lstring_provider.0.get( identifier, language_tag ); + let mut _language_entry = false; + { + let binding = self.cache.borrow(); + if let Some( result ) = binding.get( language_tag ) { + _language_entry = true; + if let Some( result2 ) = result.get( identifier.as_ref() ) { + return match result2 { + CacheData::LSring( lstring) => Ok( lstring.clone() ), + CacheData::Formatter( formatter ) => + Ok( formatter.borrow_mut().format( values )? ) + } + } + } + } + // Not in cache. + // Get pattern string for specified language, though returned `LString` may be for another language. + let lstring = match self.lstring_provider.0.get_one( + identifier.as_ref().to_string(), language_tag + )? { + Some( result ) => result, + None => { + if fallback.is_none() { + fallback = Some( self.fallback ); + } + if !fallback.unwrap() { + return Err( MessageError::StringNotFound( + identifier.as_ref().to_string(), language_tag.as_str().to_owned(), false + ) ); + } + let default_language = match self.lstring_provider.0.default_language_tag( + identifier.as_ref().to_string() + )? { + None => return Err( MessageError::NoDefaultLanguageTag( identifier.as_ref().to_string() ) ), + Some( result ) => self.language_tag_registry.get_language_tag( result )? + }; + match self.lstring_provider.0.get_one( + identifier.as_ref().to_string(), &default_language + )? { + Some( result ) => result, + None => return Err( MessageError::StringNotFound( + identifier.as_ref().to_string(), language_tag.as_str().to_owned(), true + ) ) + } + } + }; + // Tokenise the pattern string. + // If pattern string has no grammar syntax characters, simply cache (if allowed) and return the string. + if caching.is_none() { + caching = Some( self.caching ); + } + let ( tokens, grammar_found ) = tokenise( + lstring.as_str(), &self.grammar, &self.icu_data_provider + ); + if !grammar_found { + if caching.unwrap() { + if !_language_entry { + let mut data_entry = HashMap::>::new(); + data_entry.insert( + identifier.as_ref().to_string(), + CacheData::LSring( lstring.clone() ) + ); + self.cache.borrow_mut().insert( + Rc::clone( language_tag ), + data_entry + ); + } else { + let mut binding = self.cache.borrow_mut(); + let data_entry = binding.get_mut( language_tag ); + data_entry.unwrap().insert( + identifier.as_ref().to_string(), + CacheData::LSring( lstring.clone() ) + ); + } + } + return Ok( lstring ); + } - Ok( LString::new( "blah", &Rc::new( "blah".to_string() ) ) )//temp to get rid of compiling error. + // Has grammar syntax characters. + // Parse tokens and create `Formatter` + let tree = parse( tokens )?; + let mut formatter = Formatter::try_new( + &self.icu_data_provider, + language_tag, + &self.language_tag_registry.get_locale( language_tag.as_str() )?, + &tree + )?; + + // If caching is not allowed, simple use `Formatter` to get the LString. + if !caching.unwrap() { + return Ok( formatter.format( values )? ); + } + + // Cache the `Formatter`. + { + if !_language_entry { + let mut data_entry = HashMap::>::new(); + data_entry.insert( + identifier.as_ref().to_string(), + CacheData::Formatter( RefCell::new( formatter ) ) + ); + self.cache.borrow_mut().insert( + Rc::clone( language_tag ), + data_entry + ); + } else { + let mut binding = self.cache.borrow_mut(); + let data_entry = binding.get_mut( language_tag ); + data_entry.unwrap().insert( + identifier.as_ref().to_string(), + CacheData::Formatter( RefCell::new( formatter ) ) + ); + } + } + + // Get `Formatter` and use it to get the LString. + let binding = self.cache.borrow(); + let result = binding.get( language_tag ).unwrap(); + let result2 = result.get( identifier.as_ref() ).unwrap(); + match result2 { + CacheData::LSring( lstring) => Ok( lstring.clone() ), + CacheData::Formatter( formatter ) => + Ok( formatter.borrow_mut().format( values )? ) + } } } + +// Internal structs, enums, etc + +enum CacheData<'a, I> +where + I: ?Sized + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider + + DataProvider + DataProvider, +{ + LSring( LString ), + Formatter( RefCell> ), +} diff --git a/crates/message/tests/message.rs b/crates/message/tests/message.rs new file mode 100644 index 0000000..c7f5648 --- /dev/null +++ b/crates/message/tests/message.rs @@ -0,0 +1,57 @@ +// This file is part of `i18n_message-rizzen-yazston` crate. For the terms of use, please see the file +// called `LICENSE-BSD-3-Clause` at the top level of the `i18n_message-rizzen-yazston` crate. + +//! Testing `Message`. + +use i18n_icu::IcuDataProvider; +use i18n_registry::LanguageTagRegistry; +use i18n_provider_sqlite3::ProviderSqlite3; +use i18n_pattern::PlaceholderValue; +use i18n_message::Message; +use icu_testdata::buffer; +use icu_provider::serde::AsDeserializingBufferProvider; +use std::collections::HashMap; +use std::rc::Rc; +use std::error::Error; + +#[test] +fn message() -> Result<(), Box> { + let buffer_provider = buffer(); + let data_provider = buffer_provider.as_deserializing(); + let icu_data_provider = Rc::new( + IcuDataProvider::try_new( &data_provider )? + ); + let language_tag_registry = Rc::new( LanguageTagRegistry::new() ); + let lstring_provider = ProviderSqlite3::try_new( + "./i18n/", &language_tag_registry + )?; + let message_system = Message::try_new( + &icu_data_provider, &language_tag_registry, &lstring_provider, true, true + )?; + let mut values = HashMap::::new(); + values.insert( + "identifier".to_string(), + PlaceholderValue::String( "i18n_message/string_not_found".to_string() ) + ); + values.insert( + "language_tag".to_string(), + PlaceholderValue::String( "en-ZA".to_string() ) + ); + values.insert( + "fallback".to_string(), + PlaceholderValue::String( "true".to_string() ) + ); + let lstring = message_system.format( + "i18n_message/string_not_found", + &values, + &language_tag_registry.get_language_tag( "en-ZA" ).unwrap(), + None, + None + )?; + assert_eq!( + lstring.as_str(), + "No string was found for identifier ‘i18n_message/string_not_found’ and language tag ‘en-ZA’. Fallback used: True.", + "Check placeholder values." + ); + Ok( () ) +} diff --git a/crates/pattern/README.asciidoc b/crates/pattern/README.asciidoc index 87031bb..8784dfa 100644 --- a/crates/pattern/README.asciidoc +++ b/crates/pattern/README.asciidoc @@ -64,7 +64,7 @@ features = [ "serde" ] ``` use i18n_icu::IcuDataProvider; -use i18n_lexer::{Token, TokenType, Lexer}; +use i18n_lexer::{Token, TokenType, tokenise}; use i18n_pattern::{ parse, NodeType, Formatter, FormatterError, PlaceholderValue }; use icu_testdata::buffer; use icu_provider::serde::AsDeserializingBufferProvider; @@ -77,10 +77,10 @@ fn pattern_plural() -> Result<(), Box> { let buffer_provider = buffer(); let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); diff --git a/crates/pattern/src/formatter.rs b/crates/pattern/src/formatter.rs index 1920762..99aba6b 100644 --- a/crates/pattern/src/formatter.rs +++ b/crates/pattern/src/formatter.rs @@ -94,7 +94,7 @@ where /// /// ``` /// use i18n_icu::IcuDataProvider; - /// use i18n_lexer::{Token, TokenType, Lexer}; + /// use i18n_lexer::{Token, TokenType, tokenise}; /// use i18n_pattern::{parse, NodeType, Formatter, FormatterError, PlaceholderValue}; /// use icu_testdata::buffer; /// use icu_provider::serde::AsDeserializingBufferProvider; @@ -108,10 +108,10 @@ where /// let data_provider = buffer_provider.as_deserializing(); /// let icu_data_provider = /// Rc::new( IcuDataProvider::try_new( &data_provider )? ); - /// let mut lexer = Lexer::try_new( &icu_data_provider )?; - /// let tokens = lexer.tokenise( + /// let tokens = tokenise( /// "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", - /// &vec![ '{', '}', '`', '#' ] + /// &vec![ '{', '}', '`', '#' ], + /// &icu_data_provider, /// ); /// let tree = parse( tokens.0 ).expect( "Failed to parse tokens." ); /// let locale: Rc = Rc::new( "en-ZA".parse().expect( "Failed to parse language tag." ) ); @@ -277,7 +277,7 @@ where /// /// ``` /// use i18n_icu::IcuDataProvider; - /// use i18n_lexer::{Token, TokenType, Lexer}; + /// use i18n_lexer::{Token, TokenType, tokenise}; /// use i18n_pattern::{parse, NodeType, Formatter, FormatterError, PlaceholderValue}; /// use icu_testdata::buffer; /// use icu_provider::serde::AsDeserializingBufferProvider; @@ -291,10 +291,10 @@ where /// let data_provider = buffer_provider.as_deserializing(); /// let icu_data_provider = /// Rc::new( IcuDataProvider::try_new( &data_provider )? ); - /// let mut lexer = Lexer::try_new( &icu_data_provider )?; - /// let tokens = lexer.tokenise( + /// let tokens = tokenise( /// "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", - /// &vec![ '{', '}', '`', '#' ] + /// &vec![ '{', '}', '`', '#' ], + /// &icu_data_provider, /// ); /// let tree = parse( tokens.0 ).expect( "Failed to parse tokens." ); /// let locale: Rc = Rc::new( "en-ZA".parse().expect( "Failed to parse language tag." ) ); @@ -331,7 +331,7 @@ where } /// Returns the language tag used in creating the formatter. - pub fn language( &self ) -> &Rc { + pub fn language_tag( &self ) -> &Rc { &self.language_tag } @@ -370,12 +370,12 @@ where }, PatternPart::PatternString( placeholder ) => { let Some( value ) = values.get( placeholder ) else { - return Err( FormatterError::PlaceholderValue( "PatternString".to_string() ) ); + return Err( FormatterError::PlaceholderValue( "PatternString 373".to_string() ) ); }; match value { PlaceholderValue::String( value ) => string.push_str( value ), PlaceholderValue::LString( value) => string.push_str( value.as_str() ), - _ => return Err( FormatterError::InvalidValue( "PatternString".to_string() ) ) + _ => return Err( FormatterError::InvalidValue( "PatternString 378".to_string() ) ) } }, PatternPart::PatternDecimal{ diff --git a/crates/pattern/src/lib.rs b/crates/pattern/src/lib.rs index 4662eec..7a1de2b 100644 --- a/crates/pattern/src/lib.rs +++ b/crates/pattern/src/lib.rs @@ -17,7 +17,7 @@ //! //! ``` //! use i18n_icu::IcuDataProvider; -//! use i18n_lexer::{Token, TokenType, Lexer}; +//! use i18n_lexer::{Token, TokenType, tokenise}; //! use i18n_pattern::{ parse, NodeType, Formatter, FormatterError, PlaceholderValue }; //! use icu_testdata::buffer; //! use icu_provider::serde::AsDeserializingBufferProvider; @@ -30,10 +30,10 @@ //! let buffer_provider = buffer(); //! let data_provider = buffer_provider.as_deserializing(); //! let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); -//! let mut lexer = Lexer::try_new( &icu_data_provider )?; -//! let tokens = lexer.tokenise( +//! let tokens = tokenise( //! "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", -//! &vec![ '{', '}', '`', '#' ] +//! &vec![ '{', '}', '`', '#' ], +//! &icu_data_provider, //! ); //! let tree = parse( tokens.0 )?; //! let locale: Rc = Rc::new( "en-ZA".parse()? ); diff --git a/crates/pattern/src/parser.rs b/crates/pattern/src/parser.rs index e926a40..0158fac 100644 --- a/crates/pattern/src/parser.rs +++ b/crates/pattern/src/parser.rs @@ -20,7 +20,7 @@ use core::fmt::{ Display, Formatter, Result as FmtResult }; /// /// ``` /// use i18n_icu::IcuDataProvider; -/// use i18n_lexer::{Token, TokenType, Lexer}; +/// use i18n_lexer::{Token, TokenType, tokenise}; /// use i18n_pattern::{parse, NodeType, Formatter, FormatterError, PlaceholderValue}; /// use icu_testdata::buffer; /// use icu_provider::serde::AsDeserializingBufferProvider; @@ -33,10 +33,10 @@ use core::fmt::{ Display, Formatter, Result as FmtResult }; /// let buffer_provider = buffer(); /// let data_provider = buffer_provider.as_deserializing(); /// let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); -/// let mut lexer = Lexer::try_new( &icu_data_provider )?; -/// let tokens = lexer.tokenise( +/// let tokens = tokenise( /// "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", -/// &vec![ '{', '}', '`', '#' ] +/// &vec![ '{', '}', '`', '#' ], +/// &icu_data_provider, /// ); /// let tree = parse( tokens.0 )?; /// let locale: Rc = Rc::new( "en-ZA".parse()? ); diff --git a/crates/pattern/tests/formatter.rs b/crates/pattern/tests/formatter.rs index 8c02c72..8c4c1c8 100644 --- a/crates/pattern/tests/formatter.rs +++ b/crates/pattern/tests/formatter.rs @@ -4,7 +4,7 @@ //! Testing formatter. use i18n_icu::IcuDataProvider; -use i18n_lexer::Lexer; +use i18n_lexer::tokenise; use i18n_pattern::{ parse, Formatter, PlaceholderValue }; use icu_testdata::buffer; use icu_provider::serde::AsDeserializingBufferProvider; @@ -19,9 +19,9 @@ fn plain_text() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( - "A simple plain text string.", &vec![ '{', '}', '`', '#' ] + let tokens = tokenise( + "A simple plain text string.", &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -40,9 +40,9 @@ fn pattern_string() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( - "Expecting a string for placeholder: {string}", &vec![ '{', '}', '`', '#' ] + let tokens = tokenise( + "Expecting a string for placeholder: {string}", &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -69,10 +69,10 @@ fn pattern_plural() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -99,10 +99,10 @@ fn pattern_decimal() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "There is {amount decimal} kg of rice in the container.", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -129,10 +129,10 @@ fn pattern_decimal_with_option() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "There is {amount decimal sign#always} kg of rice in the container.", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -159,10 +159,10 @@ fn pattern_dateime() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "At this point in time {time date_time} the moon winked out.", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); @@ -192,10 +192,10 @@ fn pattern_dateime_string() -> Result<(), Box> { let data_provider = buffer_provider.as_deserializing(); let icu_data_provider = Rc::new( IcuDataProvider::try_new( &data_provider )? ); - let mut lexer = Lexer::try_new( &icu_data_provider )?; - let tokens = lexer.tokenise( + let tokens = tokenise( "At this point in time {time date_time} the moon winked out.", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &icu_data_provider, ); let tree = parse( tokens.0 )?; let locale: Rc = Rc::new( "en-ZA".parse()? ); diff --git a/crates/pattern/tests/parser.rs b/crates/pattern/tests/parser.rs index c65c353..299c2bc 100644 --- a/crates/pattern/tests/parser.rs +++ b/crates/pattern/tests/parser.rs @@ -4,7 +4,7 @@ //! Testing `decimal` pattern and `plural` pattern. use i18n_icu::IcuDataProvider; -use i18n_lexer::Lexer; +use i18n_lexer::tokenise; use i18n_pattern::parse; use icu_testdata::buffer; use icu_provider::serde::AsDeserializingBufferProvider; @@ -15,10 +15,12 @@ use std::error::Error; fn decimal() -> Result<(), Box> { let buffer_provider = buffer(); let data_provider = buffer_provider.as_deserializing(); - let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - let tokens = lexer.tokenise( - "String contains a {placeholder decimal sign#negative}.", &vec![ '{', '}', '`', '#' ] + let icu_data_provider = + IcuDataProvider::try_new( &data_provider )?; + let tokens = tokenise( + "String contains a {placeholder decimal sign#negative}.", + &vec![ '{', '}', '`', '#' ], + &Rc::new( icu_data_provider ), ); let tree = parse( tokens.0 )?; assert_eq!( tree.len(), 10, "Should contain 10 nodes." ); @@ -29,11 +31,12 @@ fn decimal() -> Result<(), Box> { fn plural() -> Result<(), Box> { let buffer_provider = buffer(); let data_provider = buffer_provider.as_deserializing(); - let icu_data_provider = IcuDataProvider::try_new( &data_provider )?; - let mut lexer = Lexer::try_new( &Rc::new( icu_data_provider ) )?; - let tokens = lexer.tokenise( + let icu_data_provider = + IcuDataProvider::try_new( &data_provider )?; + let tokens = tokenise( "There {dogs_number plural one#one_dog other#dogs} in the park.#{dogs are # dogs}{one_dog is 1 dog}", - &vec![ '{', '}', '`', '#' ] + &vec![ '{', '}', '`', '#' ], + &Rc::new( icu_data_provider ), ); let tree = parse( tokens.0 )?; assert_eq!( tree.len(), 24, "Should contain 24 nodes." ); diff --git a/crates/provider/core/src/provider.rs b/crates/provider/core/src/provider.rs index 94ccb17..7e769f3 100644 --- a/crates/provider/core/src/provider.rs +++ b/crates/provider/core/src/provider.rs @@ -13,11 +13,9 @@ use std::rc::Rc; pub trait LStringProvider { /// Ideally a single exact match should be returned, yet may not be for the requested language tag. If no strings - /// is found for the requested tag, the right most subtag is removed sequentially until either at least 1 `LString` - /// is found, or `None returned when there are no more subtags to be removed. Multiple `LString` may be returned - /// when there are multiple entries of language tags having additional subtags than the requested language tag. - /// - /// Return of `None` indicates no strings was found matching the requested language tag, or its more general form. + /// is found for the requested tag, the right most subtag is removed sequentially until there are no more subtags. + /// Multiple `LString`s may be returned when there are multiple entries of language tags having additional subtags + /// than the requested language tag. /// /// Return of `ProviderError` indicates there was an error, usually in the data store. fn get>( @@ -25,6 +23,15 @@ pub trait LStringProvider { language_tag: &Rc ) -> Result, ProviderError>; + /// Similar to `get()` method, except that `get_one()` will only return a single `LString` if multiple strings are + /// available. + /// + /// `None` is returned when there is no strings available for the language tag. + fn get_one>( + &self, identifier: T, + language_tag: &Rc + ) -> Result, ProviderError>; + /// Retrieve the default language tag of the crate's data store. /// /// Return of `None` indicates no default language tag was found with in the provider's data store. diff --git a/crates/provider/sqlite3/README.asciidoc b/crates/provider/sqlite3/README.asciidoc index 5a43f5d..287df7c 100644 --- a/crates/provider/sqlite3/README.asciidoc +++ b/crates/provider/sqlite3/README.asciidoc @@ -24,13 +24,12 @@ i18n_provider_sqlite3-rizzen-yazston = "0.5.0" use i18n_provider_sqlite3::ProviderSqlite3; use i18n_provider::LStringProvider; use i18n_registry::LanguageTagRegistry; -use core::cell::RefCell; use std::rc::Rc; use std::error::Error; fn main() -> Result<(), Box> { let path = "./i18n/"; - let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); - let tag = registry.borrow_mut().get_language_tag( "en" )?; + let registry = Rc::new( LanguageTagRegistry::new() ); + let tag = registry.get_language_tag( "en" )?; let provider = ProviderSqlite3::try_new( path, ®istry diff --git a/crates/provider/sqlite3/src/lib.rs b/crates/provider/sqlite3/src/lib.rs index c3bf447..2086699 100644 --- a/crates/provider/sqlite3/src/lib.rs +++ b/crates/provider/sqlite3/src/lib.rs @@ -13,14 +13,13 @@ //! use i18n_provider_sqlite3::ProviderSqlite3; //! use i18n_provider::LStringProvider; //! use i18n_registry::LanguageTagRegistry; -//! use core::cell::RefCell; //! use std::rc::Rc; //! use std::error::Error; //! //! fn main() -> Result<(), Box> { //! let path = "./i18n/"; -//! let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); -//! let tag = registry.borrow_mut().get_language_tag( "en" )?; +//! let registry = Rc::new( LanguageTagRegistry::new() ); +//! let tag = registry.get_language_tag( "en" )?; //! let provider = ProviderSqlite3::try_new( //! path, //! ®istry diff --git a/crates/provider/sqlite3/src/provider.rs b/crates/provider/sqlite3/src/provider.rs index 76d7456..f4955dc 100644 --- a/crates/provider/sqlite3/src/provider.rs +++ b/crates/provider/sqlite3/src/provider.rs @@ -22,13 +22,12 @@ use core::cell::RefCell; /// use i18n_provider_sqlite3::ProviderSqlite3; /// use i18n_provider::LStringProvider; /// use i18n_registry::LanguageTagRegistry; -/// use core::cell::RefCell; /// use std::rc::Rc; /// use std::error::Error; /// fn main() -> Result<(), Box> { /// let path = "./i18n/"; -/// let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); -/// let tag = registry.borrow_mut().get_language_tag( "en" )?; +/// let registry = Rc::new( LanguageTagRegistry::new() ); +/// let tag = registry.get_language_tag( "en" )?; /// let provider = ProviderSqlite3::try_new( /// path, /// ®istry @@ -45,7 +44,7 @@ use core::cell::RefCell; /// ``` pub struct ProviderSqlite3 { directory: PathBuf, - language_tag_registry: Rc>, + language_tag_registry: Rc, connections: RefCell>, } @@ -63,7 +62,7 @@ impl ProviderSqlite3 { /// files. pub fn try_new>( directory_path: T, - language_tag_registry: &Rc> + language_tag_registry: &Rc ) -> Result { let Ok( directory ) = directory_path.try_into() else { return Err( ProviderSqlite3Error::InvalidPath ) @@ -113,11 +112,9 @@ impl LStringProvider for ProviderSqlite3 { /// Retrieve a vector of possible `LString` for requested identifier that matches a language tag. /// /// Ideally a single exact match should be returned, yet may not be for the requested language tag. If no strings - /// is found for the requested tag, the right most subtag is removed sequentially until either at least 1 `LString` - /// is found, or `None returned when there are no more subtags to be removed. Multiple `LString` may be returned - /// when there are multiple entries of language tags having additional subtags than the requested language tag. - /// - /// Return of `None` indicates no strings was found matching the requested language tag, or its more general form. + /// is found for the requested tag, the right most subtag is removed sequentially until there are no more subtags. + /// Multiple `LString`s may be returned when there are multiple entries of language tags having additional subtags + /// than the requested language tag. /// /// Return of `ErrorMessage` indicates there was a Sqlite3 error. /// @@ -127,13 +124,12 @@ impl LStringProvider for ProviderSqlite3 { /// use i18n_provider_sqlite3::ProviderSqlite3; /// use i18n_provider::LStringProvider; /// use i18n_registry::LanguageTagRegistry; - /// use core::cell::RefCell; /// use std::rc::Rc; /// use std::error::Error; /// fn main() -> Result<(), Box> { /// let path = "./i18n/"; - /// let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); - /// let tag = registry.borrow_mut().get_language_tag( "en" )?; + /// let registry = Rc::new( LanguageTagRegistry::new() ); + /// let tag = registry.get_language_tag( "en" )?; /// let provider = ProviderSqlite3::try_new( /// path, /// ®istry @@ -225,7 +221,7 @@ impl LStringProvider for ProviderSqlite3 { ) }; let language = match - self.language_tag_registry.as_ref().borrow().get_language_tag( tag_raw ) { + self.language_tag_registry.as_ref().get_language_tag( tag_raw ) { Ok( result ) => result, Err( error ) => return Err( ProviderError { @@ -248,6 +244,47 @@ impl LStringProvider for ProviderSqlite3 { Ok( result ) } + /// Similar to `get()` method, except that `get_one()` will only return a single `LString` if multiple strings are + /// available. + /// + /// `None` is returned when there is no strings available for the language tag. + /// + /// # Examples + /// + /// ``` + /// use i18n_provider_sqlite3::ProviderSqlite3; + /// use i18n_provider::LStringProvider; + /// use i18n_registry::LanguageTagRegistry; + /// use core::cell::RefCell; + /// use std::rc::Rc; + /// use std::error::Error; + /// fn main() -> Result<(), Box> { + /// let path = "./i18n/"; + /// let registry = Rc::new( LanguageTagRegistry::new() ); + /// let tag = registry.get_language_tag( "en" )?; + /// let provider = ProviderSqlite3::try_new( + /// path, + /// ®istry + /// )?; + /// let strings = provider.get( + /// "i18n_provider_sqlite3/invalid_path", + /// &tag + /// )?;//.expect( "No string found for language tag." ); + /// assert_eq!( strings.len(), 1, "There should be 1 string." ); + /// assert_eq!( strings[ 0 ].as_str(), "Invalid path provided.", "Not correct string." ); + /// assert_eq!( strings[ 0 ].language_tag().as_str(), "en-ZA", "Must be en-ZA." ); + /// Ok( () ) + /// } + /// ``` + fn get_one>( + &self, identifier: T, + language_tag: &Rc + ) -> Result, ProviderError> { + let mut result = self.get( identifier, language_tag )?; + //temp for now, TODO: try to return string closest to the language tag, by match language length + Ok( result.pop() ) + } + /// Retrieve the default language of the crate. /// /// Return of `None` indicates no default language tag was found. @@ -265,7 +302,7 @@ impl LStringProvider for ProviderSqlite3 { /// use std::error::Error; /// fn main() -> Result<(), Box> { /// let path = "./i18n/"; - /// let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); + /// let registry = Rc::new( LanguageTagRegistry::new() ); /// let provider = ProviderSqlite3::try_new( /// path, /// ®istry diff --git a/crates/provider/sqlite3/tests/sqlite3.rs b/crates/provider/sqlite3/tests/sqlite3.rs index 575994a..3d336bb 100644 --- a/crates/provider/sqlite3/tests/sqlite3.rs +++ b/crates/provider/sqlite3/tests/sqlite3.rs @@ -6,13 +6,13 @@ use i18n_provider::LStringProvider; use i18n_provider_sqlite3::ProviderSqlite3; use i18n_registry::LanguageTagRegistry; -use std::{ rc::Rc, cell::RefCell, error::Error }; +use std::{ rc::Rc, error::Error }; #[test] fn get_for_en() -> Result<(), Box> { let path = "./i18n/"; - let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); - let tag = registry.borrow_mut().get_language_tag( "en" )?; + let registry = Rc::new( LanguageTagRegistry::new() ); + let tag = registry.get_language_tag( "en" )?; let provider = ProviderSqlite3::try_new( path, ®istry @@ -29,8 +29,8 @@ fn get_for_en() -> Result<(), Box> { #[test] fn get_for_en_za_u_ca_julian() -> Result<(), Box> { let path = "./i18n/"; - let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); - let tag = registry.borrow_mut().get_language_tag( "en-ZA-u-ca-julian" )?; + let registry = Rc::new( LanguageTagRegistry::new() ); + let tag = registry.get_language_tag( "en-ZA-u-ca-julian" )?; let provider = ProviderSqlite3::try_new( path, ®istry @@ -47,7 +47,7 @@ fn get_for_en_za_u_ca_julian() -> Result<(), Box> { #[test] fn default_language_tag() -> Result<(), Box> { let path = "./i18n/"; - let registry = Rc::new( RefCell::new( LanguageTagRegistry::new() ) ); + let registry = Rc::new( LanguageTagRegistry::new() ); let provider = ProviderSqlite3::try_new( path, ®istry