diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index f366329b4f86..858eddc7c8f8 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array}; use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the Unicode character code of the first character in a string.", + syntax_example = "ascii(str)", + sql_example = r#"```sql +> select ascii('abc'); ++--------------------+ +| ascii(Utf8("abc")) | ++--------------------+ +| 97 | ++--------------------+ +> select ascii('🚀'); ++-------------------+ +| ascii(Utf8("🚀")) | ++-------------------+ +| 128640 | ++-------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "chr") +)] #[derive(Debug)] pub struct AsciiFunc { signature: Signature, @@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_ascii_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_ascii_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the Unicode character code of the first character in a string.", - "ascii(str)", - ) - .with_sql_example( - r#"```sql -> select ascii('abc'); -+--------------------+ -| ascii(Utf8("abc")) | -+--------------------+ -| 97 | -+--------------------+ -> select ascii('🚀'); -+-------------------+ -| ascii(Utf8("🚀")) | -+-------------------+ -| 128640 | -+-------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("chr") - .build() - }) -} - fn calculate_ascii<'a, V>(array: V) -> Result where V: ArrayAccessor, diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index 5a23692d85c7..623fb2ba03f0 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -18,14 +18,29 @@ use arrow::compute::kernels::length::bit_length; use arrow::datatypes::DataType; use std::any::Any; -use std::sync::OnceLock; use crate::utils::utf8_to_int_type; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the bit length of a string.", + syntax_example = "bit_length(str)", + sql_example = r#"```sql +> select bit_length('datafusion'); ++--------------------------------+ +| bit_length(Utf8("datafusion")) | ++--------------------------------+ +| 80 | ++--------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "length"), + related_udf(name = "octet_length") +)] #[derive(Debug)] pub struct BitLengthFunc { signature: Signature, @@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_bit_length_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_bit_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the bit length of a string.", - "bit_length(str)", - ) - .with_sql_example( - r#"```sql -> select bit_length('datafusion'); -+--------------------------------+ -| bit_length(Utf8("datafusion")) | -+--------------------------------+ -| 80 | -+--------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("length") - .with_related_udf("octet_length") - .build() - }) -} diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 127b02cdf733..3530e3f22c0f 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::ArrayRef; use arrow::array::StringArray; @@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8; use crate::utils::make_scalar_function; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; /// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character. /// chr(65) = 'A' @@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the character with the specified ASCII or Unicode code value.", + syntax_example = "chr(expression)", + sql_example = r#"```sql +> select chr(128640); ++--------------------+ +| chr(Int64(128640)) | ++--------------------+ +| 🚀 | ++--------------------+ +```"#, + standard_argument(name = "expression", prefix = "String"), + related_udf(name = "ascii") +)] #[derive(Debug)] pub struct ChrFunc { signature: Signature, @@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_chr_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_chr_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the character with the specified ASCII or Unicode code value.", - "chr(expression)", - ) - .with_sql_example( - r#"```sql -> select chr(128640); -+--------------------+ -| chr(Int64(128640)) | -+--------------------+ -| 🚀 | -+--------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .with_related_udf("ascii") - .build() - }) -} diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 3e5c72ac20e9..36871f0c3282 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View}; use datafusion_common::exec_err; use datafusion_common::DataFusionError; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Return true if search_str is found within string (case-sensitive).", + syntax_example = "contains(str, search_str)", + sql_example = r#"```sql +> select contains('the quick brown fox', 'row'); ++---------------------------------------------------+ +| contains(Utf8("the quick brown fox"),Utf8("row")) | ++---------------------------------------------------+ +| true | ++---------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "search_str", description = "The string to search for in str.") +)] #[derive(Debug)] pub struct ContainsFunc { signature: Signature, @@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_contains_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_contains_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Return true if search_str is found within string (case-sensitive).", - "contains(str, search_str)", - ) - .with_sql_example( - r#"```sql -> select contains('the quick brown fox', 'row'); -+---------------------------------------------------+ -| contains(Utf8("the quick brown fox"),Utf8("row")) | -+---------------------------------------------------+ -| true | -+---------------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("search_str", "The string to search for in str.") - .build() - }) -} - /// use `arrow::compute::contains` to do the calculation for contains pub fn contains(args: &[ArrayRef]) -> Result { match (args[0].data_type(), args[1].data_type()) { diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 26355556ff07..f443571112e7 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -18,14 +18,29 @@ use arrow::compute::kernels::length::length; use arrow::datatypes::DataType; use std::any::Any; -use std::sync::OnceLock; use crate::utils::utf8_to_int_type; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the length of a string in bytes.", + syntax_example = "octet_length(str)", + sql_example = r#"```sql +> select octet_length('Ångström'); ++--------------------------------+ +| octet_length(Utf8("Ångström")) | ++--------------------------------+ +| 10 | ++--------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "bit_length"), + related_udf(name = "length") +)] #[derive(Debug)] pub struct OctetLengthFunc { signature: Signature, @@ -92,36 +107,10 @@ impl ScalarUDFImpl for OctetLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_octet_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_octet_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the length of a string in bytes.", - "octet_length(str)", - ) - .with_sql_example( - r#"```sql -> select octet_length('Ångström'); -+--------------------------------+ -| octet_length(Utf8("Ångström")) | -+--------------------------------+ -| 10 | -+--------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("bit_length") - .with_related_udf("length") - .build() - }) -} - #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index ff8430f1530e..3fb208bb7198 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -18,15 +18,14 @@ use arrow::array::{ArrayRef, OffsetSizeTrait}; use arrow::datatypes::DataType; use std::any::Any; -use std::sync::OnceLock; use crate::string::common::*; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; use datafusion_expr::function::Hint; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; /// Returns the longest string with trailing characters removed. If the characters are not specified, whitespace is removed. /// rtrim('testxxzx', 'xyz') = 'test' @@ -35,6 +34,33 @@ fn rtrim(args: &[ArrayRef]) -> Result { general_trim::(args, TrimType::Right, use_string_view) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string.", + syntax_example = "rtrim(str[, trim_str])", + alternative_syntax = "trim(TRAILING trim_str FROM str)", + sql_example = r#"```sql +> select rtrim(' datafusion '); ++-------------------------------+ +| rtrim(Utf8(" datafusion ")) | ++-------------------------------+ +| datafusion | ++-------------------------------+ +> select rtrim('___datafusion___', '_'); ++-------------------------------------------+ +| rtrim(Utf8("___datafusion___"),Utf8("_")) | ++-------------------------------------------+ +| ___datafusion | ++-------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "trim_str", + description = "String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._" + ), + related_udf(name = "btrim"), + related_udf(name = "ltrim") +)] #[derive(Debug)] pub struct RtrimFunc { signature: Signature, @@ -100,41 +126,10 @@ impl ScalarUDFImpl for RtrimFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_rtrim_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_rtrim_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string.", - "rtrim(str[, trim_str])") - .with_sql_example(r#"```sql -> select rtrim(' datafusion '); -+-------------------------------+ -| rtrim(Utf8(" datafusion ")) | -+-------------------------------+ -| datafusion | -+-------------------------------+ -> select rtrim('___datafusion___', '_'); -+-------------------------------------------+ -| rtrim(Utf8("___datafusion___"),Utf8("_")) | -+-------------------------------------------+ -| ___datafusion | -+-------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("trim_str", "String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._") - .with_alternative_syntax("trim(TRAILING trim_str FROM str)") - .with_related_udf("btrim") - .with_related_udf("ltrim") - .build() - }) -} - #[cfg(test)] mod tests { use arrow::array::{Array, StringArray, StringViewArray}; diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index ad51a8ef72fb..ee436276fbc8 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -22,13 +22,29 @@ use arrow::array::{ }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the number of characters in a string.", + syntax_example = "character_length(str)", + sql_example = r#"```sql +> select character_length('Ångström'); ++------------------------------------+ +| character_length(Utf8("Ångström")) | ++------------------------------------+ +| 8 | ++------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "bit_length"), + related_udf(name = "octet_length") +)] #[derive(Debug)] pub struct CharacterLengthFunc { signature: Signature, @@ -85,36 +101,10 @@ impl ScalarUDFImpl for CharacterLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_character_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_character_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the number of characters in a string.", - "character_length(str)", - ) - .with_sql_example( - r#"```sql -> select character_length('Ångström'); -+------------------------------------+ -| character_length(Utf8("Ångström")) | -+------------------------------------+ -| 8 | -+------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("bit_length") - .with_related_udf("octet_length") - .build() - }) -} - /// Returns number of characters in the string. /// character_length('josé') = 4 /// The implementation counts UTF-8 code points to count the number of characters