From 6a9287048535d88e58c6e302e8083c67d6f3648c Mon Sep 17 00:00:00 2001 From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com> Date: Sun, 29 Dec 2024 21:14:19 +0800 Subject: [PATCH] doc-gen: migrate scalar functions (array) documentation 1/3 (#13928) * doc-gen: migrate scalar functions (array) documentation 1/3 * fix: remove unsed import, fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai --- datafusion-cli/Cargo.lock | 2 + datafusion/functions-nested/Cargo.toml | 2 + datafusion/functions-nested/src/array_has.rs | 160 +++++------ .../functions-nested/src/cardinality.rs | 50 ++-- datafusion/functions-nested/src/concat.rs | 158 +++++------ datafusion/functions-nested/src/dimension.rs | 99 +++---- datafusion/functions-nested/src/distance.rs | 58 ++-- datafusion/functions-nested/src/empty.rs | 50 ++-- datafusion/functions-nested/src/except.rs | 70 ++--- datafusion/functions-nested/src/extract.rs | 258 ++++++++---------- .../source/user-guide/sql/scalar_functions.md | 144 +++++----- 11 files changed, 446 insertions(+), 605 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 863bb5181f45..92dcf24708c6 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1476,10 +1476,12 @@ dependencies = [ "arrow-ord", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-macros", "datafusion-physical-expr-common", "itertools", "log", diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 5310493b4e45..e7254e4125cb 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -46,10 +46,12 @@ arrow-buffer = { workspace = true } arrow-ord = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true } +datafusion-doc = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-aggregate = { workspace = true } +datafusion-macros = { workspace = true } datafusion-physical-expr-common = { workspace = true } itertools = { workspace = true, features = ["use_std"] } log = { workspace = true } diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 499b07dafccf..0a3daa18c096 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -25,17 +25,17 @@ use arrow_buffer::BooleanBuffer; use datafusion_common::cast::as_generic_list_array; use datafusion_common::utils::string_utils::string_array_to_vec; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use datafusion_physical_expr_common::datum::compare_with_eq; use itertools::Itertools; use crate::utils::make_scalar_function; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; // Create static instances of ScalarUDFs for each function make_udf_expr_and_func!(ArrayHas, @@ -57,6 +57,27 @@ make_udf_expr_and_func!(ArrayHasAny, array_has_any_udf // internal function name ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if the array contains the element.", + syntax_example = "array_has(array, element)", + sql_example = r#"```sql +> select array_has([1, 2, 3], 2); ++-----------------------------+ +| array_has(List([1,2,3]), 2) | ++-----------------------------+ +| true | ++-----------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHas { signature: Signature, @@ -138,41 +159,10 @@ impl ScalarUDFImpl for ArrayHas { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_has_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if the array contains the element.", - - "array_has(array, element)") - .with_sql_example( - r#"```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - fn array_has_inner_for_scalar( haystack: &ArrayRef, needle: &dyn Datum, @@ -287,6 +277,27 @@ fn array_has_any_inner(args: &[ArrayRef]) -> Result { } } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if all elements of sub-array exist in array.", + syntax_example = "array_has_all(array, sub-array)", + sql_example = r#"```sql +> select array_has_all([1, 2, 3, 4], [2, 3]); ++--------------------------------------------+ +| array_has_all(List([1,2,3,4]), List([2,3])) | ++--------------------------------------------+ +| true | ++--------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "sub-array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHasAll { signature: Signature, @@ -337,39 +348,31 @@ impl ScalarUDFImpl for ArrayHasAll { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_all_doc()) + self.doc() } } -fn get_array_has_all_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if all elements of sub-array exist in array.", - - "array_has_all(array, sub-array)") - .with_sql_example( - r#"```sql -> select array_has_all([1, 2, 3, 4], [2, 3]); -+--------------------------------------------+ -| array_has_all(List([1,2,3,4]), List([2,3])) | -+--------------------------------------------+ -| true | -+--------------------------------------------+ +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if any elements exist in both arrays.", + syntax_example = "array_has_any(array, sub-array)", + sql_example = r#"```sql +> select array_has_any([1, 2, 3], [3, 4]); ++------------------------------------------+ +| array_has_any(List([1,2,3]), List([3,4])) | ++------------------------------------------+ +| true | ++------------------------------------------+ ```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "sub-array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "sub-array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHasAny { signature: Signature, @@ -420,39 +423,10 @@ impl ScalarUDFImpl for ArrayHasAny { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_any_doc()) + self.doc() } } -fn get_array_has_any_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if any elements exist in both arrays.", - - "array_has_any(array, sub-array)") - .with_sql_example( - r#"```sql -> select array_has_any([1, 2, 3], [3, 4]); -+------------------------------------------+ -| array_has_any(List([1,2,3]), List([3,4])) | -+------------------------------------------+ -| true | -+------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "sub-array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Represents the type of comparison for array_has. #[derive(Debug, PartialEq, Clone, Copy)] enum ComparisonType { diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index 45543d1bd68b..b577e2aa9df9 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -26,13 +26,13 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List, Map, UInt64}; use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array}; use datafusion_common::Result; use datafusion_common::{exec_err, plan_err}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( Cardinality, @@ -57,6 +57,23 @@ impl Cardinality { } } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the total number of elements in the array.", + syntax_example = "cardinality(array)", + sql_example = r#"```sql +> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); ++--------------------------------------+ +| cardinality(List([1,2,3,4,5,6,7,8])) | ++--------------------------------------+ +| 8 | ++--------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct Cardinality { signature: Signature, @@ -96,37 +113,10 @@ impl ScalarUDFImpl for Cardinality { } fn documentation(&self) -> Option<&Documentation> { - Some(get_cardinality_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_cardinality_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the total number of elements in the array.", - - "cardinality(array)") - .with_sql_example( - r#"```sql -> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); -+--------------------------------------+ -| cardinality(List([1,2,3,4,5,6,7,8])) | -+--------------------------------------+ -| 8 | -+--------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Cardinality SQL function pub fn cardinality_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 3e8a5877fb33..934c5a5fec73 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -17,7 +17,7 @@ //! [`ScalarUDFImpl`] definitions for `array_append`, `array_prepend` and `array_concat` functions. -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use std::{any::Any, cmp::Ordering}; use arrow::array::{Capacities, MutableArrayData}; @@ -28,11 +28,11 @@ use datafusion_common::Result; use datafusion_common::{ cast::as_generic_list_array, exec_err, not_impl_err, plan_err, utils::list_ndims, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ type_coercion::binary::get_wider_type, ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function}; @@ -44,6 +44,24 @@ make_udf_expr_and_func!( array_append_udf // internal function name ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Appends an element to the end of an array.", + syntax_example = "array_append(array, element)", + sql_example = r#"```sql +> select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Element to append to the array.") +)] #[derive(Debug)] pub struct ArrayAppend { signature: Signature, @@ -99,41 +117,10 @@ impl ScalarUDFImpl for ArrayAppend { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_append_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_append_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Appends an element to the end of an array.", - - "array_append(array, element)") - .with_sql_example( - r#"```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to append to the array.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayPrepend, array_prepend, @@ -142,6 +129,24 @@ make_udf_expr_and_func!( array_prepend_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Prepends an element to the beginning of an array.", + syntax_example = "array_prepend(element, array)", + sql_example = r#"```sql +> select array_prepend(1, [2, 3, 4]); ++---------------------------------------+ +| array_prepend(Int64(1),List([2,3,4])) | ++---------------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Element to prepend to the array.") +)] #[derive(Debug)] pub struct ArrayPrepend { signature: Signature, @@ -197,41 +202,10 @@ impl ScalarUDFImpl for ArrayPrepend { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_prepend_doc()) + self.doc() } } -static DOCUMENTATION_PREPEND: OnceLock = OnceLock::new(); - -fn get_array_prepend_doc() -> &'static Documentation { - DOCUMENTATION_PREPEND.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Prepends an element to the beginning of an array.", - - "array_prepend(element, array)") - .with_sql_example( - r#"```sql -> select array_prepend(1, [2, 3, 4]); -+---------------------------------------+ -| array_prepend(Int64(1),List([2,3,4])) | -+---------------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------------+ -```"#, - ) - .with_argument( - "element", - "Element to prepend to the array.", - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayConcat, array_concat, @@ -239,6 +213,27 @@ make_udf_expr_and_func!( array_concat_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Concatenates arrays.", + syntax_example = "array_concat(array[, ..., array_n])", + sql_example = r#"```sql +> select array_concat([1, 2], [3, 4], [5, 6]); ++---------------------------------------------------+ +| array_concat(List([1,2]),List([3,4]),List([5,6])) | ++---------------------------------------------------+ +| [1, 2, 3, 4, 5, 6] | ++---------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array_n", + description = "Subsequent array column or literal array to concatenate." + ) +)] #[derive(Debug)] pub struct ArrayConcat { signature: Signature, @@ -319,39 +314,10 @@ impl ScalarUDFImpl for ArrayConcat { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_concat_doc()) + self.doc() } } -fn get_array_concat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Concatenates arrays.", - - "array_concat(array[, ..., array_n])") - .with_sql_example( - r#"```sql -> select array_concat([1, 2], [3, 4], [5, 6]); -+---------------------------------------------------+ -| array_concat(List([1,2]),List([3,4]),List([5,6])) | -+---------------------------------------------------+ -| [1, 2, 3, 4, 5, 6] | -+---------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression to concatenate. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array_n", - "Subsequent array column or literal array to concatenate.", - ) - .build() - }) -} - /// Array_concat/Array_cat SQL function pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result { if args.is_empty() { diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index 2d2f90e9c7cb..9933bcf4a4bd 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -29,11 +29,11 @@ use datafusion_common::{exec_err, plan_err, Result}; use crate::utils::{compute_array_dims, make_scalar_function}; use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64}; use arrow_schema::Field; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; -use std::sync::{Arc, OnceLock}; +use datafusion_macros::user_doc; +use std::sync::Arc; make_udf_expr_and_func!( ArrayDims, @@ -43,6 +43,23 @@ make_udf_expr_and_func!( array_dims_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of the array's dimensions.", + syntax_example = "array_dims(array)", + sql_example = r#"```sql +> select array_dims([[1, 2, 3], [4, 5, 6]]); ++---------------------------------+ +| array_dims(List([1,2,3,4,5,6])) | ++---------------------------------+ +| [2, 3] | ++---------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayDims { signature: Signature, @@ -94,37 +111,10 @@ impl ScalarUDFImpl for ArrayDims { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_dims_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_dims_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of the array's dimensions.", - - "array_dims(array)") - .with_sql_example( - r#"```sql -> select array_dims([[1, 2, 3], [4, 5, 6]]); -+---------------------------------+ -| array_dims(List([1,2,3,4,5,6])) | -+---------------------------------+ -| [2, 3] | -+---------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayNdims, array_ndims, @@ -133,6 +123,24 @@ make_udf_expr_and_func!( array_ndims_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the number of dimensions of the array.", + syntax_example = "array_ndims(array, element)", + sql_example = r#"```sql +> select array_ndims([[1, 2, 3], [4, 5, 6]]); ++----------------------------------+ +| array_ndims(List([1,2,3,4,5,6])) | ++----------------------------------+ +| 2 | ++----------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Array element.") +)] #[derive(Debug)] pub(super) struct ArrayNdims { signature: Signature, @@ -181,39 +189,10 @@ impl ScalarUDFImpl for ArrayNdims { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_ndims_doc()) + self.doc() } } -fn get_array_ndims_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the number of dimensions of the array.", - - "array_ndims(array, element)") - .with_sql_example( - r#"```sql -> select array_ndims([[1, 2, 3], [4, 5, 6]]); -+----------------------------------+ -| array_ndims(List([1,2,3,4,5,6])) | -+----------------------------------+ -| 2 | -+----------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Array element.", - ) - .build() - }) -} - /// Array_dims SQL function pub fn array_dims_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index 704e840da3f5..1f0d31f2e071 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -29,13 +29,13 @@ use datafusion_common::cast::{ }; use datafusion_common::utils::coerced_fixed_size_list_to_list; use datafusion_common::{exec_err, internal_datafusion_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; use datafusion_functions::{downcast_arg, downcast_named_arg}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( ArrayDistance, @@ -45,6 +45,27 @@ make_udf_expr_and_func!( array_distance_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the Euclidean distance between two input arrays of equal length.", + syntax_example = "array_distance(array1, array2)", + sql_example = r#"```sql +> select array_distance([1, 2], [1, 4]); ++------------------------------------+ +| array_distance(List([1,2], [1,4])) | ++------------------------------------+ +| 2.0 | ++------------------------------------+ +```"#, + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayDistance { signature: Signature, @@ -108,41 +129,10 @@ impl ScalarUDFImpl for ArrayDistance { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_distance_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_distance_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the Euclidean distance between two input arrays of equal length.", - - "array_distance(array1, array2)") - .with_sql_example( - r#"```sql -> select array_distance([1, 2], [1, 4]); -+------------------------------------+ -| array_distance(List([1,2], [1,4])) | -+------------------------------------+ -| 2.0 | -+------------------------------------+ -```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - pub fn array_distance_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { return exec_err!("array_distance expects exactly two arguments"); diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 5270c84c0338..ab205a7cd431 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -23,12 +23,12 @@ use arrow_schema::DataType; use arrow_schema::DataType::{Boolean, FixedSizeList, LargeList, List}; use datafusion_common::cast::as_generic_list_array; use datafusion_common::{exec_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( ArrayEmpty, @@ -38,6 +38,23 @@ make_udf_expr_and_func!( array_empty_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns 1 for an empty array or 0 for a non-empty array.", + syntax_example = "empty(array)", + sql_example = r#"```sql +> select empty([1]); ++------------------+ +| empty(List([1])) | ++------------------+ +| 0 | ++------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayEmpty { signature: Signature, @@ -86,37 +103,10 @@ impl ScalarUDFImpl for ArrayEmpty { } fn documentation(&self) -> Option<&Documentation> { - Some(get_empty_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_empty_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns 1 for an empty array or 0 for a non-empty array.", - - "empty(array)") - .with_sql_example( - r#"```sql -> select empty([1]); -+------------------+ -| empty(List([1])) | -+------------------+ -| 0 | -+------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Array_empty SQL function pub fn array_empty_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 83c09ad7fd90..1bb5ffd5d36d 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -24,12 +24,12 @@ use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::OffsetBuffer; use arrow_schema::{DataType, FieldRef}; use datafusion_common::{exec_err, internal_err, HashSet, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( ArrayExcept, @@ -39,6 +39,33 @@ make_udf_expr_and_func!( array_except_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of the elements that appear in the first array but not in the second.", + syntax_example = "array_except(array1, array2)", + sql_example = r#"```sql +> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +```"#, + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayExcept { signature: Signature, @@ -86,47 +113,10 @@ impl ScalarUDFImpl for ArrayExcept { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_except_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_except_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of the elements that appear in the first array but not in the second.", - - "array_except(array1, array2)") - .with_sql_example( - r#"```sql -> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Array_except SQL function pub fn array_except_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index f972597bbf84..d5b8b5d61935 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -35,13 +35,13 @@ use datafusion_common::cast::as_list_array; use datafusion_common::{ exec_err, internal_datafusion_err, plan_err, DataFusionError, Result, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::Expr; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use crate::utils::make_scalar_function; @@ -80,6 +80,27 @@ make_udf_expr_and_func!( array_any_value_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Extracts the element with the index n from the array.", + syntax_example = "array_element(array, index)", + sql_example = r#"```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "index", + description = "Index to extract the element from the array." + ) +)] #[derive(Debug)] pub(super) struct ArrayElement { signature: Signature, @@ -156,41 +177,10 @@ impl ScalarUDFImpl for ArrayElement { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_element_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_element_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Extracts the element with the index n from the array.", - - "array_element(array, index)") - .with_sql_example( - r#"```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "index", - "Index to extract the element from the array.", - ) - .build() - }) -} - /// array_element SQL function /// /// There are two arguments for array_element, the first one is the array, the second one is the 1-indexed index. @@ -296,6 +286,35 @@ pub fn array_slice(array: Expr, begin: Expr, end: Expr, stride: Option) -> array_slice_udf().call(args) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns a slice of the array based on 1-indexed start and end positions.", + syntax_example = "array_slice(array, begin, end)", + sql_example = r#"```sql +> select array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6); ++--------------------------------------------------------+ +| array_slice(List([1,2,3,4,5,6,7,8]),Int64(3),Int64(6)) | ++--------------------------------------------------------+ +| [3, 4, 5, 6] | ++--------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "begin", + description = "Index of the first element. If negative, it counts backward from the end of the array." + ), + argument( + name = "end", + description = "Index of the last element. If negative, it counts backward from the end of the array." + ), + argument( + name = "stride", + description = "Stride of the array slice. The default is 1." + ) +)] #[derive(Debug)] pub(super) struct ArraySlice { signature: Signature, @@ -362,47 +381,10 @@ impl ScalarUDFImpl for ArraySlice { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_slice_doc()) + self.doc() } } -fn get_array_slice_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns a slice of the array based on 1-indexed start and end positions.", - - "array_slice(array, begin, end)") - .with_sql_example( - r#"```sql -> select array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6); -+--------------------------------------------------------+ -| array_slice(List([1,2,3,4,5,6,7,8]),Int64(3),Int64(6)) | -+--------------------------------------------------------+ -| [3, 4, 5, 6] | -+--------------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "begin", - "Index of the first element. If negative, it counts backward from the end of the array.", - ) - .with_argument( - "end", - "Index of the last element. If negative, it counts backward from the end of the array.", - ) - .with_argument( - "stride", - "Stride of the array slice. The default is 1.", - ) - .build() - }) -} - /// array_slice SQL function /// /// We follow the behavior of array_slice in DuckDB @@ -629,6 +611,23 @@ where )?)) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the array without the first element.", + syntax_example = "array_pop_front(array)", + sql_example = r#"```sql +> select array_pop_front([1, 2, 3]); ++-------------------------------+ +| array_pop_front(List([1,2,3])) | ++-------------------------------+ +| [2, 3] | ++-------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayPopFront { signature: Signature, @@ -673,35 +672,10 @@ impl ScalarUDFImpl for ArrayPopFront { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_pop_front_doc()) + self.doc() } } -fn get_array_pop_front_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the array without the first element.", - - "array_pop_front(array)") - .with_sql_example( - r#"```sql -> select array_pop_front([1, 2, 3]); -+-------------------------------+ -| array_pop_front(List([1,2,3])) | -+-------------------------------+ -| [2, 3] | -+-------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_pop_front SQL function fn array_pop_front_inner(args: &[ArrayRef]) -> Result { let array_data_type = args[0].data_type(); @@ -737,6 +711,23 @@ where general_array_slice::(array, &from_array, &to_array, None) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the array without the last element.", + syntax_example = "array_pop_back(array)", + sql_example = r#"```sql +> select array_pop_back([1, 2, 3]); ++-------------------------------+ +| array_pop_back(List([1,2,3])) | ++-------------------------------+ +| [1, 2] | ++-------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayPopBack { signature: Signature, @@ -781,35 +772,10 @@ impl ScalarUDFImpl for ArrayPopBack { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_pop_back_doc()) + self.doc() } } -fn get_array_pop_back_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the array without the last element.", - - "array_pop_back(array)") - .with_sql_example( - r#"```sql -> select array_pop_back([1, 2, 3]); -+-------------------------------+ -| array_pop_back(List([1,2,3])) | -+-------------------------------+ -| [1, 2] | -+-------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_pop_back SQL function fn array_pop_back_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { @@ -849,6 +815,23 @@ where general_array_slice::(array, &from_array, &to_array, None) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the first non-null element in the array.", + syntax_example = "array_any_value(array)", + sql_example = r#"```sql +> select array_any_value([NULL, 1, 2, 3]); ++-------------------------------+ +| array_any_value(List([NULL,1,2,3])) | ++-------------------------------------+ +| 1 | ++-------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayAnyValue { signature: Signature, @@ -897,35 +880,10 @@ impl ScalarUDFImpl for ArrayAnyValue { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_any_value_doc()) + self.doc() } } -fn get_array_any_value_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the first non-null element in the array.", - - "array_any_value(array)") - .with_sql_example( - r#"```sql -> select array_any_value([NULL, 1, 2, 3]); -+-------------------------------+ -| array_any_value(List([NULL,1,2,3])) | -+-------------------------------------+ -| 1 | -+-------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - fn array_any_value_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("array_any_value expects one argument"); diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 56cc8e10fb1b..79fe440f377b 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2597,26 +2597,25 @@ _Alias of [current_date](#current_date)._ ### `array_any_value` -Extracts the element with the index n from the array. +Returns the first non-null element in the array. ``` -array_element(array, index) +array_any_value(array) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. #### Example ```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ +> select array_any_value([NULL, 1, 2, 3]); ++-------------------------------+ +| array_any_value(List([NULL,1,2,3])) | ++-------------------------------------+ +| 1 | ++-------------------------------------+ ``` #### Aliases @@ -2659,26 +2658,26 @@ _Alias of [array_concat](#array_concat)._ ### `array_concat` -Appends an element to the end of an array. +Concatenates arrays. ``` -array_append(array, element) +array_concat(array[, ..., array_n]) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to append to the array. +- **array_n**: Subsequent array column or literal array to concatenate. #### Example ```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ +> select array_concat([1, 2], [3, 4], [5, 6]); ++---------------------------------------------------+ +| array_concat(List([1,2]),List([3,4]),List([5,6])) | ++---------------------------------------------------+ +| [1, 2, 3, 4, 5, 6] | ++---------------------------------------------------+ ``` #### Aliases @@ -2877,26 +2876,26 @@ array_has(array, element) ### `array_has_all` -Returns true if the array contains the element. +Returns true if all elements of sub-array exist in array. ``` -array_has(array, element) +array_has_all(array, sub-array) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. +- **sub-array**: Array expression. Can be a constant, column, or function, and any combination of array operators. #### Example ```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ +> select array_has_all([1, 2, 3, 4], [2, 3]); ++--------------------------------------------+ +| array_has_all(List([1,2,3,4]), List([2,3])) | ++--------------------------------------------+ +| true | ++--------------------------------------------+ ``` #### Aliases @@ -2905,26 +2904,26 @@ array_has(array, element) ### `array_has_any` -Returns true if the array contains the element. +Returns true if any elements exist in both arrays. ``` -array_has(array, element) +array_has_any(array, sub-array) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. +- **sub-array**: Array expression. Can be a constant, column, or function, and any combination of array operators. #### Example ```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ +> select array_has_any([1, 2, 3], [3, 4]); ++------------------------------------------+ +| array_has_any(List([1,2,3]), List([3,4])) | ++------------------------------------------+ +| true | ++------------------------------------------+ ``` #### Aliases @@ -2996,25 +2995,26 @@ array_length(array, dimension) ### `array_ndims` -Returns an array of the array's dimensions. +Returns the number of dimensions of the array. ``` -array_dims(array) +array_ndims(array, element) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Array element. #### Example ```sql -> select array_dims([[1, 2, 3], [4, 5, 6]]); -+---------------------------------+ -| array_dims(List([1,2,3,4,5,6])) | -+---------------------------------+ -| [2, 3] | -+---------------------------------+ +> select array_ndims([[1, 2, 3], [4, 5, 6]]); ++----------------------------------+ +| array_ndims(List([1,2,3,4,5,6])) | ++----------------------------------+ +| 2 | ++----------------------------------+ ``` #### Aliases @@ -3023,26 +3023,25 @@ array_dims(array) ### `array_pop_back` -Extracts the element with the index n from the array. +Returns the array without the last element. ``` -array_element(array, index) +array_pop_back(array) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. #### Example ```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ +> select array_pop_back([1, 2, 3]); ++-------------------------------+ +| array_pop_back(List([1,2,3])) | ++-------------------------------+ +| [1, 2] | ++-------------------------------+ ``` #### Aliases @@ -3051,26 +3050,25 @@ array_element(array, index) ### `array_pop_front` -Extracts the element with the index n from the array. +Returns the array without the first element. ``` -array_element(array, index) +array_pop_front(array) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. #### Example ```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ +> select array_pop_front([1, 2, 3]); ++-------------------------------+ +| array_pop_front(List([1,2,3])) | ++-------------------------------+ +| [2, 3] | ++-------------------------------+ ``` #### Aliases @@ -3161,8 +3159,8 @@ array_prepend(element, array) #### Arguments -- **element**: Element to prepend to the array. - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to prepend to the array. #### Example @@ -3455,26 +3453,28 @@ array_reverse(array) ### `array_slice` -Extracts the element with the index n from the array. +Returns a slice of the array based on 1-indexed start and end positions. ``` -array_element(array, index) +array_slice(array, begin, end) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. +- **begin**: Index of the first element. If negative, it counts backward from the end of the array. +- **end**: Index of the last element. If negative, it counts backward from the end of the array. +- **stride**: Stride of the array slice. The default is 1. #### Example ```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ +> select array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6); ++--------------------------------------------------------+ +| array_slice(List([1,2,3,4,5,6,7,8]),Int64(3),Int64(6)) | ++--------------------------------------------------------+ +| [3, 4, 5, 6] | ++--------------------------------------------------------+ ``` #### Aliases