Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc-gen: migrate scalar functions (string) documentation 1/4 #13924

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 24 additions & 34 deletions datafusion/functions/src/string/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the Unicode character code of the first character in a string.",
syntax_example = "ascii(str)",
sql_example = r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "chr")
)]
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
Expand Down Expand Up @@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_ascii_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the Unicode character code of the first character in a string.",
"ascii(str)",
)
.with_sql_example(
r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("chr")
.build()
})
}

fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
Expand Down
47 changes: 18 additions & 29 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,29 @@
use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the bit length of a string.",
syntax_example = "bit_length(str)",
sql_example = r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "length"),
related_udf(name = "octet_length")
)]
#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_bit_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_bit_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the bit length of a string.",
"bit_length(str)",
)
.with_sql_example(
r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("length")
.with_related_udf("octet_length")
.build()
})
}
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::array::StringArray;
Expand All @@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8;
use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
/// chr(65) = 'A'
Expand Down Expand Up @@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the character with the specified ASCII or Unicode code value.",
syntax_example = "chr(expression)",
sql_example = r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
standard_argument(name = "expression", prefix = "String"),
related_udf(name = "ascii")
)]
#[derive(Debug)]
pub struct ChrFunc {
signature: Signature,
Expand Down Expand Up @@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_chr_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_chr_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the character with the specified ASCII or Unicode code value.",
"chr(expression)",
)
.with_sql_example(
r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
)
.with_standard_argument("expression", Some("String"))
.with_related_udf("ascii")
.build()
})
}
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/contains.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Return true if search_str is found within string (case-sensitive).",
syntax_example = "contains(str, search_str)",
sql_example = r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "search_str", description = "The string to search for in str.")
)]
#[derive(Debug)]
pub struct ContainsFunc {
signature: Signature,
Expand Down Expand Up @@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_contains_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_contains_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Return true if search_str is found within string (case-sensitive).",
"contains(str, search_str)",
)
.with_sql_example(
r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("search_str", "The string to search for in str.")
.build()
})
}

/// use `arrow::compute::contains` to do the calculation for contains
pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
match (args[0].data_type(), args[1].data_type()) {
Expand Down
47 changes: 18 additions & 29 deletions datafusion/functions/src/string/octet_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,29 @@
use arrow::compute::kernels::length::length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the length of a string in bytes.",
syntax_example = "octet_length(str)",
sql_example = r#"```sql
> select octet_length('Ångström');
+--------------------------------+
| octet_length(Utf8("Ångström")) |
+--------------------------------+
| 10 |
+--------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "bit_length"),
related_udf(name = "length")
)]
#[derive(Debug)]
pub struct OctetLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -92,36 +107,10 @@ impl ScalarUDFImpl for OctetLengthFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_octet_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_octet_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the length of a string in bytes.",
"octet_length(str)",
)
.with_sql_example(
r#"```sql
> select octet_length('Ångström');
+--------------------------------+
| octet_length(Utf8("Ångström")) |
+--------------------------------+
| 10 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("bit_length")
.with_related_udf("length")
.build()
})
}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand Down
Loading
Loading