diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index 42d2ff98c39d..b5dc4447dcfb 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -32,13 +32,27 @@ use datafusion_common::{ use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::{ColumnarValue, Documentation}; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use std::{fmt, str::FromStr}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_BINARY_STRING; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; +#[user_doc( + doc_section(label = "Binary String Functions"), + description = "Encode binary data into a textual representation.", + syntax_example = "encode(expression, format)", + argument( + name = "expression", + description = "Expression containing string or binary data" + ), + argument( + name = "format", + description = "Supported formats are: `base64`, `hex`" + ), + related_udf(name = "decode") +)] #[derive(Debug)] pub struct EncodeFunc { signature: Signature, @@ -58,22 +72,6 @@ impl EncodeFunc { } } -static ENCODE_DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_encode_doc() -> &'static Documentation { - ENCODE_DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_BINARY_STRING, - "Encode binary data into a textual representation.", - "encode(expression, format)", - ) - .with_argument("expression", "Expression containing string or binary data") - .with_argument("format", "Supported formats are: `base64`, `hex`") - .with_related_udf("decode") - .build() - }) -} - impl ScalarUDFImpl for EncodeFunc { fn as_any(&self) -> &dyn Any { self @@ -126,10 +124,21 @@ impl ScalarUDFImpl for EncodeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_encode_doc()) + self.doc() } } +#[user_doc( + doc_section(label = "Binary String Functions"), + description = "Decode binary data from textual representation in string.", + syntax_example = "decode(expression, format)", + argument( + name = "expression", + description = "Expression containing encoded string data" + ), + argument(name = "format", description = "Same arguments as [encode](#encode)"), + related_udf(name = "encode") +)] #[derive(Debug)] pub struct DecodeFunc { signature: Signature, @@ -149,22 +158,6 @@ impl DecodeFunc { } } -static DECODE_DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_decode_doc() -> &'static Documentation { - DECODE_DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_BINARY_STRING, - "Decode binary data from textual representation in string.", - "decode(expression, format)", - ) - .with_argument("expression", "Expression containing encoded string data") - .with_argument("format", "Same arguments as [encode](#encode)") - .with_related_udf("encode") - .build() - }) -} - impl ScalarUDFImpl for DecodeFunc { fn as_any(&self) -> &dyn Any { self @@ -217,7 +210,7 @@ impl ScalarUDFImpl for DecodeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_decode_doc()) + self.doc() } } diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 8f06c75b2fe9..ca22f74240cd 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -23,17 +23,45 @@ use arrow::datatypes::{ }; use arrow::error::ArrowError; use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact, TypeSignature::Uniform, Volatility, }; +use datafusion_macros::user_doc; use itertools::izip; use regex::Regex; use std::collections::hash_map::Entry; use std::collections::HashMap; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.", + syntax_example = "regexp_count(str, regexp[, start, flags])", + sql_example = r#"```sql +> select regexp_count('abcAbAbc', 'abc', 2, 'i'); ++---------------------------------------------------------------+ +| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | ++---------------------------------------------------------------+ +| 1 | ++---------------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "Regexp", prefix = "Regular"), + argument( + name = "start", + description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpCountFunc { signature: Signature, @@ -111,40 +139,10 @@ impl ScalarUDFImpl for RegexpCountFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_count_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_count_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.", - "regexp_count(str, regexp[, start, flags])") - .with_sql_example(r#"```sql -> select regexp_count('abcAbAbc', 'abc', 2, 'i'); -+---------------------------------------------------------------+ -| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | -+---------------------------------------------------------------+ -| 1 | -+---------------------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("regexp",Some("Regular")) - .with_argument("start", "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function.") - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) -} - pub fn regexp_count_func(args: &[ArrayRef]) -> Result { let args_len = args.len(); if !(2..=4).contains(&args_len) { diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 1c826b12ef8f..35e104d46f64 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -25,30 +25,18 @@ use datafusion_common::exec_err; use datafusion_common::ScalarValue; use datafusion_common::{arrow_datafusion_err, plan_err}; use datafusion_common::{internal_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; -#[derive(Debug)] -pub struct RegexpLikeFunc { - signature: Signature, -} - -impl Default for RegexpLikeFunc { - fn default() -> Self { - Self::new() - } -} - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_like_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder(DOC_SECTION_REGEX,"Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.","regexp_like(str, regexp[, flags])") - .with_sql_example(r#"```sql +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.", + syntax_example = "regexp_like(str, regexp[, flags])", + sql_example = r#"```sql select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); +--------------------------------------------------------+ | regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | @@ -63,18 +51,32 @@ SELECT regexp_like('aBc', '(b|d)', 'i'); +--------------------------------------------------+ ``` Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("regexp", Some("Regular")) - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: +"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "Regexp", prefix = "Regular"), + argument( + name = "start", + description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - **i**: case-insensitive: letters match both upper and lower case - **m**: multi-line mode: ^ and $ match begin/end of line - **s**: allow . to match \n - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) + - **U**: swap the meaning of x* and x*?"# + ) +)] +#[derive(Debug)] +pub struct RegexpLikeFunc { + signature: Signature, +} + +impl Default for RegexpLikeFunc { + fn default() -> Self { + Self::new() + } } impl RegexpLikeFunc { @@ -142,7 +144,7 @@ impl ScalarUDFImpl for RegexpLikeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_like_doc()) + self.doc() } } diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs index 8362ef2f406c..304b5728ba56 100644 --- a/datafusion/functions/src/regex/regexpmatch.rs +++ b/datafusion/functions/src/regex/regexpmatch.rs @@ -26,12 +26,47 @@ use datafusion_common::{arrow_datafusion_err, plan_err}; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.", + syntax_example = "regexp_match(str, regexp[, flags])", + sql_example = r#"```sql +> select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); ++---------------------------------------------------------+ +| regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | ++---------------------------------------------------------+ +| [Köln] | ++---------------------------------------------------------+ +SELECT regexp_match('aBc', '(b|d)', 'i'); ++---------------------------------------------------+ +| regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | ++---------------------------------------------------+ +| [B] | ++---------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) +"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "regexp", + description = "Regular expression to match against. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpMatchFunc { signature: Signature, @@ -113,48 +148,10 @@ impl ScalarUDFImpl for RegexpMatchFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_match_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_match_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.", - "regexp_match(str, regexp[, flags])") - .with_sql_example(r#"```sql - > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); - +---------------------------------------------------------+ - | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | - +---------------------------------------------------------+ - | [Köln] | - +---------------------------------------------------------+ - SELECT regexp_match('aBc', '(b|d)', 'i'); - +---------------------------------------------------+ - | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | - +---------------------------------------------------+ - | [B] | - +---------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_argument("regexp","Regular expression to match against. - Can be a constant, column, or function.") - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) -} - fn regexp_match_func(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::Utf8 => regexp_match::(args), diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 4ed9350e9729..e5a4ddc2ca6a 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -32,15 +32,51 @@ use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, }; use datafusion_expr::function::Hint; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; use std::collections::HashMap; -use std::sync::{Arc, LazyLock, OnceLock}; +use std::sync::{Arc, LazyLock}; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).", + syntax_example = "regexp_replace(str, regexp, replacement[, flags])", + sql_example = r#"```sql +> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); ++------------------------------------------------------------------------+ +| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | ++------------------------------------------------------------------------+ +| fooXarYXazY | ++------------------------------------------------------------------------+ +SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); ++-------------------------------------------------------------------+ +| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | ++-------------------------------------------------------------------+ +| aAbBac | ++-------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) +"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "replacement", prefix = "Replacement string"), + argument( + name = "regexp", + description = "Regular expression to match against. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpReplaceFunc { signature: Signature, @@ -130,50 +166,10 @@ impl ScalarUDFImpl for RegexpReplaceFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_replace_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_replace_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).", - "regexp_replace(str, regexp, replacement[, flags])") - .with_sql_example(r#"```sql -> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); -+------------------------------------------------------------------------+ -| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | -+------------------------------------------------------------------------+ -| fooXarYXazY | -+------------------------------------------------------------------------+ -SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); -+-------------------------------------------------------------------+ -| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | -+-------------------------------------------------------------------+ -| aAbBac | -+-------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_argument("regexp","Regular expression to match against. - Can be a constant, column, or function.") - .with_standard_argument("replacement", Some("Replacement string")) - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: -- **g**: (global) Search globally and don't return after the first match -- **i**: case-insensitive: letters match both upper and lower case -- **m**: multi-line mode: ^ and $ match begin/end of line -- **s**: allow . to match \n -- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used -- **U**: swap the meaning of x* and x*?"#) - .build() -}) -} - fn regexp_replace_func(args: &[ColumnarValue]) -> Result { match args[0].data_type() { DataType::Utf8 => specialize_regexp_replace::(args), diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index be4f5e56b3af..85f71ad745f5 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1758,167 +1758,6 @@ encode(expression, format) - [decode](#decode) -## Regular Expression Functions - -Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions) -regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax) -(minus support for several features including look-around and backreferences). -The following regular expression functions are supported: - -- [regexp_count](#regexp_count) -- [regexp_like](#regexp_like) -- [regexp_match](#regexp_match) -- [regexp_replace](#regexp_replace) - -### `regexp_count` - -Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string. - -``` -regexp_count(str, regexp[, start, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **start**: - **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql -> select regexp_count('abcAbAbc', 'abc', 2, 'i'); -+---------------------------------------------------------------+ -| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | -+---------------------------------------------------------------+ -| 1 | -+---------------------------------------------------------------+ -``` - -### `regexp_like` - -Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise. - -``` -regexp_like(str, regexp[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql -select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); -+--------------------------------------------------------+ -| regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | -+--------------------------------------------------------+ -| true | -+--------------------------------------------------------+ -SELECT regexp_like('aBc', '(b|d)', 'i'); -+--------------------------------------------------+ -| regexp_like(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | -+--------------------------------------------------+ -| true | -+--------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - -### `regexp_match` - -Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string. - -``` -regexp_match(str, regexp[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to match against. - Can be a constant, column, or function. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql - > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); - +---------------------------------------------------------+ - | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | - +---------------------------------------------------------+ - | [Köln] | - +---------------------------------------------------------+ - SELECT regexp_match('aBc', '(b|d)', 'i'); - +---------------------------------------------------+ - | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | - +---------------------------------------------------+ - | [B] | - +---------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - -### `regexp_replace` - -Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax). - -``` -regexp_replace(str, regexp, replacement[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to match against. - Can be a constant, column, or function. -- **replacement**: Replacement string expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: -- **g**: (global) Search globally and don't return after the first match -- **i**: case-insensitive: letters match both upper and lower case -- **m**: multi-line mode: ^ and $ match begin/end of line -- **s**: allow . to match \n -- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used -- **U**: swap the meaning of x* and x*? - -#### Example - -```sql -> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); -+------------------------------------------------------------------------+ -| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | -+------------------------------------------------------------------------+ -| fooXarYXazY | -+------------------------------------------------------------------------+ -SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); -+-------------------------------------------------------------------+ -| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | -+-------------------------------------------------------------------+ -| aAbBac | -+-------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - ## Time and Date Functions - [current_date](#current_date)