From 6d2779e6ecb4d100ac176ac38443a8aca0d5e64d Mon Sep 17 00:00:00 2001 From: matt Date: Thu, 14 Dec 2023 15:18:13 -0800 Subject: [PATCH] extract str functions into separate modules. add first sql based unit test. --- Cargo.toml | 1 + Makefile | 4 +- README.md | 24 +++--- src/lib.rs | 178 +++++++--------------------------------- src/modules/after.rs | 20 +++++ src/modules/ascii.rs | 19 +++++ src/modules/before.rs | 4 + src/modules/case.rs | 71 ++++++++++++++++ src/modules/contains.rs | 18 ++++ src/modules/length.rs | 13 +++ src/modules/markdown.rs | 24 ++++++ src/modules/random.rs | 14 ++++ src/modules/split.rs | 18 ++++ src/modules/start.rs | 29 +++++++ src/modules/substr.rs | 13 +++ src/modules/uuid.rs | 14 ++++ 16 files changed, 302 insertions(+), 162 deletions(-) create mode 100644 src/modules/after.rs create mode 100644 src/modules/ascii.rs create mode 100644 src/modules/before.rs create mode 100644 src/modules/case.rs create mode 100644 src/modules/contains.rs create mode 100644 src/modules/length.rs create mode 100644 src/modules/markdown.rs create mode 100644 src/modules/random.rs create mode 100644 src/modules/split.rs create mode 100644 src/modules/start.rs create mode 100644 src/modules/substr.rs create mode 100644 src/modules/uuid.rs diff --git a/Cargo.toml b/Cargo.toml index 6a638ec..2e96c67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ pulldown-cmark = "0.9.1" any_ascii = "0.3.0" rand = "0.8.4" uuid = "1.2.2" +regex = "1.10.2" [dev-dependencies] pgrx-tests = "=0.11.2" diff --git a/Makefile b/Makefile index 31dea0d..1dc9784 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ -PACKAGE_VERSION=0.3.0 +PACKAGE_VERSION=0.3.1 PACKAGE_NAME=pg_str PG_VERSION=15 PREFIX=target/release/$(PACKAGE_NAME)-pg$(PG_VERSION) build: - cargo pgx package + cargo pgrx package install: $(PREFIX) cp -f $(PREFIX)/usr/share/postgresql/$(PG_VERSION)/extension/$(PACKAGE_NAME)--$(PACKAGE_VERSION).sql /usr/share/postgresql/$(PG_VERSION)/extension/$(PACKAGE_NAME)--$(PACKAGE_VERSION).sql diff --git a/README.md b/README.md index b52c30a..9bfa707 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,30 @@ -# Postgresql String Extension +# pg_str: the postgresql extension for strings -A better way of handling string manipulation and transformations in Postgresql. +add some good default string manipulation functions to postgresql. build using the rust library pgrx: [https://github.com/pgcentralfoundation/pgrx](https://github.com/pgcentralfoundation/pgrx). -Function api and behavior is inspired by those available in the Laravel web framework: https://laravel.com/docs/8.x/helpers#strings-method-list -## Installation +function api and behavior is inspired by the laravel web framework: [https://laravel.com/docs/10.x/strings](https://laravel.com/docs/10.x/strings) + +## installation ``` -git clone git@github.com:abumni/pg_str +git clone https://gitea.publicmatt.com/public/pg_str.git cd pg_str cargo pgx package # run cargo install pgx first sudo make install # adjust Makefile if using different version of postgresql than 13. ``` -This puts the binaries and sql into the right folder location. Next you need to create the extension in postgresql: +this puts the binaries and sql into the right folder location. next you need to create the extension in postgresql: ``` psql -> create extension pg_str; # installs functions in a schema named 'str' -> select str.markdown('# Hello ' +> create extension pg_str; # installs functions in a 'public' schema. +> select str_markdown('# Hello ' || str.snake('pg str') || '- ~~using programming language for str manipulations~~ - **do it all in postgresql** '); ``` -## API +## api thus far: + - [x] after - [] afterLast - [x] ascii @@ -60,10 +62,10 @@ psql - [x] singular - [x] slug - [x] snake -- [] start +- [x] start - [] startsWith - [x] studly -- [] substr +- [x] substr - [] substrCount - [] substrReplace - [x] title diff --git a/src/lib.rs b/src/lib.rs index 533d75f..d5d6dd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,149 +1,39 @@ +pub mod modules { + pub mod after; + pub mod ascii; + pub mod before; + pub mod case; + pub mod contains; + pub mod length; + pub mod markdown; + pub mod random; + pub mod split; + pub mod start; + pub mod substr; + pub mod uuid; +} + use pgrx::prelude::*; -use rand::distributions::{Alphanumeric, DistString}; - -use any_ascii::any_ascii; -use inflector::cases::{ - camelcase, kebabcase, pascalcase, screamingsnakecase, snakecase, titlecase, -}; -use inflector::string::{pluralize, singularize}; -use pulldown_cmark::{html, Options, Parser}; -use str_slug::StrSlug; -use uuid::Uuid; +pub use modules::after::*; +pub use modules::ascii::*; +pub use modules::before::*; +pub use modules::case::*; +pub use modules::contains::*; +pub use modules::length::*; +pub use modules::markdown::*; +pub use modules::random::*; +pub use modules::split::*; +pub use modules::start::*; +pub use modules::substr::*; +pub use modules::uuid::*; pgrx::pg_module_magic!(); - -#[pg_extern] -fn str_random(length: i32) -> String { - Alphanumeric.sample_string(&mut rand::thread_rng(), length as usize) -} - -#[pg_extern] -fn str_length(input: &str) -> i32 { - input.len() as i32 -} - -#[pg_extern] -fn str_after<'a>(input: &'a str, search: &str) -> &'a str { - let matches: Vec<_> = input.match_indices(search).collect(); - match matches.first() { - None => input, - Some(x) => &input[x.1.len()..], - } -} -// #[pg_extern] -// fn str_after_last<'a>(input: &'a str, search: &str) -> &'a str { -// } - -// fn str_before<'a>(input: &'a str, search: &str) -> &'a str { -// } -// fn str_beforeLast<'a>(input: &'a str, search: &str) -> &'a str { -// } // fn str_between<'a>(input: &'a str, search: &str) -> &'a str { // } #[pg_extern] -fn str_uuid() -> String { - Uuid::new_v4().to_string() -} - -#[pg_extern] -fn str_ascii(input: &str) -> String { - any_ascii(input) -} - -#[pg_extern] -fn str_is_ascii(input: &str) -> bool { - input.is_ascii() -} - -#[pg_extern] -fn str_contains(input: &str, search: &str) -> bool { - input.contains(search) -} - -#[pg_extern] -fn str_contains_all(input: &str, search: Vec<&str>) -> bool { - search.iter().all(|s| input.contains(s)) -} - -#[pg_extern] -fn str_lower(input: &str) -> String { - input.to_lowercase() -} - -#[pg_extern] -fn str_upper(input: &str) -> String { - input.to_uppercase() -} - -#[pg_extern] -fn str_slug(input: &str, sep: char) -> String { - let mut slug = StrSlug::new(); - slug.separator = sep; - slug.slug(input) -} - -#[pg_extern] -fn str_singular(input: &str) -> String { - singularize::to_singular(input) -} - -#[pg_extern] -fn str_plural(input: &str) -> String { - pluralize::to_plural(input) -} - -#[pg_extern] -fn str_title(input: &str) -> String { - titlecase::to_title_case(input) -} - -#[pg_extern] -fn str_camel(input: &str) -> String { - camelcase::to_camel_case(input) -} - -#[pg_extern] -fn str_kebab(input: &str) -> String { - kebabcase::to_kebab_case(input) -} - -#[pg_extern] -fn str_snake(input: &str) -> String { - snakecase::to_snake_case(input) -} - -#[pg_extern] -fn str_studly(input: &str) -> String { - pascalcase::to_pascal_case(input) -} - -#[pg_extern] -fn str_scream(input: &str) -> String { - screamingsnakecase::to_screaming_snake_case(input) -} - -#[pg_extern] -fn str_markdown(input: &str) -> String { - // Set up options and parser. Strikethroughs are not part of the CommonMark standard - // and we therefore must enable it explicitly. - let mut options = Options::empty(); - options.insert(Options::ENABLE_STRIKETHROUGH); - let parser = Parser::new_ext(input, options); - - // Write to String buffer. - let mut html_output: String = String::with_capacity(input.len() * 3 / 2); - html::push_html(&mut html_output, parser); - html_output -} - -#[pg_extern] -fn str_substr(input: &str, start: i32, end: i32) -> &str { - &input[start as usize..end as usize] -} -#[pg_extern] -fn str_replace(input: &'static str, old: &'static str, new: &'static str) -> String { +fn str_replace<'a>(input: &'a str, old: &'a str, new: &'a str) -> String { input.replace(old, new) } @@ -153,18 +43,8 @@ fn str_append(mut input: String, extra: &str) -> String { input } -#[pg_extern] -fn str_split(input: &'static str, pattern: &str) -> Vec<&'static str> { - input.split_terminator(pattern).into_iter().collect() -} - -#[pg_extern] -fn str_split_set<'a>(input: &'a str, pattern: &'a str) -> SetOfIterator<'a, &'a str> { - SetOfIterator::new(input.split_terminator(pattern).into_iter()) -} - #[cfg(any(test, feature = "pg_test"))] -#[pg_schema] +#[pgrx::pg_schema] mod tests { // #[pg_test] diff --git a/src/modules/after.rs b/src/modules/after.rs new file mode 100644 index 0000000..6807dff --- /dev/null +++ b/src/modules/after.rs @@ -0,0 +1,20 @@ +use pgrx::prelude::*; + +#[pg_extern] +pub fn str_after<'a>(input: &'a str, search: &str) -> &'a str { + let matches: Vec<_> = input.match_indices(search).collect(); + match matches.first() { + None => input, + Some(x) => &input[x.1.len()..], + } +} +// #[pg_extern] +// fn str_after_last<'a>(input: &'a str, search: &str) -> &'a str { +// } + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/ascii.rs b/src/modules/ascii.rs new file mode 100644 index 0000000..1721be2 --- /dev/null +++ b/src/modules/ascii.rs @@ -0,0 +1,19 @@ +use any_ascii::any_ascii; +use pgrx::prelude::*; + +#[pg_extern] +pub fn str_ascii(input: &str) -> String { + any_ascii(input) +} + +#[pg_extern] +pub fn str_is_ascii(input: &str) -> bool { + input.is_ascii() +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/before.rs b/src/modules/before.rs new file mode 100644 index 0000000..9ad632f --- /dev/null +++ b/src/modules/before.rs @@ -0,0 +1,4 @@ +// fn str_before<'a>(input: &'a str, search: &str) -> &'a str { +// } +// fn str_beforeLast<'a>(input: &'a str, search: &str) -> &'a str { +// } diff --git a/src/modules/case.rs b/src/modules/case.rs new file mode 100644 index 0000000..fe324ea --- /dev/null +++ b/src/modules/case.rs @@ -0,0 +1,71 @@ +use pgrx::prelude::*; + +use inflector::cases::{ + camelcase, kebabcase, pascalcase, screamingsnakecase, snakecase, titlecase, +}; +use inflector::string::{pluralize, singularize}; +use str_slug::StrSlug; + +#[pg_extern] +pub fn str_lower(input: &str) -> String { + input.to_lowercase() +} + +#[pg_extern] +pub fn str_upper(input: &str) -> String { + input.to_uppercase() +} + +#[pg_extern] +pub fn str_slug(input: &str, sep: char) -> String { + let mut slug = StrSlug::new(); + slug.separator = sep; + slug.slug(input) +} + +#[pg_extern] +pub fn str_singular(input: &str) -> String { + singularize::to_singular(input) +} + +#[pg_extern] +pub fn str_plural(input: &str) -> String { + pluralize::to_plural(input) +} + +#[pg_extern] +pub fn str_title(input: &str) -> String { + titlecase::to_title_case(input) +} + +#[pg_extern] +pub fn str_camel(input: &str) -> String { + camelcase::to_camel_case(input) +} + +#[pg_extern] +pub fn str_kebab(input: &str) -> String { + kebabcase::to_kebab_case(input) +} + +#[pg_extern] +pub fn str_snake(input: &str) -> String { + snakecase::to_snake_case(input) +} + +#[pg_extern] +pub fn str_studly(input: &str) -> String { + pascalcase::to_pascal_case(input) +} + +#[pg_extern] +pub fn str_scream(input: &str) -> String { + screamingsnakecase::to_screaming_snake_case(input) +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/contains.rs b/src/modules/contains.rs new file mode 100644 index 0000000..89b98db --- /dev/null +++ b/src/modules/contains.rs @@ -0,0 +1,18 @@ +use pgrx::prelude::*; + +#[pg_extern] +pub fn str_contains(input: &str, search: &str) -> bool { + input.contains(search) +} + +#[pg_extern] +pub fn str_contains_all(input: &str, search: Vec<&str>) -> bool { + search.iter().all(|s| input.contains(s)) +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/length.rs b/src/modules/length.rs new file mode 100644 index 0000000..a759adc --- /dev/null +++ b/src/modules/length.rs @@ -0,0 +1,13 @@ +use pgrx::prelude::*; + +#[pg_extern] +fn str_length(input: &str) -> i32 { + input.len() as i32 +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/markdown.rs b/src/modules/markdown.rs new file mode 100644 index 0000000..e62d34e --- /dev/null +++ b/src/modules/markdown.rs @@ -0,0 +1,24 @@ +use pgrx::prelude::*; + +use pulldown_cmark::{html, Options, Parser}; + +#[pg_extern] +pub fn str_markdown(input: &str) -> String { + // Set up options and parser. Strikethroughs are not part of the CommonMark standard + // and we therefore must enable it explicitly. + let mut options = Options::empty(); + options.insert(Options::ENABLE_STRIKETHROUGH); + let parser = Parser::new_ext(input, options); + + // Write to String buffer. + let mut html_output: String = String::with_capacity(input.len() * 3 / 2); + html::push_html(&mut html_output, parser); + html_output +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/random.rs b/src/modules/random.rs new file mode 100644 index 0000000..37d6de7 --- /dev/null +++ b/src/modules/random.rs @@ -0,0 +1,14 @@ +use pgrx::prelude::*; +use rand::distributions::{Alphanumeric, DistString}; + +#[pg_extern] +pub fn str_random(length: i32) -> String { + Alphanumeric.sample_string(&mut rand::thread_rng(), length as usize) +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/split.rs b/src/modules/split.rs new file mode 100644 index 0000000..7b2d745 --- /dev/null +++ b/src/modules/split.rs @@ -0,0 +1,18 @@ +use pgrx::prelude::*; + +#[pg_extern] +pub fn str_split<'a>(input: &'a str, pattern: &str) -> Vec<&'a str> { + input.split_terminator(pattern).into_iter().collect() +} + +#[pg_extern] +pub fn str_split_set<'a>(input: &'a str, pattern: &'a str) -> SetOfIterator<'a, &'a str> { + SetOfIterator::new(input.split_terminator(pattern).into_iter()) +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/start.rs b/src/modules/start.rs new file mode 100644 index 0000000..66f6cf2 --- /dev/null +++ b/src/modules/start.rs @@ -0,0 +1,29 @@ +use pgrx::prelude::*; +use regex::Regex; + +#[pg_extern] +pub fn str_start(value: &str, prefix: &str) -> String { + let quoted = regex::escape(prefix); + let re = Regex::new(&format!("^(?:{})+", quoted)).unwrap(); + + format!("{}{}", prefix, re.replace(value, "")) +} + +#[cfg(any(test, feature = "pg_test"))] +#[pgrx::pg_schema] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; + + #[pg_test] + fn test_no_slash_prefix() { + let result = Spi::get_one::("SELECT public.str_start('path/to/file', '/')"); + assert_eq!(result, Ok(Some("/path/to/file".to_string()))); + } + #[pg_test] + fn test_slash_prefix() { + let result = Spi::get_one::("SELECT public.str_start('/path/to/file', '/')"); + assert_eq!(result, Ok(Some("/path/to/file".to_string()))); + } +} diff --git a/src/modules/substr.rs b/src/modules/substr.rs new file mode 100644 index 0000000..d67bf8a --- /dev/null +++ b/src/modules/substr.rs @@ -0,0 +1,13 @@ +use pgrx::prelude::*; + +#[pg_extern] +pub fn str_substr(input: &str, start: i32, end: i32) -> &str { + &input[start as usize..end as usize] +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +} diff --git a/src/modules/uuid.rs b/src/modules/uuid.rs new file mode 100644 index 0000000..3ae524e --- /dev/null +++ b/src/modules/uuid.rs @@ -0,0 +1,14 @@ +use pgrx::prelude::*; +use uuid::Uuid; + +#[pg_extern] +pub fn str_uuid() -> String { + Uuid::new_v4().to_string() +} + +#[cfg(any(test, feature = "pg_test"))] +mod tests { + #[allow(unused_imports)] + use super::*; + use pgrx::prelude::*; +}