-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add text processes * fix test * pre commit
- Loading branch information
1 parent
de071a6
commit 9522ef7
Showing
3 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from typing import Any, Optional | ||
|
||
|
||
def text_begins(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str: | ||
if data: | ||
if case_sensitive: | ||
return data.startswith(pattern) | ||
else: | ||
return data.lower().startswith(pattern.lower()) | ||
else: | ||
return None | ||
|
||
|
||
def text_contains( | ||
data: str, pattern: str, case_sensitive: Optional[bool] = True | ||
) -> str: | ||
if data: | ||
if case_sensitive: | ||
return pattern in data | ||
else: | ||
return pattern.lower() in data.lower() | ||
else: | ||
return None | ||
|
||
|
||
def text_ends(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str: | ||
if data: | ||
if case_sensitive: | ||
return data.endswith(pattern) | ||
else: | ||
return data.lower().endswith(pattern.lower()) | ||
else: | ||
return None | ||
|
||
|
||
def text_concat(data: list[Any], separator: Any) -> str: | ||
string = "" | ||
for elem in data: | ||
if isinstance(elem, bool) or elem is None: | ||
string += str(elem).lower() | ||
else: | ||
string += str(elem) | ||
if isinstance(separator, bool) or separator is None: | ||
string += str(separator).lower() | ||
else: | ||
string += str(separator) | ||
if separator == "": | ||
return string | ||
else: | ||
return string[: -len(str(separator))] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import pytest | ||
|
||
from openeo_processes_dask.process_implementations.text import * | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"string,expected,pattern,case_sensitive", | ||
[ | ||
("Lorem ipsum dolor sit amet", False, "amet", True), | ||
("Lorem ipsum dolor sit amet", True, "Lorem", True), | ||
("Lorem ipsum dolor sit amet", False, "lorem", True), | ||
("Lorem ipsum dolor sit amet", True, "lorem", False), | ||
("Ä", True, "ä", False), | ||
(None, "nan", "null", True), | ||
], | ||
) | ||
def test_text_begins(string, expected, pattern, case_sensitive): | ||
result = text_begins(string, pattern, case_sensitive) | ||
if isinstance(expected, str) and "nan" == expected: | ||
assert result is None | ||
else: | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"string,expected,pattern,case_sensitive", | ||
[ | ||
("Lorem ipsum dolor sit amet", True, "amet", True), | ||
("Lorem ipsum dolor sit amet", False, "Lorem", True), | ||
("Lorem ipsum dolor sit amet", False, "AMET", True), | ||
("Lorem ipsum dolor sit amet", True, "AMET", False), | ||
("Ä", True, "ä", False), | ||
(None, "nan", "null", True), | ||
], | ||
) | ||
def test_text_ends(string, expected, pattern, case_sensitive): | ||
result = text_ends(string, pattern, case_sensitive) | ||
if isinstance(expected, str) and "nan" == expected: | ||
assert result is None | ||
else: | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"string,expected,pattern,case_sensitive", | ||
[ | ||
("Lorem ipsum dolor sit amet", False, "openEO", True), | ||
("Lorem ipsum dolor sit amet", True, "ipsum dolor", True), | ||
("Lorem ipsum dolor sit amet", False, "Ipsum Dolor", True), | ||
("Lorem ipsum dolor sit amet", True, "SIT", False), | ||
("ÄÖÜ", True, "ö", False), | ||
(None, "nan", "null", True), | ||
], | ||
) | ||
def test_text_contains(string, expected, pattern, case_sensitive): | ||
result = text_contains(string, pattern, case_sensitive) | ||
if isinstance(expected, str) and "nan" == expected: | ||
assert result is None | ||
else: | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data,expected,separator", | ||
[ | ||
(["Hello", "World"], "Hello World", " "), | ||
([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], "1234567890", ""), | ||
([None, True, False, 1, -1.5, "ß"], "none\ntrue\nfalse\n1\n-1.5\nß", "\n"), | ||
([2, 0], "210", 1), | ||
([], "", ""), | ||
], | ||
) | ||
def test_text_contains(data, expected, separator): | ||
result = text_concat(data, separator) | ||
assert result == expected |