Skip to content

Commit

Permalink
[FEATURE] Toggleable keyword evaluation behavior (#1130)
Browse files Browse the repository at this point in the history
With the `Filter Keywords` prebuilt preset, can now set keyword evaluation to be either `ANY` or `ALL`. More info here: https://ytdl-sub.readthedocs.io/en/latest/prebuilt_presets/helpers.html#filter-keywords
  • Loading branch information
jmbannon authored Nov 26, 2024
1 parent 8c3f852 commit 53d84d8
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,13 @@ contains
:description:
Returns True if ``contains`` is in ``string``. False otherwise.

contains_all
~~~~~~~~~~~~
:spec: ``contains_all(string: String, contains_array: Array) -> Boolean``

:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.

contains_any
~~~~~~~~~~~~
:spec: ``contains_any(string: String, contains_array: Array) -> Boolean``
Expand Down
17 changes: 13 additions & 4 deletions docs/source/prebuilt_presets/helpers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ Filter Keywords

``Filter Keywords`` can include or exclude media with any of the listed keywords. Both keywords and title/description are lower-cased before filtering.

Default behavior for Keyword evaluation is ANY, meaning the filter will succeed if any of the keywords are present. This can be set to ANY or ALL using the respective ``_eval`` variable.

Supports the following override variables:

* ``title_include_keywords``
* ``title_exclude_keywords``
* ``description_include_keywords``
* ``description_exclude_keywords``
* ``title_include_keywords``, ``title_include_eval``
* ``title_exclude_keywords``, ``title_exclude_eval``
* ``description_include_keywords``, ``title_exclude_eval``
* ``description_exclude_keywords``, ``title_exclude_eval``

.. tip::

Expand All @@ -61,6 +63,13 @@ Supports the following override variables:
title_include_keywords:
- "To Catch a Smuggler"
= Sports:
"~Maple Leafs Highlights":
url: "https://www.youtube.com/@NHL"
title_include_eval: "ALL"
title_include_keywords:
- "maple leafs"
- "highlights"
Chunk Downloads
---------------
Expand Down
34 changes: 27 additions & 7 deletions src/ytdl_sub/prebuilt_presets/helpers/filtering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ presets:
description_include_keywords: "{ [] }"
description_exclude_keywords: "{ [] }"

title_include_eval: "ANY"
title_exclude_eval: "ANY"
description_include_eval: "ANY"
description_exclude_eval: "ANY"

"%ensure_string": >-
{
%assert_then(
Expand All @@ -32,21 +37,36 @@ presets:
)
}
# $0 - var to evaluate
# $1 - keyword list
# $2 - eval type
"%contains_keywords_inner": >-
{
%elif(
%eq(%ensure_string($2), 'any'),
%contains_any( $0, $1 ),
%eq(%ensure_string($2), 'all'),
%contains_all( $0, $1 ),
%throw('Keyword eval must be either ANY or ALL')
)
}
# $0 - var to evaluate
# $1 - keyword list
# $2 - variable name for error messages
# $3 - default return if keyword list is empty
# $3 - keyword eval
# $4 - default return if keyword list is empty
"%contains_keywords": >-
{
%if(
%bool( $1 ),
%contains_any( %lower($0), %ensure_lower_array($1, $2) ),
$3
%contains_keywords_inner( %lower($0), %ensure_lower_array($1, $2), $3 ),
$4
)
}
filter_exclude:
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords',false) }"
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', title_include_eval, true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', description_include_eval, true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', title_exclude_eval, false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords', description_exclude_eval, false) }"
14 changes: 14 additions & 0 deletions src/ytdl_sub/script/functions/string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ def contains_any(string: String, contains_array: Array) -> Boolean:
)
)

@staticmethod
def contains_all(string: String, contains_array: Array) -> Boolean:
"""
:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.
"""
return Boolean(
all(
str(val) in string.value
for val in contains_array.value
if isinstance(val, (String, Integer, Boolean, Float))
)
)

@staticmethod
def slice(string: String, start: Integer, end: Optional[Integer] = None) -> String:
"""
Expand Down
100 changes: 97 additions & 3 deletions tests/integration/prebuilt_presets/test_filter_keywords.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import re

import pytest
from expected_transaction_log import assert_transaction_log_matches

from ytdl_sub.script.utils.exceptions import UserThrownRuntimeError
from ytdl_sub.subscriptions.subscription import Subscription
from ytdl_sub.utils.exceptions import ValidationException


@pytest.fixture
Expand Down Expand Up @@ -77,6 +74,39 @@ def test_title(
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_title_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"title_{filter_mode}_eval"] = "all"
filter_subscription_dict["overrides"][f"title_{filter_mode}_keywords"] = [
"MOCK",
"ENTRY",
"20-3",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description(
self,
Expand Down Expand Up @@ -108,6 +138,38 @@ def test_description(
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"description_{filter_mode}_eval"] = "ALL"
filter_subscription_dict["overrides"][f"description_{filter_mode}_keywords"] = [
"descr",
"iption",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize(
"keyword_variable",
[
Expand Down Expand Up @@ -169,3 +231,35 @@ def test_error_not_string_keyword(
pytest.raises(UserThrownRuntimeError, match="filter keywords must be strings"),
):
_ = subscription.download(dry_run=True)

@pytest.mark.parametrize(
"keyword_variable",
[
"title_include",
"title_exclude",
"description_include",
"description_exclude",
],
)
def test_error_not_correct_eval(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
keyword_variable,
):
filter_subscription_dict["overrides"][f"{keyword_variable}_keywords"] = ["hmm"]
filter_subscription_dict["overrides"][f"{keyword_variable}_eval"] = "LOL"
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with (
mock_download_collection_entries(is_youtube_channel=False, num_urls=1, is_dry_run=True),
pytest.raises(UserThrownRuntimeError, match="Keyword eval must be either ANY or ALL"),
):
_ = subscription.download(dry_run=True)
8 changes: 8 additions & 0 deletions tests/unit/script/functions/test_string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ def test_contains_any(self, value, expected_output):
output = single_variable_output(f"{{%contains_any('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"value, expected_output",
[("['a', 'b', 'c']", False), ("['nope', [], {}]", False), ("['a', 'dog']", True)],
)
def test_contains_all(self, value, expected_output):
output = single_variable_output(f"{{%contains_all('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"input_string, split, max_split, expected_output",
[
Expand Down

0 comments on commit 53d84d8

Please sign in to comment.