Skip to content

Commit

Permalink
Merge pull request #12 from oribarilan/feature/mostCommon
Browse files Browse the repository at this point in the history
Most-Common functionality
  • Loading branch information
oribarilan authored Jan 9, 2024
2 parents e36b37a + 137369c commit 362dd53
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/reference/code_api/api_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
- [ ] for_each
- [x] [shuffle](mapper_methods.md#fliq.query.Query.shuffle)
- [x] [flatten](mapper_methods.md#fliq.query.Query.flatten)
- [x] [most_common](mapper_methods.md#fliq.query.Query.most_common)

### Materializers

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/code_api/mapper_methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
::: fliq.query.Query
options:
filters: [
"^append$", "^append_many$", "^bottom$", "^distinct$", "^exclude$", "^flatten$", "^group_by$", "^interleave$", "^order$", "^pairwise$", "^prepend$", "^prepend_many$", "^reverse$", "^select$", "^shuffle$", "^skip$", "^slice$", "^slide$", "^take$", "^top$", "^where$", "^zip$"
"^append$", "^append_many$", "^bottom$", "^distinct$", "^exclude$", "^flatten$", "^group_by$", "^interleave$", "^most_common$", "^order$", "^pairwise$", "^prepend$", "^prepend_many$", "^reverse$", "^select$", "^shuffle$", "^skip$", "^slice$", "^slide$", "^take$", "^top$", "^where$", "^zip$"
]
3 changes: 3 additions & 0 deletions docs/reference/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

* 🌟 **Slide** - new [slide method](code_api/mapper_methods.md#fliq.query.Query.slide) for creating
tuples that "slide" over a query, in a windowed fashion (a sliding window).
* 🌟 **Most Common** - new [most_common method](code_api/mapper_methods.md#fliq.query.Query.most_common)
for finding the most common `n` items in a query.
* 🌟 **Pairwise** - new [pairwise method](code_api/mapper_methods.md#fliq.query.Query.pairwise) for creating
tuples that "slide" over a query, in a pairwise fashion (a sliding window of size 2, without overlap).
* ⬆️ **Update to `first` and `single`** - added `default` parameter to the [first](code_api/materializer_methods.md#fliq.query.Query.first) and [single](code_api/materializer_methods.md#fliq.query.Query.single) methods
which allows specifying a default value. If no default is specified, an exception is raised.
This replaces the need for the additional `first_or_default` and `single_or_default` methods, which were removed.
Also, some optimization to both methods (to avoid unnecessary try-except).

## v1.11.0
* ⬆️ **Update to peek** - added fillvalue to the [peek method](code_api/peeking.md)
Expand Down
26 changes: 25 additions & 1 deletion fliq/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import heapq
import itertools
import random
from collections import defaultdict, deque
from collections import defaultdict, deque, Counter
from functools import reduce
from itertools import islice, chain, zip_longest
from operator import attrgetter
Expand Down Expand Up @@ -859,6 +859,30 @@ def interleave(self, *iterables: Iterable[U]) -> Query[Union[T, U]]:
flattened_items)
return self._self(non_sentinel_items)

def most_common(self, n: int = 1) -> Query[T]:
"""
Yields the most common n elements, in descending order of frequency.
By definition, does not support inifinte iterables.
Examples:
>>> from fliq import q
>>> q([1, 2, 3, 1, 2, 1]).most_common(n=1).single()
1
>>> q([1, 2, 3, 1, 2, 1]).most_common(n=2).to_list()
[1, 2]
Args:
n: Optional. The number of elements to return. Defaults to 1.
Raises:
NotEnoughElementsException: In case the query does not have n items.
"""
top_counts = Counter(self._items).most_common(n)
if len(top_counts) < n:
raise NotEnoughElementsException(f"Found {len(top_counts)} items, expected {n}")
top_elements = [item for item, count in top_counts]
return self._self(top_elements)

# endregion

# region Materializers
Expand Down
55 changes: 55 additions & 0 deletions fliq/tests/materializer/reducers/test_most_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pytest

from fliq import q
from fliq.exceptions import NotEnoughElementsException
from fliq.tests.fliq_test_utils import Params


class TestMostCommon:
@pytest.mark.parametrize(Params.sig_iterable, Params.iterable_empty())
def test_mostCommon_hasNoItems_singleMostCommonRequested(self,
iter_type,
iterable,
iterable_list):
with pytest.raises(NotEnoughElementsException):
q(iterable).most_common()

@pytest.mark.parametrize(Params.sig_iterable, Params.iterable_empty())
def test_mostCommon_hasNoItems_multiMostCommonRequests(self,
iter_type,
iterable,
iterable_list):
with pytest.raises(NotEnoughElementsException):
q(iterable).most_common(n=2)

@pytest.mark.parametrize(Params.sig_iterable, Params.iterable_single())
def test_mostCommon_hasSingleItem_singleRequested(self,
iter_type,
iterable,
iterable_list):
assert q(iterable).most_common() == iterable_list

@pytest.mark.parametrize(Params.sig_iterable, Params.iterable_single())
def test_mostCommon_hasSingleItem_multiRequested(self,
iter_type,
iterable,
iterable_list):
with pytest.raises(NotEnoughElementsException):
q(iterable).most_common(n=2)

@pytest.mark.parametrize(Params.sig_iterable, Params.iterable_multi())
def test_mostCommon_hasMultipleItems_multiRequested(self,
iter_type,
iterable,
iterable_list):
a, b, c = q(iterable).most_common(n=3).to_list()
# not testing for sorting, they all appear once
assert a in iterable_list
assert b in iterable_list
assert c in iterable_list

def test_mostCommon_returnedByFrequencyDesc(self):
a, b, c = q([1, 2, 2, 3, 3, 3]).most_common(n=3).to_list()
assert a == 3
assert b == 2
assert c == 1
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "fliq"
version = "1.11.0"
version = "1.12.0"
description = "Fluent-based Lazily-evaluated Integrated Query for Python"
readme = "README.md"
authors = [{ name = "Ori Bar-ilan", email = "python.oplog@gmail.com" }]
Expand Down

0 comments on commit 362dd53

Please sign in to comment.