Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix upper limit of nb_elements #2067

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 50 additions & 22 deletions faker/providers/lorem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,40 +23,33 @@ class Provider(BaseProvider):
word_connector = " "
sentence_punctuation = "."

def words(
def get_words_list(
self,
nb: int = 3,
part_of_speech: Optional[str] = None,
ext_word_list: Optional[Sequence[str]] = None,
unique: bool = False,
) -> List[str]:
"""Generate a tuple of words.

The ``nb`` argument controls the number of words in the resulting list,
and if ``ext_word_list`` is provided, words from that list will be used
instead of those from the locale provider's built-in word list.
"""Get list of words.

If ``unique`` is ``True``, this method will return a list containing
unique words. Under the hood, |random_sample| will be used for sampling
without replacement. If ``unique`` is ``False``, |random_choices| is
used instead, and the list returned may contain duplicates.
``ext_word_list`` is a parameter that allows the user to provide a list
of words to be used instead of the built-in word list. If ``ext_word_list``
is provided, then the value of ``part_of_speech`` is ignored.

``part_of_speech`` is a parameter that defines to what part of speech
the returned word belongs. If ``ext_word_list`` is not ``None``, then
``part_of_speech`` is ignored. If the value of ``part_of_speech`` does
not correspond to an existent part of speech according to the set locale,
then an exception is raised.

.. warning::
Depending on the length of a locale provider's built-in word list or
on the length of ``ext_word_list`` if provided, a large ``nb`` can
exhaust said lists if ``unique`` is ``True``, raising an exception.
:sample: part_of_speech="abc", ext_word_list=['abc', 'def', 'ghi', 'jkl']
:sample: part_of_speech="abc"
:sample: ext_word_list=['abc', 'def', 'ghi', 'jkl']

:sample:
:sample: nb=5
:sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl']
:sample: nb=4, ext_word_list=['abc', 'def', 'ghi', 'jkl'], unique=True
.. warning::
Depending on the length of a locale provider's built-in word list or
on the length of ``ext_word_list`` if provided, a large ``nb`` can
exhaust said lists if ``unique`` is ``True``, raising an exception.
"""

if ext_word_list is not None:
word_list = ext_word_list
elif part_of_speech:
Expand All @@ -67,6 +60,38 @@ def words(
else:
word_list = self.word_list # type: ignore[attr-defined]

return word_list

def words(
self,
nb: int = 3,
word_list: List[str] = None,
unique: bool = False,
) -> List[str]:
"""Generate a tuple of words.

The ``nb`` argument controls the number of words in the resulting list,
and if ``ext_word_list`` is provided, words from that list will be used
instead of those from the locale provider's built-in word list.

if ``word_list`` is not provided, the method will use a default value of None,
which will result in the method calling the ``get_words_list`` method to get the
word list. If ``word_list`` is provided, the method will use the provided list.

If ``unique`` is ``True``, this method will return a list containing
unique words. Under the hood, |random_sample| will be used for sampling
without replacement. If ``unique`` is ``False``, |random_choices| is
used instead, and the list returned may contain duplicates.

:sample:
:sample: nb=5
:sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl']
:sample: nb=4, ext_word_list=['abc', 'def', 'ghi', 'jkl'], unique=True
"""

if word_list is None:
word_list = self.get_words_list()

if unique:
unique_samples = cast(List[str], self.random_sample(word_list, length=nb))
return unique_samples
Expand All @@ -82,7 +107,9 @@ def word(self, part_of_speech: Optional[str] = None, ext_word_list: Optional[Seq
:sample:
:sample: ext_word_list=['abc', 'def', 'ghi', 'jkl']
"""
return self.words(1, part_of_speech, ext_word_list)[0]
word_list = self.get_words_list(part_of_speech, ext_word_list)

return self.words(1, word_list)[0]

def sentence(
self, nb_words: int = 6, variable_nb_words: bool = True, ext_word_list: Optional[Sequence[str]] = None
Expand All @@ -109,7 +136,8 @@ def sentence(
if variable_nb_words:
nb_words = self.randomize_nb_elements(nb_words, min=1)

words = list(self.words(nb=nb_words, ext_word_list=ext_word_list))
word_list = self.get_words_list(ext_word_list=ext_word_list)
words = list(self.words(nb=nb_words, word_list=word_list))
words[0] = words[0].title()

return self.word_connector.join(words) + self.sentence_punctuation
Expand Down
5 changes: 4 additions & 1 deletion faker/providers/lorem/en_PH/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def english_words(self, nb: int = 3, unique: bool = False) -> List[str]:
:sample: nb=5
:sample: nb=5, unique=True
"""
return self.words(nb=nb, ext_word_list=self.english_word_list, unique=unique)

word_list = self.generator.get_words_list(ext_word_list=self.english_word_list)

return self.words(nb=nb, word_list=word_list, unique=unique)

def english_sentence(self, nb_words: int = 6, variable_nb_words: bool = True) -> str:
"""Generate a sentence in English.
Expand Down
12 changes: 12 additions & 0 deletions faker/providers/python/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import math
import string
import sys
Expand All @@ -16,6 +17,8 @@
TypesSpec = Union[List[Type], Tuple[Type, ...]]
TEnum = TypeVar("TEnum", bound=Enum)

logger = logging.getLogger(__name__)


class EmptyEnumException(BaseFakerException):
pass
Expand Down Expand Up @@ -466,9 +469,18 @@ def pydict(
:variable_nb_elements: is use variable number of elements for dictionary
:value_types: type of dictionary values
"""

words_list_count = len(self.generator.get_words_list())

if variable_nb_elements:
nb_elements = self.randomize_nb_elements(nb_elements, min=1)

if nb_elements > words_list_count:
logger.warning(
f"Number of nb_elements is greater than the number of words in the list. {words_list_count} words will be used."
)
nb_elements = words_list_count

return dict(
zip(
self.generator.words(nb_elements, unique=True),
Expand Down
49 changes: 31 additions & 18 deletions faker/proxy.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,32 @@ class Faker:
def isbn10(self, separator: str = ...) -> str: ...
def isbn13(self, separator: str = ...) -> str: ...
def job(self) -> str: ...
def get_words_list(
self, part_of_speech: Optional[str] = ..., ext_word_list: Optional[Sequence[str]] = ...
) -> List[str]:
"""
Get list of words.

``ext_word_list`` is a parameter that allows the user to provide a list
of words to be used instead of the built-in word list. If ``ext_word_list``
is provided, then the value of ``part_of_speech`` is ignored.

``part_of_speech`` is a parameter that defines to what part of speech
the returned word belongs. If ``ext_word_list`` is not ``None``, then
``part_of_speech`` is ignored. If the value of ``part_of_speech`` does
not correspond to an existent part of speech according to the set locale,
then an exception is raised.

:sample: part_of_speech="abc", ext_word_list=['abc', 'def', 'ghi', 'jkl']
:sample: part_of_speech="abc"
:sample: ext_word_list=['abc', 'def', 'ghi', 'jkl']

.. warning::
Depending on the length of a locale provider's built-in word list or
on the length of ``ext_word_list`` if provided, a large ``nb`` can
exhaust said lists if ``unique`` is ``True``, raising an exception.
"""
...
def paragraph(
self, nb_sentences: int = ..., variable_nb_sentences: bool = ..., ext_word_list: Optional[Sequence[str]] = ...
) -> str:
Expand Down Expand Up @@ -1703,36 +1729,23 @@ class Faker:
:sample: ext_word_list=['abc', 'def', 'ghi', 'jkl']
"""
...
def words(
self,
nb: int = ...,
part_of_speech: Optional[str] = ...,
ext_word_list: Optional[Sequence[str]] = ...,
unique: bool = ...,
) -> List[str]:
def words(self, nb: int = ..., word_list: List[str] = ..., unique: bool = ...) -> List[str]:
"""
Generate a tuple of words.

The ``nb`` argument controls the number of words in the resulting list,
and if ``ext_word_list`` is provided, words from that list will be used
instead of those from the locale provider's built-in word list.

if ``word_list`` is not provided, the method will use a default value of None,
which will result in the method calling the ``get_words_list`` method to get the
word list. If ``word_list`` is provided, the method will use the provided list.

If ``unique`` is ``True``, this method will return a list containing
unique words. Under the hood, |random_sample| will be used for sampling
without replacement. If ``unique`` is ``False``, |random_choices| is
used instead, and the list returned may contain duplicates.

``part_of_speech`` is a parameter that defines to what part of speech
the returned word belongs. If ``ext_word_list`` is not ``None``, then
``part_of_speech`` is ignored. If the value of ``part_of_speech`` does
not correspond to an existent part of speech according to the set locale,
then an exception is raised.

.. warning::
Depending on the length of a locale provider's built-in word list or
on the length of ``ext_word_list`` if provided, a large ``nb`` can
exhaust said lists if ``unique`` is ``True``, raising an exception.

:sample:
:sample: nb=5
:sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl']
Expand Down
35 changes: 16 additions & 19 deletions tests/providers/test_lorem.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def test_words_with_defaults(self, faker, num_samples):
def test_words_with_custom_word_list(self, faker, num_samples):
num_words = 5
for _ in range(num_samples):
words = faker.words(num_words, ext_word_list=self.custom_word_list)
words = faker.words(num_words, word_list=self.custom_word_list)
assert isinstance(words, list)
assert len(words) == 5
assert all(isinstance(word, str) and word in self.custom_word_list for word in words)

def test_words_with_unique_sampling(self, faker, num_samples):
num_words = 5
for _ in range(num_samples):
words = faker.words(num_words, ext_word_list=self.custom_word_list, unique=True)
words = faker.words(num_words, word_list=self.custom_word_list, unique=True)
assert isinstance(words, list)
assert len(words) == 5

Expand Down Expand Up @@ -165,29 +165,26 @@ def test_texts(self, faker, num_samples):
words = re.sub(r"[.\n]+", " ", text.lower()).split()
assert all(word in self.custom_word_list for word in words)

@pytest.mark.parametrize(
"nb,part_of_speech", [(10, "verb"), (18, "adverb"), (11, "noun")], ids=["verb", "adverb", "noun"]
)
def test_words_part_of_speech(self, faker, nb, part_of_speech):
words = faker.words(nb=nb, part_of_speech=part_of_speech)
assert (word in EnUsLoremProvider.parts_of_speech[part_of_speech] for word in words)
def test_get_default_words_list(self, faker):
words_list = faker.get_words_list()
assert all(word in EnUsLoremProvider.word_list for word in words_list)

@pytest.mark.parametrize("part_of_speech", [("verb"), ("adverb"), ("noun")], ids=["verb", "adverb", "noun"])
def test_get_words_list_part_of_speech(self, faker, part_of_speech):
words_list = faker.get_words_list(part_of_speech=part_of_speech)
assert (word in EnUsLoremProvider.parts_of_speech[part_of_speech] for word in words_list)

def test_get_words_list_invalid_part_of_speech(self, faker):
part_of_speech = "invalid part of speech"

@pytest.mark.parametrize("nb,part_of_speech", [(5, "abcdefg")], ids=["invalid part of speech"])
def test_words_invalid_part_of_speech(self, faker, nb, part_of_speech):
with pytest.raises(ValueError) as exc_info:
faker.words(nb=nb, part_of_speech=part_of_speech)
faker.get_words_list(part_of_speech=part_of_speech)

assert exc_info.type is ValueError
assert exc_info.value.args[0] == f"{part_of_speech} is not recognized as a part of speech."

@pytest.mark.parametrize(
"nb,part_of_speech",
[(3, "adverb"), (5, "verb"), (4, "abcdefgh")],
ids=["ignore adverb", "ignore verb", "ignore invalid part of speech"],
)
def test_words_part_of_speech_ignored(self, faker, nb, part_of_speech):
words = faker.words(nb=nb, part_of_speech=part_of_speech, ext_word_list=self.custom_word_list)
assert len(words) == nb
def test_get_words_list_part_of_speech_ignored(self, faker):
words = faker.get_words_list(part_of_speech="ignored part of speech", ext_word_list=self.custom_word_list)
assert all(word in self.custom_word_list for word in words)


Expand Down
32 changes: 32 additions & 0 deletions tests/providers/test_python.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import decimal
import logging
import sys
import unittest
import warnings
Expand Down Expand Up @@ -299,6 +300,37 @@ def test_float_min_and_max_value_with_same_whole(self):
self.fake.pyfloat(min_value=2.3, max_value=2.5)


class TestPyDict(unittest.TestCase):
def setUp(self):
self.fake = Faker()
Faker.seed(0)

def test_pydict_with_default_nb_elements(self):
result = self.fake.pydict()

self.assertEqual(len(result), 10)

def test_pydict_with_valid_number_of_nb_elements(self):
result = self.fake.pydict(nb_elements=5)

self.assertEqual(len(result), 5)

def test_pydict_with_invalid_number_of_nb_elements(self):
nb_elements = 10000

words_list_count = len(self.fake.get_words_list())

logger = logging.getLogger("faker.providers.python")

with patch.object(logger, "warning") as mock_warn:
result = self.fake.pydict(nb_elements=nb_elements)

mock_warn.assert_called_once_with(
f"Number of nb_elements is greater than the number of words in the list. {words_list_count} words will be used."
)
self.assertEqual(len(result), words_list_count)


class TestPydecimal(unittest.TestCase):
def setUp(self):
self.fake = Faker()
Expand Down
2 changes: 1 addition & 1 deletion tests/pytest/session_overrides/session_locale/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
_MODULE_LOCALES = ["en_GB"]
_MODULE_LOCALES = ["en_US"]