diff --git a/faker/providers/lorem/__init__.py b/faker/providers/lorem/__init__.py index f730415764..1e2901644b 100644 --- a/faker/providers/lorem/__init__.py +++ b/faker/providers/lorem/__init__.py @@ -23,23 +23,16 @@ class Provider(BaseProvider): word_connector = " " sentence_punctuation = "." - def words( + def get_words_list( self, - nb: int = 3, part_of_speech: Optional[str] = None, ext_word_list: Optional[Sequence[str]] = None, - unique: bool = False, ) -> List[str]: - """Generate a tuple of words. - - The ``nb`` argument controls the number of words in the resulting list, - and if ``ext_word_list`` is provided, words from that list will be used - instead of those from the locale provider's built-in word list. + """Get list of words. - If ``unique`` is ``True``, this method will return a list containing - unique words. Under the hood, |random_sample| will be used for sampling - without replacement. If ``unique`` is ``False``, |random_choices| is - used instead, and the list returned may contain duplicates. + ``ext_word_list`` is a parameter that allows the user to provide a list + of words to be used instead of the built-in word list. If ``ext_word_list`` + is provided, then the value of ``part_of_speech`` is ignored. ``part_of_speech`` is a parameter that defines to what part of speech the returned word belongs. If ``ext_word_list`` is not ``None``, then @@ -47,16 +40,16 @@ def words( not correspond to an existent part of speech according to the set locale, then an exception is raised. - .. warning:: - Depending on the length of a locale provider's built-in word list or - on the length of ``ext_word_list`` if provided, a large ``nb`` can - exhaust said lists if ``unique`` is ``True``, raising an exception. + :sample: part_of_speech="abc", ext_word_list=['abc', 'def', 'ghi', 'jkl'] + :sample: part_of_speech="abc" + :sample: ext_word_list=['abc', 'def', 'ghi', 'jkl'] - :sample: - :sample: nb=5 - :sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl'] - :sample: nb=4, ext_word_list=['abc', 'def', 'ghi', 'jkl'], unique=True + .. warning:: + Depending on the length of a locale provider's built-in word list or + on the length of ``ext_word_list`` if provided, a large ``nb`` can + exhaust said lists if ``unique`` is ``True``, raising an exception. """ + if ext_word_list is not None: word_list = ext_word_list elif part_of_speech: @@ -67,6 +60,38 @@ def words( else: word_list = self.word_list # type: ignore[attr-defined] + return word_list + + def words( + self, + nb: int = 3, + word_list: List[str] = None, + unique: bool = False, + ) -> List[str]: + """Generate a tuple of words. + + The ``nb`` argument controls the number of words in the resulting list, + and if ``ext_word_list`` is provided, words from that list will be used + instead of those from the locale provider's built-in word list. + + if ``word_list`` is not provided, the method will use a default value of None, + which will result in the method calling the ``get_words_list`` method to get the + word list. If ``word_list`` is provided, the method will use the provided list. + + If ``unique`` is ``True``, this method will return a list containing + unique words. Under the hood, |random_sample| will be used for sampling + without replacement. If ``unique`` is ``False``, |random_choices| is + used instead, and the list returned may contain duplicates. + + :sample: + :sample: nb=5 + :sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl'] + :sample: nb=4, ext_word_list=['abc', 'def', 'ghi', 'jkl'], unique=True + """ + + if word_list is None: + word_list = self.get_words_list() + if unique: unique_samples = cast(List[str], self.random_sample(word_list, length=nb)) return unique_samples @@ -82,7 +107,9 @@ def word(self, part_of_speech: Optional[str] = None, ext_word_list: Optional[Seq :sample: :sample: ext_word_list=['abc', 'def', 'ghi', 'jkl'] """ - return self.words(1, part_of_speech, ext_word_list)[0] + word_list = self.get_words_list(part_of_speech, ext_word_list) + + return self.words(1, word_list)[0] def sentence( self, nb_words: int = 6, variable_nb_words: bool = True, ext_word_list: Optional[Sequence[str]] = None @@ -109,7 +136,8 @@ def sentence( if variable_nb_words: nb_words = self.randomize_nb_elements(nb_words, min=1) - words = list(self.words(nb=nb_words, ext_word_list=ext_word_list)) + word_list = self.get_words_list(ext_word_list=ext_word_list) + words = list(self.words(nb=nb_words, word_list=word_list)) words[0] = words[0].title() return self.word_connector.join(words) + self.sentence_punctuation diff --git a/faker/providers/lorem/en_PH/__init__.py b/faker/providers/lorem/en_PH/__init__.py index ea64408e70..fc1f6d6593 100644 --- a/faker/providers/lorem/en_PH/__init__.py +++ b/faker/providers/lorem/en_PH/__init__.py @@ -32,7 +32,10 @@ def english_words(self, nb: int = 3, unique: bool = False) -> List[str]: :sample: nb=5 :sample: nb=5, unique=True """ - return self.words(nb=nb, ext_word_list=self.english_word_list, unique=unique) + + word_list = self.generator.get_words_list(ext_word_list=self.english_word_list) + + return self.words(nb=nb, word_list=word_list, unique=unique) def english_sentence(self, nb_words: int = 6, variable_nb_words: bool = True) -> str: """Generate a sentence in English. diff --git a/faker/providers/python/__init__.py b/faker/providers/python/__init__.py index 332f5346d8..8f9ce43a81 100644 --- a/faker/providers/python/__init__.py +++ b/faker/providers/python/__init__.py @@ -1,3 +1,4 @@ +import logging import math import string import sys @@ -16,6 +17,8 @@ TypesSpec = Union[List[Type], Tuple[Type, ...]] TEnum = TypeVar("TEnum", bound=Enum) +logger = logging.getLogger(__name__) + class EmptyEnumException(BaseFakerException): pass @@ -466,9 +469,18 @@ def pydict( :variable_nb_elements: is use variable number of elements for dictionary :value_types: type of dictionary values """ + + words_list_count = len(self.generator.get_words_list()) + if variable_nb_elements: nb_elements = self.randomize_nb_elements(nb_elements, min=1) + if nb_elements > words_list_count: + logger.warning( + f"Number of nb_elements is greater than the number of words in the list. {words_list_count} words will be used." + ) + nb_elements = words_list_count + return dict( zip( self.generator.words(nb_elements, unique=True), diff --git a/faker/proxy.pyi b/faker/proxy.pyi index b85fcb9ae0..d92197ccee 100644 --- a/faker/proxy.pyi +++ b/faker/proxy.pyi @@ -1588,6 +1588,32 @@ class Faker: def isbn10(self, separator: str = ...) -> str: ... def isbn13(self, separator: str = ...) -> str: ... def job(self) -> str: ... + def get_words_list( + self, part_of_speech: Optional[str] = ..., ext_word_list: Optional[Sequence[str]] = ... + ) -> List[str]: + """ + Get list of words. + + ``ext_word_list`` is a parameter that allows the user to provide a list + of words to be used instead of the built-in word list. If ``ext_word_list`` + is provided, then the value of ``part_of_speech`` is ignored. + + ``part_of_speech`` is a parameter that defines to what part of speech + the returned word belongs. If ``ext_word_list`` is not ``None``, then + ``part_of_speech`` is ignored. If the value of ``part_of_speech`` does + not correspond to an existent part of speech according to the set locale, + then an exception is raised. + + :sample: part_of_speech="abc", ext_word_list=['abc', 'def', 'ghi', 'jkl'] + :sample: part_of_speech="abc" + :sample: ext_word_list=['abc', 'def', 'ghi', 'jkl'] + + .. warning:: + Depending on the length of a locale provider's built-in word list or + on the length of ``ext_word_list`` if provided, a large ``nb`` can + exhaust said lists if ``unique`` is ``True``, raising an exception. + """ + ... def paragraph( self, nb_sentences: int = ..., variable_nb_sentences: bool = ..., ext_word_list: Optional[Sequence[str]] = ... ) -> str: @@ -1703,13 +1729,7 @@ class Faker: :sample: ext_word_list=['abc', 'def', 'ghi', 'jkl'] """ ... - def words( - self, - nb: int = ..., - part_of_speech: Optional[str] = ..., - ext_word_list: Optional[Sequence[str]] = ..., - unique: bool = ..., - ) -> List[str]: + def words(self, nb: int = ..., word_list: List[str] = ..., unique: bool = ...) -> List[str]: """ Generate a tuple of words. @@ -1717,22 +1737,15 @@ class Faker: and if ``ext_word_list`` is provided, words from that list will be used instead of those from the locale provider's built-in word list. + if ``word_list`` is not provided, the method will use a default value of None, + which will result in the method calling the ``get_words_list`` method to get the + word list. If ``word_list`` is provided, the method will use the provided list. + If ``unique`` is ``True``, this method will return a list containing unique words. Under the hood, |random_sample| will be used for sampling without replacement. If ``unique`` is ``False``, |random_choices| is used instead, and the list returned may contain duplicates. - ``part_of_speech`` is a parameter that defines to what part of speech - the returned word belongs. If ``ext_word_list`` is not ``None``, then - ``part_of_speech`` is ignored. If the value of ``part_of_speech`` does - not correspond to an existent part of speech according to the set locale, - then an exception is raised. - - .. warning:: - Depending on the length of a locale provider's built-in word list or - on the length of ``ext_word_list`` if provided, a large ``nb`` can - exhaust said lists if ``unique`` is ``True``, raising an exception. - :sample: :sample: nb=5 :sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl'] diff --git a/tests/providers/test_lorem.py b/tests/providers/test_lorem.py index cb77ad3f47..3e61cb72f5 100644 --- a/tests/providers/test_lorem.py +++ b/tests/providers/test_lorem.py @@ -44,7 +44,7 @@ def test_words_with_defaults(self, faker, num_samples): def test_words_with_custom_word_list(self, faker, num_samples): num_words = 5 for _ in range(num_samples): - words = faker.words(num_words, ext_word_list=self.custom_word_list) + words = faker.words(num_words, word_list=self.custom_word_list) assert isinstance(words, list) assert len(words) == 5 assert all(isinstance(word, str) and word in self.custom_word_list for word in words) @@ -52,7 +52,7 @@ def test_words_with_custom_word_list(self, faker, num_samples): def test_words_with_unique_sampling(self, faker, num_samples): num_words = 5 for _ in range(num_samples): - words = faker.words(num_words, ext_word_list=self.custom_word_list, unique=True) + words = faker.words(num_words, word_list=self.custom_word_list, unique=True) assert isinstance(words, list) assert len(words) == 5 @@ -165,29 +165,26 @@ def test_texts(self, faker, num_samples): words = re.sub(r"[.\n]+", " ", text.lower()).split() assert all(word in self.custom_word_list for word in words) - @pytest.mark.parametrize( - "nb,part_of_speech", [(10, "verb"), (18, "adverb"), (11, "noun")], ids=["verb", "adverb", "noun"] - ) - def test_words_part_of_speech(self, faker, nb, part_of_speech): - words = faker.words(nb=nb, part_of_speech=part_of_speech) - assert (word in EnUsLoremProvider.parts_of_speech[part_of_speech] for word in words) + def test_get_default_words_list(self, faker): + words_list = faker.get_words_list() + assert all(word in EnUsLoremProvider.word_list for word in words_list) + + @pytest.mark.parametrize("part_of_speech", [("verb"), ("adverb"), ("noun")], ids=["verb", "adverb", "noun"]) + def test_get_words_list_part_of_speech(self, faker, part_of_speech): + words_list = faker.get_words_list(part_of_speech=part_of_speech) + assert (word in EnUsLoremProvider.parts_of_speech[part_of_speech] for word in words_list) + + def test_get_words_list_invalid_part_of_speech(self, faker): + part_of_speech = "invalid part of speech" - @pytest.mark.parametrize("nb,part_of_speech", [(5, "abcdefg")], ids=["invalid part of speech"]) - def test_words_invalid_part_of_speech(self, faker, nb, part_of_speech): with pytest.raises(ValueError) as exc_info: - faker.words(nb=nb, part_of_speech=part_of_speech) + faker.get_words_list(part_of_speech=part_of_speech) assert exc_info.type is ValueError assert exc_info.value.args[0] == f"{part_of_speech} is not recognized as a part of speech." - @pytest.mark.parametrize( - "nb,part_of_speech", - [(3, "adverb"), (5, "verb"), (4, "abcdefgh")], - ids=["ignore adverb", "ignore verb", "ignore invalid part of speech"], - ) - def test_words_part_of_speech_ignored(self, faker, nb, part_of_speech): - words = faker.words(nb=nb, part_of_speech=part_of_speech, ext_word_list=self.custom_word_list) - assert len(words) == nb + def test_get_words_list_part_of_speech_ignored(self, faker): + words = faker.get_words_list(part_of_speech="ignored part of speech", ext_word_list=self.custom_word_list) assert all(word in self.custom_word_list for word in words) diff --git a/tests/providers/test_python.py b/tests/providers/test_python.py index 4935528eb8..477af304bb 100644 --- a/tests/providers/test_python.py +++ b/tests/providers/test_python.py @@ -1,4 +1,5 @@ import decimal +import logging import sys import unittest import warnings @@ -299,6 +300,37 @@ def test_float_min_and_max_value_with_same_whole(self): self.fake.pyfloat(min_value=2.3, max_value=2.5) +class TestPyDict(unittest.TestCase): + def setUp(self): + self.fake = Faker() + Faker.seed(0) + + def test_pydict_with_default_nb_elements(self): + result = self.fake.pydict() + + self.assertEqual(len(result), 10) + + def test_pydict_with_valid_number_of_nb_elements(self): + result = self.fake.pydict(nb_elements=5) + + self.assertEqual(len(result), 5) + + def test_pydict_with_invalid_number_of_nb_elements(self): + nb_elements = 10000 + + words_list_count = len(self.fake.get_words_list()) + + logger = logging.getLogger("faker.providers.python") + + with patch.object(logger, "warning") as mock_warn: + result = self.fake.pydict(nb_elements=nb_elements) + + mock_warn.assert_called_once_with( + f"Number of nb_elements is greater than the number of words in the list. {words_list_count} words will be used." + ) + self.assertEqual(len(result), words_list_count) + + class TestPydecimal(unittest.TestCase): def setUp(self): self.fake = Faker() diff --git a/tests/pytest/session_overrides/session_locale/__init__.py b/tests/pytest/session_overrides/session_locale/__init__.py index 8cf482cd34..fe75ca0193 100644 --- a/tests/pytest/session_overrides/session_locale/__init__.py +++ b/tests/pytest/session_overrides/session_locale/__init__.py @@ -1 +1 @@ -_MODULE_LOCALES = ["en_GB"] +_MODULE_LOCALES = ["en_US"]