diff --git a/README.rst b/README.rst index 0ed664ea..9b1e97f6 100644 --- a/README.rst +++ b/README.rst @@ -80,6 +80,8 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a * ``ar`` (Arabic) * ``az`` (Azerbaijani) * ``by`` (Belarusian) +* ``ce`` (Chechen) +* ``cy`` (Welsh) * ``cz`` (Czech) * ``de`` (German) * ``dk`` (Danish) diff --git a/num2words/README.md b/num2words/README.md new file mode 100644 index 00000000..b7e2e0fa --- /dev/null +++ b/num2words/README.md @@ -0,0 +1,70 @@ +# Add new language + +for each new language you must create a file `lang_NN.py` where `NN` is the +ISO 639-1 or ISO 639-3 [language code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). + +This class must be a subclass of `Num2Word_EU` and implement at least the following methods + +``` +to_cardinal(self, number) +to_ordinal(self, number) +`` + +To integrate your language into the `num2words` module, add the name of your file +to the import list in [num2words/__init__.py](num2words/__init__.py) (top of the file), +and `'nn': lang_NN.Num2Word_NN()` to the `CONVERTER_CLASSES` list in the same file. +Do not forget to remplace `NN` by the appropriate ISO 639 language code. + +The following is a template for a new language class + +``` +from .lang_EU import Num2Word_EU + +class Num2Word_CY(Num2Word_EU): + def setup(self): + Num2Word_EU.setup(self) + + def __init__(self): + pass + + def to_ordinal(self, number): + # implement here your code. number is the integer to be transformed into an ordinal + # as a word (str) + # which is returned + return "NOT IMPLEMENTED" + + def to_cardinal(self, number): + # implement here your code. number is the integer to be transformed into an cardinal + # as a word (str) + # which is returned + return "NOT IMPLEMENTED" +``` + +You can use as manu auxiliary methods as you need to make your code efficient and readable. +If you need further options like Gender, Formal/Informal, add those parameters to the methods, +e.g. + +``` + def to_ordinal(self, number, gender="fem", informal=True) + # your code + pass +``` + +More inspiration can be found in existing `num2words/lang_NN.py` files + +## Code validation + +In order to get your contribution merged into the main project, your code must test the validation tests. +For this install the packages needed to test + +``` +pip install -r requirements-test.txt +``` + +run `tox` and `coverage` to check that the code is well formated and all parts of the code are tested + +``` +tox +python3 -m coverage report -m +``` + diff --git a/num2words/__init__.py b/num2words/__init__.py index 693b0b01..95dbcd7a 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -17,20 +17,23 @@ from __future__ import unicode_literals -from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CZ, lang_DE, lang_DK, - lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, - lang_ES_CR, lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, - lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, - lang_HU, lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, - lang_KZ, lang_LT, lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, - lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, - lang_SV, lang_TE, lang_TG, lang_TH, lang_TR, lang_UK, lang_VI) +from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CE, lang_CY, lang_CZ, + lang_DE, lang_DK, lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, + lang_ES, lang_ES_CO, lang_ES_CR, lang_ES_GT, lang_ES_NI, + lang_ES_VE, lang_FA, lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, + lang_FR_DZ, lang_HE, lang_HU, lang_ID, lang_IS, lang_IT, + lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT, lang_LV, lang_NL, + lang_NO, lang_PL, lang_PT, lang_PT_BR, lang_RO, lang_RU, + lang_SK, lang_SL, lang_SR, lang_SV, lang_TE, lang_TG, lang_TH, + lang_TR, lang_UK, lang_VI) CONVERTER_CLASSES = { 'am': lang_AM.Num2Word_AM(), 'ar': lang_AR.Num2Word_AR(), 'az': lang_AZ.Num2Word_AZ(), 'by': lang_BY.Num2Word_BY(), + 'ce': lang_CE.Num2Word_CE(), + 'cy': lang_CY.Num2Word_CY(), 'cz': lang_CZ.Num2Word_CZ(), 'en': lang_EN.Num2Word_EN(), 'en_IN': lang_EN_IN.Num2Word_EN_IN(), diff --git a/num2words/lang_CE.py b/num2words/lang_CE.py new file mode 100644 index 00000000..abf8e5f0 --- /dev/null +++ b/num2words/lang_CE.py @@ -0,0 +1,522 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from .currency import parse_currency_parts +from .lang_EU import Num2Word_EU + +# Chechen numbers inflect in case if without noun or +# use a special oblique ending when followed by a counted noun +# 4, 14, 40 and composites thereof agree in class (gender) with the +# noun. Chechen has 6 classes which are indicated by the initial +# letter of 4, 14 and 40. By default it is "д" but +# it can also be "б", "й" or "в". +# Indicate the needed class prefix as follows +# num2words(4, lang='ce', case="abs", clazz="б") + + +CARDINALS = { + "casenames": { + "abs": "Им.", + "gen": "Род.", + "dat": "Дат.", + "erg": "Эрг;", + "instr": "Твор.", + "mat": "Вещ.", + "comp": "Сравнит.", + "all": "Местн.", + }, + "casesuffix_cons": { # to be added to numerals with final consonant + "gen": "аннан", + "dat": "анна", + "erg": "амма", + "instr": "анца", + "mat": "аннах", + "comp": "аннал", + "all": "анга", + "obl": "ан", + "ORD": "алгӀа", + }, + "casesuffix_voc": { # to be added to numerals with final vowel + "gen": "ннан", + "dat": "нна", + "erg": "мма", + "instr": "нца", + "mat": "ннах", + "comp": "ннал", + "all": "нга", + "obl": "н", + "ORD": "лгӀа", + }, + 0: { + "attr": "ноль", + "abs": "ноль", + "gen": "нолан", + "dat": "нолана", + "erg": "ноло", + "instr": "ноланца", + "mat": "ноланах", + "comp": "ноланал", + "all": "ноланга", + }, + 1: { + "attr": "цхьа", # in front of nouns in ABS + "obl": "цхьана", # with nouns in other cases than ABS + "abs": "цхьаъ", + "gen": "цхьаннан", + "dat": "цхьанна", + "erg": "цхьамма", + "instr": "цхьаьнца", + "mat": "цхьаннах", + "comp": "цхьаннал", + "all": "цхаьнга", + "ORD": "цхьалгӀа", + }, + 2: { + "attr": "ши", # in front of 100, 1000 + "obl": "шина", + "abs": "шиъ", + "gen": "шиннан", + "dat": "шинна", + "erg": "шимма", + "instr": "шинца", + "mat": "шиннах", + "comp": "шиннал", + "all": "шинга", + "ORD": "шолгӀа", + }, + 3: { + "attr": "кхо", + "obl": "кхона", + "abs": "кхоъ", + "gen": "кхааннан", + "dat": "кхаанна", + "erg": "кхаамма", + "instr": "кхаанца", + "mat": "кхааннах", + "comp": "кхааннал", + "all": "кхаанга", + "ORD": "кхоалгӀа", + }, + 4: { + "attr": "д*и", + "obl": "д*еа", + "abs": "д*иъ", + "gen": "д*еаннан", + "dat": "д*еанна", + "erg": "д*еамма", + "instr": "д*еанца", + "mat": "д*еаннах", + "comp": "д*еаннал", + "all": "д*еанга", + "ORD": "д*оьалгӀа", + }, + 5: { + "attr": "пхи", + "obl": "пхеа", + "abs": "пхиъ", + "gen": "пхеаннан", + "dat": "пхеанна", + "erg": "пхеамма", + "instr": "нхеанца", + "mat": "пхеаннах", + "comp": "пхеаннал", + "all": "пхеанга", + "ORD": "пхоьалгӀа", + }, + 6: { + "abs": "ялх", + "attr": "ялх", + "ORD": "йолхалгӀа", + }, + 7: { + "abs": "ворхӀ", + "attr": "ворхӀ", + "ORD": "ворхӀалгӀа", + }, + 8: { + "abs": "бархӀ", + "attr": "бархӀ", + "ORD": "борхӀалӀа", + }, + 9: { + "abs": "исс", + "attr": "исс", + "ORD": "уьссалгӀа", + }, + 10: { + "attr": "итт", + "abs": "итт", + "gen": "иттаннан", + "dat": "иттанна", + "erg": "иттамма", + "instr": "иттанца", + "mat": "иттаннах", + "comp": "иттаннал", + "all": "иттанга", + "ORD": "уьтталгӀа", + }, + 11: { + "abs": "цхьайтта", + "attr": "цхьайтта", + "ORD": "цхьайтталгӀа", + }, + 12: { + "abs": "шийтта", + "attr": "шийтта", + "ORD": "шийтталга", + }, + 13: { + "abs": "кхойтта", + "attr": "кхойтта", + "ORD": "кхойтталгӀа", + }, + 14: { + "abs": "д*ейтта", + "attr": "д*ейтта", + "ORD": "д*ейтталгӀа", + }, + 15: { + "abs": "пхийтта", + "attr": "пхийтта", + "ORD": "пхийтталгӀа", + }, + 16: { + "abs": "ялхитта", + "attr": "ялхитта", + "ORD": "ялхитталгӀа", + }, + 17: { + "abs": "вуьрхӀитта", + "attr": "вуьрхӀитта", + "ORD": "вуьрхӀитталгӀа", + }, + 18: { + "abs": "берхӀитта", + "attr": "берхӀитта", + "ORD": "берхитталӀа", + }, + 19: { + "abs": "ткъайесна", + "attr": "ткъайесна", + "ORD": "ткъаесналгӀа", + }, + 20: { + "abs": "ткъа", + "gen": "ткъаннан", + "dat": "ткъанна", + "erg": "ткъамма", + "instr": "ткъанца", + "mat": "ткъаннах", + "comp": "ткъаннал", + "all": "ткъанга", + "attr": "ткъе", + "ORD": "ткъолгӀа", + }, + 40: { + "abs": "шовзткъа", + "attr": "шовзткъе", + "ORD": "шовзткъалгІа", + }, + 60: { + "abs": "кхузткъа", + "attr": "кхузткъе", + "ORD": "кхузткъалгІа", + }, + 80: { + "abs": "дезткъа", + "attr": "дезткъе", + "ORD": "дезткъалгІа", + }, + 100: { + "attr": "бӀе", + "abs": "бӀе", + "obl": "бӀен", + "gen": "бӀеннан", + "dat": "бӀенна", + "erg": "бӀемма", + "instr": "бӀенца", + "mat": "бӀеннах", + "comp": "бӀеннал", + "all": "бӀенга", + "ORD": "бІолгІа", + }, + 1000: { + "attr": "эзар", + "abs": "эзар", + "obl": "эзаран", + "gen": "эзарнан", + "dat": "эзарна", + "erg": "эзарно", + "instr": "эзарнаца", + "mat": "эзарнах", + "comp": "эзарнал", + "all": "эзаранга", + "ORD": "эзарлагІа", + }, + 1000000: { + "attr": "миллион", + "abs": "миллион", + "ORD": "миллионалгІа", + }, +} + +ILLIONS = { + 6: { + "attr": "миллион", + "abs": "миллион", + "ORD": "миллионалгІа", + }, + 9: { + "attr": "миллиард", + "abs": "миллиард", + "ORD": "миллиардалгІа", + }, + 12: { + "attr": "биллион", + "abs": "биллион", + "ORD": "биллионалгІа", + }, + 15: { + "attr": "биллиард", + "abs": "биллиард", + "ORD": "биллиардалгІа", + }, + 18: { + "attr": "триллион", + "abs": "триллион", + "ORD": "триллионалгІа", + }, + 21: { + "attr": "триллиард", + "abs": "триллиард", + "ORD": "триллиардалгІа", + }, + 24: { + "attr": "квадриллион", + "abs": "квадриллион", + "ORD": "квадриллионалгІа", + }, + 27: { + "attr": "квадриллиард", + "abs": "квадриллиард", + "ORD": "квадриллиардалгІа", + }, + 30: { + "attr": "квинтиллион", + "abs": "квинтиллион", + "ORD": "квинтиллионалгІа", + }, + 33: { + "attr": "квинтиллиард", + "abs": "квинтиллиард", + "ORD": "квинтиллиардалгІа", + }, +} + + +MINUS = "минус" +# DECIMALPOINT = "запятая" # check ! +DECIMALPOINT = "а" + + +class Num2Word_CE(Num2Word_EU): + CURRENCY_FORMS = { + # currency code: (sg, pl), (sg, pl) + "EUR": (("Евро", "Евро"), ("Сент", "Сенташ")), + "RUB": (("Сом", "Сомаш"), ("Кепек", "Кепекаш")), + "USD": (("Доллар", "Доллараш"), ("Сент", "Сенташ")), + "GBP": (("Фунт", "Фунташ"), ("Пенни", "Пенни")), + } + + def setup(self): + Num2Word_EU.setup(self) + self.negword = "минус" + self.pointword = "запятая" # check ! + # self.errmsg_nonnum = ( + # u"Seulement des nombres peuvent être convertis en mots." + # ) + # self.errmsg_toobig = ( + # u"Nombre trop grand pour être converti en mots (abs(%s) > %s)." + # ) + # self.exclude_title = ["et", "virgule", "moins"] + self.mid_numwords = [] + self.low_numwords = [] + self.ords = {} + + def to_ordinal(self, number, clazz="д"): + # implement here your code. number is the integer to + # be transformed into an ordinal as a word (str) + # which is returned + return self.to_cardinal(number, clazz=clazz, case="ORD") + + def to_cardinal(self, number, clazz="д", case="abs"): + if isinstance(number, float): + entires = self.to_cardinal(int(number)) + float_part = str(number).split(".")[1] + postfix = " ".join( + # Drops the trailing zero and comma + [self.to_cardinal(int(c)) for c in float_part] + ) + return entires + " " + DECIMALPOINT + " " + postfix + + elif number < 20: + return self.makecase(number, case, clazz) + elif number < 100: + twens = number // 20 + units = number % 20 + base = twens * 20 + if units == 0: + return self.makecase(number, case, clazz) + else: + twenties = self.makecase(base, "attr", clazz) + rest = self.to_cardinal(units, clazz=clazz, case=case) + return twenties + " " + rest.replace("д*", clazz) + elif number < 1000: + hundreds = number // 100 + tens = number % 100 + if hundreds > 1: + hundert = ( + CARDINALS[hundreds]["attr"].replace("д*", clazz) + " " + ) + else: + hundert = "" + if tens != 0: + rest = self.to_cardinal(tens, clazz=clazz, case=case) + return hundert + CARDINALS[100]["abs"] + " " + rest + else: + return hundert + self.makecase(100, case, clazz) + elif number < 1000000: + thousands = number // 1000 + hundert = number % 1000 + if hundert > 0: + tcase = "attr" + else: + tcase = case + if thousands > 1: + tausend = ( + self.to_cardinal(thousands, clazz=clazz, case="attr") + + " " + + CARDINALS[1000][tcase] + ) + else: + tausend = self.makecase(1000, tcase, clazz) + + if hundert != 0: + rest = " " + self.to_cardinal(hundert, clazz=clazz, case=case) + else: + rest = "" + return tausend + rest + + elif number < 10**34: + out = [] + for pot in reversed([6, 9, 12, 15, 18, 21, 24, 27, 30, 33]): + # 3 digits of billion, trillion etc + step = number // 10**pot % 1000 + if step > 0: + words = self.to_cardinal(step, clazz=clazz, case="attr") + out.append(words + " " + ILLIONS[pot]["attr"]) + rest = number % 10**6 + if rest: + out.append(self.to_cardinal(rest, clazz=clazz, case=case)) + return " ".join(out) + + return "NOT IMPLEMENTED" + + def _money_verbose(self, number, currency, case): + mcase = "attr" + if case != "abs": + mcase = "obl" + return self.to_cardinal(number, case=mcase) + + def _cents_verbose(self, number, currency, case): + mcase = "attr" + if case != "abs": + mcase = "obl" + return self.to_cardinal(number, case=mcase) + + def to_currency( + self, + val, + currency="RUB", + cents=True, + separator=",", + adjective=False, + case="abs", + ): + """ + Args: + val: Numeric value + currency (str): Currency code + cents (bool): Verbose cents + separator (str): Cent separator + adjective (bool): Prefix currency name with adjective + Returns: + str: Formatted string + + """ + left, right, is_negative = parse_currency_parts(val) + + try: + cr1, cr2 = self.CURRENCY_FORMS[currency] + devise = cr1[0] + centime = cr2[0] + except KeyError: + raise NotImplementedError( + 'Currency code "%s" not implemented for "%s"' + % (currency, self.__class__.__name__) + ) + + minus_str = "%s " % self.negword.strip() if is_negative else "" + money_str = self._money_verbose(left, currency, case) + cents_str = ( + self._cents_verbose(right, currency, case) + if cents + else self._cents_terse(right, currency) + ) + + return "%s%s %s%s %s %s" % ( + minus_str, + money_str, + devise, # always singular + separator, + cents_str, + centime, + ) + + def to_ordinal_num(self, number): + self.verify_ordinal(number) + return str(number) + "-й" + + def to_year(self, year, case="abs"): + return self.to_cardinal(year, case=case) + + def makecase(self, number, case, clazz): + # print("ZZZZ", number, CARDINALS[number]) + if case in CARDINALS[number]: + return CARDINALS[number][case].replace("д*", clazz) + else: + if CARDINALS[number]["abs"][-1] in "а": + return ( + CARDINALS[number]["abs"].replace("д*", clazz) + + CARDINALS["casesuffix_voc"][case] + ) + else: + return ( + CARDINALS[number]["abs"].replace("д*", clazz) + + CARDINALS["casesuffix_cons"][case] + ) diff --git a/num2words/lang_CY.py b/num2words/lang_CY.py new file mode 100644 index 00000000..e5045129 --- /dev/null +++ b/num2words/lang_CY.py @@ -0,0 +1,607 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from .currency import parse_currency_parts +from .lang_EU import Num2Word_EU + +# Welsh numerals differs to many other languages since the counted +# object does not follow the numeral but is inserted between +# e.g. "23 hours" is +# tri awr ar hugain +# 3 hour on twenty +# in addition to that some numeral trigger a mutation on the following word +# either another numeral or the counted object +# (https://en.wikipedia.org/wiki/Consonant_mutation#Welsh) +# e.g. "23 dogs" (aspirated mutation, c -> ch) +# tri chi ar hugain +# 3 dog on twenty +# but "22 dogs" (soft mutation, c -> g) +# dau gi ar hugain +# 2 dog on twenty +# and "24 dogs" (no mutation) +# pedwar ci ar hugain +# 4 dog on twenty +# (BTW, the counted word is always in singular when following a numeral) +# numerals are mutated as well +# e.g. "300" +# tri chant +# 3 hundred +# "200" +# dau gant +# 2 hundred +# "500" +# pump cant +# 5 hundreds +# the numerals for 2, 3 and 4 are different in function of gender (MASC, FEM) +# 2 cats +# dwy gath + +# 2 dogs +# dau gi + +# 2000 +# dwy fil + +# 3000 +# tair mil + +# to add the counted object in the correct position use +# num2words(17, lang="cy", counted="ci", gender="masc") +# num2words(17, lang="cy", counted="cath", gender="fem") +# if the number is > 99, use plural form of counted object +# num2words(117, lang="cy", counted="cathod", gender="fem") + + +# Globals +# ------- + +OBJ = "__OBJECT__" + +CARDINAL_WORDS = { + # masc, fem, triggers mutation + 0: [("dim", None), (OBJ, None)], + 1: [("un", None), (OBJ, None)], + 2: [("dau", "SM"), (OBJ, None)], + 3: [("tri", "AM"), (OBJ, None)], + 4: [("pedwar", None), (OBJ, None)], + 5: [("pump", None), (OBJ, None)], + 6: [("chwech", "AM"), (OBJ, None)], + 7: [("saith", None), (OBJ, None)], + 8: [("wyth", None), (OBJ, None)], + 9: [("naw", None), (OBJ, None)], + 10: [("deg", None), (OBJ, None)], + 11: [("un", None), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddeg", None), (OBJ, None)], + 13: [("tri", "AM"), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwar", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymtheg", None), (OBJ, None)], + 16: [("un", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("dau", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunaw", None), (OBJ, None)], + 19: [("pedwar", None), ("ar bymtheg", None)], +} + +CARDINAL_WORDS_FEM = { + # masc, fem, triggers mutation + 0: [("dim", None), (OBJ, None)], + 1: [("un", None), (OBJ, None)], + 2: [("dwy", "SM"), (OBJ, None)], + 3: [("tair", None), (OBJ, None)], + 4: [("pedair", None), (OBJ, None)], + 5: [("pump", None), (OBJ, None)], + 6: [("chwech", "AM"), (OBJ, None)], + 7: [("saith", None), (OBJ, None)], + 8: [("wyth", None), (OBJ, None)], + 9: [("naw", None), (OBJ, None)], + 10: [("deg", None), (OBJ, None)], + 11: [("un", None), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddeg", None), (OBJ, None)], + 13: [("tair", None), (OBJ, None), ("ar ddeg", None)], + 14: [("pedair", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymtheg", None), (OBJ, None)], + 16: [("un", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("dwy", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunaw", None), (OBJ, None)], + 19: [("pedair", None), ("ar bymtheg", None)], +} + +MILLION_WORDS = { + 3: ("mil", None), + 6: ("miliwn", None), + 9: ("biliwn", None), + 12: ("triliwn", None), + 15: ("cwadriliwn", None), + 18: ("cwintiliwn", None), + 21: ("secsttiliwn", None), + 24: ("septiliwn", None), + 27: ("octiliwn", None), + 30: ("noniliwn", None), + 33: ("dengiliwn", None), +} + +ORDINAL_WORDS = { + 0: [("dimfed", None), (OBJ, None)], + 1: [(OBJ, None), ("cyntaf", None)], + 2: [("ail", "SM"), (OBJ, None)], + 3: [("trydydd", None), (OBJ, None)], + 4: [("pedwerydd", None), (OBJ, None)], + 5: [("pumed", None), (OBJ, None)], + 6: [("chweched", None), (OBJ, None)], + 7: [("saithfed", None), (OBJ, None)], + 8: [("wythfed", None), (OBJ, None)], + 9: [("nawfed", None), (OBJ, None)], + 10: [("degfed", None), (OBJ, None)], + 11: [("unfed", "SM"), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddegfed", None), (OBJ, None)], + 13: [("trydydd", None), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwerydd", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymthegfed", None), (OBJ, None)], + 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunawfed", None), (OBJ, None)], + 19: [("pedwerydd", None), (OBJ, None), ("ar bymtheg", None)], +} +ORDINAL_WORDS_FEM = { + 0: [("dimfed", None), (OBJ, None)], + 1: [(OBJ, None), ("gyntaf", None)], + 2: [("ail", "SM"), (OBJ, None)], + 3: [("trydedd", "SM"), (OBJ, None)], + 4: [("pedwaredd", "SM"), (OBJ, None)], + 5: [("pumed", None), (OBJ, None)], + 6: [("chweched", None), (OBJ, None)], + 7: [("saithfed", None), (OBJ, None)], + 8: [("wythfed", None), (OBJ, None)], + 9: [("nawfed", None), (OBJ, None)], + 10: [("degfed", None), (OBJ, None)], + 11: [("unfed", "SM"), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddegfed", None), (OBJ, None)], + 13: [("trydedd", "SM"), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwaredd", "SM"), (OBJ, None), ("ar ddeg", None)], + 15: [("pymthegfed", None), (OBJ, None)], + 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunawfed", None), (OBJ, None)], + 19: [("pedwaredd", None), (OBJ, None), ("ar bymtheg", None)], +} + +# The script can extrapolate the missing numbers from the base forms. +STR_TENS = { + 1: [("ugain", None), (OBJ, None)], + 2: [("deugain", None), (OBJ, None)], + 3: [("trigain", None), (OBJ, None)], + 4: [("pedwar ugain", None), (OBJ, None)], +} + +ORD_STR_TENS = { + 1: [("ugainfed", None), (OBJ, None)], + 2: [("deugainfed", None), (OBJ, None)], + 3: [("trigainfed", None), (OBJ, None)], + 4: [("pedwar ugainfed", None), (OBJ, None)], +} + +STR_TENS_INFORMAL = { + 1: ("undeg", None), + 2: ("dauddeg", None), + 3: ("trideg", None), + 4: ("pedwardeg", None), + 5: ("pumdeg", None), + 6: ("chwedeg", None), + 7: ("saithdeg", None), + 8: ("wythdeg", None), + 9: ("nawdeg", None), +} + + +GENERIC_DOLLARS = ("dolar", "dolarau") +GENERIC_CENTS = ("ceiniog", "ceiniogau") + +CURRENCIES_FEM = ["GBP"] + + +class Num2Word_CY(Num2Word_EU): + CURRENCY_FORMS = { + # currency code: (sg, pl), (sg, pl) + # in Welsh a noun after a numeral is ALWAYS in the singular + "EUR": (("euro", "euros"), GENERIC_CENTS), + "USD": (GENERIC_DOLLARS, GENERIC_CENTS), + "GBP": (("punt", "punnoedd"), ("ceiniog", "ceiniogau")), + "CNY": (("yuan", "yuans"), ("ffen", "ffens")), + } + + MINUS_PREFIX_WORD = "meinws " + FLOAT_INFIX_WORD = " pwynt " + +# def setup(self): +# Num2Word_EU.setup(self) + + def __init__(self): + pass + + def float_to_words(self, float_number): + # if ordinal: + # prefix = self.to_ordinal(int(float_number)) + # else: + prefix = self.to_cardinal(int(float_number)) + float_part = str(float_number).split(".")[1] + postfix = " ".join( + # Drops the trailing zero and comma + [self.to_cardinal(int(c)) for c in float_part] + ) + return prefix + Num2Word_CY.FLOAT_INFIX_WORD + postfix + + def hundred_group( + self, number, informal=False, gender="masc", ordinal=False + ): + hundreds = number // 100 + until100 = number % 100 # 0 - 99 + # list group of number words and mutation info (for the following word) + result = ( + [] + ) + if gender == "fem": + CW = CARDINAL_WORDS_FEM + else: + if ordinal: + CW = ORDINAL_WORDS + else: + CW = CARDINAL_WORDS + + if hundreds > 0: + if hundreds > 1: + result.extend((CARDINAL_WORDS[hundreds])) + result.extend([("cant", None), (OBJ, None)]) + if until100: + if until100 in [ + 1, + 8, + 11, + 16, + 20, + 21, + 31, + 36, + 41, + 48, + 61, + 68, + 71, + 81, + 88, + 91, + ]: + result.append(("ac", None)) + else: + result.append(("a", "AM")) + if until100: + # if informal: + # pass + if not ordinal and until100 >= 50 and until100 <= 59: + units = number % 10 + if hundreds > 0: + if units == 0: + result.append(("hanner", None)) + elif units == 1: + result.extend([("hanner ac un", None), (OBJ, None)]) + else: + result.append(("hanner a", "AM")) + result.extend(CW[units]) + else: + if units == 0: + result.extend([("hanner cant", None), (OBJ, None)]) + elif units == 1: + result.extend( + [("hanner cant ac un", None), (OBJ, None)] + ) + else: + result.append(("hanner cant a", "AM")) + result.extend(CW[units]) + else: + if (number < 20 and number > 0) or ( + number == 0 and hundreds == 0 + ): + if gender == "fem": + result.extend(CARDINAL_WORDS_FEM[int(number)]) + else: + result.extend(CARDINAL_WORDS[int(number)]) + + else: + tens = until100 // 20 + units = number % 20 + if ordinal and units == 0: + degau = ORD_STR_TENS.get(tens) + else: + degau = STR_TENS.get(tens) + + if units != 0: + if tens > 1: + result.extend(CW[units]) + if degau: + result.append(("a", "AM")) + result.extend(degau) + else: + result.extend(CW[units]) + if degau: + result.append(("ar", "SM")) + result.extend(degau) + elif degau: + result.extend(degau) + return result + + def to_ordinal(self, number, informal=False, gender="masc"): + if number < 20: + return makestring(ORDINAL_WORDS[number]) + if number == 100: + return "canfed" + elif number > 100: + raise NotImplementedError("The given number is too large.") + + return self.to_cardinal( + number, informal=False, gender=gender, ordinal=True + ) + + def to_cardinal( + self, + number, + informal=False, + gender="masc", + ordinal=False, + counted=None, + raw=False, + ): + negative = False + if number < 0: + negative = True + number = -1 * number + if number == 0: + if raw: + return CARDINAL_WORDS[0] + else: + return makestring(CARDINAL_WORDS[0]) + elif not number < 999 * 10**33: + raise NotImplementedError("The given number is too large.") + + elif isinstance(number, float): + return self.float_to_words(number) + + # split in groups of 10**3 + # groups of three digits starting from right (units (1 - 999), + # thousands, millions, ...) + groups = ( + [] + ) + lowestgroup = ( + None # find the lowest group of 3 digits > 0 for the ordinals + ) + for pot in [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36]: + gr = (number % 10**pot) // 10 ** (pot - 3) + groups.append((gr, pot)) + if gr and not lowestgroup: + lowestgroup = gr + # print("groups", groups) + + result = [] + if negative: + result.append(("meinws", None)) + + for gr, pot in reversed(groups): + if gr: + # print("AAAA", gr, pot, gender) + if pot == 6: + g = "fem" # mil (1000) is feminine + elif pot == 3: + g = gender # units depend on the following noun + else: + g = "masc" # millions etc are masculine + # "mil" is feminine + if gr > 1 or pot == 3: + words = self.hundred_group( + gr, + informal=informal, + gender=g, + ordinal=ordinal and (lowestgroup == gr), + ) + result += words + # print(">>>> ", words) + if pot > 3: + result.append(MILLION_WORDS[pot - 3]) + if raw: + # need to be able trigger correct mutation on currencies + return result + else: + if number < 100: + return makestring(result, counted=counted) + else: + if counted: + result.extend([("o", "SM"), (counted, None)]) + return makestring(result) + + def to_currency( + self, val, currency="EUR", cents=True, separator=",", adjective=False + ): + """ + Args: + val: Numeric value + currency (str): Currency code + cents (bool): Verbose cents + separator (str): Cent separator + adjective (bool): Prefix currency name with adjective + Returns: + str: Formatted string + + """ + left, right, is_negative = parse_currency_parts(val) + try: + cr1, cr2 = self.CURRENCY_FORMS[currency] + + except KeyError: + raise NotImplementedError( + 'Currency code "%s" not implemented for "%s"' + % (currency, self.__class__.__name__) + ) + + # if adjective and currency in self.CURRENCY_ADJECTIVES: + # cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) + + minus_str = "%s " % self.negword.strip() if is_negative else "" + money_str = self._money_verbose(left, currency) + cents_str = ( + self._cents_verbose(right, currency) + if cents + else self._cents_terse(right, currency) + ) + + if right == 0: + # no pence + return "%s%s" % ( + minus_str, + money_str, + # self.pluralize(right, cr2) + ) + elif left == 0: + # no pounds + return "%s%s" % ( + minus_str, + cents_str, + # self.pluralize(right, cr2) + ) + + return "%s%s%s %s" % ( + minus_str, + money_str, + # self.pluralize(left, cr1), + separator, + cents_str, + # self.pluralize(right, cr2) + ) + + def _money_verbose(self, number, currency): + # used in super().to_currency(), we need to add gender + # here for feminine currencies + # if currency in CURRENCIES_FEM: # always true in this context + if number > 100: + m = self.to_cardinal(number, gender="fem", raw=True) + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][1] + m.append(("o", "SM")) + m.append((c, None)) + # else: + # c = currency + # m.append((c, None)) + return makestring(m) + else: + # if number > 1: + m = self.to_cardinal(number, gender="fem", raw=True) + # elif number == 0: + # m = self.to_cardinal(number, gender="fem", raw=True) + # else: + # m = [(OBJ, None)] + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][0] + # else: + # c = currency + # print("eeeeeeeee", m) + # m.append((c, None)) + # print("fffffffff", m) + return makestring(m, counted=c) + # else: + # return self.to_cardinal(number, raw=True) + + def _cents_verbose(self, number, currency): + if number == 0: + return "" + # elif number > 100: + # m = self.to_cardinal(number, raw=True) + # # if currency in self.CURRENCY_FORMS: + # c = self.CURRENCY_FORMS[currency][0][1] + # m.append(("o", "SM")) + # m.append((c, None)) + # # else: + # # c = currency + # # m.append((c, None)) + # return makestring(m) + else: + if number > 1: + m = self.to_cardinal(number, raw=True) + else: + m = [(OBJ, None)] + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][1][0] + # else: + # c = currency + return makestring(m, counted=c) + + +def makestring(result, counted=None): + # concatenate numberwords with correct mutation + out = [] + lastmut = None + for w, mut in result: + if w == OBJ: + if not counted: + continue + else: + w = counted + counted = None # only first position + if lastmut: + out.append(mutate(w, lastmut)) + else: + out.append(w) + lastmut = mut + return " ".join(out) + + +def mutate(word, mutation): + # print("uuu", word, mutation) + if mutation == "SM": + return softmutation(word) + elif mutation == "AM": + return aspiratedmutation(word) + # return word # does not occur + + +def softmutation(word): + # print("SM<<<<%s>" % word) + if word[0] == "p" and word[1] != "h": + return "b" + word[1:] + elif word[0] == "t" and word[1] != "h": + return "d" + word[1:] + elif word[0] == "c" and word[1] != "h": + return "g" + word[1:] + elif word[0] == "b" or word[0] == "m": + return "f" + word[1:] + elif word[0] == "d" and word[1] != "d": + return "d" + word + elif word.startswith("ll"): + return word[1:] + elif word.startswith("rh"): + return "r" + word[2:] + elif word == "ugain": + return "hugain" + else: + return word + + +def aspiratedmutation(word): + if word[0] == "p" and word[1] != "h": + return "ph" + word[1:] + elif word[0] == "t" and word[1] != "h": + return "th" + word[1:] + elif word[0] == "c" and word[1] != "h": + return "ch" + word[1:] + else: + return word diff --git a/tests/test_ce.py b/tests/test_ce.py new file mode 100644 index 00000000..7deddfe6 --- /dev/null +++ b/tests/test_ce.py @@ -0,0 +1,414 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = [ + (1, "obl", "б", "цхьана"), + (2, "comp", "в", "шиннал"), + (3, "mat", "д", "кхааннах"), + (4, "mat", "в", "веаннах"), + (5, "abs", "й", "пхиъ"), + (6, "dat", "д", "ялханна"), + (7, "erg", "в", "ворхӀамма"), + (8, "comp", "й", "бархӀаннал"), + (9, "dat", "й", "иссанна"), + (10, "erg", "б", "иттамма"), + (11, "dat", "б", "цхьайттанна"), + (12, "instr", "й", "шийттанца"), + (13, "erg", "б", "кхойттамма"), + (14, "all", "в", "вейттанга"), + (15, "dat", "б", "пхийттанна"), + (16, "dat", "й", "ялхиттанна"), + (17, "dat", "в", "вуьрхӀиттанна"), + (18, "attr", "й", "берхӀитта"), + (19, "all", "й", "ткъайеснанга"), + (20, "attr", "б", "ткъе"), + (21, "all", "в", "ткъе цхаьнга"), + (22, "obl", "в", "ткъе шина"), + (23, "attr", "б", "ткъе кхо"), + (24, "dat", "й", "ткъе йеанна"), + (25, "attr", "й", "ткъе пхи"), + (26, "abs", "б", "ткъе ялх"), + (27, "abs", "в", "ткъе ворхӀ"), + (28, "all", "б", "ткъе бархӀанга"), + (29, "mat", "д", "ткъе иссаннах"), + (30, "gen", "й", "ткъе иттаннан"), + (31, "dat", "в", "ткъе цхьайттанна"), + (32, "comp", "й", "ткъе шийттаннал"), + (33, "instr", "в", "ткъе кхойттанца"), + (34, "instr", "в", "ткъе вейттанца"), + (35, "comp", "в", "ткъе пхийттаннал"), + (36, "dat", "й", "ткъе ялхиттанна"), + (37, "obl", "в", "ткъе вуьрхӀиттан"), + (38, "dat", "й", "ткъе берхӀиттанна"), + (39, "mat", "й", "ткъе ткъайеснаннах"), + (40, "all", "д", "шовзткъанга"), + (41, "obl", "в", "шовзткъе цхьана"), + (42, "dat", "в", "шовзткъе шинна"), + (43, "erg", "й", "шовзткъе кхаамма"), + (44, "erg", "й", "шовзткъе йеамма"), + (45, "comp", "д", "шовзткъе пхеаннал"), + (46, "mat", "б", "шовзткъе ялханнах"), + (47, "erg", "б", "шовзткъе ворхӀамма"), + (48, "erg", "в", "шовзткъе бархӀамма"), + (49, "all", "б", "шовзткъе иссанга"), + (50, "mat", "й", "шовзткъе иттаннах"), + (51, "comp", "в", "шовзткъе цхьайттаннал"), + (52, "erg", "в", "шовзткъе шийттамма"), + (53, "attr", "д", "шовзткъе кхойтта"), + (54, "gen", "б", "шовзткъе бейттаннан"), + (55, "attr", "д", "шовзткъе пхийтта"), + (56, "instr", "й", "шовзткъе ялхиттанца"), + (57, "obl", "б", "шовзткъе вуьрхӀиттан"), + (58, "attr", "б", "шовзткъе берхӀитта"), + (59, "all", "й", "шовзткъе ткъайеснанга"), + (60, "all", "й", "кхузткъанга"), + (61, "gen", "й", "кхузткъе цхьаннан"), + (62, "all", "б", "кхузткъе шинга"), + (63, "instr", "б", "кхузткъе кхаанца"), + (64, "dat", "й", "кхузткъе йеанна"), + (65, "instr", "й", "кхузткъе нхеанца"), + (66, "all", "б", "кхузткъе ялханга"), + (67, "erg", "д", "кхузткъе ворхӀамма"), + (68, "instr", "д", "кхузткъе бархӀанца"), + (69, "mat", "й", "кхузткъе иссаннах"), + (70, "attr", "б", "кхузткъе итт"), + (71, "gen", "б", "кхузткъе цхьайттаннан"), + (72, "abs", "й", "кхузткъе шийтта"), + (73, "mat", "д", "кхузткъе кхойттаннах"), + (74, "instr", "й", "кхузткъе йейттанца"), + (75, "mat", "в", "кхузткъе пхийттаннах"), + (76, "instr", "б", "кхузткъе ялхиттанца"), + (77, "dat", "в", "кхузткъе вуьрхӀиттанна"), + (78, "erg", "д", "кхузткъе берхӀиттамма"), + (79, "gen", "б", "кхузткъе ткъайеснаннан"), + (80, "dat", "б", "дезткъанна"), + (81, "gen", "б", "дезткъе цхьаннан"), + (82, "dat", "б", "дезткъе шинна"), + (83, "obl", "д", "дезткъе кхона"), + (84, "erg", "в", "дезткъе веамма"), + (85, "all", "в", "дезткъе пхеанга"), + (86, "erg", "д", "дезткъе ялхамма"), + (87, "comp", "б", "дезткъе ворхӀаннал"), + (88, "dat", "д", "дезткъе бархӀанна"), + (89, "erg", "б", "дезткъе иссамма"), + (90, "obl", "й", "дезткъе иттан"), + (91, "obl", "б", "дезткъе цхьайттан"), + (92, "abs", "б", "дезткъе шийтта"), + (93, "gen", "в", "дезткъе кхойттаннан"), + (94, "comp", "б", "дезткъе бейттаннал"), + (95, "all", "б", "дезткъе пхийттанга"), + (96, "instr", "д", "дезткъе ялхиттанца"), + (97, "erg", "д", "дезткъе вуьрхӀиттамма"), + (98, "instr", "й", "дезткъе берхӀиттанца"), + (99, "instr", "б", "дезткъе ткъайеснанца"), + (0, "gen", "б", "нолан"), + (100, "mat", "б", "бӀеннах"), + (200, "attr", "д", "ши бӀе"), + (300, "obl", "в", "кхо бӀен"), + (400, "abs", "в", "ви бӀе"), + (500, "all", "й", "пхи бӀенга"), + (600, "abs", "й", "ялх бӀе"), + (700, "mat", "й", "ворхӀ бӀеннах"), + (800, "gen", "б", "бархӀ бӀеннан"), + (900, "mat", "в", "исс бӀеннах"), + (1000, "gen", "д", "эзарнан"), + (1100, "instr", "д", "эзар бӀенца"), + (1200, "instr", "д", "эзар ши бӀенца"), + (1300, "comp", "б", "эзар кхо бӀеннал"), + (1400, "instr", "д", "эзар ди бӀенца"), + (1500, "comp", "б", "эзар пхи бӀеннал"), + (1600, "erg", "б", "эзар ялх бӀемма"), + (1700, "attr", "д", "эзар ворхӀ бӀе"), + (1800, "obl", "д", "эзар бархӀ бӀен"), + (1900, "gen", "й", "эзар исс бӀеннан"), + (2000, "comp", "д", "ши эзарнал"), + (2022, "comp", "д", "ши эзар ткъе шиннал"), + (2100, "obl", "в", "ши эзар бӀен"), + (423000, "erg", "в", "ви бӀе ткъе кхо эзарно"), +] + +TEST_CASES_ORDINAL = [ + (1, "all", "б", "цхьалгӀа"), + (2, "dat", "в", "шолгӀа"), + (3, "obl", "й", "кхоалгӀа"), + (4, "dat", "б", "боьалгӀа"), + (5, "dat", "в", "пхоьалгӀа"), + (6, "abs", "в", "йолхалгӀа"), + (7, "abs", "в", "ворхӀалгӀа"), + (8, "abs", "д", "борхӀалӀа"), + (9, "comp", "д", "уьссалгӀа"), + (10, "erg", "д", "уьтталгӀа"), + (11, "all", "б", "цхьайтталгӀа"), + (12, "abs", "й", "шийтталга"), + (13, "gen", "в", "кхойтталгӀа"), + (14, "gen", "в", "вейтталгӀа"), + (15, "mat", "й", "пхийтталгӀа"), + (16, "dat", "й", "ялхитталгӀа"), + (17, "erg", "д", "вуьрхӀитталгӀа"), + (18, "erg", "й", "берхитталӀа"), + (19, "obl", "в", "ткъаесналгӀа"), + (20, "abs", "в", "ткъолгӀа"), + (21, "mat", "б", "ткъе цхьалгӀа"), + (22, "erg", "б", "ткъе шолгӀа"), + (23, "mat", "й", "ткъе кхоалгӀа"), + (24, "obl", "б", "ткъе боьалгӀа"), + (25, "abs", "д", "ткъе пхоьалгӀа"), + (26, "all", "й", "ткъе йолхалгӀа"), + (27, "mat", "в", "ткъе ворхӀалгӀа"), + (28, "instr", "д", "ткъе борхӀалӀа"), + (29, "obl", "б", "ткъе уьссалгӀа"), + (30, "dat", "б", "ткъе уьтталгӀа"), + (31, "obl", "й", "ткъе цхьайтталгӀа"), + (32, "comp", "д", "ткъе шийтталга"), + (33, "attr", "д", "ткъе кхойтталгӀа"), + (34, "gen", "в", "ткъе вейтталгӀа"), + (35, "erg", "д", "ткъе пхийтталгӀа"), + (36, "all", "в", "ткъе ялхитталгӀа"), + (37, "attr", "й", "ткъе вуьрхӀитталгӀа"), + (38, "erg", "б", "ткъе берхитталӀа"), + (39, "gen", "д", "ткъе ткъаесналгӀа"), + (40, "abs", "й", "шовзткъалгІа"), + (41, "erg", "й", "шовзткъе цхьалгӀа"), + (42, "comp", "й", "шовзткъе шолгӀа"), + (43, "obl", "д", "шовзткъе кхоалгӀа"), + (44, "all", "й", "шовзткъе йоьалгӀа"), + (45, "abs", "д", "шовзткъе пхоьалгӀа"), + (46, "comp", "д", "шовзткъе йолхалгӀа"), + (47, "comp", "й", "шовзткъе ворхӀалгӀа"), + (48, "attr", "б", "шовзткъе борхӀалӀа"), + (49, "comp", "й", "шовзткъе уьссалгӀа"), + (50, "abs", "д", "шовзткъе уьтталгӀа"), + (51, "dat", "б", "шовзткъе цхьайтталгӀа"), + (52, "comp", "в", "шовзткъе шийтталга"), + (53, "mat", "б", "шовзткъе кхойтталгӀа"), + (54, "all", "д", "шовзткъе дейтталгӀа"), + (55, "dat", "в", "шовзткъе пхийтталгӀа"), + (56, "erg", "б", "шовзткъе ялхитталгӀа"), + (57, "comp", "й", "шовзткъе вуьрхӀитталгӀа"), + (58, "instr", "в", "шовзткъе берхитталӀа"), + (59, "mat", "б", "шовзткъе ткъаесналгӀа"), + (60, "all", "в", "кхузткъалгІа"), + (61, "obl", "д", "кхузткъе цхьалгӀа"), + (62, "instr", "д", "кхузткъе шолгӀа"), + (63, "erg", "й", "кхузткъе кхоалгӀа"), + (64, "dat", "д", "кхузткъе доьалгӀа"), + (65, "gen", "д", "кхузткъе пхоьалгӀа"), + (66, "mat", "в", "кхузткъе йолхалгӀа"), + (67, "gen", "в", "кхузткъе ворхӀалгӀа"), + (68, "attr", "б", "кхузткъе борхӀалӀа"), + (69, "all", "д", "кхузткъе уьссалгӀа"), + (70, "mat", "в", "кхузткъе уьтталгӀа"), + (71, "gen", "й", "кхузткъе цхьайтталгӀа"), + (72, "obl", "й", "кхузткъе шийтталга"), + (73, "attr", "в", "кхузткъе кхойтталгӀа"), + (74, "dat", "б", "кхузткъе бейтталгӀа"), + (75, "instr", "в", "кхузткъе пхийтталгӀа"), + (76, "gen", "в", "кхузткъе ялхитталгӀа"), + (77, "erg", "д", "кхузткъе вуьрхӀитталгӀа"), + (78, "all", "й", "кхузткъе берхитталӀа"), + (79, "instr", "д", "кхузткъе ткъаесналгӀа"), + (80, "dat", "в", "дезткъалгІа"), + (81, "mat", "в", "дезткъе цхьалгӀа"), + (82, "abs", "д", "дезткъе шолгӀа"), + (83, "abs", "д", "дезткъе кхоалгӀа"), + (84, "erg", "в", "дезткъе воьалгӀа"), + (85, "obl", "й", "дезткъе пхоьалгӀа"), + (86, "instr", "д", "дезткъе йолхалгӀа"), + (87, "all", "в", "дезткъе ворхӀалгӀа"), + (88, "dat", "д", "дезткъе борхӀалӀа"), + (89, "obl", "б", "дезткъе уьссалгӀа"), + (90, "instr", "в", "дезткъе уьтталгӀа"), + (91, "abs", "й", "дезткъе цхьайтталгӀа"), + (92, "comp", "в", "дезткъе шийтталга"), + (93, "erg", "д", "дезткъе кхойтталгӀа"), + (94, "obl", "й", "дезткъе йейтталгӀа"), + (95, "comp", "б", "дезткъе пхийтталгӀа"), + (96, "obl", "б", "дезткъе ялхитталгӀа"), + (97, "gen", "й", "дезткъе вуьрхӀитталгӀа"), + (98, "dat", "б", "дезткъе берхитталӀа"), + (99, "abs", "д", "дезткъе ткъаесналгӀа"), + (100, "abs", "в", "бІолгІа"), + (200, "obl", "й", "ши бІолгІа"), + (300, "mat", "в", "кхо бІолгІа"), + (400, "gen", "б", "би бІолгІа"), + (500, "erg", "й", "пхи бІолгІа"), + (600, "gen", "д", "ялх бІолгІа"), + (700, "instr", "й", "ворхӀ бІолгІа"), + (800, "all", "б", "бархӀ бІолгІа"), + (900, "comp", "б", "исс бІолгІа"), + (1000, "dat", "д", "эзарлагІа"), + (107, "gen", "в", "бӀе ворхӀалгӀа"), + (214, "attr", "д", "ши бӀе дейтталгӀа"), + (321, "comp", "д", "кхо бӀе ткъе цхьалгӀа"), + (428, "dat", "в", "ви бӀе ткъе борхӀалӀа"), + (535, "erg", "й", "пхи бӀе ткъе пхийтталгӀа"), + (642, "all", "й", "ялх бӀе шовзткъе шолгӀа"), + (749, "mat", "в", "ворхӀ бӀе шовзткъе уьссалгӀа"), + (856, "attr", "й", "бархӀ бӀе шовзткъе ялхитталгӀа"), + (963, "mat", "б", "исс бӀе кхузткъе кхоалгӀа"), + (1070, "comp", "в", "эзар кхузткъе уьтталгӀа"), + (1177, "dat", "в", "эзар бӀе кхузткъе вуьрхӀитталгӀа"), + (1284, "abs", "д", "эзар ши бӀе дезткъе доьалгӀа"), + (1391, "dat", "в", "эзар кхо бӀе дезткъе цхьайтталгӀа"), + (1498, "abs", "в", "эзар ви бӀе дезткъе берхитталӀа"), + (1605, "obl", "б", "эзар ялх бӀе пхоьалгӀа"), + (1712, "erg", "й", "эзар ворхӀ бӀе шийтталга"), + (1819, "all", "б", "эзар бархӀ бӀе ткъаесналгӀа"), + (1926, "abs", "б", "эзар исс бӀе ткъе йолхалгӀа"), + (2033, "all", "д", "ши эзар ткъе кхойтталгӀа"), + (2140, "dat", "б", "ши эзар бӀе шовзткъалгІа"), + (423000, "dat", "д", "ди бӀе ткъе кхо эзарлагІа"), +] + +TEST_CASES_YEAR = [ + (1719, "abs", "эзар ворхӀ бӀе ткъайесна"), + (1812, "abs", "эзар бархӀ бӀе шийтта"), + (1926, "abs", "эзар исс бӀе ткъе ялх"), +] + +TEST_CASES_DECIMALS = [(123.4567, "бӀе ткъе кхоъ а диъ пхиъ ялх ворхӀ")] + +TEST_CASES_MILLIONS = [ + (200020, "ши бӀе эзар ткъа"), + (4000400, "ди миллион ди бӀе"), + (60006000, "кхузткъе миллион ялх эзар"), + (800080000, "бархӀ бӀе миллион дезткъе эзар"), + (10001000000, "итт миллиард цхьа миллион"), + (120012000000, "бӀе ткъе миллиард шийтта миллион"), + (1400140000000, "цхьа биллион ди бӀе миллиард бӀе шовзткъе миллион"), + (16001600000000, "ялхитта биллион цхьа миллиард ялх бӀе миллион"), + (180018000000000, "бӀе дезткъе биллион берхӀитта миллиард"), + (2000200000000000, "ши биллиард ши бӀе миллиард"), + (22002200000000000, "ткъе ши биллиард ши биллион ши бӀе миллиард"), + (240024000000000000, "ши бӀе шовзткъе биллиард ткъе ди биллион"), + ( + 2600260000000000000, + "ши триллион ялх бӀе биллиард ши бӀе кхузткъе биллион", + ), + ( + 28002800000000000000, + "ткъе бархӀ триллион ши биллиард бархӀ бӀе биллион", + ), + (300030000000000000000, "кхо бӀе триллион ткъе итт биллиард"), + ( + 3200320000000000000000, + "кхо триллиард ши бӀе триллион кхо бӀе ткъе биллиард", + ), + ( + 34003400000000000000000, + "ткъе дейтта триллиард кхо триллион ди бӀе биллиард", + ), + ( + 360036000000000000000000, + "кхо бӀе кхузткъе триллиард ткъе ялхитта триллион", + ), + ( + 3800380000000000000000000, + "кхо квадриллион бархӀ бӀе триллиард кхо бӀе дезткъе триллион", + ), + (40004000000000000000000000, "шовзткъе квадриллион ди триллиард"), + ( + 420042000000000000000000000, + "ди бӀе ткъе квадриллион шовзткъе ши триллиард", + ), + ( + 4400440000000000000000000000, + "ди квадриллиард ди бӀе квадриллион ди бӀе шовзткъе триллиард", + ), + ( + 46004600000000000000000000000, + "шовзткъе ялх квадриллиард ди квадриллион ялх бӀе триллиард", + ), + ( + 480048000000000000000000000000, + "ди бӀе дезткъе квадриллиард шовзткъе бархӀ квадриллион", + ), + (5000500000000000000000000000000, "пхи квинтиллион пхи бӀе квадриллион"), + ( + 52005200000000000000000000000000, + "шовзткъе шийтта квинтиллион пхи квадриллиард ши бӀе квадриллион", + ), + ( + 540054000000000000000000000000000, + "пхи бӀе шовзткъе квинтиллион шовзткъе дейтта квадриллиард", + ), + ( + 5600560000000000000000000000000000, + "пхи квинтиллиард ялх бӀе квинтиллион пхи бӀе кхузткъе квадриллиард", + ), + (10**56, "NOT IMPLEMENTED") +] + +TEST_CURRENCY = [ + (143.55, "abs", "RUB", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), + (243.15, "dat", "RUB", "ши бӀе шовзткъе кхона Сом, пхийттан Кепек"), +] + + +class Num2WordsCETest(TestCase): + def test_number(self): + for test in TEST_CASES_CARDINAL: + self.assertEqual( + num2words(test[0], lang="ce", case=test[1], clazz=test[2]), + test[3], + ) + + def test_millions(self): + for test in TEST_CASES_MILLIONS: + self.assertEqual(num2words(test[0], lang="ce"), test[1]) + + def test_ordinal_number(self): + for test in TEST_CASES_ORDINAL: + self.assertEqual( + num2words(test[0], lang="ce", to="ordinal", clazz=test[2]), + test[3], + ) + self.assertEqual(num2words(3, to="ordinal_num", lang='ce'), "3-й") + self.assertEqual(num2words(5, to="ordinal_num", lang='ce'), "5-й") + self.assertEqual(num2words(82, to="ordinal_num", lang='ce'), "82-й") + + def test_year(self): + for test in TEST_CASES_YEAR: + self.assertEqual( + num2words(test[0], lang="ce", to="year", case=test[1]), test[2] + ) + + def test_currency(self): + for test in TEST_CURRENCY: + self.assertEqual( + num2words( + test[0], + lang="ce", + to="currency", + currency=test[2], + case=test[1], + ), + test[3], + ) + + def test_currency_missing(self): + with self.assertRaises(NotImplementedError): + num2words(2.45, to="currency", lang='cy', currency="DEM") + + def test_decimals(self): + for test in TEST_CASES_DECIMALS: + self.assertEqual(num2words(test[0], lang="ce"), test[1]) diff --git a/tests/test_cy.py b/tests/test_cy.py new file mode 100644 index 00000000..3f7ee038 --- /dev/null +++ b/tests/test_cy.py @@ -0,0 +1,475 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = ( + (0, "dim"), + (1, "un"), + (1, "un"), + (2, "dau"), + (3, "tri"), + (4, "pedwar"), + (5, "pump"), + (6, "chwech"), + (7, "saith"), + (8, "wyth"), + (9, "naw"), + (10, "deg"), + (11, "un ar ddeg"), + (12, "deuddeg"), + (13, "tri ar ddeg"), + (14, "pedwar ar ddeg"), + (15, "pymtheg"), + (16, "un ar bymtheg"), + (17, "dau ar bymtheg"), + (18, "deunaw"), + (19, "pedwar ar bymtheg"), + (20, "ugain"), + (21, "un ar hugain"), + (22, "dau ar hugain"), + (23, "tri ar hugain"), + (24, "pedwar ar hugain"), + (25, "pump ar hugain"), + (26, "chwech ar hugain"), + (27, "saith ar hugain"), + (28, "wyth ar hugain"), + (29, "naw ar hugain"), + (30, "deg ar hugain"), + (31, "un ar ddeg ar hugain"), + (32, "deuddeg ar hugain"), + (33, "tri ar ddeg ar hugain"), + (34, "pedwar ar ddeg ar hugain"), + (35, "pymtheg ar hugain"), + (36, "un ar bymtheg ar hugain"), + (37, "dau ar bymtheg ar hugain"), + (38, "deunaw ar hugain"), + (39, "pedwar ar bymtheg ar hugain"), + (40, "deugain"), + (41, "un a deugain"), + (42, "dau a deugain"), + (43, "tri a deugain"), + (44, "pedwar a deugain"), + (45, "pump a deugain"), + (46, "chwech a deugain"), + (47, "saith a deugain"), + (48, "wyth a deugain"), + (49, "naw a deugain"), + (50, "hanner cant"), + (51, "hanner cant ac un"), + (52, "hanner cant a dau"), + (53, "hanner cant a thri"), + (54, "hanner cant a phedwar"), + (55, "hanner cant a phump"), + (56, "hanner cant a chwech"), + (57, "hanner cant a saith"), + (58, "hanner cant a wyth"), + (59, "hanner cant a naw"), + (60, "trigain"), + (61, "un a thrigain"), + (62, "dau a thrigain"), + (63, "tri a thrigain"), + (64, "pedwar a thrigain"), + (65, "pump a thrigain"), + (66, "chwech a thrigain"), + (67, "saith a thrigain"), + (68, "wyth a thrigain"), + (69, "naw a thrigain"), + (70, "deg a thrigain"), + (71, "un ar ddeg a thrigain"), + (72, "deuddeg a thrigain"), + (73, "tri ar ddeg a thrigain"), + (74, "pedwar ar ddeg a thrigain"), + (75, "pymtheg a thrigain"), + (76, "un ar bymtheg a thrigain"), + (77, "dau ar bymtheg a thrigain"), + (78, "deunaw a thrigain"), + (79, "pedwar ar bymtheg a thrigain"), + (80, "pedwar ugain"), + (81, "un a phedwar ugain"), + (82, "dau a phedwar ugain"), + (83, "tri a phedwar ugain"), + (84, "pedwar a phedwar ugain"), + (85, "pump a phedwar ugain"), + (86, "chwech a phedwar ugain"), + (87, "saith a phedwar ugain"), + (88, "wyth a phedwar ugain"), + (89, "naw a phedwar ugain"), + (90, "deg a phedwar ugain"), + (91, "un ar ddeg a phedwar ugain"), + (92, "deuddeg a phedwar ugain"), + (93, "tri ar ddeg a phedwar ugain"), + (94, "pedwar ar ddeg a phedwar ugain"), + (95, "pymtheg a phedwar ugain"), + (96, "un ar bymtheg a phedwar ugain"), + (97, "dau ar bymtheg a phedwar ugain"), + (98, "deunaw a phedwar ugain"), + (99, "pedwar ar bymtheg a phedwar ugain"), + (100, "cant"), + (101, "cant ac un"), + (102, "cant a dau"), + (103, "cant a thri"), + (104, "cant a phedwar"), + (105, "cant a phump"), + (106, "cant a chwech"), + (107, "cant a saith"), + (108, "cant ac wyth"), + (109, "cant a naw"), + (110, "cant a deg"), + (111, "cant ac un ar ddeg"), + (112, "cant a deuddeg"), + (113, "cant a thri ar ddeg"), + (114, "cant a phedwar ar ddeg"), + (115, "cant a phymtheg"), + (116, "cant ac un ar bymtheg"), + (117, "cant a dau ar bymtheg"), + (118, "cant a deunaw"), + (119, "cant a phedwar ar bymtheg"), + (120, "cant ac ugain"), + (121, "cant ac un ar hugain"), + (122, "cant a dau ar hugain"), + (100, "cant"), + (217, "dau gant a dau ar bymtheg"), + (334, "tri chant a phedwar ar ddeg ar hugain"), + (451, "pedwar cant a hanner ac un"), + (568, "pump cant ac wyth a thrigain"), + (685, "chwech chant a phump a phedwar ugain"), + (802, "wyth cant a dau"), + (919, "naw cant a phedwar ar bymtheg"), + (100, "cant"), + (150, "cant a hanner"), + (200, "dau gant"), + (300, "tri chant"), + (400, "pedwar cant"), + (500, "pump cant"), + (600, "chwech chant"), + (700, "saith cant"), + (800, "wyth cant"), + (900, "naw cant"), + (1000, "mil"), + (1000, "mil"), + (12111, "deuddeg mil cant ac un ar ddeg"), + (23222, "tair ar hugain mil dau gant a dau ar hugain"), + ( + 34333, + "pedair ar ddeg ar hugain mil tri chant a thri ar ddeg ar hugain", + ), + (45444, "pump a deugain mil pedwar cant a phedwar a deugain"), + (56555, "hanner cant a chwech mil pump cant a hanner a phump"), + (67666, "saith a thrigain mil chwech chant a chwech a thrigain"), + (78777, "deunaw a thrigain mil saith cant a dau ar bymtheg a thrigain"), + (89888, "naw a phedwar ugain mil wyth cant ac wyth a phedwar ugain"), + (100999, "cant mil naw cant a phedwar ar bymtheg a phedwar ugain"), + (112110, "cant a deuddeg mil cant a deg"), + (123221, "cant a thair ar hugain mil dau gant ac un ar hugain"), + ( + 134332, + "cant a phedair ar ddeg ar hugain mil tri chant a deuddeg ar hugain", + ), + (145443, "cant a phump a deugain mil pedwar cant a thri a deugain"), + (156554, "cant a hanner a chwech mil pump cant a hanner a phedwar"), + (123, "cant a thri ar hugain"), + (2345, "dwy fil tri chant a phump a deugain"), + (34567, "pedair ar ddeg ar hugain mil pump cant a saith a thrigain"), + (654321, "chwech chant a hanner a phedair mil tri chant ac un ar hugain"), + ( + 7654321, + "saith miliwn chwech chant a hanner a " + "phedair mil tri chant ac un ar hugain", + ), + ( + 987654321, + "naw cant a saith a phedwar ugain miliwn chwech chant a " + "hanner a phedair mil tri chant ac un ar hugain", + ), + ( + 123456789012, + "cant a thri ar hugain biliwn pedwar cant a hanner a chwech miliwn " + "saith cant a naw a phedwar ugain mil deuddeg", + ), + (2023, "dwy fil tri ar hugain"), + (-40123, "meinws deugain mil cant a thri ar hugain"), + (12340000000000000, "deuddeg cwadriliwn tri chant a deugain triliwn"), + (3000000000000000, "tri chwadriliwn"), + (2500000000000000000000000000000000, "dau ddengiliwn pump cant noniliwn"), +) + + +TEST_CASES_CARDINAL_FEM = ( + (2, "dwy"), + (3, "tair"), + (4, "pedair"), + (5, "pump"), + (6, "chwech"), + (7, "saith"), + (8, "wyth"), + (9, "naw"), + (10, "deg"), + (11, "un ar ddeg"), + (12, "deuddeg"), + (13, "tair ar ddeg"), + (14, "pedair ar ddeg"), + (15, "pymtheg"), + (16, "un ar bymtheg"), + (17, "dwy ar bymtheg"), + (18, "deunaw"), + (19, "pedair ar bymtheg"), + (20, "ugain"), + (21, "un ar hugain"), + (22, "dwy ar hugain"), + (23, "tair ar hugain"), + (24, "pedair ar hugain"), + (25, "pump ar hugain"), + (26, "chwech ar hugain"), + (27, "saith ar hugain"), + (28, "wyth ar hugain"), + (29, "naw ar hugain"), + (30, "deg ar hugain"), + (31, "un ar ddeg ar hugain"), + (32, "deuddeg ar hugain"), + (33, "tair ar ddeg ar hugain"), + (34, "pedair ar ddeg ar hugain"), + (35, "pymtheg ar hugain"), + (36, "un ar bymtheg ar hugain"), + (37, "dwy ar bymtheg ar hugain"), + (38, "deunaw ar hugain"), + (39, "pedair ar bymtheg ar hugain"), + (40, "deugain"), + (41, "un a deugain"), + (42, "dwy a deugain"), + (43, "tair a deugain"), + (44, "pedair a deugain"), + (45, "pump a deugain"), + (46, "chwech a deugain"), + (47, "saith a deugain"), + (48, "wyth a deugain"), + (49, "naw a deugain"), + (50, "hanner cant"), + (51, "hanner cant ac un"), + (52, "hanner cant a dwy"), + (53, "hanner cant a thair"), + (54, "hanner cant a phedair"), + (55, "hanner cant a phump"), + (56, "hanner cant a chwech"), + (57, "hanner cant a saith"), + (58, "hanner cant a wyth"), + (59, "hanner cant a naw"), + (60, "trigain"), + (61, "un a thrigain"), + (62, "dwy a thrigain"), +) + +TEST_CASES_ORDINAL = ( + (0, "dimfed"), + (1, "cyntaf"), + (2, "ail"), + (3, "trydydd"), + (4, "pedwerydd"), + (5, "pumed"), + (6, "chweched"), + (7, "saithfed"), + (8, "wythfed"), + (9, "nawfed"), + (10, "degfed"), + (11, "unfed ar ddeg"), + (12, "deuddegfed"), + (13, "trydydd ar ddeg"), + (14, "pedwerydd ar ddeg"), + (15, "pymthegfed"), + (16, "unfed ar bymtheg"), + (17, "ail ar bymtheg"), + (18, "deunawfed"), + (19, "pedwerydd ar bymtheg"), + (20, "ugainfed"), + (21, "cyntaf ar hugain"), + (22, "ail ar hugain"), + (23, "trydydd ar hugain"), + (24, "pedwerydd ar hugain"), + (25, "pumed ar hugain"), + (26, "chweched ar hugain"), + (27, "saithfed ar hugain"), + (28, "wythfed ar hugain"), + (29, "nawfed ar hugain"), + (30, "degfed ar hugain"), + (31, "unfed ar ddeg ar hugain"), + (32, "deuddegfed ar hugain"), + (33, "trydydd ar ddeg ar hugain"), + (34, "pedwerydd ar ddeg ar hugain"), + (35, "pymthegfed ar hugain"), + (36, "unfed ar bymtheg ar hugain"), + (37, "ail ar bymtheg ar hugain"), + (38, "deunawfed ar hugain"), + (39, "pedwerydd ar bymtheg ar hugain"), + (40, "deugainfed"), + (41, "cyntaf a deugain"), + (42, "ail a deugain"), + (43, "trydydd a deugain"), + (44, "pedwerydd a deugain"), + (45, "pumed a deugain"), + (46, "chweched a deugain"), + (47, "saithfed a deugain"), + (48, "wythfed a deugain"), + (49, "nawfed a deugain"), + (50, "degfed a deugain"), + (51, "unfed ar ddeg a deugain"), + (52, "deuddegfed a deugain"), + (53, "trydydd ar ddeg a deugain"), + (54, "pedwerydd ar ddeg a deugain"), + (55, "pymthegfed a deugain"), + (56, "unfed ar bymtheg a deugain"), + (57, "ail ar bymtheg a deugain"), + (58, "deunawfed a deugain"), + (59, "pedwerydd ar bymtheg a deugain"), + (60, "trigainfed"), + (61, "cyntaf a thrigain"), + (62, "ail a thrigain"), + (63, "trydydd a thrigain"), + (64, "pedwerydd a thrigain"), + (65, "pumed a thrigain"), + (66, "chweched a thrigain"), + (67, "saithfed a thrigain"), + (68, "wythfed a thrigain"), + (69, "nawfed a thrigain"), + (70, "degfed a thrigain"), + (71, "unfed ar ddeg a thrigain"), + (72, "deuddegfed a thrigain"), + (73, "trydydd ar ddeg a thrigain"), + (74, "pedwerydd ar ddeg a thrigain"), + (75, "pymthegfed a thrigain"), + (76, "unfed ar bymtheg a thrigain"), + (77, "ail ar bymtheg a thrigain"), + (78, "deunawfed a thrigain"), + (79, "pedwerydd ar bymtheg a thrigain"), + (80, "pedwar ugainfed"), + (81, "cyntaf a phedwar ugain"), + (82, "ail a phedwar ugain"), + (83, "trydydd a phedwar ugain"), + (84, "pedwerydd a phedwar ugain"), + (85, "pumed a phedwar ugain"), + (86, "chweched a phedwar ugain"), + (87, "saithfed a phedwar ugain"), + (88, "wythfed a phedwar ugain"), + (89, "nawfed a phedwar ugain"), + (90, "degfed a phedwar ugain"), + (91, "unfed ar ddeg a phedwar ugain"), + (92, "deuddegfed a phedwar ugain"), + (93, "trydydd ar ddeg a phedwar ugain"), + (94, "pedwerydd ar ddeg a phedwar ugain"), + (95, "pymthegfed a phedwar ugain"), + (96, "unfed ar bymtheg a phedwar ugain"), + (97, "ail ar bymtheg a phedwar ugain"), + (98, "deunawfed a phedwar ugain"), + (99, "pedwerydd ar bymtheg a phedwar ugain"), + (100, "canfed"), +) + +TEST_CASES_YEAR = [ + (1922, "mil naw dau dau"), + (1989, "mil naw wyth naw"), + (1812, "mil wyth un dau"), + (2012, "dwy fil deuddeg"), + (2023, "dwy fil tri ar hugain") + ] + +TEST_CASES_DECIMALS = [ + (123.4567, "cant a thri ar hugain pwynt pedwar pump chwech saith") +] + +TEST_CASES_TO_CURRENCY_GBP = ( + (0.00, "dim punt"), + (0.23, "tri cheiniog ar hugain"), + (2.04, "dwy bunt, pedwar ceiniog"), + (3.50, "tair punt, hanner cant ceiniog"), + (2002.15, "dwy fil dwy o bunnoedd, pymtheg ceiniog"), + (100.01, "cant punt, ceiniog"), + (50.00, "hanner cant punt"), + (51.00, "hanner cant ac un punt"), + (152.50, "cant a hanner a dwy o bunnoedd, hanner cant ceiniog"), +) + +TEST_CASES_COUNTED = [ + (2, "ci", "masc", "dau gi"), + (2, "ty", "masc", "dau dy"), + (2, "llwy", "fem", "dwy lwy"), + (2, "rhaglen", "masc", "dau raglen"), + (11, "ci", "masc", "un ci ar ddeg"), + (13, "ci", "masc", "tri chi ar ddeg"), + (26, "ci", "masc", "chwech chi ar hugain"), + (56, "ci", "masc", "hanner cant a chwech chi"), + (100, "cwn", "masc", "cant o gwn"), + (2000, "cathod", "fem", "dwy fil o gathod"), + (11, "cath", "fem", "un cath ar ddeg"), + (13, "cath", "fem", "tair cath ar ddeg"), + (26, "cath", "fem", "chwech chath ar hugain"), + (42, "cath", "fem", "dwy gath a deugain"), + (56, "cath", "fem", "hanner cant a chwech chath"), +] + + +class Num2WordsCYTest(TestCase): + def test_number(self): + for test in TEST_CASES_CARDINAL: + self.assertEqual(num2words(test[0], lang="cy"), test[1]) + + def test_number_fem(self): + for test in TEST_CASES_CARDINAL_FEM: + self.assertEqual( + num2words(test[0], lang="cy", gender="fem"), test[1] + ) + + def test_number_not_implemented(self): + with self.assertRaises(NotImplementedError): + num2words(10**66, lang='cy') + + def test_decimals(self): + for test in TEST_CASES_DECIMALS: + self.assertEqual(num2words(test[0], lang="cy"), test[1]) + + def test_ordinals(self): + for test in TEST_CASES_ORDINAL: + self.assertEqual( + num2words(test[0], lang="cy", to="ordinal"), test[1] + ) + + def test_ordinal_not_implemented(self): + with self.assertRaises(NotImplementedError): + num2words(101, lang='cy', to="ordinal") + + def test_pounds(self): + for test in TEST_CASES_TO_CURRENCY_GBP: + self.assertEqual( + num2words(test[0], lang="cy", to="currency", currency="GBP"), + test[1], + ) + + def test_other_cur(self): + with self.assertRaises(NotImplementedError): + num2words(10.23, lang="cy", to="currency", currency="DEM"), + + def test_counted(self): + for test in TEST_CASES_COUNTED: + self.assertEqual( + num2words( + test[0], lang="cy", counted=test[1], gender=test[2] + ), + test[3], + ) + +# TODO 'ordinal_num', 'year'