Skip to content

Commit

Permalink
Merge pull request #17 from gunsodo/feat/number-to-lao
Browse files Browse the repository at this point in the history
Implement `num_to_laoword`
  • Loading branch information
wannaphong authored May 23, 2024
2 parents d91bfaf + 0174b26 commit 8738c50
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 9 deletions.
2 changes: 2 additions & 0 deletions laonlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
__all__ = [
"lao_digit_to_arabic_digit",
"arabic_digit_to_lao_digit",
"num_to_laoword",
"remove_tone_mark",
]
from laonlp.util.digitconv import (
lao_digit_to_arabic_digit,
arabic_digit_to_lao_digit,
num_to_laoword,
)
from laonlp.util.lao import (
remove_tone_mark
Expand Down
54 changes: 50 additions & 4 deletions laonlp/util/digitconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
"ເກົ້າ",
"ສູນ"
]
_places = [
"", "ສິບ", "ຮ້ອຍ", "ພັນ", "ຫມື່ນ", "ແສນ", "ລ້ານ", "ຕື້",
]
_exceptions = {"ຫມື່ນ": "ສິບ", "ນຶ່ງສິບ": "ສິບ", "ສອງສິບນຶ່ງ": "ຊາວເອັດ", "ສອງສິບ": "ຊາວ", "ສິບນຶ່ງ": "ສິບເອັດ"}

_dict_lao_arabic = dict(zip(list(NUMBERS), list(_arabic_numerals)))
_dict_arabic_lao = dict(zip(list(_arabic_numerals), list(NUMBERS)))
_lao_arabic_table = str.maketrans(_dict_lao_arabic)
Expand Down Expand Up @@ -56,9 +61,50 @@ def arabic_digit_to_lao_digit(text: str) -> str:
return text.translate(_arabic_lao_table)


def number2lao(numbers: int):
def num_to_laoword(number: int):
"""
Numbers to Lao pronunciation
Number to Lao word
:param number int: Integer to be converted
:return: returns a string of Lao word representation of the integer
:rtype: str
"""
# TODO
return ""
output = ""
prefix = ""

if number is None:
return ""

if number == 0:
return _pronunciation[-1]

sign = number < 0
number = str(abs(number))

# Special case > 1e9
if len(number) >= 10:
prefix = num_to_laoword(int(number[:-9])) + _places[-1]
number = number[-9:]

prev_value = ""

for place, value in enumerate(list(number[::-1])):
if place % 6 == 0 and place > 0:
output = _places[6] + output

if value != "0":
output = _pronunciation[int(value) - 1] + _places[place % 6] + output

# Special place exception
if place % 6 == 3 and prev_value == "0":
output = _places[3] + output

prev_value = value

for search, replac in _exceptions.items():
output = output.replace(search, replac)

if sign:
output = "ລົບ" + output

return prefix + output
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
requirements = [i.strip() for i in f.readlines()]

extras = {
"word_vector": ["gensim", "huggingface-hub"],
"word_vector": ["scipy<1.11.0", "gensim", "huggingface-hub"],
"full": [
"scipy<1.11.0",
"gensim",
"huggingface-hub"
]
Expand Down
23 changes: 19 additions & 4 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,39 @@
# -*- coding: utf-8 -*-

import unittest
from laonlp.util import *
from laonlp.util import (
lao_digit_to_arabic_digit,
arabic_digit_to_lao_digit,
remove_tone_mark,
num_to_laoword,
)


class TestTagPackage(unittest.TestCase):
def test_lao_digit_to_arabic_digit(self):
self.assertEqual(
lao_digit_to_arabic_digit("໑໒໓໔໕໖໗໘໙໐"),
'1234567890'
"1234567890"
)

def test_arabic_digit_to_lao_digit(self):
self.assertEqual(
arabic_digit_to_lao_digit('1234567890'),
arabic_digit_to_lao_digit("1234567890"),
"໑໒໓໔໕໖໗໘໙໐"
)

def test_remove_tone_mark(self):
self.assertEqual(
remove_tone_mark("ຜູ້"),
'ຜູ'
"ຜູ"
)

def test_num_to_laoword(self):
self.assertEqual(num_to_laoword(None), "")
self.assertEqual(num_to_laoword(0), "ສູນ")
self.assertEqual(num_to_laoword(112), "ນຶ່ງຮ້ອຍສິບສອງ")
self.assertEqual(num_to_laoword(-273), "ລົບສອງຮ້ອຍເຈັດສິບສາມ")
self.assertEqual(num_to_laoword(12101), "ສິບສອງພັນນຶ່ງຮ້ອຍນຶ່ງ")
self.assertEqual(num_to_laoword(20000), "ຊາວພັນ")
self.assertEqual(num_to_laoword(987654321), "ເກົ້າຮ້ອຍແປດສິບເຈັດລ້ານຫົກແສນຫ້າສິບສີ່ພັນສາມຮ້ອຍຊາວເອັດ")
self.assertEqual(num_to_laoword(11987654321), "ສິບເອັດຕື້ເກົ້າຮ້ອຍແປດສິບເຈັດລ້ານຫົກແສນຫ້າສິບສີ່ພັນສາມຮ້ອຍຊາວເອັດ")

0 comments on commit 8738c50

Please sign in to comment.