From 800dc45ff2a9356da6dcc4f50a46cd5b8ba6d6ea Mon Sep 17 00:00:00 2001 From: javad Date: Wed, 30 Mar 2022 22:32:38 +0430 Subject: [PATCH] fixed some issues with ^bi* and brackets --- Changelog.txt | 4 ++++ negar/constants.py | 2 +- negar/virastar.py | 18 +++++++++--------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index b99ebff..ee3defe 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,3 +1,7 @@ +0.9.2 - 2022-03-30 +-- Fixed some issues with ^bi* +-- Fixed the issue of extra ZWNJ beside brackets + 0.9.1 - 2022-03-30 -- Supports bi* diff --git a/negar/constants.py b/negar/constants.py index d1855b8..7ea3d06 100644 --- a/negar/constants.py +++ b/negar/constants.py @@ -1,6 +1,6 @@ from pathlib import Path -__version__ = "0.9.1" +__version__ = "0.9.2" LOGO = (Path(__file__).parent.absolute()/"logo.png").as_posix() DATAFILE = Path(__file__).parent.absolute()/"data/untouchable.dat" diff --git a/negar/virastar.py b/negar/virastar.py index fe8301b..5ae2c73 100644 --- a/negar/virastar.py +++ b/negar/virastar.py @@ -127,11 +127,11 @@ def fix_english_numbers(self): def fix_prefix_spacing(self): """Puts ZWNJ between a word and its prefix (mi* nemi* bi*)""" - self.text = re.sub(r"\b(ن?می|بی)\s+",r'\1‌', self.text) + self.text = re.sub(r"\b(ن?می|بی)‌*(\s+)",r'\1‌', self.text) def fix_prefix_separate(self): """Puts ZWNJ between a word and its prefix (mi* nemi* bi*)""" - regex = re.compile(r"\b(بی|ن?می)(\S+)") # \b for words like سهمیه + regex = re.compile(r"\b(بی|ن?می)‌*([^\[\]\(\)\s]+)") # \b for words like سهمیه wlist = self.text.split(" ") for word in wlist: @@ -193,36 +193,36 @@ def aggressive(self): def fix_spacing_for_braces_and_quotes(self): """Fixes the braces and quotes spacing problems.""" - # ()[]{}""«» should have one space before and one virtual space after (inside) + # ()[]{}""«» should have one space before and no space after (inside) self.text = re.sub( r'[ ‌]*(\()\s*([^)]+?)\s*?(\))[ ‌]*', - r' \1‌\2‌\3 ', + r' \1\2\3 ', self.text ) self.text = re.sub( r'[ ‌]*(\[)\s*([^)]+?)\s*?(\])[ ‌]*', - r' \1‌\2‌\3 ', + r' \1\2\3 ', self.text ) self.text = re.sub( r'[ ‌]*(\{)\s*([^)]+?)\s*?(\})[ ‌]*', - r' \1‌\2‌\3 ', + r' \1\2\3 ', self.text ) self.text = re.sub( r'[ ‌]*(“)\s*([^)]+?)\s*?(”)[ ‌]*', - r' \1‌\2‌\3 ', + r' \1\2\3 ', self.text ) self.text = re.sub( r'[ ‌]*(«)\s*([^)]+?)\s*?(»)[ ‌]*', - r' \1‌\2‌\3 ', + r' \1\2\3 ', self.text ) # : ; , ! ? and their Persian counterparts should have one space after and no space before self.text = re.sub( r'[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*', - r'‌\1 ', + r'\1 ', self.text ) self.text = re.sub(