Skip to content

Commit

Permalink
fixed some issues with ^bi* and brackets
Browse files Browse the repository at this point in the history
  • Loading branch information
javadr committed Mar 30, 2022
1 parent bad0a0f commit 800dc45
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 10 deletions.
4 changes: 4 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.9.2 - 2022-03-30
-- Fixed some issues with ^bi*
-- Fixed the issue of extra ZWNJ beside brackets

0.9.1 - 2022-03-30
-- Supports bi*

Expand Down
2 changes: 1 addition & 1 deletion negar/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

__version__ = "0.9.1"
__version__ = "0.9.2"

LOGO = (Path(__file__).parent.absolute()/"logo.png").as_posix()
DATAFILE = Path(__file__).parent.absolute()/"data/untouchable.dat"
Expand Down
18 changes: 9 additions & 9 deletions negar/virastar.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ def fix_english_numbers(self):

def fix_prefix_spacing(self):
"""Puts ZWNJ between a word and its prefix (mi* nemi* bi*)"""
self.text = re.sub(r"\b(ن?می|بی)\s+",r'\1‌', self.text)
self.text = re.sub(r"\b(ن?می|بی)‌*(\s+)",r'\1‌', self.text)

def fix_prefix_separate(self):
"""Puts ZWNJ between a word and its prefix (mi* nemi* bi*)"""
regex = re.compile(r"\b(بی|ن?می)(\S+)") # \b for words like سهمیه
regex = re.compile(r"\b(بی|ن?می)‌*([^\[\]\(\)\s]+)") # \b for words like سهمیه

wlist = self.text.split(" ")
for word in wlist:
Expand Down Expand Up @@ -193,36 +193,36 @@ def aggressive(self):

def fix_spacing_for_braces_and_quotes(self):
"""Fixes the braces and quotes spacing problems."""
# ()[]{}""«» should have one space before and one virtual space after (inside)
# ()[]{}""«» should have one space before and no space after (inside)
self.text = re.sub(
r'[ ‌]*(\()\s*([^)]+?)\s*?(\))[ ‌]*',
r' \1‌\2‌\3 ',
r' \1\2\3 ',
self.text
)
self.text = re.sub(
r'[ ‌]*(\[)\s*([^)]+?)\s*?(\])[ ‌]*',
r' \1‌\2‌\3 ',
r' \1\2\3 ',
self.text
)
self.text = re.sub(
r'[ ‌]*(\{)\s*([^)]+?)\s*?(\})[ ‌]*',
r' \1‌\2‌\3 ',
r' \1\2\3 ',
self.text
)
self.text = re.sub(
r'[ ‌]*(“)\s*([^)]+?)\s*?(”)[ ‌]*',
r' \1‌\2‌\3 ',
r' \1\2\3 ',
self.text
)
self.text = re.sub(
r'[ ‌]*(«)\s*([^)]+?)\s*?(»)[ ‌]*',
r' \1‌\2‌\3 ',
r' \1\2\3 ',
self.text
)
# : ; , ! ? and their Persian counterparts should have one space after and no space before
self.text = re.sub(
r'[ ‌ ]*([:;,؛،.؟!]{1})[ ‌ ]*',
r'\1 ',
r'\1 ',
self.text
)
self.text = re.sub(
Expand Down

0 comments on commit 800dc45

Please sign in to comment.