From 62e054cd2adff237173b7f2dffd80929d27d16e7 Mon Sep 17 00:00:00 2001 From: Nipun Sadvilkar Date: Tue, 11 Aug 2020 17:57:15 +0530 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=9A=91=20=E2=9C=85=20=20Handle=20Newl?= =?UTF-8?q?ine=20character=20&=20update=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pysbd/processor.py | 1 + tests/regression/test_issues.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pysbd/processor.py b/pysbd/processor.py index f140b8f..df591f1 100644 --- a/pysbd/processor.py +++ b/pysbd/processor.py @@ -28,6 +28,7 @@ def __init__(self, text, lang, char_span=False): def process(self): if not self.text: return self.text + self.text = self.text.replace('\n', '\r') li = ListItemReplacer(self.text) self.text = li.add_line_break() self.replace_abbreviations() diff --git a/tests/regression/test_issues.py b/tests/regression/test_issues.py index d9ac4f5..d0ac4cd 100644 --- a/tests/regression/test_issues.py +++ b/tests/regression/test_issues.py @@ -58,7 +58,13 @@ ('#55', "She turned to him, \"This is great.\" She held the book out to show him.", [ ('She turned to him, "This is great." ', 0, 36), ('She held the book out to show him.', 36, 70) - ]) + ]), +('#56', +"""This eBook is for the use of anyone anywhere at no cost +you may copy it, give it away or re-use it under the terms of the this license +""", +[('This eBook is for the use of anyone anywhere at no cost\n', 0, 56), + ('you may copy it, give it away or re-use it under the terms of the this license\n', 56, 135)]) ] From 60983a2835ae6ea73c0f8b8163f0522ba9700e55 Mon Sep 17 00:00:00 2001 From: Nipun Sadvilkar Date: Tue, 11 Aug 2020 18:08:14 +0530 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=94=96=20=20Bump=20up=20version=20to?= =?UTF-8?q?=20v0.3.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 10 ++++++++++ pysbd/about.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ddeaaf..cc9e8e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +# v0.3.1 +- 🚑 ✅ Handle Newline character & update tests + +# v0.3.0 +- ✨ 💫 Support Multiple languages - \#2 +- 🏎⚡️💯 Benchmark across Segmentation Tools, Libraries and Algorithms +- 🎨 ♻️ Update sentence char_span logic +- ⚡️ Performance improvements - \#41 +- ♻️🐛 Refactor AbbreviationReplacer + # v0.3.0rc - ✨ 💫 sent `char_span` through with spaCy & regex approach - \#63 - ♻️ Refactoring to support multiple languages diff --git a/pysbd/about.py b/pysbd/about.py index 4de8a4f..052283f 100644 --- a/pysbd/about.py +++ b/pysbd/about.py @@ -2,7 +2,7 @@ # https://python-packaging-user-guide.readthedocs.org/en/latest/single_source_version/ __title__ = "pysbd" -__version__ = "0.3.0" +__version__ = "0.3.1" __summary__ = "pysbd (Python Sentence Boundary Disambiguation) is a rule-based sentence boundary detection that works out-of-the-box across many languages." __uri__ = "http://nipunsadvilkar.github.io/" __author__ = "Nipun Sadvilkar"