From 8ef9d0f9932c89db0f20d81fc56fe416b2617b4d Mon Sep 17 00:00:00 2001 From: Robin Whittleton Date: Fri, 22 Mar 2024 19:01:26 +0100 Subject: [PATCH 1/3] =?UTF-8?q?Fix=20test=20comment=E2=80=99s=20reference?= =?UTF-8?q?=20to=20removed=20test=5Finput=5Foutput?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That function was removed in 60f3ecd119097ebc7a59f04c4637d45ea9b1c930 --- titlecase/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/titlecase/tests.py b/titlecase/tests.py index b17423f..9e083f2 100644 --- a/titlecase/tests.py +++ b/titlecase/tests.py @@ -12,7 +12,7 @@ from titlecase import titlecase, set_small_word_list, create_wordlist_filter_from_file -# (executed by `test_input_output` below) +# (executed by `test_specific_string` below) TEST_DATA = ( ( "", From fba05517ece254486eb18f87c1200eeece269bce Mon Sep 17 00:00:00 2001 From: Robin Whittleton Date: Sat, 23 Mar 2024 10:05:44 +0100 Subject: [PATCH 2/3] Add preserve_blank_lines parameter to titlecase docstring --- titlecase/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/titlecase/__init__.py b/titlecase/__init__.py index 621f799..57e194b 100755 --- a/titlecase/__init__.py +++ b/titlecase/__init__.py @@ -82,9 +82,11 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F :param text: Titlecases input text :param callback: Callback function that returns the titlecase version of a specific word :param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing + :param preserve_blank_lines: Preserve blank lines in the output :type text: str :type callback: function :type small_first_last: bool + :type preserve_blank_lines: bool This filter changes all words to Title Caps, and attempts to be clever about *un*capitalizing SMALL words like a/an/the in the input. From 027dd215438e67187ecb3720a734f3f895d7747e Mon Sep 17 00:00:00 2001 From: Robin Whittleton Date: Sat, 23 Mar 2024 13:41:39 +0100 Subject: [PATCH 3/3] Preserve the input space characters in the output We now match (on line 107) any space character rather than just a tab or a space. To make sure that a user can choose the older behaviour, we preserve that behind a new boolean parameter called `normalise_space_characters`. --- titlecase/__init__.py | 16 +++++++++++++--- titlecase/tests.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/titlecase/__init__.py b/titlecase/__init__.py index 57e194b..fd24431 100755 --- a/titlecase/__init__.py +++ b/titlecase/__init__.py @@ -77,16 +77,18 @@ def set_small_word_list(small=SMALL): SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small) -def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False): +def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False): """ :param text: Titlecases input text :param callback: Callback function that returns the titlecase version of a specific word :param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing :param preserve_blank_lines: Preserve blank lines in the output + :param normalise_space_characters: Convert all original spaces to normal space characters :type text: str :type callback: function :type small_first_last: bool :type preserve_blank_lines: bool + :type normalise_space_characters: bool This filter changes all words to Title Caps, and attempts to be clever about *un*capitalizing SMALL words like a/an/the in the input. @@ -102,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F processed = [] for line in lines: all_caps = line.upper() == line - words = regex.split('[\t ]', line) + split_line = regex.split(r'(\s)', line) + words = split_line[::2] + spaces = split_line[1::2] tc_line = [] for word in words: if callback: @@ -190,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F lambda m: m.group(0).capitalize(), tc_line[-1] ) - result = " ".join(tc_line) + if normalise_space_characters: + result = " ".join(tc_line) + else: + line_to_be_joined = tc_line + spaces + line_to_be_joined[::2] = tc_line + line_to_be_joined[1::2] = spaces + result = "".join(line_to_be_joined) result = SUBPHRASE.sub(lambda m: '%s%s' % ( m.group(1), diff --git a/titlecase/tests.py b/titlecase/tests.py index 9e083f2..9265ec0 100644 --- a/titlecase/tests.py +++ b/titlecase/tests.py @@ -307,6 +307,10 @@ "Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person", "Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person", ), + ( + "a mix of\tdifferent\u200aspace\u2006characters", + "A Mix of\tDifferent\u200aSpace\u2006Characters", + ), ) @@ -429,6 +433,16 @@ def test_complex_blanks(self): self.assertEqual(titlecase(s, preserve_blank_lines=True), '\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n') +class TestNormaliseSpaceCharacters(unittest.TestCase): + def test_tabs(self): + s = 'text\twith\ttabs' + self.assertEqual(titlecase(s), 'Text\tWith\tTabs') + self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs') + + def test_nbsps(self): + s = 'text with nonbreaking spaces' + self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces') + self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces') if __name__ == '__main__': unittest.main()