Skip to content

Commit

Permalink
Add an option to preserve the input space characters
Browse files Browse the repository at this point in the history
This potentially changes the output in two ways:

1. We now match (on line 107) any space character rather than just a tab or a space. The test case as it stands continues to pass, and the output remains separated by a space character.

2. There’s a new boolean parameter called `preserve_space_characters` that changes the output to use the space characters that were passed in. This is more accurate, but an obvious change to the existing behaviour hence the gating behind a default-False parameter.
  • Loading branch information
robinwhittleton committed Mar 23, 2024
1 parent fba0551 commit ce95cea
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
16 changes: 13 additions & 3 deletions titlecase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,18 @@ def set_small_word_list(small=SMALL):
SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small)


def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False):
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, preserve_space_characters=False):
"""
:param text: Titlecases input text
:param callback: Callback function that returns the titlecase version of a specific word
:param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing
:param preserve_blank_lines: Preserve blank lines in the output
:param preserve_space_characters: Preserve original space characters
:type text: str
:type callback: function
:type small_first_last: bool
:type preserve_blank_lines: bool
:type preserve_space_characters: bool
This filter changes all words to Title Caps, and attempts to be clever
about *un*capitalizing SMALL words like a/an/the in the input.
Expand All @@ -102,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
processed = []
for line in lines:
all_caps = line.upper() == line
words = regex.split('[\t ]', line)
split_line = regex.split(r'(\s)', line)
words = split_line[::2]
spaces = split_line[1::2]
tc_line = []
for word in words:
if callback:
Expand Down Expand Up @@ -190,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
lambda m: m.group(0).capitalize(), tc_line[-1]
)

result = " ".join(tc_line)
if preserve_space_characters:
line_to_be_joined = tc_line + spaces
line_to_be_joined[::2] = tc_line
line_to_be_joined[1::2] = spaces
result = "".join(line_to_be_joined)
else:
result = " ".join(tc_line)

result = SUBPHRASE.sub(lambda m: '%s%s' % (
m.group(1),
Expand Down
14 changes: 14 additions & 0 deletions titlecase/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@
"Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person",
"Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person",
),
(
"a mix of\tdifferent\u200aspace\u2006characters",
"A Mix of Different Space Characters",
),
)


Expand Down Expand Up @@ -429,6 +433,16 @@ def test_complex_blanks(self):
self.assertEqual(titlecase(s, preserve_blank_lines=True),
'\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n')

class TestPreserveSpaceCharacters(unittest.TestCase):
def test_tabs(self):
s = 'text\twith\ttabs'
self.assertEqual(titlecase(s), 'Text With Tabs')
self.assertEqual(titlecase(s, preserve_space_characters=True), 'Text\tWith\tTabs')

def test_nbsps(self):
s = 'text with nonbreaking spaces'
self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces')
self.assertEqual(titlecase(s, preserve_space_characters=True), 'Text With Nonbreaking Spaces')

if __name__ == '__main__':
unittest.main()

0 comments on commit ce95cea

Please sign in to comment.