Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an option to preserve the input space characters #97

Merged
merged 3 commits into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions titlecase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,18 @@ def set_small_word_list(small=SMALL):
SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small)


def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False):
def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False):
"""
:param text: Titlecases input text
:param callback: Callback function that returns the titlecase version of a specific word
:param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing
:param preserve_blank_lines: Preserve blank lines in the output
:param normalise_space_characters: Convert all original spaces to normal space characters
:type text: str
:type callback: function
:type small_first_last: bool
:type preserve_blank_lines: bool
:type normalise_space_characters: bool

This filter changes all words to Title Caps, and attempts to be clever
about *un*capitalizing SMALL words like a/an/the in the input.
Expand All @@ -100,7 +104,9 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
processed = []
for line in lines:
all_caps = line.upper() == line
words = regex.split('[\t ]', line)
split_line = regex.split(r'(\s)', line)
words = split_line[::2]
spaces = split_line[1::2]
tc_line = []
for word in words:
if callback:
Expand Down Expand Up @@ -188,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=F
lambda m: m.group(0).capitalize(), tc_line[-1]
)

result = " ".join(tc_line)
if normalise_space_characters:
result = " ".join(tc_line)
else:
line_to_be_joined = tc_line + spaces
line_to_be_joined[::2] = tc_line
line_to_be_joined[1::2] = spaces
result = "".join(line_to_be_joined)

result = SUBPHRASE.sub(lambda m: '%s%s' % (
m.group(1),
Expand Down
16 changes: 15 additions & 1 deletion titlecase/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from titlecase import titlecase, set_small_word_list, create_wordlist_filter_from_file


# (executed by `test_input_output` below)
# (executed by `test_specific_string` below)
TEST_DATA = (
(
"",
Expand Down Expand Up @@ -307,6 +307,10 @@
"Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person",
"Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person",
),
(
"a mix of\tdifferent\u200aspace\u2006characters",
"A Mix of\tDifferent\u200aSpace\u2006Characters",
),
)


Expand Down Expand Up @@ -429,6 +433,16 @@ def test_complex_blanks(self):
self.assertEqual(titlecase(s, preserve_blank_lines=True),
'\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n')

class TestNormaliseSpaceCharacters(unittest.TestCase):
def test_tabs(self):
s = 'text\twith\ttabs'
self.assertEqual(titlecase(s), 'Text\tWith\tTabs')
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs')

def test_nbsps(self):
s = 'text with nonbreaking spaces'
self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces')
self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces')

if __name__ == '__main__':
unittest.main()
Loading