Skip to content

Commit

Permalink
Accept small words as a config option
Browse files Browse the repository at this point in the history
  • Loading branch information
blakeembrey committed Oct 3, 2023
1 parent 4befb41 commit c2c96b6
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 24 deletions.
1 change: 1 addition & 0 deletions packages/title-case/src/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ const TEST_CASES: [string, string][] = [
"Is Human Activity Responsible for the Climate Emergency? New Report Calls It ‘Unequivocal.’",
],
["лев николаевич толстой", "Лев Николаевич Толстой"],
["Read foo-bar.com", "Read foo-bar.com"],
];

describe("swap case", () => {
Expand Down
101 changes: 77 additions & 24 deletions packages/title-case/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,90 @@
const SMALL_WORDS =
/\b(?:an?d?|a[st]|because|but|by|en|for|i[fn]|neither|nor|o[fnr]|only|over|per|so|some|tha[tn]|the|to|up|upon|vs?\.?|versus|via|when|with|without|yet)\b/i;
const TOKENS = /[^\s:–—-]+|./g;
const WHITESPACE = /\s/;
const TOKENS = /\S+|./g;
const IS_MANUAL_CASE = /\p{Ll}(?=[\p{Lu}])|\.\p{L}/u; // iPhone, example.com, U.N., etc.
const ALPHANUMERIC_PATTERN = /[\p{L}\d]/u;
const ALPHANUMERIC_PATTERN = /[\p{L}\d]+/gu;

export function titleCase(input: string, locale?: string[] | string) {
const WORD_SEPARATORS = new Set(["—", "–", "-", "―", "/"]);

const SMALL_WORDS = new Set([
"an",
"and",
"as",
"at",
"because",
"but",
"by",
"en",
"for",
"if",
"in",
"neither",
"nor",
"of",
"on",
"or",
"only",
"over",
"per",
"so",
"some",
"that",
"than",
"the",
"to",
"up",
"upon",
"v",
"vs",
"versus",
"via",
"when",
"with",
"without",
"yet",
]);

export interface Options {
smallWords?: Set<string>;
locale?: string | string[];
}

export function titleCase(
input: string,
options: Options | string[] | string = {},
) {
let result = "";
let m: RegExpExecArray | null;

const { smallWords = SMALL_WORDS, locale } =
typeof options === "string" || Array.isArray(options)
? { locale: options }
: options;

// tslint:disable-next-line
while ((m = TOKENS.exec(input)) !== null) {
const { 0: token, index } = m;

if (
// Ignore already capitalized words.
!IS_MANUAL_CASE.test(token) &&
// Ignore small words except at beginning or end.
(!SMALL_WORDS.test(token) ||
index === 0 ||
index + token.length === input.length) &&
// Ignore URLs.
(input.charAt(index + token.length) !== ":" ||
WHITESPACE.test(input.charAt(index + token.length + 1)))
) {
// Find and uppercase first word character, skips over *modifiers*.
result += token.replace(ALPHANUMERIC_PATTERN, (m) =>
m.toLocaleUpperCase(locale),
);
continue;
}
// Ignore already capitalized words.
if (IS_MANUAL_CASE.test(token)) {
result += token;
} else {
result += token.replace(ALPHANUMERIC_PATTERN, (m, i) => {
// Ignore small words except at beginning or end.
if (
index > 0 &&
index + token.length < input.length &&
smallWords.has(m)
) {
return m;
}

result += token;
// Only capitalize words after a valid word separator.
if (i > 1 && !WORD_SEPARATORS.has(input.charAt(index + i - 1))) {
return m;
}

return m.charAt(0).toLocaleUpperCase(locale) + m.slice(1);
});
}
}

return result;
Expand Down

0 comments on commit c2c96b6

Please sign in to comment.