Skip to content

Commit

Permalink
Merge pull request #12 from zr87/feat/add-replace-method-and-improve-…
Browse files Browse the repository at this point in the history
…error-handling

Feat/add replace method and improve error handling
  • Loading branch information
zr87 committed Aug 21, 2024
2 parents 4129cb4 + ec8541f commit 3d86b53
Show file tree
Hide file tree
Showing 5 changed files with 879 additions and 779 deletions.
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,23 @@ or even with yarn:
yarn add accent-folding
```

Simple use-case
## Public Methods

### `replace`

Replaces accented characters in a string with their unaccented equivalents.

```js
import AccentFolding from 'accent-folding';

const af = new AccentFolding();

af.replace('Fulanilo López'); // --> "Fulanilo Lopez"
```

### `highlightMatch`

Highlights matched terms in a string, ignoring accents.

```js
import AccentFolding from 'accent-folding';
Expand All @@ -47,7 +63,7 @@ const af = new AccentFolding();
af.highlightMatch('Fulanilo López', 'lo'); // --> "Fulani<b>lo</b> <b>Ló</b>pez"
```

Using with custom html tag
Use the 3d argument to specify the wrapping html tag (strong, em, span etc.):

```js
af.highlightMatch('Fulanilo López', 'lo', 'strong'); // --> "Fulani<strong>lo</strong> <strong>Ló</strong>pez"
Expand Down
64 changes: 45 additions & 19 deletions src/accentFolding.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import defaultAccentMap from './accentMap.js';
import defaultAccentMap from './accentMap.json';

class AccentFolding {
#cache;
#accentMap;

constructor(customMap = []) {
this.#accentMap = new Map([...defaultAccentMap, ...customMap]);
constructor() {
this.#accentMap = new Map([
...AccentFolding.convertAccentMapToArray(defaultAccentMap),
]);
this.#cache = new Map();
}

Expand All @@ -20,28 +22,48 @@ class AccentFolding {
return ret;
}

replace(text) {
if (typeof text !== 'string') {
throw new TypeError('Input must be a string');
}
return [...text].map((char) => this.#accentMap.get(char) || char).join('');
}

highlightMatch(str, fragment, wrapTag = 'b') {
if (!fragment) return str;
try {
if (!fragment) return str;

if (typeof str !== 'string' || typeof fragment !== 'string') {
throw new TypeError('Both str and fragment must be strings');
}

const escapedFragment = fragment.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const strFolded = this.#fold(str).toLowerCase();
const fragmentFolded = this.#fold(escapedFragment).toLowerCase();
if (typeof wrapTag !== 'string') {
throw new TypeError('wrapTag must be a string');
}

const re = new RegExp(fragmentFolded, 'g');
let result = '';
let lastIndex = 0;
let hasMatch = false;
const escapedFragment = fragment.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const strFolded = this.#fold(str).toLowerCase();
const fragmentFolded = this.#fold(escapedFragment).toLowerCase();

strFolded.replace(re, (match, index) => {
hasMatch = true;
result += this.#escapeHtml(str.slice(lastIndex, index));
result += `<${wrapTag}>${this.#escapeHtml(str.slice(index, index + match.length))}</${wrapTag}>`;
lastIndex = index + match.length;
});
const re = new RegExp(fragmentFolded, 'g');
let result = '';
let lastIndex = 0;
let hasMatch = false;

result += this.#escapeHtml(str.slice(lastIndex));
strFolded.replace(re, (match, index) => {
hasMatch = true;
result += this.#escapeHtml(str.slice(lastIndex, index));
result += `<${wrapTag}>${this.#escapeHtml(str.slice(index, index + match.length))}</${wrapTag}>`;
lastIndex = index + match.length;
});

return hasMatch ? result : str;
result += this.#escapeHtml(str.slice(lastIndex));

return hasMatch ? result : str;
} catch (error) {
console.error('Error in highlightMatch:', error.message);
throw error; // Return original string if there's an error
}
}

#escapeHtml(unsafe) {
Expand All @@ -52,6 +74,10 @@ class AccentFolding {
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
}

static convertAccentMapToArray(accentMap) {
return Object.entries(accentMap);
}
}

export default AccentFolding;
152 changes: 107 additions & 45 deletions src/accentFolding.js.test.js
Original file line number Diff line number Diff line change
@@ -1,61 +1,123 @@
import { expect, describe, it } from 'vitest';
import { expect, describe, it, beforeEach } from 'vitest';
import AccentFolding from './accentFolding.js';
import accentMap from './accentMap.json';

describe('AccentFolding', () => {
const accentFolder = new AccentFolding();
let accentFolder;

it('should recognize simple accents', () => {
expect(accentFolder.highlightMatch('Fulanilo López', 'lo')).toBe(
'Fulani<b>lo</b> <b>Ló</b>pez'
);
expect(accentFolder.highlightMatch('Erik Lørgensen', 'lo')).toBe(
'Erik <b>Lø</b>rgensen'
);
expect(accentFolder.highlightMatch('James Lö', 'lo')).toBe(
'James <b>Lö</b>'
);
beforeEach(() => {
accentFolder = new AccentFolding();
});

it("should wrap matched fragment with custom tag added in second parameter '<strong>'", () => {
expect(
accentFolder.highlightMatch('Fulanilo López', 'lo', 'strong')
).toEqual('Fulani<strong>lo</strong> <strong>Ló</strong>pez');
});
describe('highlightMatch', () => {
it('should throw TypeError if str is not a string', () => {
expect(() => accentFolder.highlightMatch(123, 'test')).toThrow(TypeError);
expect(() => accentFolder.highlightMatch(123, 'test')).toThrow(
'Both str and fragment must be strings'
);
});

it('wraps matched fragment with custom tag', () => {
expect(accentFolder.highlightMatch('Fulanilo López', 'lo', 'strong')).toBe(
'Fulani<strong>lo</strong> <strong>Ló</strong>pez'
);
});
it('should throw TypeError if fragment is not a string', () => {
expect(() => accentFolder.highlightMatch('test', 123)).toThrow(TypeError);
expect(() => accentFolder.highlightMatch('test', 123)).toThrow(
'Both str and fragment must be strings'
);
});

it('is case insensitive', () => {
expect(accentFolder.highlightMatch('FULANILO LÓPEZ', 'lo')).toBe(
'FULANI<b>LO</b> <b>LÓ</b>PEZ'
);
});
it('should throw TypeError if wrapTag is not a string', () => {
expect(() => accentFolder.highlightMatch('test', 'es', 123)).toThrow(
TypeError
);
expect(() => accentFolder.highlightMatch('test', 'es', 123)).toThrow(
'wrapTag must be a string'
);
});

it('handles empty strings', () => {
expect(accentFolder.highlightMatch('', 'test')).toBe('');
expect(accentFolder.highlightMatch('Test', '')).toBe('Test');
});
it('should recognize simple accents', () => {
expect(accentFolder.highlightMatch('Fulanilo López', 'lo')).toBe(
'Fulani<b>lo</b> <b>Ló</b>pez'
);
expect(accentFolder.highlightMatch('Erik Lørgensen', 'lo')).toBe(
'Erik <b>Lø</b>rgensen'
);
expect(accentFolder.highlightMatch('James Lö', 'lo')).toBe(
'James <b>Lö</b>'
);
});

it('returns original string when no match is found', () => {
expect(accentFolder.highlightMatch('Hello World', 'xyz')).toBe(
'Hello World'
);
it("should wrap matched fragment with custom tag added in second parameter '<strong>'", () => {
expect(
accentFolder.highlightMatch('Fulanilo López', 'lo', 'strong')
).toEqual('Fulani<strong>lo</strong> <strong>Ló</strong>pez');
});

it('wraps matched fragment with custom tag', () => {
expect(
accentFolder.highlightMatch('Fulanilo López', 'lo', 'strong')
).toBe('Fulani<strong>lo</strong> <strong>Ló</strong>pez');
});

it('is case insensitive', () => {
expect(accentFolder.highlightMatch('FULANILO LÓPEZ', 'lo')).toBe(
'FULANI<b>LO</b> <b>LÓ</b>PEZ'
);
});

it('handles empty strings', () => {
expect(accentFolder.highlightMatch('', 'test')).toBe('');
expect(accentFolder.highlightMatch('Test', '')).toBe('Test');
});

it('returns original string when no match is found', () => {
expect(accentFolder.highlightMatch('Hello World', 'xyz')).toBe(
'Hello World'
);
});

it('handles multiple matches', () => {
expect(accentFolder.highlightMatch('lólá lòlã', 'la')).toBe(
'ló<b>lá</b> lò<b>lã</b>'
);
});

it('handles special characters in fragment', () => {
expect(accentFolder.highlightMatch('a+b=c', '+')).toBe('a<b>+</b>b=c');
});

// it('preserves HTML in original string', () => {
// expect(accentFold.highlightMatch("<p>Héllo</p>", "he")).toBe("<p><b>Hé</b>llo</p>");
// });
});

it('handles multiple matches', () => {
expect(accentFolder.highlightMatch('lólá lòlã', 'la')).toBe(
'ló<b>lá</b> lò<b>lã</b>'
describe('replace', () => {
it('should throw TypeError if input is not a string', () => {
expect(() => accentFolder.replace(123)).toThrow(TypeError);
expect(() => accentFolder.replace(123)).toThrow('Input must be a string');
});

it.each(Object.entries(accentMap))(
'should replace %s with %s',
(accentedChar, expectedChar) => {
expect(accentFolder.replace(accentedChar)).toBe(expectedChar);
}
);
});
it('should recognize simple accents', () => {
expect(accentFolder.replace('naïve')).toBe('naive');
});

it('handles special characters in fragment', () => {
expect(accentFolder.highlightMatch('a+b=c', '+')).toBe('a<b>+</b>b=c');
});
it('should replace multiple accented characters', () => {
expect(accentFolder.replace('résumé')).toBe('resume');
});

// it('preserves HTML in original string', () => {
// expect(accentFold.highlightMatch("<p>Héllo</p>", "he")).toBe("<p><b>Hé</b>llo</p>");
// });
it('should handle mixed accented and non-accented text', () => {
expect(accentFolder.replace('Café au lait')).toBe('Cafe au lait');
});
it('should return the same string if no accented characters are present', () => {
expect(accentFolder.replace('hello world')).toBe('hello world');
});

it('should handle empty string', () => {
expect(accentFolder.replace('')).toBe('');
});
});
});
Loading

0 comments on commit 3d86b53

Please sign in to comment.