diff --git a/package-lock.json b/package-lock.json index a531b3f..11e2173 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,8 +5,12 @@ "packages": { "": { "license": "MIT", + "dependencies": { + "uuid": "^9.0.1" + }, "devDependencies": { "@types/jest": "^29.5.11", + "@types/uuid": "^9.0.7", "cross-env": "^7.0.3", "gh-pages": "^6.1.1", "jest": "^29.7.0", @@ -2662,6 +2666,12 @@ "integrity": "sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==", "dev": true }, + "node_modules/@types/uuid": { + "version": "9.0.7", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.7.tgz", + "integrity": "sha512-WUtIVRUZ9i5dYXefDEAI7sh9/O7jGvHg7Df/5O/gtH3Yabe5odI3UWopVR1qbPXQtvOxWu3mM4XxlYeZtMWF4g==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.22", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.22.tgz", @@ -11699,6 +11709,16 @@ "request": "^2.34" } }, + "node_modules/request/node_modules/uuid": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", + "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==", + "deprecated": "Please upgrade to version 7 or higher. Older versions may use Math.random() in certain circumstances, which is known to be problematic. See https://v8.dev/blog/math-random for details.", + "dev": true, + "bin": { + "uuid": "bin/uuid" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -13517,13 +13537,15 @@ "license": "ISC" }, "node_modules/uuid": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", - "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==", - "dev": true, - "license": "MIT", + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], "bin": { - "uuid": "bin/uuid" + "uuid": "dist/bin/uuid" } }, "node_modules/v8-compile-cache": { @@ -15755,6 +15777,12 @@ "integrity": "sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==", "dev": true }, + "@types/uuid": { + "version": "9.0.7", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.7.tgz", + "integrity": "sha512-WUtIVRUZ9i5dYXefDEAI7sh9/O7jGvHg7Df/5O/gtH3Yabe5odI3UWopVR1qbPXQtvOxWu3mM4XxlYeZtMWF4g==", + "dev": true + }, "@types/yargs": { "version": "17.0.22", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.22.tgz", @@ -22447,6 +22475,14 @@ "tough-cookie": "~2.5.0", "tunnel-agent": "^0.6.0", "uuid": "^3.3.2" + }, + "dependencies": { + "uuid": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", + "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==", + "dev": true + } } }, "request-promise-core": { @@ -23807,10 +23843,9 @@ } }, "uuid": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.4.0.tgz", - "integrity": "sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==", - "dev": true + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==" }, "v8-compile-cache": { "version": "2.3.0", diff --git a/package.json b/package.json index 334f14b..701e7d4 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ }, "devDependencies": { "@types/jest": "^29.5.11", + "@types/uuid": "^9.0.7", "cross-env": "^7.0.3", "gh-pages": "^6.1.1", "jest": "^29.7.0", @@ -27,5 +28,8 @@ "ts-jest": "^29.1.1", "ts-node": "^10.9.2", "typescript": "^5.3.3" + }, + "dependencies": { + "uuid": "^9.0.1" } } diff --git a/src/lib/format_and_split_text_Into_columns.ts b/src/lib/format_and_split_text_Into_columns.ts index 980975f..3a9f9b3 100644 --- a/src/lib/format_and_split_text_Into_columns.ts +++ b/src/lib/format_and_split_text_Into_columns.ts @@ -1,35 +1,43 @@ -import {splitIntoColumns} from './split_into_columns' +import { splitIntoColumns } from './split_into_columns' +import { v4 as uuidv4 } from 'uuid'; export function formatAndSplitTextIntoColumns( text: string, charLimit: number, ): string[][] { + if (text === "") { + return [[""]] + } + + const dummy = uuidv4() + text = text.replace(/\. /g, dummy) + const replacements: [RegExp, string | ((substring: string) => string)][] = [ [/-\n/g, ''], // Ensure hyphen followed by newline is completely removed [/\n/g, " "], // Replaces newlines with spaces [/- /g, ""], // Removes hyphens followed by a space - [/Fig\. /g, "Fig."], // Formats abbreviation for "Figure" - [/Figs\. /g, "Figs."], // Formats abbreviation for "Figures" - [/No\. /g, "No."], // Formats abbreviation for "Number" - [/Prof\. /g, "Prof."], // Formats abbreviation for "Professor" - [/Eq\. /g, "Eq."], // Formats abbreviation for "Equation" - [/et al\. /g, "et al."], // Formats "et al." - [/Dr\. /g, "Dr."], // Formats abbreviation for "Doctor" - [/e\.g\. /g, "e.g."], // Formats "e.g." - [/i\.e\. /g, "i.e."], // Formats "i.e." - [/Sec\. /g, "Sec."], // Formats abbreviation for "Section" - [/Sect\. /g, "Sect."], // Formats abbreviation for "Section" - [/2\.4 GHz/g, "2.4GHz"], // Formats specific frequency value - [/I\. /g, "I."], - [/II\. /g, "II."], - [/III\. /g, "III."], - [/IV\. /g, "IV."], - [/V\. /g, "V."], - [/VI\. /g, "VI."], - [/VII\. /g, "VII."], - [/VIII\. /g, "VIII."], - [/IX\. /g, "IX."], - [/X\. /g, "X."], + [RegExp(`Fig${dummy}`, "g"), "Fig. "], // Formats abbreviation for "Figure" + [RegExp(`Figs${dummy}`, "g"), "Figs. "], // Formats abbreviation for "Figures" + [RegExp(`No${dummy}`, "g"), "No. "], // Formats abbreviation for "Number" + [RegExp(`Prof${dummy}`, "g"), "Prof. "], // Formats abbreviation for "Professor" + [RegExp(`Eq${dummy}`, "g"), "Eq. "], // Formats abbreviation for "Equation" + [RegExp(`et al${dummy}`, "g"), "et al. "], // Formats "et al." + [RegExp(`Dr${dummy}`, "g"), "Dr. "], // Formats abbreviation for "Doctor" + [RegExp(`e\.g${dummy}`, "g"), "e.g. "], // Formats "e.g." + [RegExp(`i\.e${dummy}`, "g"), "i.e. "], // Formats "i.e." + [RegExp(`Sec${dummy}`, "g"), "Sec. "], // Formats abbreviation for "Section" + [RegExp(`Sect${dummy}`, "g"), "Sect. "], // Formats abbreviation for "Section" + [RegExp(`2\.4 GHz`, "g"), "2.4GHz"], // Formats specific frequency value + [RegExp(`I${dummy}`, "g"), "I. "], + [RegExp(`II${dummy}`, "g"), "II. "], + [RegExp(`III${dummy}`, "g"), "III. "], + [RegExp(`IV${dummy}`, "g"), "IV. "], + [RegExp(`V${dummy}`, "g"), "V. "], + [RegExp(`VI${dummy}`, "g"), "VI. "], + [RegExp(`VII${dummy}`, "g"), "VII. "], + [RegExp(`VIII${dummy}`, "g"), "VIII. "], + [RegExp(`IX${dummy}`, "g"), "IX. "], + [RegExp(`X${dummy}`, "g"), "X. "], [/\.\d+,\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats numbers with commas [/\.\d+-\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats number ranges ]; @@ -41,8 +49,14 @@ export function formatAndSplitTextIntoColumns( }); // Split the processed text into sentences - const sentences = processedText.split(/(?<=\.)\s/); + let sentences = processedText.split(RegExp(`${dummy}`, "s")); + sentences = sentences.map(item => { + if (!item.endsWith('.')) { + return item + '.'; + } + return item; + }); // Use the splitIntoColumns function to split the sentences into columns return splitIntoColumns(sentences, charLimit); } diff --git a/src/lib/test/format_and_split_text_into_columns.test.ts b/src/lib/test/format_and_split_text_into_columns.test.ts index 4e96d6b..9a8fbaf 100644 --- a/src/lib/test/format_and_split_text_into_columns.test.ts +++ b/src/lib/test/format_and_split_text_into_columns.test.ts @@ -1,3 +1,4 @@ + import { formatAndSplitTextIntoColumns } from "../format_and_split_text_Into_columns" @@ -5,7 +6,7 @@ describe('formatAndSplitTextIntoColumns', () => { // Test Text Processing it('should apply all text processing rules correctly', () => { const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz, and e.g. example."; - const expectedResult = "This is a teststring with various rules like Fig.1, 2.4GHz, and e.g.example."; + const expectedResult = "This is a teststring with various rules like Fig. 1, 2.4GHz, and e.g. example."; const result = formatAndSplitTextIntoColumns(testString, 1000); expect(result.join(' ')).toContain(expectedResult); }); @@ -21,7 +22,7 @@ describe('formatAndSplitTextIntoColumns', () => { const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz. This part will be in a separate column."; const charLimit = 10; const expectedResult = [ - ['This is a teststring with various rules like Fig.1, 2.4GHz.'], + ['This is a teststring with various rules like Fig. 1, 2.4GHz.'], ['This part will be in a separate column.'] ]; const result = formatAndSplitTextIntoColumns(testString, charLimit); @@ -82,4 +83,4 @@ describe('formatAndSplitTextIntoColumns', () => { }); // ... (Complete the tests for all the replacement rules) ... -}); \ No newline at end of file +});