例外処理した際に、スペースを削除しないように変更

takanotume24 · Jan 19, 2024 · 7ab9735 · 7ab9735
1 parent ffcb86b
commit 7ab9735
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 37 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -19,6 +19,7 @@
   },
   "devDependencies": {
     "@types/jest": "^29.5.11",
+    "@types/uuid": "^9.0.7",
     "cross-env": "^7.0.3",
     "gh-pages": "^6.1.1",
     "jest": "^29.7.0",
@@ -27,5 +28,8 @@
     "ts-jest": "^29.1.1",
     "ts-node": "^10.9.2",
     "typescript": "^5.3.3"
+  },
+  "dependencies": {
+    "uuid": "^9.0.1"
   }
 }
diff --git a/src/lib/format_and_split_text_Into_columns.ts b/src/lib/format_and_split_text_Into_columns.ts
@@ -1,35 +1,43 @@
-import {splitIntoColumns} from './split_into_columns'
+import { splitIntoColumns } from './split_into_columns'
+import { v4 as uuidv4 } from 'uuid';
 
 export function formatAndSplitTextIntoColumns(
     text: string,
     charLimit: number,
 ): string[][] {
+    if (text === "") {
+        return [[""]]
+    }
+
+    const dummy = uuidv4()
+    text = text.replace(/\. /g, dummy)
+
     const replacements: [RegExp, string | ((substring: string) => string)][] = [
         [/-\n/g, ''], // Ensure hyphen followed by newline is completely removed
         [/\n/g, " "], // Replaces newlines with spaces
         [/- /g, ""], // Removes hyphens followed by a space
-        [/Fig\. /g, "Fig."], // Formats abbreviation for "Figure"
-        [/Figs\. /g, "Figs."], // Formats abbreviation for "Figures"
-        [/No\. /g, "No."], // Formats abbreviation for "Number"
-        [/Prof\. /g, "Prof."], // Formats abbreviation for "Professor"
-        [/Eq\. /g, "Eq."], // Formats abbreviation for "Equation"
-        [/et al\. /g, "et al."], // Formats "et al."
-        [/Dr\. /g, "Dr."], // Formats abbreviation for "Doctor"
-        [/e\.g\. /g, "e.g."], // Formats "e.g."
-        [/i\.e\. /g, "i.e."], // Formats "i.e."
-        [/Sec\. /g, "Sec."], // Formats abbreviation for "Section"
-        [/Sect\. /g, "Sect."], // Formats abbreviation for "Section"
-        [/2\.4 GHz/g, "2.4GHz"], // Formats specific frequency value
-        [/I\. /g, "I."],
-        [/II\. /g, "II."],
-        [/III\. /g, "III."],
-        [/IV\. /g, "IV."],
-        [/V\. /g, "V."],
-        [/VI\. /g, "VI."],
-        [/VII\. /g, "VII."],
-        [/VIII\. /g, "VIII."],
-        [/IX\. /g, "IX."],
-        [/X\. /g, "X."],
+        [RegExp(`Fig${dummy}`, "g"), "Fig. "], // Formats abbreviation for "Figure"
+        [RegExp(`Figs${dummy}`, "g"), "Figs. "], // Formats abbreviation for "Figures"
+        [RegExp(`No${dummy}`, "g"), "No. "], // Formats abbreviation for "Number"
+        [RegExp(`Prof${dummy}`, "g"), "Prof. "], // Formats abbreviation for "Professor"
+        [RegExp(`Eq${dummy}`, "g"), "Eq. "], // Formats abbreviation for "Equation"
+        [RegExp(`et al${dummy}`, "g"), "et al. "], // Formats "et al."
+        [RegExp(`Dr${dummy}`, "g"), "Dr. "], // Formats abbreviation for "Doctor"
+        [RegExp(`e\.g${dummy}`, "g"), "e.g. "], // Formats "e.g."
+        [RegExp(`i\.e${dummy}`, "g"), "i.e. "], // Formats "i.e."
+        [RegExp(`Sec${dummy}`, "g"), "Sec. "], // Formats abbreviation for "Section"
+        [RegExp(`Sect${dummy}`, "g"), "Sect. "], // Formats abbreviation for "Section"
+        [RegExp(`2\.4 GHz`, "g"), "2.4GHz"], // Formats specific frequency value
+        [RegExp(`I${dummy}`, "g"), "I. "],
+        [RegExp(`II${dummy}`, "g"), "II. "],
+        [RegExp(`III${dummy}`, "g"), "III. "],
+        [RegExp(`IV${dummy}`, "g"), "IV. "],
+        [RegExp(`V${dummy}`, "g"), "V. "],
+        [RegExp(`VI${dummy}`, "g"), "VI. "],
+        [RegExp(`VII${dummy}`, "g"), "VII. "],
+        [RegExp(`VIII${dummy}`, "g"), "VIII. "],
+        [RegExp(`IX${dummy}`, "g"), "IX. "],
+        [RegExp(`X${dummy}`, "g"), "X. "],
         [/\.\d+,\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats numbers with commas
         [/\.\d+-\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats number ranges
     ];
@@ -41,8 +49,14 @@ export function formatAndSplitTextIntoColumns(
     });
 
     // Split the processed text into sentences
-    const sentences = processedText.split(/(?<=\.)\s/);
+    let sentences = processedText.split(RegExp(`${dummy}`, "s"));
 
+    sentences = sentences.map(item => {
+        if (!item.endsWith('.')) {
+            return item + '.';
+        }
+        return item;
+    });
     // Use the splitIntoColumns function to split the sentences into columns
     return splitIntoColumns(sentences, charLimit);
 }
diff --git a/src/lib/test/format_and_split_text_into_columns.test.ts b/src/lib/test/format_and_split_text_into_columns.test.ts
@@ -1,11 +1,12 @@
+
 import { formatAndSplitTextIntoColumns } from "../format_and_split_text_Into_columns"
 
 
 describe('formatAndSplitTextIntoColumns', () => {
     // Test Text Processing
     it('should apply all text processing rules correctly', () => {
         const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz, and e.g. example.";
-        const expectedResult = "This is a teststring with various rules like Fig.1, 2.4GHz, and e.g.example.";
+        const expectedResult = "This is a teststring with various rules like Fig. 1, 2.4GHz, and e.g. example.";
         const result = formatAndSplitTextIntoColumns(testString, 1000);
         expect(result.join(' ')).toContain(expectedResult);
     });
@@ -21,7 +22,7 @@ describe('formatAndSplitTextIntoColumns', () => {
         const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz. This part will be in a separate column.";
         const charLimit = 10;
         const expectedResult = [
-            ['This is a teststring with various rules like Fig.1, 2.4GHz.'],
+            ['This is a teststring with various rules like Fig. 1, 2.4GHz.'],
             ['This part will be in a separate column.']
         ];
         const result = formatAndSplitTextIntoColumns(testString, charLimit);
@@ -82,4 +83,4 @@ describe('formatAndSplitTextIntoColumns', () => {
     });
 
     // ... (Complete the tests for all the replacement rules) ...
-});
+});