Skip to content

Commit

Permalink
例外処理した際に、スペースを削除しないように変更
Browse files Browse the repository at this point in the history
  • Loading branch information
takanotume24 committed Jan 19, 2024
1 parent ffcb86b commit 7ab9735
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 37 deletions.
55 changes: 45 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
},
"devDependencies": {
"@types/jest": "^29.5.11",
"@types/uuid": "^9.0.7",
"cross-env": "^7.0.3",
"gh-pages": "^6.1.1",
"jest": "^29.7.0",
Expand All @@ -27,5 +28,8 @@
"ts-jest": "^29.1.1",
"ts-node": "^10.9.2",
"typescript": "^5.3.3"
},
"dependencies": {
"uuid": "^9.0.1"
}
}
62 changes: 38 additions & 24 deletions src/lib/format_and_split_text_Into_columns.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,43 @@
import {splitIntoColumns} from './split_into_columns'
import { splitIntoColumns } from './split_into_columns'
import { v4 as uuidv4 } from 'uuid';

export function formatAndSplitTextIntoColumns(
text: string,
charLimit: number,
): string[][] {
if (text === "") {
return [[""]]
}

const dummy = uuidv4()
text = text.replace(/\. /g, dummy)

const replacements: [RegExp, string | ((substring: string) => string)][] = [
[/-\n/g, ''], // Ensure hyphen followed by newline is completely removed
[/\n/g, " "], // Replaces newlines with spaces
[/- /g, ""], // Removes hyphens followed by a space
[/Fig\. /g, "Fig."], // Formats abbreviation for "Figure"
[/Figs\. /g, "Figs."], // Formats abbreviation for "Figures"
[/No\. /g, "No."], // Formats abbreviation for "Number"
[/Prof\. /g, "Prof."], // Formats abbreviation for "Professor"
[/Eq\. /g, "Eq."], // Formats abbreviation for "Equation"
[/et al\. /g, "et al."], // Formats "et al."
[/Dr\. /g, "Dr."], // Formats abbreviation for "Doctor"
[/e\.g\. /g, "e.g."], // Formats "e.g."
[/i\.e\. /g, "i.e."], // Formats "i.e."
[/Sec\. /g, "Sec."], // Formats abbreviation for "Section"
[/Sect\. /g, "Sect."], // Formats abbreviation for "Section"
[/2\.4 GHz/g, "2.4GHz"], // Formats specific frequency value
[/I\. /g, "I."],
[/II\. /g, "II."],
[/III\. /g, "III."],
[/IV\. /g, "IV."],
[/V\. /g, "V."],
[/VI\. /g, "VI."],
[/VII\. /g, "VII."],
[/VIII\. /g, "VIII."],
[/IX\. /g, "IX."],
[/X\. /g, "X."],
[RegExp(`Fig${dummy}`, "g"), "Fig. "], // Formats abbreviation for "Figure"
[RegExp(`Figs${dummy}`, "g"), "Figs. "], // Formats abbreviation for "Figures"
[RegExp(`No${dummy}`, "g"), "No. "], // Formats abbreviation for "Number"
[RegExp(`Prof${dummy}`, "g"), "Prof. "], // Formats abbreviation for "Professor"
[RegExp(`Eq${dummy}`, "g"), "Eq. "], // Formats abbreviation for "Equation"
[RegExp(`et al${dummy}`, "g"), "et al. "], // Formats "et al."
[RegExp(`Dr${dummy}`, "g"), "Dr. "], // Formats abbreviation for "Doctor"
[RegExp(`e\.g${dummy}`, "g"), "e.g. "], // Formats "e.g."
[RegExp(`i\.e${dummy}`, "g"), "i.e. "], // Formats "i.e."
[RegExp(`Sec${dummy}`, "g"), "Sec. "], // Formats abbreviation for "Section"
[RegExp(`Sect${dummy}`, "g"), "Sect. "], // Formats abbreviation for "Section"
[RegExp(`2\.4 GHz`, "g"), "2.4GHz"], // Formats specific frequency value
[RegExp(`I${dummy}`, "g"), "I. "],
[RegExp(`II${dummy}`, "g"), "II. "],
[RegExp(`III${dummy}`, "g"), "III. "],
[RegExp(`IV${dummy}`, "g"), "IV. "],
[RegExp(`V${dummy}`, "g"), "V. "],
[RegExp(`VI${dummy}`, "g"), "VI. "],
[RegExp(`VII${dummy}`, "g"), "VII. "],
[RegExp(`VIII${dummy}`, "g"), "VIII. "],
[RegExp(`IX${dummy}`, "g"), "IX. "],
[RegExp(`X${dummy}`, "g"), "X. "],
[/\.\d+,\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats numbers with commas
[/\.\d+-\d+(?= [A-Z])/g, match => "[" + match + "]. "], // Formats number ranges
];
Expand All @@ -41,8 +49,14 @@ export function formatAndSplitTextIntoColumns(
});

// Split the processed text into sentences
const sentences = processedText.split(/(?<=\.)\s/);
let sentences = processedText.split(RegExp(`${dummy}`, "s"));

sentences = sentences.map(item => {
if (!item.endsWith('.')) {
return item + '.';
}
return item;
});
// Use the splitIntoColumns function to split the sentences into columns
return splitIntoColumns(sentences, charLimit);
}
7 changes: 4 additions & 3 deletions src/lib/test/format_and_split_text_into_columns.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@

import { formatAndSplitTextIntoColumns } from "../format_and_split_text_Into_columns"


describe('formatAndSplitTextIntoColumns', () => {
// Test Text Processing
it('should apply all text processing rules correctly', () => {
const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz, and e.g. example.";
const expectedResult = "This is a teststring with various rules like Fig.1, 2.4GHz, and e.g.example.";
const expectedResult = "This is a teststring with various rules like Fig. 1, 2.4GHz, and e.g. example.";
const result = formatAndSplitTextIntoColumns(testString, 1000);
expect(result.join(' ')).toContain(expectedResult);
});
Expand All @@ -21,7 +22,7 @@ describe('formatAndSplitTextIntoColumns', () => {
const testString = "This is a test-\nstring with various rules like Fig. 1, 2.4 GHz. This part will be in a separate column.";
const charLimit = 10;
const expectedResult = [
['This is a teststring with various rules like Fig.1, 2.4GHz.'],
['This is a teststring with various rules like Fig. 1, 2.4GHz.'],
['This part will be in a separate column.']
];
const result = formatAndSplitTextIntoColumns(testString, charLimit);
Expand Down Expand Up @@ -82,4 +83,4 @@ describe('formatAndSplitTextIntoColumns', () => {
});

// ... (Complete the tests for all the replacement rules) ...
});
});

0 comments on commit 7ab9735

Please sign in to comment.