forked from danestves/markdown-to-text
-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.ts
109 lines (103 loc) · 3.27 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
type Options = {
stripListLeaders?: boolean;
listUnicodeChar: string | boolean;
gfm?: boolean;
useImgAltText?: boolean;
preserveLinks?: boolean;
};
/**
* @function removeMarkdown
*
* @description
* Parse the markdown and returns a string
*
* @param markdown - The markdown string to parse
* @param options - The options for the function
*
* @returns The parsed plain text
*/
const removeMarkdown = (
markdown: string,
options: Options = {
listUnicodeChar: "",
}
) => {
options = options || {};
options.listUnicodeChar = options.hasOwnProperty("listUnicodeChar")
? options.listUnicodeChar
: false;
options.stripListLeaders = options.hasOwnProperty("stripListLeaders")
? options.stripListLeaders
: true;
options.gfm = options.hasOwnProperty("gfm") ? options.gfm : true;
options.useImgAltText = options.hasOwnProperty("useImgAltText")
? options.useImgAltText
: true;
options.preserveLinks = options.hasOwnProperty("preserveLinks")
? options.preserveLinks
: false;
let output = markdown || "";
// Remove horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top)
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, "");
try {
if (options.stripListLeaders) {
if (options.listUnicodeChar)
output = output.replace(
/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm,
options.listUnicodeChar + " $1"
);
else output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, "$1");
}
if (options.gfm) {
output = output
// Header
.replace(/\n={2,}/g, "\n")
// Fenced codeblocks
.replace(/~{3}.*\n/g, "")
// Strikethrough
.replace(/~~/g, "")
// Fenced codeblocks
.replace(/`{3}.*\n/g, "");
}
if (options.preserveLinks) {
// Remove inline links while preserving the links
output = output.replace(/\[(.*?)\][\[\(](.*?)[\]\)]/g, "$1 ($2)");
}
output = output
// Remove HTML tags
.replace(/<[^>]*>/g, "")
// Remove setext-style headers
.replace(/^[=\-]{2,}\s*$/g, "")
// Remove footnotes?
.replace(/\[\^.+?\](\: .*?$)?/g, "")
.replace(/\s{0,2}\[.*?\]: .*?$/g, "")
// Remove images
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? "$1" : "")
// Remove inline links
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, "$1")
// Remove blockquotes
.replace(/^\s{0,3}>\s?/g, "")
.replace(/(^|\n)\s{0,3}>\s?/g, "\n\n")
// Remove reference-style links?
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, "")
// Remove atx-style headers
.replace(
/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm,
"$1$2$3"
)
// Remove emphasis (repeat the line to remove double emphasis)
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
// Remove code blocks
.replace(/(`{3,})(.*?)\1/gm, "$2")
// Remove inline code
.replace(/`(.+?)`/g, "$1")
// Replace two or more newlines with exactly two? Not entirely sure this belongs here...
.replace(/\n{2,}/g, "\n\n");
} catch (e) {
console.error(e);
return markdown;
}
return output;
};
export { removeMarkdown };