From 88d52626b0bb63fc5ea20f585aa2b67bf5efe2a0 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 19 Jul 2022 22:42:54 +0100 Subject: [PATCH 01/58] Use subprocess instead of `os.system` in testall.py --- test/testall.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/testall.py b/test/testall.py index e26856ed..795e798a 100644 --- a/test/testall.py +++ b/test/testall.py @@ -2,14 +2,11 @@ # # Run the test suite against all the Python versions we can find. # - - - -import sys import os -from os.path import dirname, abspath, join import re - +import subprocess +import sys +from os.path import abspath, dirname, join TOP = dirname(dirname(abspath(__file__))) sys.path.insert(0, join(TOP, "tools")) @@ -50,7 +47,11 @@ def testall(): ver_str = "%s.%s" % ver print("-- test with Python %s (%s)" % (ver_str, python)) assert ' ' not in python - rv = os.system("MACOSX_DEPLOYMENT_TARGET= %s test.py -- -knownfailure" % python) + proc = subprocess.Popen( + "MACOSX_DEPLOYMENT_TARGET= %s test.py -- -knownfailure" % python, + shell=True + ) + rv = proc.wait() if rv: sys.exit(os.WEXITSTATUS(rv)) From faee13e54c41a111a69e9d2a61f9076454c8b766 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 19 Jul 2022 23:08:48 +0100 Subject: [PATCH 02/58] Re-print test warnings after tests have ran (issue #458). This works by capturing stderr from tests and checking to see if each line contains the string "WARNING:test:". If a line contains this string it is saved for later and re-printed once all tests have been run. --- test/testall.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/test/testall.py b/test/testall.py index 795e798a..92efd1de 100644 --- a/test/testall.py +++ b/test/testall.py @@ -40,6 +40,7 @@ def _gen_pythons(): yield ver, python def testall(): + all_warnings = [] for ver, python in _gen_pythons(): if ver < (3, 5): # Don't support Python < 3.5 @@ -47,12 +48,26 @@ def testall(): ver_str = "%s.%s" % ver print("-- test with Python %s (%s)" % (ver_str, python)) assert ' ' not in python + proc = subprocess.Popen( - "MACOSX_DEPLOYMENT_TARGET= %s test.py -- -knownfailure" % python, - shell=True + # pass "-u" option to force unbuffered output + "MACOSX_DEPLOYMENT_TARGET= %s -u test.py -- -knownfailure" % python, + shell=True, stderr=subprocess.PIPE ) - rv = proc.wait() - if rv: - sys.exit(os.WEXITSTATUS(rv)) + + while proc.poll() is None: + # capture and re-print stderr while process is running + line = proc.stderr.readline().decode().strip() + print(line, file=sys.stderr) + if 'WARNING:test:' in line: + # if stderr contains a warning, save this for later + all_warnings.append((python, ver_str, line)) + + if proc.returncode: + sys.exit(os.WEXITSTATUS(proc.returncode)) + + for python, ver_str, warning in all_warnings: + # now re-print all warnings to make sure they are seen + print('-- warning raised by Python %s (%s) -- %s' % (ver_str, python, warning)) testall() From b1e37a1675a3c7a5e1e3d8f6f6ef3feadfed418d Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sun, 22 Jan 2023 16:36:24 -0500 Subject: [PATCH 03/58] prepare for 2.4.7 release --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index abf8ed80..bf598ecb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # python-markdown2 Changelog -## python-markdown2 2.4.7 (not yet released) +## python-markdown2 2.4.7 - [pull #483] Fix hashing nested HTML blocks - [pull #486] Fix backslash being unable to escape raw HTML tags From 07edaf45111b79a3576b7522edd1204f7db3357f Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sun, 22 Jan 2023 16:36:33 -0500 Subject: [PATCH 04/58] prep for future dev --- CHANGES.md | 5 +++++ lib/markdown2.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index bf598ecb..78e8a0f9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # python-markdown2 Changelog +## python-markdown2 2.4.8 (not yet released) + +(nothing yet) + + ## python-markdown2 2.4.7 - [pull #483] Fix hashing nested HTML blocks diff --git a/lib/markdown2.py b/lib/markdown2.py index 112fa707..6eeff77d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -99,7 +99,7 @@ # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 4, 7) +__version_info__ = (2, 4, 8) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" From 1e9880b5a37199867e87f7d2cd94303f43dc359f Mon Sep 17 00:00:00 2001 From: Crozzers Date: Fri, 27 Jan 2023 22:20:32 +0000 Subject: [PATCH 05/58] Update test cases for pygments 2.14 --- test/tm-cases/fenced_code_blocks_safe_highlight.html | 4 ++-- test/tm-cases/fenced_code_blocks_syntax_highlighting.html | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight.html b/test/tm-cases/fenced_code_blocks_safe_highlight.html index 105bd8c9..b77f50b1 100644 --- a/test/tm-cases/fenced_code_blocks_safe_highlight.html +++ b/test/tm-cases/fenced_code_blocks_safe_highlight.html @@ -9,8 +9,8 @@ http://github.github.com/github-flavored-markdown/.

-
def foo
-    puts "hi"
+
def foo
+    puts "hi"
 end
 
diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting.html b/test/tm-cases/fenced_code_blocks_syntax_highlighting.html index 105bd8c9..b77f50b1 100644 --- a/test/tm-cases/fenced_code_blocks_syntax_highlighting.html +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting.html @@ -9,8 +9,8 @@ http://github.github.com/github-flavored-markdown/.

-
def foo
-    puts "hi"
+
def foo
+    puts "hi"
 end
 
From 7cba7e421bdff204cedab2cad8106fb4b1894294 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Fri, 27 Jan 2023 22:49:37 +0000 Subject: [PATCH 06/58] Add versioned tests for different pygments versions --- test/test.py | 11 ++++++++++- .../fenced_code_blocks_safe_highlight.tags | 2 +- .../fenced_code_blocks_safe_highlight_old.html | 16 ++++++++++++++++ .../fenced_code_blocks_safe_highlight_old.opts | 1 + .../fenced_code_blocks_safe_highlight_old.tags | 1 + .../fenced_code_blocks_safe_highlight_old.text | 14 ++++++++++++++ .../fenced_code_blocks_syntax_highlighting.tags | 2 +- ...nced_code_blocks_syntax_highlighting_old.html | 16 ++++++++++++++++ ...nced_code_blocks_syntax_highlighting_old.opts | 1 + ...nced_code_blocks_syntax_highlighting_old.tags | 1 + ...nced_code_blocks_syntax_highlighting_old.text | 14 ++++++++++++++ 11 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 test/tm-cases/fenced_code_blocks_safe_highlight_old.html create mode 100644 test/tm-cases/fenced_code_blocks_safe_highlight_old.opts create mode 100644 test/tm-cases/fenced_code_blocks_safe_highlight_old.tags create mode 100644 test/tm-cases/fenced_code_blocks_safe_highlight_old.text create mode 100644 test/tm-cases/fenced_code_blocks_syntax_highlighting_old.html create mode 100644 test/tm-cases/fenced_code_blocks_syntax_highlighting_old.opts create mode 100644 test/tm-cases/fenced_code_blocks_syntax_highlighting_old.tags create mode 100644 test/tm-cases/fenced_code_blocks_syntax_highlighting_old.text diff --git a/test/test.py b/test/test.py index 0dce1544..64e2ffeb 100755 --- a/test/test.py +++ b/test/test.py @@ -39,10 +39,19 @@ def setup(): default_tags = [] for extra_lib in ('pygments', 'wavedrom'): try: - importlib.import_module(extra_lib) + mod = importlib.import_module(extra_lib) except ImportError: log.warning("skipping %s tests ('%s' module not found)" % (extra_lib, extra_lib)) default_tags.append("-%s" % extra_lib) + else: + if extra_lib == 'pygments': + version = tuple(int(i) for i in mod.__version__.split('.')[:3]) + if version >= (2, 14, 0): + tag = "pygments<2.14" + else: + tag = "pygments>=2.14" + log.warning("skipping %s tests (pygments %s found)" % (tag, mod.__version__)) + default_tags.append("-%s" % tag) retval = testlib.harness(testdir_from_ns=testdir_from_ns, default_tags=default_tags) diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight.tags b/test/tm-cases/fenced_code_blocks_safe_highlight.tags index 2c03fb5d..b5480365 100644 --- a/test/tm-cases/fenced_code_blocks_safe_highlight.tags +++ b/test/tm-cases/fenced_code_blocks_safe_highlight.tags @@ -1 +1 @@ -extra fenced-code-blocks pygments +extra fenced-code-blocks pygments pygments>=2.14 diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight_old.html b/test/tm-cases/fenced_code_blocks_safe_highlight_old.html new file mode 100644 index 00000000..105bd8c9 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_safe_highlight_old.html @@ -0,0 +1,16 @@ +
+
if True:
+    print "hi"
+
+
+ +

That's using the fenced-code-blocks extra with Python +syntax coloring, if pygments is installed. See +http://github.github.com/github-flavored-markdown/.

+ +
+
def foo
+    puts "hi"
+end
+
+
diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight_old.opts b/test/tm-cases/fenced_code_blocks_safe_highlight_old.opts new file mode 100644 index 00000000..ca41f659 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_safe_highlight_old.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"], "safe_mode": "escape"} diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight_old.tags b/test/tm-cases/fenced_code_blocks_safe_highlight_old.tags new file mode 100644 index 00000000..46b22d25 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_safe_highlight_old.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments pygments<2.14 diff --git a/test/tm-cases/fenced_code_blocks_safe_highlight_old.text b/test/tm-cases/fenced_code_blocks_safe_highlight_old.text new file mode 100644 index 00000000..5c6181cf --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_safe_highlight_old.text @@ -0,0 +1,14 @@ +```python +if True: + print "hi" +``` + +That's using the *fenced-code-blocks* extra with Python +syntax coloring, if `pygments` is installed. See +. + +```ruby +def foo + puts "hi" +end +``` diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting.tags b/test/tm-cases/fenced_code_blocks_syntax_highlighting.tags index 2c03fb5d..b5480365 100644 --- a/test/tm-cases/fenced_code_blocks_syntax_highlighting.tags +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting.tags @@ -1 +1 @@ -extra fenced-code-blocks pygments +extra fenced-code-blocks pygments pygments>=2.14 diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.html b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.html new file mode 100644 index 00000000..105bd8c9 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.html @@ -0,0 +1,16 @@ +
+
if True:
+    print "hi"
+
+
+ +

That's using the fenced-code-blocks extra with Python +syntax coloring, if pygments is installed. See +http://github.github.com/github-flavored-markdown/.

+ +
+
def foo
+    puts "hi"
+end
+
+
diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.opts b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.opts new file mode 100644 index 00000000..ba411ac7 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"]} diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.tags b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.tags new file mode 100644 index 00000000..46b22d25 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments pygments<2.14 diff --git a/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.text b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.text new file mode 100644 index 00000000..5c6181cf --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_syntax_highlighting_old.text @@ -0,0 +1,14 @@ +```python +if True: + print "hi" +``` + +That's using the *fenced-code-blocks* extra with Python +syntax coloring, if `pygments` is installed. See +. + +```ruby +def foo + puts "hi" +end +``` From e61d7acfb9710a1990e955b2852c9d29ecd222a1 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 4 Feb 2023 17:43:33 +0000 Subject: [PATCH 07/58] Fix #498 --- lib/markdown2.py | 1 + test/tm-cases/consecutive_image_links_issue498.html | 4 ++++ test/tm-cases/consecutive_image_links_issue498.text | 4 ++++ 3 files changed, 9 insertions(+) create mode 100644 test/tm-cases/consecutive_image_links_issue498.html create mode 100644 test/tm-cases/consecutive_image_links_issue498.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 6eeff77d..79ffbc46 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1591,6 +1591,7 @@ def _do_links(self, text): if "smarty-pants" in self.extras: result = result.replace('"', self._escape_table['"']) curr_pos = start_idx + len(result) + anchor_allowed_pos = start_idx + len(result) text = text[:start_idx] + result + text[url_end_idx:] elif start_idx >= anchor_allowed_pos: safe_link = self._safe_protocols.match(url) or url.startswith('#') diff --git a/test/tm-cases/consecutive_image_links_issue498.html b/test/tm-cases/consecutive_image_links_issue498.html new file mode 100644 index 00000000..776f324a --- /dev/null +++ b/test/tm-cases/consecutive_image_links_issue498.html @@ -0,0 +1,4 @@ +

A +A +A +A

diff --git a/test/tm-cases/consecutive_image_links_issue498.text b/test/tm-cases/consecutive_image_links_issue498.text new file mode 100644 index 00000000..34a6a5cb --- /dev/null +++ b/test/tm-cases/consecutive_image_links_issue498.text @@ -0,0 +1,4 @@ +[![A](https://img.shields.io/badge/license-AGPL--3.0-orange?style=flat-square&color=0f6adb&logo=github)](https://github.com/aoaostar) +[![A](https://img.shields.io/badge/license-AGPL--3.0-orange?style=flat-square&color=0f6adb&logo=github)](https://github.com/aoaostar) +[![A](https://img.shields.io/badge/license-AGPL--3.0-orange?style=flat-square&color=0f6adb&logo=github)](https://github.com/aoaostar) +[![A](https://img.shields.io/badge/license-AGPL--3.0-orange?style=flat-square&color=0f6adb&logo=github)](https://github.com/aoaostar) \ No newline at end of file From 16554e742c282c03343e77b78eda5fadf408ec19 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 4 Feb 2023 17:50:23 +0000 Subject: [PATCH 08/58] Update changes.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 78e8a0f9..d311a235 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## python-markdown2 2.4.8 (not yet released) -(nothing yet) +- [pull #499] Fix images not being procesed correctly (#498) ## python-markdown2 2.4.7 From f456341fde46e0a492d0bc0e2ee39957d4fb770d Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sun, 12 Feb 2023 17:36:38 -0500 Subject: [PATCH 09/58] prepare for 2.4.8 release --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d311a235..48256c79 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # python-markdown2 Changelog -## python-markdown2 2.4.8 (not yet released) +## python-markdown2 2.4.8 - [pull #499] Fix images not being procesed correctly (#498) From 42067dce00edc460a5697f106310e667c72b6232 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sun, 12 Feb 2023 17:36:48 -0500 Subject: [PATCH 10/58] prep for future dev --- CHANGES.md | 5 +++++ lib/markdown2.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 48256c79..aaa3b3b5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # python-markdown2 Changelog +## python-markdown2 2.4.9 (not yet released) + +(nothing yet) + + ## python-markdown2 2.4.8 - [pull #499] Fix images not being procesed correctly (#498) diff --git a/lib/markdown2.py b/lib/markdown2.py index 79ffbc46..360847d1 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -99,7 +99,7 @@ # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 4, 8) +__version_info__ = (2, 4, 9) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" From f84d05b171fd9c24cc8bdfad3e6eef8c9eeed831 Mon Sep 17 00:00:00 2001 From: Ankit Mahato Date: Sat, 18 Feb 2023 11:10:43 +0530 Subject: [PATCH 11/58] Add tag to html-classes extra --- lib/markdown2.py | 4 ++-- test/tm-cases/html_classes.html | 4 ++-- test/tm-cases/html_classes.opts | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 360847d1..fce8e4ba 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -56,7 +56,7 @@ highlighting when using fenced-code-blocks and highlightjs. * html-classes: Takes a dict mapping html tag names (lowercase) to a string to use for a "class" tag attribute. Currently only supports "img", - "table", "pre", "code", "ul" and "ol" tags. Add an issue if you require + "table", "thead", "pre", "code", "ul" and "ol" tags. Add an issue if you require this for other tags. * link-patterns: Auto-link given regex patterns in text (e.g. bug number references, revision number references). @@ -1139,7 +1139,7 @@ def _table_sub(self, match): align_from_col_idx[col_idx] = ' style="text-align:right;"' # thead - hlines = ['' % self._html_class_str_from_tag('table'), '', ''] + hlines = ['' % self._html_class_str_from_tag('table'), '' % self._html_class_str_from_tag('thead'), ''] cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))] for col_idx, col in enumerate(cols): hlines.append(' %s' % ( diff --git a/test/tm-cases/html_classes.html b/test/tm-cases/html_classes.html index 2ea98976..4caa7921 100644 --- a/test/tm-cases/html_classes.html +++ b/test/tm-cases/html_classes.html @@ -1,5 +1,5 @@ - + @@ -18,7 +18,7 @@
Header 1 Header 2
- + diff --git a/test/tm-cases/html_classes.opts b/test/tm-cases/html_classes.opts index 923dea07..2a11cef3 100644 --- a/test/tm-cases/html_classes.opts +++ b/test/tm-cases/html_classes.opts @@ -7,6 +7,7 @@ "code": "codesyntaxcolor", "img": "custom-image-class", "table": "table table-striped", + "thead": "table-light", "p": "col-xs-3 custom-paragraph-class", "ul": "custom-unordered-list-class", "ol": "custom-ordered-list-class" From e6602a69ac7565a55392d59c38237fbd4c282547 Mon Sep 17 00:00:00 2001 From: Ankit Mahato Date: Mon, 20 Feb 2023 10:53:53 +0530 Subject: [PATCH 12/58] Update markdown2.py --- lib/markdown2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index fce8e4ba..ddadb6b0 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1215,7 +1215,7 @@ def format_cell(text): add_hline('' % self._html_class_str_from_tag('table')) # Check if first cell of first row is a header cell. If so, assume the whole row is a header row. if rows and rows[0] and re.match(r"^\s*~", rows[0][0]): - add_hline('', 1) + add_hline('' % self._html_class_str_from_tag('thead'), 1) add_hline('', 2) for cell in rows[0]: add_hline("".format(format_cell(cell)), 3) From 1815e4a29cf5c4cbb75d654b0138f5aa322f735e Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Mon, 20 Feb 2023 23:54:58 -0500 Subject: [PATCH 13/58] Changes and contributors --- CHANGES.md | 2 +- CONTRIBUTORS.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index aaa3b3b5..614d36fd 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## python-markdown2 2.4.9 (not yet released) -(nothing yet) +- [pull #500] Add `` tag to html-classes extra ## python-markdown2 2.4.8 diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 5e0980ab..6b5b2acb 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -58,3 +58,4 @@ gitbra (github.com/gitbra) Max Omdal (github.com/momja) Kishore (github.com/jk6521) Ircama (github.com/Ircama) +Ankit Mahato (github.com/animator) From ffc92626105bcb320947cd4c7fa10fc26b6f507e Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 16:50:00 +0000 Subject: [PATCH 14/58] Fix link patterns extra matching against internal hashes --- lib/markdown2.py | 15 ++++++++++++++- .../link_patterns_hash_matching_issue287.html | 1 + .../link_patterns_hash_matching_issue287.opts | 7 +++++++ .../link_patterns_hash_matching_issue287.text | 1 + 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.html create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.opts create mode 100644 test/tm-cases/link_patterns_hash_matching_issue287.text diff --git a/lib/markdown2.py b/lib/markdown2.py index ddadb6b0..4583131a 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2241,7 +2241,7 @@ def _do_strike(self, text): def _do_underline(self, text): text = self._underline_re.sub(r"\1", text) return text - + _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S) def _do_tg_spoiler(self, text): text = self._tg_spoiler_re.sub(r"\1", text) @@ -2533,6 +2533,9 @@ def _do_link_patterns(self, text): for regex, repl in self.link_patterns: replacements = [] for match in regex.finditer(text): + if any(self._match_overlaps_substr(text, match, h) for h in link_from_hash): + continue + if hasattr(repl, "__call__"): href = repl(match) else: @@ -2637,6 +2640,16 @@ def _uniform_indent(self, text, indent, include_empty_lines=False): for line in text.splitlines(True) ) + @staticmethod + def _match_overlaps_substr(text, match, substr): + for instance in re.finditer(re.escape(substr), text): + start, end = instance.span() + if start <= match.start() <= end: + return True + if start <= match.end() <= end: + return True + return False + class MarkdownWithExtras(Markdown): """A markdowner class that enables most extras: diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.html b/test/tm-cases/link_patterns_hash_matching_issue287.html new file mode 100644 index 00000000..7cee86e2 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.html @@ -0,0 +1 @@ +

this is a test issue #1234 with a test commit (addeddd) made by test @username more text

diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.opts b/test/tm-cases/link_patterns_hash_matching_issue287.opts new file mode 100644 index 00000000..21062d75 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.opts @@ -0,0 +1,7 @@ +{"extras": ["link-patterns"], + "link_patterns": [ + (re.compile("#(\d+)", re.I), r"https://github.com/pyfa-org/Pyfa/issues/\1"), + (re.compile("@(\w+)", re.I), r"https://github.com/\1"), + (re.compile("([0-9a-f]{6,40})", re.I), r"https://github.com/pyfa-org/Pyfa/commit/\1") + ] +} diff --git a/test/tm-cases/link_patterns_hash_matching_issue287.text b/test/tm-cases/link_patterns_hash_matching_issue287.text new file mode 100644 index 00000000..b0b5f4b9 --- /dev/null +++ b/test/tm-cases/link_patterns_hash_matching_issue287.text @@ -0,0 +1 @@ +this is a test issue #1234 with a test commit (addeddd) made by test @username more text From d7d6471ff578cfb21a88bb208f8b36202a1b54d7 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 16:57:59 +0000 Subject: [PATCH 15/58] Update `CHANGES.md`. Also added docstring to `_match_overlaps_substr` method --- CHANGES.md | 1 + lib/markdown2.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 614d36fd..c16e8fef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.9 (not yet released) - [pull #500] Add `
` tag to html-classes extra +- [pull #501] Fix link patterns extra matching against internal hashes ## python-markdown2 2.4.8 diff --git a/lib/markdown2.py b/lib/markdown2.py index 4583131a..738f91e4 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2642,6 +2642,9 @@ def _uniform_indent(self, text, indent, include_empty_lines=False): @staticmethod def _match_overlaps_substr(text, match, substr): + ''' + Checks if a regex match overlaps with a substring in the given text. + ''' for instance in re.finditer(re.escape(substr), text): start, end = instance.span() if start <= match.start() <= end: From 5c722254bacf3908999b19ccfb59aebd5a2cf851 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 17:33:24 +0000 Subject: [PATCH 16/58] Replace deprecated `optparse` with `argparse` --- lib/markdown2.py | 53 ++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index ddadb6b0..3717b864 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -103,14 +103,14 @@ __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" -import sys -import re -import logging -from hashlib import sha256 -import optparse -from random import random, randint +import argparse import codecs +import logging +import re +import sys from collections import defaultdict +from hashlib import sha256 +from random import randint, random # ---- globals @@ -2956,7 +2956,7 @@ def _html_escape_url(attr, safe_mode=False): # ---- mainline -class _NoReflowFormatter(optparse.IndentedHelpFormatter): +class _NoReflowFormatter(argparse.RawDescriptionHelpFormatter): """An optparse formatter that does NOT reflow the description.""" def format_description(self, description): return description or "" @@ -2973,38 +2973,43 @@ def main(argv=None): if not logging.root.handlers: logging.basicConfig() - usage = "usage: %prog [PATHS...]" - version = "%prog "+__version__ - parser = optparse.OptionParser(prog="markdown2", usage=usage, - version=version, description=cmdln_desc, - formatter=_NoReflowFormatter()) - parser.add_option("-v", "--verbose", dest="log_level", + parser = argparse.ArgumentParser( + prog="markdown2", description=cmdln_desc, + formatter_class=_NoReflowFormatter + ) + parser.add_argument('paths', nargs='*', + help=( + 'optional list of files to convert.' + 'If none are given, stdin will be used' + )) + parser.add_argument("-v", "--verbose", dest="log_level", action="store_const", const=logging.DEBUG, help="more verbose output") - parser.add_option("--encoding", + parser.add_argument("--encoding", help="specify encoding of text content") - parser.add_option("--html4tags", action="store_true", default=False, + parser.add_argument("--html4tags", action="store_true", default=False, help="use HTML 4 style for empty element tags") - parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", + parser.add_argument("-s", "--safe", metavar="MODE", dest="safe_mode", help="sanitize literal HTML: 'escape' escapes " "HTML meta chars, 'replace' replaces with an " "[HTML_REMOVED] note") - parser.add_option("-x", "--extras", action="append", + parser.add_argument("-x", "--extras", action="append", help="Turn on specific extra features (not part of " "the core Markdown spec). See above.") - parser.add_option("--use-file-vars", + parser.add_argument("--use-file-vars", help="Look for and use Emacs-style 'markdown-extras' " "file var to turn on extras. See " "") - parser.add_option("--link-patterns-file", + parser.add_argument("--link-patterns-file", help="path to a link pattern file") - parser.add_option("--self-test", action="store_true", + parser.add_argument("--self-test", action="store_true", help="run internal self-tests (some doctests)") - parser.add_option("--compare", action="store_true", + parser.add_argument("--compare", action="store_true", help="run against Markdown.pl as well (for testing)") parser.set_defaults(log_level=logging.INFO, compare=False, encoding="utf-8", safe_mode=None, use_file_vars=False) - opts, paths = parser.parse_args() + opts = parser.parse_args() + paths = opts.paths log.setLevel(opts.log_level) if opts.self_test: @@ -3046,7 +3051,7 @@ def main(argv=None): else: link_patterns = None - from os.path import join, dirname, abspath, exists + from os.path import abspath, dirname, exists, join markdown_pl = join(dirname(dirname(abspath(__file__))), "test", "Markdown.pl") if not paths: @@ -3059,7 +3064,7 @@ def main(argv=None): text = fp.read() fp.close() if opts.compare: - from subprocess import Popen, PIPE + from subprocess import PIPE, Popen print("==== Markdown.pl ====") p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True) p.stdin.write(text.encode('utf-8')) From 5d6b44d5029f1aef05028f329ab1cd2e83601cac Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 11 Mar 2023 17:36:53 +0000 Subject: [PATCH 17/58] Update `CHANGES.md` and fix docstring --- CHANGES.md | 1 + lib/markdown2.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 614d36fd..5f223aba 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.9 (not yet released) - [pull #500] Add `` tag to html-classes extra +- [pull #502] Replace deprecated `optparse` with `argparse` ## python-markdown2 2.4.8 diff --git a/lib/markdown2.py b/lib/markdown2.py index 3717b864..5eb732f1 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2957,7 +2957,7 @@ def _html_escape_url(attr, safe_mode=False): # ---- mainline class _NoReflowFormatter(argparse.RawDescriptionHelpFormatter): - """An optparse formatter that does NOT reflow the description.""" + """An argparse formatter that does NOT reflow the description.""" def format_description(self, description): return description or "" From b5cbd8e0da872afa24e703cfbb0e60d87c415440 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 13 Mar 2023 09:04:02 +0000 Subject: [PATCH 18/58] Restore original usage message and re-add version arg --- lib/markdown2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 5eb732f1..9121d29d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2241,7 +2241,7 @@ def _do_strike(self, text): def _do_underline(self, text): text = self._underline_re.sub(r"\1", text) return text - + _tg_spoiler_re = re.compile(r"\|\|\s?(.+?)\s?\|\|", re.S) def _do_tg_spoiler(self, text): text = self._tg_spoiler_re.sub(r"\1", text) @@ -2974,9 +2974,11 @@ def main(argv=None): logging.basicConfig() parser = argparse.ArgumentParser( - prog="markdown2", description=cmdln_desc, + prog="markdown2", description=cmdln_desc, usage='%(prog)s [PATHS...]', formatter_class=_NoReflowFormatter ) + parser.add_argument('--version', action='version', + version='%(prog)s {version}'.format(version=__version__)) parser.add_argument('paths', nargs='*', help=( 'optional list of files to convert.' From aa0ea8a726aebf507342497422367afd3ea3da95 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 8 Apr 2023 12:49:07 +0100 Subject: [PATCH 19/58] Fix `_uniform_outdent` failing with empty strings (issue #505) --- lib/markdown2.py | 4 ++++ test/tm-cases/empty_fenced_code_blocks.html | 7 +++++++ test/tm-cases/empty_fenced_code_blocks.opts | 1 + test/tm-cases/empty_fenced_code_blocks.text | 5 +++++ 4 files changed, 17 insertions(+) create mode 100644 test/tm-cases/empty_fenced_code_blocks.html create mode 100644 test/tm-cases/empty_fenced_code_blocks.opts create mode 100644 test/tm-cases/empty_fenced_code_blocks.text diff --git a/lib/markdown2.py b/lib/markdown2.py index ddadb6b0..16358e9b 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2610,6 +2610,10 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): for line in text.splitlines() ] + # if no whitespace detected (ie: no lines in code block, issue #505) + if not whitespace: + return '', text + # get minimum common whitespace outdent = min(i for i in whitespace if i is not None) # adjust min common ws to be within bounds diff --git a/test/tm-cases/empty_fenced_code_blocks.html b/test/tm-cases/empty_fenced_code_blocks.html new file mode 100644 index 00000000..d036ed37 --- /dev/null +++ b/test/tm-cases/empty_fenced_code_blocks.html @@ -0,0 +1,7 @@ +
+

+
+
+ +

+
diff --git a/test/tm-cases/empty_fenced_code_blocks.opts b/test/tm-cases/empty_fenced_code_blocks.opts new file mode 100644 index 00000000..ba411ac7 --- /dev/null +++ b/test/tm-cases/empty_fenced_code_blocks.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"]} diff --git a/test/tm-cases/empty_fenced_code_blocks.text b/test/tm-cases/empty_fenced_code_blocks.text new file mode 100644 index 00000000..ed2fd281 --- /dev/null +++ b/test/tm-cases/empty_fenced_code_blocks.text @@ -0,0 +1,5 @@ +```shell +``` + +``` +``` From cb57500fb01c968b9dc4bd5d586e7b75c533ecac Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 8 Apr 2023 13:28:14 +0100 Subject: [PATCH 20/58] Fix code blocks containing only newlines --- lib/markdown2.py | 7 ++++--- test/tm-cases/empty_fenced_code_blocks.html | 13 +++++++++++++ test/tm-cases/empty_fenced_code_blocks.text | 15 +++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 16358e9b..dbb8c16d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2609,16 +2609,17 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): re.findall(r'^[ \t]*', line)[0] if line else None for line in text.splitlines() ] + whitespace_not_empty = [i for i in whitespace if i is not None] # if no whitespace detected (ie: no lines in code block, issue #505) - if not whitespace: + if not whitespace_not_empty: return '', text # get minimum common whitespace - outdent = min(i for i in whitespace if i is not None) + outdent = min(whitespace_not_empty) # adjust min common ws to be within bounds if min_outdent is not None: - outdent = min([i for i in whitespace if i is not None and i >= min_outdent] or [min_outdent]) + outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent]) if max_outdent is not None: outdent = min(outdent, max_outdent) diff --git a/test/tm-cases/empty_fenced_code_blocks.html b/test/tm-cases/empty_fenced_code_blocks.html index d036ed37..70cbf9ca 100644 --- a/test/tm-cases/empty_fenced_code_blocks.html +++ b/test/tm-cases/empty_fenced_code_blocks.html @@ -4,4 +4,17 @@

+
+ +

Pygments removes the empty line whitespace from the next code block

+ +
+

+
+
+ +

+
+
+
 
diff --git a/test/tm-cases/empty_fenced_code_blocks.text b/test/tm-cases/empty_fenced_code_blocks.text index ed2fd281..edce91ae 100644 --- a/test/tm-cases/empty_fenced_code_blocks.text +++ b/test/tm-cases/empty_fenced_code_blocks.text @@ -2,4 +2,19 @@ ``` ``` +``` + +Pygments removes the empty line whitespace from the next code block +```shell + + + + +``` + +``` + + + + ``` From 934e937669bfe9236db8ea52212d1271666ea354 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 8 Apr 2023 13:31:35 +0100 Subject: [PATCH 21/58] Update changes.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 614d36fd..bb0e3bf8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.9 (not yet released) - [pull #500] Add `
` tag to html-classes extra +- [pull #506] Fix `_uniform_outdent` failing with empty strings (issue #505) ## python-markdown2 2.4.8 From c9391257691e9ef895e1e6894cd61a8dfdf232fa Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 26 Apr 2023 22:05:43 +0100 Subject: [PATCH 22/58] Fix #508 --- lib/markdown2.py | 6 +++++- test/tm-cases/hash_html_blocks_issue_508.html | 8 ++++++++ test/tm-cases/hash_html_blocks_issue_508.text | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/hash_html_blocks_issue_508.html create mode 100644 test/tm-cases/hash_html_blocks_issue_508.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 50d97215..7f55b458 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -890,7 +890,7 @@ def _strict_tag_block_sub(self, text, html_tags_re, callback): tag_count -= 1 else: # if close tag is in same line - if '' % is_markup.group(2) in chunk[is_markup.end():]: + if self._tag_is_closed(is_markup.group(2), chunk): # we must ignore these is_markup = None else: @@ -908,6 +908,10 @@ def _strict_tag_block_sub(self, text, html_tags_re, callback): return result + def _tag_is_closed(self, tag_name, text): + # super basic check if number of open tags == number of closing tags + return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('' % tag_name, text)) + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. diff --git a/test/tm-cases/hash_html_blocks_issue_508.html b/test/tm-cases/hash_html_blocks_issue_508.html new file mode 100644 index 00000000..f39d91a5 --- /dev/null +++ b/test/tm-cases/hash_html_blocks_issue_508.html @@ -0,0 +1,8 @@ +
+
+ +
+ +
+ +
diff --git a/test/tm-cases/hash_html_blocks_issue_508.text b/test/tm-cases/hash_html_blocks_issue_508.text new file mode 100644 index 00000000..56755712 --- /dev/null +++ b/test/tm-cases/hash_html_blocks_issue_508.text @@ -0,0 +1,5 @@ +
+
+
+
+
From e0fea567c7947dd9ddee870fa56003b212a31b74 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 26 Apr 2023 22:09:25 +0100 Subject: [PATCH 23/58] Update #508 test --- test/tm-cases/hash_html_blocks_issue_508.html | 4 ++++ test/tm-cases/hash_html_blocks_issue_508.text | 2 ++ 2 files changed, 6 insertions(+) diff --git a/test/tm-cases/hash_html_blocks_issue_508.html b/test/tm-cases/hash_html_blocks_issue_508.html index f39d91a5..2c176c15 100644 --- a/test/tm-cases/hash_html_blocks_issue_508.html +++ b/test/tm-cases/hash_html_blocks_issue_508.html @@ -6,3 +6,7 @@
+ +
    +
  • A
  • +
diff --git a/test/tm-cases/hash_html_blocks_issue_508.text b/test/tm-cases/hash_html_blocks_issue_508.text index 56755712..dbe7342d 100644 --- a/test/tm-cases/hash_html_blocks_issue_508.text +++ b/test/tm-cases/hash_html_blocks_issue_508.text @@ -3,3 +3,5 @@
+ +- A From e2c1c6467591a5f96cdf5497e8f28d3858602795 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 26 Apr 2023 22:13:56 +0100 Subject: [PATCH 24/58] Update changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 4aee683e..c6b1df5e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ - [pull #501] Fix link patterns extra matching against internal hashes - [pull #502] Replace deprecated `optparse` with `argparse` - [pull #506] Fix `_uniform_outdent` failing with empty strings (issue #505) +- [pull #509] Fix HTML elements not unhashing correctly (issue 508) ## python-markdown2 2.4.8 From 0fde72b2f55f875ffc67a6b26e402d433b1e0c98 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 30 Apr 2023 12:19:45 +0100 Subject: [PATCH 25/58] Remove deprecated imp module --- test/testlib.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/testlib.py b/test/testlib.py index 2c345573..c6244dc4 100644 --- a/test/testlib.py +++ b/test/testlib.py @@ -54,14 +54,14 @@ import os -from os.path import join, basename, dirname, abspath, splitext, \ +from os.path import join, basename, abspath, splitext, \ isfile, isdir, normpath, exists import sys import getopt import glob import time import unittest -import imp +import importlib import logging import textwrap import traceback @@ -234,13 +234,12 @@ def testmods_from_testdir(testdir): testmod_name = splitext(basename(testmod_path))[0] log.debug("import test module '%s'", testmod_path) try: - iinfo = imp.find_module(testmod_name, [dirname(testmod_path)]) testabsdir = abspath(testdir) sys.path.insert(0, testabsdir) old_dir = os.getcwd() os.chdir(testdir) try: - testmod = imp.load_module(testmod_name, *iinfo) + testmod = importlib.import_module(testmod_name) finally: os.chdir(old_dir) sys.path.remove(testabsdir) From 50a9f171cbe22758b5940e8ed290cd5afda4536c Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 30 Apr 2023 12:21:52 +0100 Subject: [PATCH 26/58] Update changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index c6b1df5e..70518201 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,7 @@ - [pull #502] Replace deprecated `optparse` with `argparse` - [pull #506] Fix `_uniform_outdent` failing with empty strings (issue #505) - [pull #509] Fix HTML elements not unhashing correctly (issue 508) +- [pull #511] Remove deprecated `imp` module (issue #510) ## python-markdown2 2.4.8 From 3cb418775e69f580a769b38d889e4b270a2f5315 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 25 May 2023 08:33:23 +0100 Subject: [PATCH 27/58] Allow link patterns to be specified via extras dict --- lib/markdown2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/markdown2.py b/lib/markdown2.py index 7f55b458..f5e5fbc8 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -235,11 +235,14 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, self._instance_extras = self.extras.copy() if 'link-patterns' in self.extras: + # allow link patterns via extras dict without kwarg explicitly set + link_patterns = link_patterns or extras['link-patterns'] if link_patterns is None: # if you have specified that the link-patterns extra SHOULD # be used (via self.extras) but you haven't provided anything # via the link_patterns argument then an error is raised raise MarkdownError("If the 'link-patterns' extra is used, an argument for 'link_patterns' is required") + self.link_patterns = link_patterns self.footnote_title = footnote_title self.footnote_return_symbol = footnote_return_symbol From 731cff1dc8b737543f6fbfb40fef06d01a252ddf Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 25 May 2023 18:56:41 +0100 Subject: [PATCH 28/58] Update changes.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 70518201..35200511 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ - [pull #506] Fix `_uniform_outdent` failing with empty strings (issue #505) - [pull #509] Fix HTML elements not unhashing correctly (issue 508) - [pull #511] Remove deprecated `imp` module (issue #510) +- [pull #512] Allow link patterns to be passed via extras dict ## python-markdown2 2.4.8 From 50e8f512a267e18350484a213f21e55b0512425b Mon Sep 17 00:00:00 2001 From: pknowles Date: Mon, 22 May 2023 14:03:08 -0700 Subject: [PATCH 29/58] add a more general url parser/sanitizer copies a url regex from pagedown (used by stackoverflow) --- lib/markdown2.py | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index f5e5fbc8..21727561 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1483,7 +1483,43 @@ def _protect_url(self, url): self._escape_table[url] = key return key - _safe_protocols = re.compile(r'(https?|ftp):', re.I) + # _safe_href is copied from pagedown's Markdown.Sanitizer.js + # Inlining the entire license as I don't have the time to add it properly for upstreaming + # From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt + # + # A javascript port of Markdown, as used on Stack Overflow + # and the rest of Stack Exchange network. + # + # Largely based on showdown.js by John Fraser (Attacklab). + # + # Original Markdown Copyright (c) 2004-2005 John Gruber + # + # + # + # Original Showdown code copyright (c) 2007 John Fraser + # + # Modifications and bugfixes (c) 2009 Dana Robinson + # Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to deal + # in the Software without restriction, including without limitation the rights + # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + # copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in + # all copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + # THE SOFTWARE. + _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+$', re.I) + def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. @@ -1601,7 +1637,7 @@ def _do_links(self, text): anchor_allowed_pos = start_idx + len(result) text = text[:start_idx] + result + text[url_end_idx:] elif start_idx >= anchor_allowed_pos: - safe_link = self._safe_protocols.match(url) or url.startswith('#') + safe_link = self._safe_href.match(url) if self.safe_mode and not safe_link: result_head = '' % (title_str) else: @@ -1657,7 +1693,7 @@ def _do_links(self, text): curr_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] elif start_idx >= anchor_allowed_pos: - if self.safe_mode and not self._safe_protocols.match(url): + if self.safe_mode and not self._safe_href.match(url): result_head = '' % (title_str) else: result_head = '' % (self._protect_url(url), title_str) From 76909905b1190fdf81fdeb8403ba035558261be9 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 29 May 2023 16:40:13 +0100 Subject: [PATCH 30/58] Add tests for relative links in safe mode and remove MIT license text in favour of link. Project source, copyright notices and link to license are kept but the full MIT text was removed --- lib/markdown2.py | 30 --------------------- test/tm-cases/relative_links_safe_mode.html | 6 +++++ test/tm-cases/relative_links_safe_mode.opts | 1 + test/tm-cases/relative_links_safe_mode.text | 6 +++++ 4 files changed, 13 insertions(+), 30 deletions(-) create mode 100644 test/tm-cases/relative_links_safe_mode.html create mode 100644 test/tm-cases/relative_links_safe_mode.opts create mode 100644 test/tm-cases/relative_links_safe_mode.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 21727561..9f79ebce 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1484,40 +1484,10 @@ def _protect_url(self, url): return key # _safe_href is copied from pagedown's Markdown.Sanitizer.js - # Inlining the entire license as I don't have the time to add it properly for upstreaming # From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt - # - # A javascript port of Markdown, as used on Stack Overflow - # and the rest of Stack Exchange network. - # - # Largely based on showdown.js by John Fraser (Attacklab). - # - # Original Markdown Copyright (c) 2004-2005 John Gruber - # - # - # # Original Showdown code copyright (c) 2007 John Fraser - # # Modifications and bugfixes (c) 2009 Dana Robinson # Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. - # - # Permission is hereby granted, free of charge, to any person obtaining a copy - # of this software and associated documentation files (the "Software"), to deal - # in the Software without restriction, including without limitation the rights - # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - # copies of the Software, and to permit persons to whom the Software is - # furnished to do so, subject to the following conditions: - # - # The above copyright notice and this permission notice shall be included in - # all copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - # THE SOFTWARE. _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+$', re.I) def _do_links(self, text): diff --git a/test/tm-cases/relative_links_safe_mode.html b/test/tm-cases/relative_links_safe_mode.html new file mode 100644 index 00000000..e94bea79 --- /dev/null +++ b/test/tm-cases/relative_links_safe_mode.html @@ -0,0 +1,6 @@ +

link1 +link2 +link3 +link4 +link5 +link6

diff --git a/test/tm-cases/relative_links_safe_mode.opts b/test/tm-cases/relative_links_safe_mode.opts new file mode 100644 index 00000000..ad487c04 --- /dev/null +++ b/test/tm-cases/relative_links_safe_mode.opts @@ -0,0 +1 @@ +{"safe_mode": "escape"} diff --git a/test/tm-cases/relative_links_safe_mode.text b/test/tm-cases/relative_links_safe_mode.text new file mode 100644 index 00000000..d445cfd1 --- /dev/null +++ b/test/tm-cases/relative_links_safe_mode.text @@ -0,0 +1,6 @@ +[link1](https://www.hostname.com/absolute/path) +[link2](https://www.hostname.com/absolute/path#anchor-on-another-page) +[link3](#anchor-on-this-page) +[link4](/) +[link5](/absolute/path) +[link6](../relative/path) From 2be9f5bcffcf59c63cf51a33f287b0de293c194b Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 29 May 2023 16:57:44 +0100 Subject: [PATCH 31/58] Update changes.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 35200511..bd2076e9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,7 @@ - [pull #509] Fix HTML elements not unhashing correctly (issue 508) - [pull #511] Remove deprecated `imp` module (issue #510) - [pull #512] Allow link patterns to be passed via extras dict +- [pull #513] Fix relative links not working in safe mode (issue #254) ## python-markdown2 2.4.8 From 4a46040434b5cd125fb42c89a1a6a3b00a149900 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 29 May 2023 17:03:43 +0100 Subject: [PATCH 32/58] Update tests for new safe href regex --- lib/markdown2.py | 2 +- test/tm-cases/basic_safe_mode.html | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 9f79ebce..48d9d4c6 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1488,7 +1488,7 @@ def _protect_url(self, url): # Original Showdown code copyright (c) 2007 John Fraser # Modifications and bugfixes (c) 2009 Dana Robinson # Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. - _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+$', re.I) + _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]*$', re.I) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. diff --git a/test/tm-cases/basic_safe_mode.html b/test/tm-cases/basic_safe_mode.html index 95c00ad0..60051078 100644 --- a/test/tm-cases/basic_safe_mode.html +++ b/test/tm-cases/basic_safe_mode.html @@ -6,13 +6,13 @@

[HTML_REMOVED]alert(1)[HTML_REMOVED]

-

link1

+

link1

link2

-

link3

+

link3

-

link4 >[HTML_REMOVED]alert(1)[HTML_REMOVED]

+

link4 >[HTML_REMOVED]alert(1)[HTML_REMOVED]

link5

From d8699306076973641219703ba9d4be2398442a99 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:04:15 +0100 Subject: [PATCH 33/58] Fix list items losing nesting when following another list --- lib/markdown2.py | 3 ++- test/tm-cases/seperated_list_items.html | 12 ++++++++++++ test/tm-cases/seperated_list_items.text | 6 ++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/seperated_list_items.html create mode 100644 test/tm-cases/seperated_list_items.text diff --git a/lib/markdown2.py b/lib/markdown2.py index f5e5fbc8..e71f9210 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1887,7 +1887,8 @@ def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) + item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1] + item = self._run_block_gamut(item) else: # Recursion for sub-lists: item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1]) diff --git a/test/tm-cases/seperated_list_items.html b/test/tm-cases/seperated_list_items.html new file mode 100644 index 00000000..140ad893 --- /dev/null +++ b/test/tm-cases/seperated_list_items.html @@ -0,0 +1,12 @@ +
    +
  • Item 1 +ABCDEF

  • +
  • Item 2

    + +
      +
    • Item 3 +
        +
      • Item 4
      • +
    • +
  • +
diff --git a/test/tm-cases/seperated_list_items.text b/test/tm-cases/seperated_list_items.text new file mode 100644 index 00000000..1a5c991a --- /dev/null +++ b/test/tm-cases/seperated_list_items.text @@ -0,0 +1,6 @@ +- Item 1 + ABCDEF + +- Item 2 + - Item 3 + - Item 4 From 6725ef01f707aeff023a4b6976aa7a741f109bfb Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:14:30 +0100 Subject: [PATCH 34/58] Update `_uniform_indent` to allow more granular control of whitespace only lines. Also converted it and `_uniform_outdent` to `staticmethod`s and added docstrings --- lib/markdown2.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index e71f9210..79501bae 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2196,7 +2196,7 @@ def _wavedrom_block_sub(self, match): return self._uniform_indent( '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag), - lead_indent, include_empty_lines=True + lead_indent, indent_empty_lines=True ) def _do_wavedrom_blocks(self, text): @@ -2607,13 +2607,16 @@ def _outdent(self, text): # Remove one level of line-leading tabs or spaces return self._outdent_re.sub('', text) - def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): - # Removes the smallest common leading indentation from each (non empty) - # line of `text` and returns said indent along with the outdented text. - # The `min_outdent` kwarg makes sure the smallest common whitespace - # must be at least this size - # The `max_outdent` sets the maximum amount a line can be - # outdented by + @staticmethod + def _uniform_outdent(text, min_outdent=None, max_outdent=None): + ''' + Removes the smallest common leading indentation from each (non empty) + line of `text` and returns said indent along with the outdented text. + + Args: + min_outdent: make sure the smallest common whitespace is at least this size + max_outdent: the maximum amount a line can be outdented by + ''' # find the leading whitespace for every line whitespace = [ @@ -2647,11 +2650,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): return outdent, ''.join(outdented) - def _uniform_indent(self, text, indent, include_empty_lines=False): - return ''.join( - (indent + line if line.strip() or include_empty_lines else '') - for line in text.splitlines(True) - ) + @staticmethod + def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False): + ''' + Uniformly indent a block of text by a fixed amount + + Args: + text: the text to indent + indent: a string containing the indent to apply + include_empty_lines: don't remove whitespace only lines + indent_empty_lines: indent whitespace only lines with the rest of the text + ''' + blocks = [] + for line in text.splitlines(True): + if line.strip() or indent_empty_lines: + blocks.append(indent + line) + elif include_empty_lines: + blocks.append(line) + else: + blocks.append('') + return ''.join(blocks) @staticmethod def _match_overlaps_substr(text, match, substr): From 700ac816b8d7fca9ae1a67b25ed85450dcaae43c Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:27:08 +0100 Subject: [PATCH 35/58] Fix `markdown-in-html` extra not working within lists --- lib/markdown2.py | 34 +++++++++++++++--- test/tm-cases/markdown_in_html_in_lists.html | 37 ++++++++++++++++++++ test/tm-cases/markdown_in_html_in_lists.opts | 1 + test/tm-cases/markdown_in_html_in_lists.text | 17 +++++++++ 4 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 test/tm-cases/markdown_in_html_in_lists.html create mode 100644 test/tm-cases/markdown_in_html_in_lists.opts create mode 100644 test/tm-cases/markdown_in_html_in_lists.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 79501bae..8d4469e5 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -363,6 +363,9 @@ def convert(self, text): # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) + if 'markdown-in-html' in self.extras: + text = self._do_markdown_in_html(text) + if "fenced-code-blocks" in self.extras and self.safe_mode: text = self._do_fenced_code_blocks(text) @@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False): return text - def _strict_tag_block_sub(self, text, html_tags_re, callback): + def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False): + ''' + Finds and substitutes HTML blocks within blocks of text + + Args: + text: the text to search + html_tags_re: a regex pattern of HTML block tags to match against. + For example, `Markdown._block_tags_a` + callback: callback function that receives the found HTML text block + allow_indent: allow matching HTML blocks that are not completely outdented + ''' tag_count = 0 current_tag = html_tags_re block = '' result = '' for chunk in text.splitlines(True): - is_markup = re.match(r'^(?:(?=))?(?)' % current_tag, chunk) + is_markup = re.match( + r'^(\s{0,%s})(?:(?=))?(?)' % ('' if allow_indent else '0', current_tag), chunk + ) block += chunk if is_markup: - if chunk.startswith('' % tag_name, text)) == len(re.findall('' % tag_name, text)) + def _do_markdown_in_html(self, text): + def callback(block): + indent, block = self._uniform_outdent(block) + block = self._hash_html_block_sub(block) + block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False) + return block + + return self._strict_tag_block_sub(text, self._block_tags_a, callback, True) + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html new file mode 100644 index 00000000..981113f9 --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.html @@ -0,0 +1,37 @@ +
    +
  • Item 1

    + +
    + +
    Block one
    + +

    Some text

    + +
  • +
  • Item 2

    + +
      +
    • Item 3

      + +
        +
      • Item 4

        + +
        + +
        Block two
        + +

        Some text

        + +
      • +
    • +
    • Item 5

      + +
      + +
      Block three
      + +

      Some text

      + +
    • +
  • +
diff --git a/test/tm-cases/markdown_in_html_in_lists.opts b/test/tm-cases/markdown_in_html_in_lists.opts new file mode 100644 index 00000000..25fea79f --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.opts @@ -0,0 +1 @@ +{"extras": ["markdown-in-html"]} diff --git a/test/tm-cases/markdown_in_html_in_lists.text b/test/tm-cases/markdown_in_html_in_lists.text new file mode 100644 index 00000000..e629c55d --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.text @@ -0,0 +1,17 @@ +- Item 1 +
+ ###### Block one + Some text +
+- Item 2 + - Item 3 + - Item 4 +
+ ###### Block two + Some text +
+ - Item 5 +
+ ###### Block three + Some text +
From da54c21aaa105ce8c66952eb005c4d4070d059fa Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 5 Jun 2023 18:37:15 +0100 Subject: [PATCH 36/58] Expand nested_list test case for recent bug fix --- test/tm-cases/nested_list.html | 15 +++++++++++++++ test/tm-cases/nested_list.text | 12 +++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/test/tm-cases/nested_list.html b/test/tm-cases/nested_list.html index 05851236..49624d24 100644 --- a/test/tm-cases/nested_list.html +++ b/test/tm-cases/nested_list.html @@ -34,3 +34,18 @@
  • Item 3 - yes! just a single item
  • + +

    Other more different nested list:

    + +
      +
    • Item 1 +With some space after

    • +
    • Item 2

      + +
        +
      • Item 3 +
          +
        • Item 4
        • +
      • +
    • +
    diff --git a/test/tm-cases/nested_list.text b/test/tm-cases/nested_list.text index 94a2ece1..14fb9291 100644 --- a/test/tm-cases/nested_list.text +++ b/test/tm-cases/nested_list.text @@ -20,4 +20,14 @@ Slightly more nested list: + What + The + Code -* Item 3 - yes! just a single item \ No newline at end of file +* Item 3 - yes! just a single item + + +Other more different nested list: + +- Item 1 + With some space after + +- Item 2 + - Item 3 + - Item 4 From 349d96a03e35882537a7f07c3dc15b5e76138d45 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Thu, 22 Jun 2023 16:13:30 -0400 Subject: [PATCH 37/58] prepare for 2.4.9 release --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index bd2076e9..10cbb65e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # python-markdown2 Changelog -## python-markdown2 2.4.9 (not yet released) +## python-markdown2 2.4.9 - [pull #500] Add `
    ` tag to html-classes extra - [pull #501] Fix link patterns extra matching against internal hashes From d022ba912e7631375b935934847a0dc7d5be0b27 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Thu, 22 Jun 2023 16:13:38 -0400 Subject: [PATCH 38/58] prep for future dev --- CHANGES.md | 5 +++++ lib/markdown2.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 10cbb65e..36592088 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # python-markdown2 Changelog +## python-markdown2 2.4.10 (not yet released) + +(nothing yet) + + ## python-markdown2 2.4.9 - [pull #500] Add `` tag to html-classes extra diff --git a/lib/markdown2.py b/lib/markdown2.py index eb388547..b8427b1a 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -99,7 +99,7 @@ # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 4, 9) +__version_info__ = (2, 4, 10) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" From 902353eeaac3ae2bc89b48a710b975ecb5b02248 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Thu, 22 Jun 2023 17:17:55 -0400 Subject: [PATCH 39/58] Move test warnings to log after test run --- test/test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test.py b/test/test.py index 64e2ffeb..8cdf3a89 100755 --- a/test/test.py +++ b/test/test.py @@ -37,11 +37,12 @@ def setup(): setup() default_tags = [] + warnings = [] for extra_lib in ('pygments', 'wavedrom'): try: mod = importlib.import_module(extra_lib) except ImportError: - log.warning("skipping %s tests ('%s' module not found)" % (extra_lib, extra_lib)) + warnings.append("skipping %s tests ('%s' module not found)" % (extra_lib, extra_lib)) default_tags.append("-%s" % extra_lib) else: if extra_lib == 'pygments': @@ -50,9 +51,13 @@ def setup(): tag = "pygments<2.14" else: tag = "pygments>=2.14" - log.warning("skipping %s tests (pygments %s found)" % (tag, mod.__version__)) + warnings.append("skipping %s tests (pygments %s found)" % (tag, mod.__version__)) default_tags.append("-%s" % tag) retval = testlib.harness(testdir_from_ns=testdir_from_ns, default_tags=default_tags) + + for warning in warnings: + log.warning(warning) + sys.exit(retval) From 157b4b13c98effe70b63cb5179497e1244a94f10 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 24 Jun 2023 12:12:26 +0100 Subject: [PATCH 40/58] Add bug report issue template --- .github/ISSUE_TEMPLATE/bug_report.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..62104894 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Report a parsing error, unexpected output and other bugs +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Attach a minimal markdown snippet that causes the bug to occur. This should be placed inside a fenced code block to escape GitHub's formatting. + +If your snippet contains fenced code blocks then you can escape them by adding more backticks to the enclosing block. See the [this GitHub article](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#fenced-code-blocks) for an example. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Debug info** +Version of library being used: + +Any extras being used: + +**Additional context** +Add any other context about the problem here. From 7d852be667f511041dbaa8a5050e73d7aed421c2 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 2 Jul 2023 21:41:57 +0100 Subject: [PATCH 41/58] Allow relative links in safe mode. Also added back `_safe_protocols` attr and expanded allowed protocols to include mailto and tel --- lib/markdown2.py | 25 ++++++++++++++----- test/tm-cases/link_safe_urls.html | 38 ++++++++++++++++++++++++++++- test/tm-cases/link_safe_urls.text | 40 +++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 9 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index b8427b1a..6bbc19dd 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1507,12 +1507,25 @@ def _protect_url(self, url): self._escape_table[url] = key return key - # _safe_href is copied from pagedown's Markdown.Sanitizer.js - # From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt - # Original Showdown code copyright (c) 2007 John Fraser - # Modifications and bugfixes (c) 2009 Dana Robinson - # Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. - _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]*$', re.I) + _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):|\/|\.{,2}|#' + + @property + def _safe_href(self): + ''' + _safe_href is adapted from pagedown's Markdown.Sanitizer.js + From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt + Original Showdown code copyright (c) 2007 John Fraser + Modifications and bugfixes (c) 2009 Dana Robinson + Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. + ''' + safe = r'-\w/' + # omitted ['"<>] for XSS reasons + less_safe = r'\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + # dot seperated hostname, optional port number, not followed by protocol seperator + domain = r'(?:[%s]+(?:\.[%s]+)*)(?::\d+/?)?(?![^:/]*:/*)' % (safe, safe) + fragment = r'[%s]*' % (safe + less_safe) + + return re.compile(r'^(%s)?(%s)(%s)$' % (self._safe_protocols, domain, fragment), re.I) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. diff --git a/test/tm-cases/link_safe_urls.html b/test/tm-cases/link_safe_urls.html index d1ac0d3a..b035827b 100644 --- a/test/tm-cases/link_safe_urls.html +++ b/test/tm-cases/link_safe_urls.html @@ -1,11 +1,47 @@ +

    Normal links

    +

    Safe link 1

    Safe link 2

    Safe link 3

    -

    Safe link 4

    +

    Safe link 4

    + +

    Safe link 5

    + +

    Safe link 6

    + +

    Bad protocols

    Unsafe link 1

    Unsafe link 2

    + +

    Relative links

    + +

    Safe link 1

    + +

    Safe link 2

    + +

    Safe link 3

    + +

    Safe link 4

    + +

    Unsafe link 1

    + +

    Edge cases

    + +

    Safe link 1

    + +

    Safe link 2

    + +

    Safe link 3

    + +

    Safe link 4

    + +

    Unsafe link 1

    + +

    Unsafe link 2

    + +

    Unsafe link 3

    diff --git a/test/tm-cases/link_safe_urls.text b/test/tm-cases/link_safe_urls.text index 56a21fad..88230460 100644 --- a/test/tm-cases/link_safe_urls.text +++ b/test/tm-cases/link_safe_urls.text @@ -1,11 +1,47 @@ +# Normal links + [Safe link 1](https://www.example.com) [Safe link 2](http://www.example.com) [Safe link 3](ftp://www.example.com) -[Safe link 4](#anchor) +[Safe link 4](mailto:emailaddress@server.com) + +[Safe link 5](tel:0123456789) + +[Safe link 6](#anchor) + +# Bad protocols [Unsafe link 1](unknown://www.example.com) -[Unsafe link 2](example) +[Unsafe link 2](mailfrom:www.example.com) + +# Relative links + +[Safe link 1](example) + +[Safe link 2](./example) + +[Safe link 3](../../example) + +[Safe link 4](/example) + +[Unsafe link 1](.../www.example.com) + +# Edge cases + +[Safe link 1](www.example.com/abc:def) + +[Safe link 2](www.example.com/abc:/def) + +[Safe link 3](https://www.example.com:4200) + +[Safe link 4](https://www.example.com:4200/abcdef) + +[Unsafe link 1](unknown://www.example.com://abc) + +[Unsafe link 2](C:/Windows/System32) + +[Unsafe link 3](C:\Windows\System32) From b8151a2866c30ff4fb0a0cf6169d429be5d3bd7c Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 2 Jul 2023 21:56:14 +0100 Subject: [PATCH 42/58] Update changes.md --- CHANGES.md | 2 +- lib/markdown2.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 36592088..30214a12 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## python-markdown2 2.4.10 (not yet released) -(nothing yet) +- [pull #520] Allow more relative links in safe mode (issue #517) ## python-markdown2 2.4.9 diff --git a/lib/markdown2.py b/lib/markdown2.py index 6bbc19dd..827638cc 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1509,8 +1509,9 @@ def _protect_url(self, url): _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):|\/|\.{,2}|#' + @classmethod @property - def _safe_href(self): + def _safe_href(cls): ''' _safe_href is adapted from pagedown's Markdown.Sanitizer.js From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt @@ -1525,7 +1526,7 @@ def _safe_href(self): domain = r'(?:[%s]+(?:\.[%s]+)*)(?::\d+/?)?(?![^:/]*:/*)' % (safe, safe) fragment = r'[%s]*' % (safe + less_safe) - return re.compile(r'^(%s)?(%s)(%s)$' % (self._safe_protocols, domain, fragment), re.I) + return re.compile(r'^(%s)?(%s)(%s)$' % (cls._safe_protocols, domain, fragment), re.I) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. From c65cfe08ee39f8e0ee3980d68c7566affbef190f Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 2 Jul 2023 22:09:46 +0100 Subject: [PATCH 43/58] Fix tests for older python versions --- lib/markdown2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 827638cc..6bbc19dd 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1509,9 +1509,8 @@ def _protect_url(self, url): _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):|\/|\.{,2}|#' - @classmethod @property - def _safe_href(cls): + def _safe_href(self): ''' _safe_href is adapted from pagedown's Markdown.Sanitizer.js From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt @@ -1526,7 +1525,7 @@ def _safe_href(cls): domain = r'(?:[%s]+(?:\.[%s]+)*)(?::\d+/?)?(?![^:/]*:/*)' % (safe, safe) fragment = r'[%s]*' % (safe + less_safe) - return re.compile(r'^(%s)?(%s)(%s)$' % (cls._safe_protocols, domain, fragment), re.I) + return re.compile(r'^(%s)?(%s)(%s)$' % (self._safe_protocols, domain, fragment), re.I) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. From 9c6017f16b0bee4ac10d0ebe556c87c7036a5c65 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 9 Jul 2023 13:49:58 +0100 Subject: [PATCH 44/58] Move relative link handling out of `_safe_protocols` regex. See: https://github.com/trentm/python-markdown2/issues/517#issuecomment-1622477158 Moved the realtive link handling into `_safe_href` --- lib/markdown2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 6bbc19dd..74337f07 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1507,7 +1507,7 @@ def _protect_url(self, url): self._escape_table[url] = key return key - _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):|\/|\.{,2}|#' + _safe_protocols = r'(?:https?|ftp):\/\/|(?:mailto|tel):' @property def _safe_href(self): @@ -1518,14 +1518,14 @@ def _safe_href(self): Modifications and bugfixes (c) 2009 Dana Robinson Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc. ''' - safe = r'-\w/' + safe = r'-\w' # omitted ['"<>] for XSS reasons - less_safe = r'\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' # dot seperated hostname, optional port number, not followed by protocol seperator domain = r'(?:[%s]+(?:\.[%s]+)*)(?::\d+/?)?(?![^:/]*:/*)' % (safe, safe) fragment = r'[%s]*' % (safe + less_safe) - return re.compile(r'^(%s)?(%s)(%s)$' % (self._safe_protocols, domain, fragment), re.I) + return re.compile(r'^(?:(%s)?(%s)(%s)|(#|\.{,2}/)(%s))$' % (self._safe_protocols, domain, fragment, fragment), re.I) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML and tags. From 6c8b1e7af3ee3c9df75d648bfb6a22aba7ea7a6b Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 10 Jul 2023 19:35:56 +0100 Subject: [PATCH 45/58] Fix safe href domain regex matching `tel:` protocols --- lib/markdown2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 74337f07..718d61b6 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1522,7 +1522,7 @@ def _safe_href(self): # omitted ['"<>] for XSS reasons less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[%s]+(?:\.[%s]+)*)(?::\d+/?)?(?![^:/]*:/*)' % (safe, safe) + domain = r'(?:[%s]+(?:\.[%s]+)*)(?:(? Date: Sat, 22 Jul 2023 13:48:23 +0100 Subject: [PATCH 46/58] Fix #185 by always restoring hashed html blocks at the end of conversion. Nested hashes don't get unravelled in `_form_paragraphs` --- lib/markdown2.py | 5 ++++- test/tm-cases/hash_html_blocks.html | 9 +++++++++ test/tm-cases/hash_html_blocks.text | 6 ++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/hash_html_blocks.html create mode 100644 test/tm-cases/hash_html_blocks.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 718d61b6..832b6b7e 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2638,9 +2638,12 @@ def _do_link_patterns(self, text): def _unescape_special_chars(self, text): # Swap back in all the special characters we've hidden. + hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items()) + # html_blocks table is in format {hash: item} compared to usual {item: hash} + hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items()) while True: orig_text = text - for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()): + for ch, hash in hashmap: text = text.replace(hash, ch) if text == orig_text: break diff --git a/test/tm-cases/hash_html_blocks.html b/test/tm-cases/hash_html_blocks.html new file mode 100644 index 00000000..310fe3da --- /dev/null +++ b/test/tm-cases/hash_html_blocks.html @@ -0,0 +1,9 @@ +
    +

    Archons of the Colophon

    + + +

    by Paco Xander Nathan +

    + +
    diff --git a/test/tm-cases/hash_html_blocks.text b/test/tm-cases/hash_html_blocks.text new file mode 100644 index 00000000..f4a20b0f --- /dev/null +++ b/test/tm-cases/hash_html_blocks.text @@ -0,0 +1,6 @@ +
    +

    Archons of the Colophon

    +

    by Paco Xander Nathan +

    +
    From 3bb99d55ab3668236f5c3261d670d8227a232e7b Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 22 Jul 2023 13:59:44 +0100 Subject: [PATCH 47/58] Update `CHANGES.md` --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 30214a12..351b099b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.10 (not yet released) - [pull #520] Allow more relative links in safe mode (issue #517) +- [pull #521] Always restore hashed HTML blocks (issue #185) ## python-markdown2 2.4.9 From c14e99be243484cec80650788b51246935ab8a02 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 23 Jul 2023 14:29:09 +0100 Subject: [PATCH 48/58] Add `middle-word-em` extra. This extra will control whether emphasis in the middle of words will be allowed. When disabled, a whitespace will be requitred on both sides of the emphasis to be considered. The default is to allow middle word emphasis --- lib/markdown2.py | 18 ++++++++++++++---- test/tm-cases/middle_word_em.html | 2 ++ test/tm-cases/middle_word_em.opts | 1 + test/tm-cases/middle_word_em.text | 2 ++ 4 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 test/tm-cases/middle_word_em.html create mode 100644 test/tm-cases/middle_word_em.opts create mode 100644 test/tm-cases/middle_word_em.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 718d61b6..97607a87 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -66,6 +66,9 @@ some limitations. * metadata: Extract metadata from a leading '---'-fenced block. See for details. +* middle-word-em: Allows or disallows emphasis syntax in the middle of words, + defaulting to allow. Disabling this means that `this_text_here` will not be + converted to `thistexthere`. * nofollow: Add `rel="nofollow"` to add `
    ` tags with an href. See . * numbering: Support of generic counters. Non standard extension to @@ -2299,17 +2302,24 @@ def _do_tg_spoiler(self, text): return text _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) - _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _em_re = r"(\*|_)(?=\S)(.+?)(?<=\S)\1" _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) - _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + _code_friendly_em_re = r"\*(?=\S)(.+?)(?<=\S)\*" def _do_italics_and_bold(self, text): + if self.extras.get('middle-word-em', True) is False: + code_friendly_em_re = r'(?<=\s)%s(?=\s)' % self._code_friendly_em_re + em_re = r'(?<=\s)%s(?=\s)' % self._em_re + else: + code_friendly_em_re = self._code_friendly_em_re + em_re = self._em_re + # must go first: if "code-friendly" in self.extras: text = self._code_friendly_strong_re.sub(r"\1", text) - text = self._code_friendly_em_re.sub(r"\1", text) + text = re.sub(code_friendly_em_re, r"\1", text, flags=re.S) else: text = self._strong_re.sub(r"\2", text) - text = self._em_re.sub(r"\2", text) + text = re.sub(em_re, r"\2", text, flags=re.S) return text # "smarty-pants" extra: Very liberal in interpreting a single prime as an diff --git a/test/tm-cases/middle_word_em.html b/test/tm-cases/middle_word_em.html new file mode 100644 index 00000000..546b96c5 --- /dev/null +++ b/test/tm-cases/middle_word_em.html @@ -0,0 +1,2 @@ +

    When middle word emphasis is disabled strings like 'self.this_long_attr' should not +become self.this<em>long</em>attr.

    diff --git a/test/tm-cases/middle_word_em.opts b/test/tm-cases/middle_word_em.opts new file mode 100644 index 00000000..c69e4954 --- /dev/null +++ b/test/tm-cases/middle_word_em.opts @@ -0,0 +1 @@ +{'extras': {'middle-word-em': False}} diff --git a/test/tm-cases/middle_word_em.text b/test/tm-cases/middle_word_em.text new file mode 100644 index 00000000..1c7b5550 --- /dev/null +++ b/test/tm-cases/middle_word_em.text @@ -0,0 +1,2 @@ +When middle word emphasis is disabled strings like 'self.this_long_attr' should not +become `self.thislongattr`. From 2a01d522230dc4526c0a12f2c3b88aeb3946f19f Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 23 Jul 2023 14:42:41 +0100 Subject: [PATCH 49/58] Fix middle-word-em not applying when next to word boundary --- lib/markdown2.py | 4 ++-- test/tm-cases/middle_word_em.html | 3 +++ test/tm-cases/middle_word_em.text | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 97607a87..71cf8a93 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2307,8 +2307,8 @@ def _do_tg_spoiler(self, text): _code_friendly_em_re = r"\*(?=\S)(.+?)(?<=\S)\*" def _do_italics_and_bold(self, text): if self.extras.get('middle-word-em', True) is False: - code_friendly_em_re = r'(?<=\s)%s(?=\s)' % self._code_friendly_em_re - em_re = r'(?<=\s)%s(?=\s)' % self._em_re + code_friendly_em_re = r'(?<=\b)%s(?=\b)' % self._code_friendly_em_re + em_re = r'(?<=\b)%s(?=\b)' % self._em_re else: code_friendly_em_re = self._code_friendly_em_re em_re = self._em_re diff --git a/test/tm-cases/middle_word_em.html b/test/tm-cases/middle_word_em.html index 546b96c5..cc28b9aa 100644 --- a/test/tm-cases/middle_word_em.html +++ b/test/tm-cases/middle_word_em.html @@ -1,2 +1,5 @@

    When middle word emphasis is disabled strings like 'self.this_long_attr' should not become self.this<em>long</em>attr.

    + +

    Emphasis will only occur when the word is surrounded with whitespace. +This should still work with my_filename.

    diff --git a/test/tm-cases/middle_word_em.text b/test/tm-cases/middle_word_em.text index 1c7b5550..96883313 100644 --- a/test/tm-cases/middle_word_em.text +++ b/test/tm-cases/middle_word_em.text @@ -1,2 +1,5 @@ When middle word emphasis is disabled strings like 'self.this_long_attr' should not become `self.thislongattr`. + +Emphasis will _only_ occur when the word is surrounded with whitespace. +This should still work with _my_filename_. From 06156272ccbdfd6647fadcf7bf62d7e17b1604c5 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 23 Jul 2023 14:47:15 +0100 Subject: [PATCH 50/58] Update changes.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 30214a12..22500d57 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.10 (not yet released) - [pull #520] Allow more relative links in safe mode (issue #517) +- [pull #522] Add `middle-word-em` extra ## python-markdown2 2.4.9 From 2b85b82923ae428aede4f93eb28103997025442d Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Mon, 24 Jul 2023 23:27:11 -0400 Subject: [PATCH 51/58] prepare for 2.4.10 release --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 59ac020d..9dc02731 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # python-markdown2 Changelog -## python-markdown2 2.4.10 (not yet released) +## python-markdown2 2.4.10 - [pull #520] Allow more relative links in safe mode (issue #517) - [pull #521] Always restore hashed HTML blocks (issue #185) From 9272f9af6456c1e4e98d257b5ce0cf51ae0f3b6a Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Mon, 24 Jul 2023 23:27:20 -0400 Subject: [PATCH 52/58] prep for future dev --- CHANGES.md | 5 +++++ lib/markdown2.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 9dc02731..dc355cee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # python-markdown2 Changelog +## python-markdown2 2.4.11 (not yet released) + +(nothing yet) + + ## python-markdown2 2.4.10 - [pull #520] Allow more relative links in safe mode (issue #517) diff --git a/lib/markdown2.py b/lib/markdown2.py index 68de7574..686fe29e 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -102,7 +102,7 @@ # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 4, 10) +__version_info__ = (2, 4, 11) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" From 784bad2e10e246c4a3ae22e45347fb2301506e93 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 13 Aug 2023 21:38:32 +0100 Subject: [PATCH 53/58] Fix angles being escaped in style blocks --- lib/markdown2.py | 2 +- test/tm-cases/script-and-style-blocks.html | 13 +++++++++++++ test/tm-cases/script-and-style-blocks.text | 13 +++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/script-and-style-blocks.html create mode 100644 test/tm-cases/script-and-style-blocks.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 686fe29e..87b9858c 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -715,7 +715,7 @@ def _detab(self, text): # _block_tags_b. This way html5 tags are easy to keep track of. _html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption' - _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style' _block_tags_a += _html5tags _strict_tag_block_re = re.compile(r""" diff --git a/test/tm-cases/script-and-style-blocks.html b/test/tm-cases/script-and-style-blocks.html new file mode 100644 index 00000000..a855b8ba --- /dev/null +++ b/test/tm-cases/script-and-style-blocks.html @@ -0,0 +1,13 @@ + + + + +

    Some other text

    diff --git a/test/tm-cases/script-and-style-blocks.text b/test/tm-cases/script-and-style-blocks.text new file mode 100644 index 00000000..73c508e7 --- /dev/null +++ b/test/tm-cases/script-and-style-blocks.text @@ -0,0 +1,13 @@ + + + + +Some other text From 35bdcd45a34a51ec9bbd6fc05833722fa8fba71e Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 13 Aug 2023 21:44:12 +0100 Subject: [PATCH 54/58] Update CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index dc355cee..38983901 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## python-markdown2 2.4.11 (not yet released) -(nothing yet) +- [pull #524] Fix angles being escaped in style blocks (issue #523) ## python-markdown2 2.4.10 From a87cb74e7e379536dd07c31eb7fadb96a1d16ca0 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 26 Aug 2023 11:28:41 +0100 Subject: [PATCH 55/58] Allow base64 image data URLs when operating in safe mode --- lib/markdown2.py | 32 ++++++++++++++++++++--- test/tm-cases/data_urls_in_safe_mode.html | 3 +++ test/tm-cases/data_urls_in_safe_mode.opts | 1 + test/tm-cases/data_urls_in_safe_mode.text | 3 +++ 4 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 test/tm-cases/data_urls_in_safe_mode.html create mode 100644 test/tm-cases/data_urls_in_safe_mode.opts create mode 100644 test/tm-cases/data_urls_in_safe_mode.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 87b9858c..7386cc4f 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1499,13 +1499,30 @@ def _extract_url_and_title(self, text, start): url = self._strip_anglebrackets.sub(r'\1', url) return url, title, end_idx + # https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types + _data_url_re = re.compile(r''' + data: + # in format type/subtype;parameter=optional + (?P\w+/[\w+\.-]+(?:;\w+=[\w+\.-]+)?)? + # optional base64 token + (?P;base64)? + ,(?P.*) + ''', re.X) + def _protect_url(self, url): ''' Function that passes a URL through `_html_escape_url` to remove any nasty characters, and then hashes the now "safe" URL to prevent other safety mechanisms from tampering with it (eg: escaping "&" in URL parameters) ''' - url = _html_escape_url(url, safe_mode=self.safe_mode) + data_url = self._data_url_re.match(url) + charset = None + if data_url is not None: + mime = data_url.group('mime') or '' + if mime.startswith('image/') and data_url.group('token') == ';base64': + charset='base64' + url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset) key = _hash_text(url) self._escape_table[url] = key return key @@ -3045,14 +3062,21 @@ def _xml_encode_email_char_at_random(ch): return '&#%s;' % ord(ch) -def _html_escape_url(attr, safe_mode=False): - """Replace special characters that are potentially malicious in url string.""" +def _html_escape_url(attr, safe_mode=False, charset=None): + """ + Replace special characters that are potentially malicious in url string. + + Args: + charset: don't escape characters from this charset. Currently the only + exception is for '+' when charset=='base64' + """ escaped = (attr .replace('"', '"') .replace('<', '<') .replace('>', '>')) if safe_mode: - escaped = escaped.replace('+', ' ') + if charset != 'base64': + escaped = escaped.replace('+', ' ') escaped = escaped.replace("'", "'") return escaped diff --git a/test/tm-cases/data_urls_in_safe_mode.html b/test/tm-cases/data_urls_in_safe_mode.html new file mode 100644 index 00000000..587d980d --- /dev/null +++ b/test/tm-cases/data_urls_in_safe_mode.html @@ -0,0 +1,3 @@ +

    smiley

    + +
    diff --git a/test/tm-cases/data_urls_in_safe_mode.opts b/test/tm-cases/data_urls_in_safe_mode.opts new file mode 100644 index 00000000..ccb6a09b --- /dev/null +++ b/test/tm-cases/data_urls_in_safe_mode.opts @@ -0,0 +1 @@ +{'safe_mode': True} diff --git a/test/tm-cases/data_urls_in_safe_mode.text b/test/tm-cases/data_urls_in_safe_mode.text new file mode 100644 index 00000000..512c6119 --- /dev/null +++ b/test/tm-cases/data_urls_in_safe_mode.text @@ -0,0 +1,3 @@ +![smiley]() + +[smiley]() From a2bab997e3f975c055ef4e2284349c7965a4d172 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 26 Aug 2023 11:39:12 +0100 Subject: [PATCH 56/58] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 38983901..c31d5fce 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.11 (not yet released) - [pull #524] Fix angles being escaped in style blocks (issue #523) +- [pull #527] Fix base64 images being corrupted in safe mode (issue #526) ## python-markdown2 2.4.10 From e2a595c4d037f30f63f7da2a50025456e700ad22 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 5 Sep 2023 18:49:00 +0100 Subject: [PATCH 57/58] Add `breaks` extra with ability to hard break on backslashes. See #525 --- lib/markdown2.py | 20 ++++++++++++++++--- test/tm-cases/break_on_backslash.html | 5 +++++ test/tm-cases/break_on_backslash.opts | 1 + test/tm-cases/break_on_backslash.text | 5 +++++ .../break_on_newline_and_backslash.html | 3 +++ .../break_on_newline_and_backslash.opts | 1 + .../break_on_newline_and_backslash.text | 3 +++ 7 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 test/tm-cases/break_on_backslash.html create mode 100644 test/tm-cases/break_on_backslash.opts create mode 100644 test/tm-cases/break_on_backslash.text create mode 100644 test/tm-cases/break_on_newline_and_backslash.html create mode 100644 test/tm-cases/break_on_newline_and_backslash.opts create mode 100644 test/tm-cases/break_on_newline_and_backslash.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 87b9858c..f7a0e328 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -41,7 +41,11 @@ see for details): * admonitions: Enable parsing of RST admonitions. -* break-on-newline: Replace single new line characters with
    when True +* breaks: Control where hard breaks are inserted in the markdown. + Options include: + - on_newline: Replace single new line characters with
    when True + - on_backslash: Replace backslashes at the end of a line with
    +* break-on-newline: Alias for the on_newline option in the breaks extra. * code-friendly: Disable _ and __ for em and strong. * cuddled-lists: Allow lists to be cuddled to the preceding paragraph. * fenced-code-blocks: Allows a code block to not have to be indented @@ -235,6 +239,11 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, self._toc_depth = 6 else: self._toc_depth = self.extras["toc"].get("depth", 6) + + if 'break-on-newline' in self.extras: + self.extras.setdefault('breaks', {}) + self.extras['breaks']['on_newline'] = True + self._instance_extras = self.extras.copy() if 'link-patterns' in self.extras: @@ -1318,8 +1327,13 @@ def _run_span_gamut(self, text): text = self._do_smart_punctuation(text) # Do hard breaks: - if "break-on-newline" in self.extras: - text = re.sub(r" *\n(?!\<(?:\/?(ul|ol|li))\>)", ")", break_tag, text) else: text = re.sub(r" {2,}\n", " Github flavoured markdown allows
    +you to insert a backslash with or +without a space, which results in
    +a hard line break, unless it has \ +been escaped.

    diff --git a/test/tm-cases/break_on_backslash.opts b/test/tm-cases/break_on_backslash.opts new file mode 100644 index 00000000..52f81fdc --- /dev/null +++ b/test/tm-cases/break_on_backslash.opts @@ -0,0 +1 @@ +{'extras': {'breaks': {'on_backslash': True}}} diff --git a/test/tm-cases/break_on_backslash.text b/test/tm-cases/break_on_backslash.text new file mode 100644 index 00000000..0576c564 --- /dev/null +++ b/test/tm-cases/break_on_backslash.text @@ -0,0 +1,5 @@ +Github flavoured markdown allows \ +you to insert a backslash with or +without a space, which results in\ +a hard line break, unless it has \\ +been escaped. diff --git a/test/tm-cases/break_on_newline_and_backslash.html b/test/tm-cases/break_on_newline_and_backslash.html new file mode 100644 index 00000000..cb43d6db --- /dev/null +++ b/test/tm-cases/break_on_newline_and_backslash.html @@ -0,0 +1,3 @@ +

    The breaks extra allows you to insert a hard break on newlines.
    +You can also insert hard breaks after backslashes

    +although this will result in a double break when both are enabled.

    diff --git a/test/tm-cases/break_on_newline_and_backslash.opts b/test/tm-cases/break_on_newline_and_backslash.opts new file mode 100644 index 00000000..5ac49b03 --- /dev/null +++ b/test/tm-cases/break_on_newline_and_backslash.opts @@ -0,0 +1 @@ +{'extras': {'breaks': {'on_backslash': True, 'on_newline': True}}} diff --git a/test/tm-cases/break_on_newline_and_backslash.text b/test/tm-cases/break_on_newline_and_backslash.text new file mode 100644 index 00000000..4114125b --- /dev/null +++ b/test/tm-cases/break_on_newline_and_backslash.text @@ -0,0 +1,3 @@ +The breaks extra allows you to insert a hard break on newlines. +You can also insert hard breaks after backslashes \ +although this will result in a double break when both are enabled. From f25bc21962b86fdb130e0bc6eed3ac88cc2f4fe4 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 5 Sep 2023 19:00:09 +0100 Subject: [PATCH 58/58] Update changes.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 38983901..841980d1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,7 +3,7 @@ ## python-markdown2 2.4.11 (not yet released) - [pull #524] Fix angles being escaped in style blocks (issue #523) - +- [pull #529] Add `breaks` extra with ability to hard break on backslashes (issue #525) ## python-markdown2 2.4.10

    smiley

    Year Temperature (low)
    {}