Add pathological_tests.py from cmark. - md4c - C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.

commit 26f14899ed326b123dd8f7accef6e0422c59fe12
parent ad4f28bb85136cde06eb01b5cb4255b28870f145
Author: Martin Mitas <mity@morous.org>
Date:   Mon, 24 Jul 2017 19:27:27 +0200

Add pathological_tests.py from cmark.

Diffstat:
M scripts/run-tests.sh  | 3 +++
A test/pathological_tests.py  | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 92 insertions(+), 0 deletions(-)
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
@@ -37,3 +37,6 @@ $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-url-autolinks.txt" -p
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-www-autolinks.txt" -p "$PROGRAM --fpermissive-www-autolinks"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tables.txt" -p "$PROGRAM --ftables"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough"
+
+# Run pathological tests:
+$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import argparse
+import sys
+import platform
+from cmark import CMark
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    args = parser.parse_args(sys.argv[1:])
+
+cmark = CMark(prog=args.program, library_dir=args.library_dir)
+
+# list of pairs consisting of input and a regex that must match the output.
+pathological = {
+    # note - some pythons have limit of 65535 for {num-matches} in re.
+    "nested strong emph":
+                (("*a **a " * 65000) + "b" + (" a** a*" * 65000),
+                 re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
+    "many emph closers with no openers":
+                 (("a_ " * 65000),
+                  re.compile("(a[_] ){64999}a_")),
+    "many emph openers with no closers":
+                 (("_a " * 65000),
+                  re.compile("(_a ){64999}_a")),
+    "many link closers with no openers":
+                 (("a]" * 65000),
+                  re.compile("(a\]){65000}")),
+    "many link openers with no closers":
+                 (("[a" * 65000),
+                  re.compile("(\[a){65000}")),
+    "mismatched openers and closers":
+                 (("*a_ " * 50000),
+                  re.compile("([*]a[_] ){49999}[*]a_")),
+    "openers and closers multiple of 3":
+                 (("a**b" + ("c* " * 50000)),
+                  re.compile("a[*][*]b(c[*] ){49999}c[*]")),
+    "link openers and emph closers":
+                 (("[ a_" * 50000),
+                  re.compile("(\[ a_){50000}")),
+    "hard link/emph case":
+                 ("**x [a*b**c*](d)",
+                  re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
+    "nested brackets":
+                 (("[" * 50000) + "a" + ("]" * 50000),
+                  re.compile("\[{50000}a\]{50000}")),
+    "nested block quotes":
+                 ((("> " * 50000) + "a"),
+                  re.compile("(<blockquote>\n){50000}")),
+    "U+0000 in input":
+                 ("abc\u0000de\u0000",
+                  re.compile("abc\ufffd?de\ufffd?")),
+    "backticks":
+                 ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))),
+                  re.compile("^<p>[e`]*</p>\n$"))
+    }
+
+whitespace_re = re.compile('/s+/')
+passed = 0
+errored = 0
+failed = 0
+
+print("Testing pathological cases:")
+for description in pathological:
+    (inp, regex) = pathological[description]
+    [rc, actual, err] = cmark.to_html(inp)
+    if rc != 0:
+        errored += 1
+        print(description, '[ERRORED (return code %d)]' %rc)
+        print(err)
+    elif regex.search(actual):
+        print(description, '[PASSED]')
+        passed += 1
+    else:
+        print(description, '[FAILED]')
+        print(repr(actual))
+        failed += 1
+
+print("%d passed, %d failed, %d errored" % (passed, failed, errored))
+if (failed == 0 and errored == 0):
+    exit(0)
+else:
+    exit(1)

	md4c C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
	git clone https://noulin.net/git/md4c.git
	Log \| Files \| Refs \| README \| LICENSE

M	scripts/run-tests.sh	\|	3	+++
A	test/pathological_tests.py	\|	89	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++