git subrepo clone --branch=0.30.3 https://github.com/commonmark/cmark.git deps/cmark

subrepo:
  subdir:   "deps/cmark"
  merged:   "5ba25ff"
upstream:
  origin:   "https://github.com/commonmark/cmark.git"
  branch:   "0.30.3"
  commit:   "5ba25ff"
git-subrepo:
  version:  "0.4.6"
  commit:   "d4444b563"
This commit is contained in:
2023-09-03 20:42:10 -07:00
parent 17a4224cb8
commit 24810cbbbd
120 changed files with 48683 additions and 0 deletions

80
deps/cmark/test/CMakeLists.txt vendored Executable file
View File

@@ -0,0 +1,80 @@
# To get verbose output: cmake --build build --target "test" -- ARGS='-V'
# By default, we run the spec tests only if python3 is available.
# To require the spec tests, compile with -DSPEC_TESTS=1
if (SPEC_TESTS)
find_package(PythonInterp 3 REQUIRED)
else(SPEC_TESTS)
find_package(PythonInterp 3)
endif(SPEC_TESTS)
if (CMARK_SHARED OR CMARK_STATIC)
add_test(NAME api_test COMMAND api_test)
endif()
if (WIN32)
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR)
set(NEWPATH "${WIN_DLL_DIR};$ENV{PATH}")
string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}")
set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}")
set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
else(WIN32)
set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh")
endif(WIN32)
IF (PYTHONINTERP_FOUND)
add_test(html_normalization
${PYTHON_EXECUTABLE} "-m" "doctest"
"${CMAKE_CURRENT_SOURCE_DIR}/normalize.py"
)
if (CMARK_SHARED)
add_test(spectest_library
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
"${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)
add_test(pathological_tests_library
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py"
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)
add_test(roundtriptest_library
${PYTHON_EXECUTABLE}
"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
"--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)
add_test(entity_library
${PYTHON_EXECUTABLE}
"${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)
endif()
add_test(spectest_executable
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
)
add_test(smartpuncttest_executable
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart"
)
add_test(regressiontest_executable
${PYTHON_EXECUTABLE}
"${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
"${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program"
"${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
)
ELSE(PYTHONINTERP_FOUND)
message("\n*** A python 3 interpreter is required to run the spec tests.\n")
add_test(skipping_spectests
echo "Skipping spec tests, because no python 3 interpreter is available.")
ENDIF(PYTHONINTERP_FOUND)

36
deps/cmark/test/afl_test_cases/test.md vendored Normal file
View File

@@ -0,0 +1,36 @@
# H1
H2
--
t ☺
*b* **em** `c`
≥\&\
\_e\_
4) I1
5) I2
> [l](/u "t")
>
> - [f]
> - ![a](/u "t")
>
>> <ftp://hh>
>> <u@hh>
~~~ l☺
cb
~~~
c1
c2
***
<div>
<b>x</b>
</div>
[f]: /u "t"

33
deps/cmark/test/cmark-fuzz.c vendored Normal file
View File

@@ -0,0 +1,33 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "cmark.h"
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
struct __attribute__((packed)) {
int options;
int width;
} fuzz_config;
if (size >= sizeof(fuzz_config)) {
/* The beginning of `data` is treated as fuzzer configuration */
memcpy(&fuzz_config, data, sizeof(fuzz_config));
/* Mask off valid option bits */
fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
/* Remainder of input is the markdown */
const char *markdown = (const char *)(data + sizeof(fuzz_config));
const size_t markdown_size = size - sizeof(fuzz_config);
cmark_node *doc = cmark_parse_document(markdown, markdown_size, fuzz_config.options);
free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width));
free(cmark_render_html(doc, fuzz_config.options));
free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width));
free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width));
free(cmark_render_xml(doc, fuzz_config.options));
cmark_node_free(doc);
}
return 0;
}

62
deps/cmark/test/cmark.py vendored Normal file
View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p
from subprocess import *
import platform
import os
def pipe_through_prog(prog, text):
p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
[result, err] = p1.communicate(input=text.encode('utf-8'))
return [p1.returncode, result.decode('utf-8'), err]
def to_html(lib, text):
markdown = lib.cmark_markdown_to_html
markdown.restype = c_char_p
markdown.argtypes = [c_char_p, c_size_t, c_int]
textbytes = text.encode('utf-8')
textlen = len(textbytes)
# 1 << 17 == CMARK_OPT_UNSAFE
result = markdown(textbytes, textlen, 1 << 17).decode('utf-8')
return [0, result, '']
def to_commonmark(lib, text):
textbytes = text.encode('utf-8')
textlen = len(textbytes)
parse_document = lib.cmark_parse_document
parse_document.restype = c_void_p
parse_document.argtypes = [c_char_p, c_size_t, c_int]
render_commonmark = lib.cmark_render_commonmark
render_commonmark.restype = c_char_p
render_commonmark.argtypes = [c_void_p, c_int, c_int]
node = parse_document(textbytes, textlen, 0)
result = render_commonmark(node, 0, 0).decode('utf-8')
return [0, result, '']
class CMark:
def __init__(self, prog=None, library_dir=None):
self.prog = prog
if prog:
prog += ' --unsafe'
self.to_html = lambda x: pipe_through_prog(prog, x)
self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x)
else:
sysname = platform.system()
if sysname == 'Darwin':
libnames = [ "libcmark.dylib" ]
elif sysname == 'Windows':
libnames = [ "cmark.dll", "libcmark.dll" ]
else:
libnames = [ "libcmark.so" ]
if not library_dir:
library_dir = os.path.join("build", "src")
for libname in libnames:
candidate = os.path.join(library_dir, libname)
if os.path.isfile(candidate):
libpath = candidate
break
cmark = CDLL(libpath)
self.to_html = lambda x: to_html(cmark, x)
self.to_commonmark = lambda x: to_commonmark(cmark, x)

67
deps/cmark/test/entity_tests.py vendored Normal file
View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import os
import argparse
import sys
import platform
import html
from cmark import CMark
def get_entities():
regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
code = f.read()
entities = []
for entity, utf8 in re.findall(regex, code, re.MULTILINE):
utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
entities.append((entity, utf8))
return entities
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
cmark = CMark(prog=args.program, library_dir=args.library_dir)
entities = get_entities()
passed = 0
errored = 0
failed = 0
exceptions = {
'quot': '&quot;',
'QUOT': '&quot;',
# These are broken, but I'm not too worried about them.
'nvlt': '&lt;⃒',
'nvgt': '&gt;⃒',
}
print("Testing entities:")
for entity, utf8 in entities:
[rc, actual, err] = cmark.to_html("&{};".format(entity))
check = exceptions.get(entity, utf8)
if rc != 0:
errored += 1
print(entity, '[ERRORED (return code {})]'.format(rc))
print(err)
elif check in actual:
# print(entity, '[PASSED]') # omit noisy success output
passed += 1
else:
print(entity, '[FAILED]')
print(repr(actual))
failed += 1
print("{} passed, {} failed, {} errored".format(passed, failed, errored))
if failed == 0 and errored == 0:
exit(0)
else:
exit(1)

49
deps/cmark/test/fuzzing_dictionary vendored Normal file
View File

@@ -0,0 +1,49 @@
asterisk="*"
attr_generic=" a=\"1\""
attr_href=" href=\"1\""
attr_xml_lang=" xml:lang=\"1\""
attr_xmlns=" xmlns=\"1\""
backslash="\\"
backtick="`"
colon=":"
dashes="---"
double_quote="\""
entity_builtin="&lt;"
entity_decimal="&#1;"
entity_external="&a;"
entity_hex="&#x1;"
equals="==="
exclamation="!"
greater_than=">"
hash="#"
hyphen="-"
indent=" "
left_bracket="["
left_paren="("
less_than="<"
plus="+"
right_bracket="]"
right_paren=")"
single_quote="'"
string_any="ANY"
string_brackets="[]"
string_cdata="CDATA"
string_dashes="--"
string_empty_dblquotes="\"\""
string_empty_quotes="''"
string_idrefs="IDREFS"
string_parentheses="()"
string_pcdata="#PCDATA"
tag_cdata="<![CDATA["
tag_close="</a>"
tag_doctype="<!DOCTYPE"
tag_element="<!ELEMENT"
tag_entity="<!ENTITY"
tag_notation="<!NOTATION"
tag_open="<a>"
tag_open_close="<a />"
tag_open_exclamation="<!"
tag_open_q="<?"
tag_sq2_close="]]>"
tag_xml_q="<?xml?>"
underscore="_"

194
deps/cmark/test/normalize.py vendored Normal file
View File

@@ -0,0 +1,194 @@
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
import urllib
try:
from html.parser import HTMLParseError
except ImportError:
# HTMLParseError was removed in Python 3.5. It could never be
# thrown, so we define a placeholder instead.
class HTMLParseError(Exception):
pass
from html.entities import name2codepoint
import sys
import re
import html
# Normalization code, adapted from
# https://github.com/karlcow/markdown-testsuite/
significant_attrs = ["alt", "href", "src", "title"]
whitespace_re = re.compile('\s+')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.convert_charrefs = False
self.last = "starttag"
self.in_pre = False
self.output = ""
self.last_tag = ""
def handle_data(self, data):
after_tag = self.last == "endtag" or self.last == "starttag"
after_block_tag = after_tag and self.is_block_tag(self.last_tag)
if after_tag and self.last_tag == "br":
data = data.lstrip('\n')
if not self.in_pre:
data = whitespace_re.sub(' ', data)
if after_block_tag and not self.in_pre:
if self.last == "starttag":
data = data.lstrip()
elif self.last == "endtag":
data = data.strip()
self.output += data
self.last = "data"
def handle_endtag(self, tag):
if tag == "pre":
self.in_pre = False
elif self.is_block_tag(tag):
self.output = self.output.rstrip()
self.output += "</" + tag + ">"
self.last_tag = tag
self.last = "endtag"
def handle_starttag(self, tag, attrs):
if tag == "pre":
self.in_pre = True
if self.is_block_tag(tag):
self.output = self.output.rstrip()
self.output += "<" + tag
# For now we don't strip out 'extra' attributes, because of
# raw HTML test cases.
# attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
if attrs:
attrs.sort()
for (k,v) in attrs:
self.output += " " + k
if v in ['href','src']:
self.output += ("=" + '"' +
urllib.quote(urllib.unquote(v), safe='/') + '"')
elif v != None:
self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
self.output += ">"
self.last_tag = tag
self.last = "starttag"
def handle_startendtag(self, tag, attrs):
"""Ignore closing tag for self-closing """
self.handle_starttag(tag, attrs)
self.last_tag = tag
self.last = "endtag"
def handle_comment(self, data):
self.output += '<!--' + data + '-->'
self.last = "comment"
def handle_decl(self, data):
self.output += '<!' + data + '>'
self.last = "decl"
def unknown_decl(self, data):
self.output += '<!' + data + '>'
self.last = "decl"
def handle_pi(self,data):
self.output += '<?' + data + '>'
self.last = "pi"
def handle_entityref(self, name):
try:
c = chr(name2codepoint[name])
except KeyError:
c = None
self.output_char(c, '&' + name + ';')
self.last = "ref"
def handle_charref(self, name):
try:
if name.startswith("x"):
c = chr(int(name[1:], 16))
else:
c = chr(int(name))
except ValueError:
c = None
self.output_char(c, '&' + name + ';')
self.last = "ref"
# Helpers.
def output_char(self, c, fallback):
if c == '<':
self.output += "&lt;"
elif c == '>':
self.output += "&gt;"
elif c == '&':
self.output += "&amp;"
elif c == '"':
self.output += "&quot;"
elif c == None:
self.output += fallback
else:
self.output += c
def is_block_tag(self,tag):
return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
def normalize_html(html):
r"""
Return normalized form of HTML which ignores insignificant output
differences:
Multiple inner whitespaces are collapsed to a single space (except
in pre tags):
>>> normalize_html("<p>a \t b</p>")
'<p>a b</p>'
>>> normalize_html("<p>a \t\nb</p>")
'<p>a b</p>'
* Whitespace surrounding block-level tags is removed.
>>> normalize_html("<p>a b</p>")
'<p>a b</p>'
>>> normalize_html(" <p>a b</p>")
'<p>a b</p>'
>>> normalize_html("<p>a b</p> ")
'<p>a b</p>'
>>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t")
'<p>a b</p>'
>>> normalize_html("<i>a b</i> ")
'<i>a b</i> '
* Self-closing tags are converted to open tags.
>>> normalize_html("<br />")
'<br>'
* Attributes are sorted and lowercased.
>>> normalize_html('<a title="bar" HREF="foo">x</a>')
'<a href="foo" title="bar">x</a>'
* References are converted to unicode, except that '<', '>', '&', and
'"' are rendered using entities.
>>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
'\u2200&amp;&gt;&lt;&quot;'
"""
html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
try:
parser = MyHTMLParser()
# We work around HTMLParser's limitations parsing CDATA
# by breaking the input into chunks and passing CDATA chunks
# through verbatim.
for chunk in re.finditer(html_chunk_re, html):
if chunk.group(0)[:8] == "<![CDATA":
parser.output += chunk.group(0)
else:
parser.feed(chunk.group(0))
parser.close()
return parser.output
except HTMLParseError as e:
sys.stderr.write("Normalization error: " + e.msg + "\n")
return html # on error, return unnormalized HTML

193
deps/cmark/test/pathological_tests.py vendored Normal file
View File

@@ -0,0 +1,193 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import argparse
import sys
import platform
import itertools
import multiprocessing
import queue
import time
from cmark import CMark
TIMEOUT = 5
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
allowed_failures = {"many references": True}
cmark = CMark(prog=args.program, library_dir=args.library_dir)
def hash_collisions():
REFMAP_SIZE = 16
COUNT = 25000
def badhash(ref):
h = 0
for c in ref:
a = (h << 6) & 0xFFFFFFFF
b = (h << 16) & 0xFFFFFFFF
h = ord(c) + a + b - h
h = h & 0xFFFFFFFF
return (h % REFMAP_SIZE) == 0
keys = ("x%d" % i for i in itertools.count())
collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
bad_key = next(collisions)
document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
# list of pairs consisting of input and a regex that must match the output.
pathological = {
# note - some pythons have limit of 65535 for {num-matches} in re.
"nested strong emph":
(("*a **a " * 32500) + "b" + (" a** a*" * 32500),
re.compile("(<em>a <strong>a ){32500}b( a</strong> a</em>){32500}")),
"many emph closers with no openers":
(("a_ " * 32500),
re.compile("(a[_] ){32499}a_")),
"many emph openers with no closers":
(("_a " * 32500),
re.compile("(_a ){32499}_a")),
"many link closers with no openers":
(("a]" * 32500),
re.compile("(a\]){32500}")),
"many link openers with no closers":
(("[a" * 32500),
re.compile("(\[a){32500}")),
"mismatched openers and closers":
(("*a_ " * 25000),
re.compile("([*]a[_] ){24999}[*]a_")),
"issue #389":
(("*a " * 20000 + "_a*_ " * 20000),
re.compile("(<em>a ){20000}(_a<\/em>_ ?){20000}")),
"openers and closers multiple of 3":
(("a**b" + ("c* " * 25000)),
re.compile("a[*][*]b(c[*] ){24999}c[*]")),
"link openers and emph closers":
(("[ a_" * 25000),
re.compile("(\[ a_){25000}")),
"pattern [ (]( repeated":
(("[ (](" * 40000),
re.compile("(\[ \(\]\(){40000}")),
"pattern ![[]() repeated":
("![[]()" * 160000,
re.compile("(!\[<a href=\"\"></a>){160000}")),
"hard link/emph case":
("**x [a*b**c*](d)",
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
"nested brackets":
(("[" * 25000) + "a" + ("]" * 25000),
re.compile("\[{25000}a\]{25000}")),
"nested block quotes":
((("> " * 25000) + "a"),
re.compile("(<blockquote>\n){25000}")),
"deeply nested lists":
("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))),
re.compile("<ul>\n(<li>a\n<ul>\n){499}<li>a</li>\n</ul>\n(</li>\n</ul>\n){499}")),
"U+0000 in input":
("abc\u0000de\u0000",
re.compile("abc\ufffd?de\ufffd?")),
"backticks":
("".join(map(lambda x: ("e" + "`" * x), range(1,2500))),
re.compile("^<p>[e`]*</p>\n$")),
"unclosed links A":
("[a](<b" * 30000,
re.compile("(\[a\]\(&lt;b){30000}")),
"unclosed links B":
("[a](b" * 30000,
re.compile("(\[a\]\(b){30000}")),
"unclosed <!--":
("</" + "<!--" * 300000,
re.compile("\&lt;\/(\&lt;!--){300000}")),
"reference collisions": hash_collisions()
# "many references":
# ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
# re.compile("(\[0\] ){4999}"))
}
pathological_cmark = {
"nested inlines":
("*" * 20000 + "a" + "*" * 20000,
re.compile("^\*+a\*+$")),
}
whitespace_re = re.compile('/s+/')
def run_pathological(q, inp):
q.put(cmark.to_html(inp))
def run_pathological_cmark(q, inp):
q.put(cmark.to_commonmark(inp))
def run_tests():
q = multiprocessing.Queue()
passed = []
errored = []
failed = []
ignored = []
print("Testing pathological cases:")
for description in (*pathological, *pathological_cmark):
if description in pathological:
(inp, regex) = pathological[description]
p = multiprocessing.Process(target=run_pathological,
args=(q, inp))
else:
(inp, regex) = pathological_cmark[description]
p = multiprocessing.Process(target=run_pathological_cmark,
args=(q, inp))
p.start()
try:
# wait TIMEOUT seconds or until it finishes
rc, actual, err = q.get(True, TIMEOUT)
p.join()
if rc != 0:
print(description, '[ERRORED (return code %d)]' %rc)
print(err)
if description in allowed_failures:
ignored.append(description)
else:
errored.append(description)
elif regex.search(actual):
print(description, '[PASSED]')
passed.append(description)
else:
print(description, '[FAILED]')
print(repr(actual[:60]))
if description in allowed_failures:
ignored.append(description)
else:
failed.append(description)
except queue.Empty:
p.terminate()
p.join()
print(description, '[TIMEOUT]')
if description in allowed_failures:
ignored.append(description)
else:
errored.append(description)
print("%d passed, %d failed, %d errored" %
(len(passed), len(failed), len(errored)))
if ignored:
print("Ignoring these allowed failures:")
for x in ignored:
print(x)
if failed or errored:
exit(1)
else:
exit(0)
if __name__ == "__main__":
run_tests()

215
deps/cmark/test/regression.txt vendored Normal file
View File

@@ -0,0 +1,215 @@
### Regression tests
Issue #113: EOL character weirdness on Windows
(Important: first line ends with CR + CR + LF)
```````````````````````````````` example
line1
line2
.
<p>line1</p>
<p>line2</p>
````````````````````````````````
Issue #114: cmark skipping first character in line
(Important: the blank lines around "Repeatedly" contain a tab.)
```````````````````````````````` example
By taking it apart
- alternative solutions
Repeatedly solving
- how techniques
.
<p>By taking it apart</p>
<ul>
<li>alternative solutions</li>
</ul>
<p>Repeatedly solving</p>
<ul>
<li>how techniques</li>
</ul>
````````````````````````````````
Issue jgm/CommonMark#430: h2..h6 not recognized as block tags.
```````````````````````````````` example
<h1>lorem</h1>
<h2>lorem</h2>
<h3>lorem</h3>
<h4>lorem</h4>
<h5>lorem</h5>
<h6>lorem</h6>
.
<h1>lorem</h1>
<h2>lorem</h2>
<h3>lorem</h3>
<h4>lorem</h4>
<h5>lorem</h5>
<h6>lorem</h6>
````````````````````````````````
Issue jgm/commonmark.js#109 - tabs after setext header line
```````````````````````````````` example
hi
--→
.
<h2>hi</h2>
````````````````````````````````
Issue #177 - incorrect emphasis parsing
```````````````````````````````` example
a***b* c*
.
<p>a*<em><em>b</em> c</em></p>
````````````````````````````````
Issue #193 - unescaped left angle brackets in link destination
```````````````````````````````` example
[a]
[a]: <te<st>
.
<p>[a]</p>
<p>[a]: &lt;te<st></p>
````````````````````````````````
Issue #192 - escaped spaces in link destination
```````````````````````````````` example
[a](te\ st)
.
<p>[a](te\ st)</p>
````````````````````````````````
Issue #527 - meta tags in inline contexts
```````````````````````````````` example
City:
<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
<meta itemprop="name" content="Springfield">
</span>
.
<p>City:
<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
<meta itemprop="name" content="Springfield">
</span></p>
````````````````````````````````
Issue #530 - link parsing corner cases
```````````````````````````````` example
[a](\ b)
[a](<<b)
[a](<b
)
.
<p>[a](\ b)</p>
<p>[a](&lt;&lt;b)</p>
<p>[a](&lt;b
)</p>
````````````````````````````````
Issue commonmark#526 - unescaped ( in link title
```````````````````````````````` example
[link](url ((title))
.
<p>[link](url ((title))</p>
````````````````````````````````
Issue commonamrk#517 - script, pre, style close tag without
opener.
```````````````````````````````` example
</script>
</pre>
</style>
.
</script>
</pre>
</style>
````````````````````````````````
Issue #289.
```````````````````````````````` example
[a](<b) c>
.
<p>[a](&lt;b) c&gt;</p>
````````````````````````````````
Issue #334 - UTF-8 BOM
```````````````````````````````` example
# Hi
.
<h1>Hi</h1>
````````````````````````````````
Issue commonmark.js#213 - type 7 blocks can't interrupt
paragraph
```````````````````````````````` example
- <script>
- some text
some other text
</script>
.
<ul>
<li>
<script>
</li>
<li>some text
some other text
</script></li>
</ul>
````````````````````````````````
Issue #383 - emphasis parsing.
```````````````````````````````` example
*****Hello*world****
.
<p>**<em><strong>Hello<em>world</em></strong></em></p>
````````````````````````````````
Issue #424 - emphasis before links
```````````````````````````````` example
*text* [link](#section)
.
<p><em>text</em> <a href="#section">link</a></p>
````````````````````````````````
`<!doctype` is case-insensitive
```````````````````````````````` example
<!docType html>
.
<!docType html>
````````````````````````````````
Declarations don't need spaces, according to the spec
```````````````````````````````` example
x <!A>
.
<p>x <!A></p>
````````````````````````````````

46
deps/cmark/test/roundtrip_tests.py vendored Normal file
View File

@@ -0,0 +1,46 @@
import re
import sys
from spec_tests import get_tests, do_test
from cmark import CMark
import argparse
parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.')
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
help='path to spec')
parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
default=None, help='limit to sections matching regex pattern')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
parser.add_argument('--no-normalize', dest='normalize',
action='store_const', const=False, default=True,
help='do not normalize HTML')
parser.add_argument('-n', '--number', type=int, default=None,
help='only consider the test with the given number')
args = parser.parse_args(sys.argv[1:])
spec = sys.argv[1]
def converter(md):
cmark = CMark(prog=args.program, library_dir=args.library_dir)
[ec, result, err] = cmark.to_commonmark(md)
if ec == 0:
[ec, html, err] = cmark.to_html(result)
if ec == 0:
# In the commonmark writer we insert dummy HTML
# comments between lists, and between lists and code
# blocks. Strip these out, since the spec uses
# two blank lines instead:
return [ec, re.sub('<!-- end list -->\n', '', html), '']
else:
return [ec, html, err]
else:
return [ec, result, err]
tests = get_tests(args.spec)
result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0}
for test in tests:
do_test(converter, test, args.normalize, result_counts)
exit(result_counts['fail'] + result_counts['error'])

4
deps/cmark/test/run-cmark-fuzz vendored Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash -eu
CMARK_FUZZ="$1"
shift
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" "${CMARK_FUZZ}" -max_len=256 -timeout=1 -dict=test/fuzzing_dictionary "$@"

177
deps/cmark/test/smart_punct.txt vendored Normal file
View File

@@ -0,0 +1,177 @@
## Smart punctuation
Open quotes are matched with closed quotes.
The same method is used for matching openers and closers
as is used in emphasis parsing:
```````````````````````````````` example
"Hello," said the spider.
"'Shelob' is my name."
.
<p>“Hello,” said the spider.
Shelob is my name.”</p>
````````````````````````````````
```````````````````````````````` example
'A', 'B', and 'C' are letters.
.
<p>A, B, and C are letters.</p>
````````````````````````````````
```````````````````````````````` example
'Oak,' 'elm,' and 'beech' are names of trees.
So is 'pine.'
.
<p>Oak, elm, and beech are names of trees.
So is pine.</p>
````````````````````````````````
```````````````````````````````` example
'He said, "I want to go."'
.
<p>He said, “I want to go.”’</p>
````````````````````````````````
A single quote that isn't an open quote matched
with a close quote will be treated as an
apostrophe:
```````````````````````````````` example
Were you alive in the 70's?
.
<p>Were you alive in the 70s?</p>
````````````````````````````````
```````````````````````````````` example
Here is some quoted '`code`' and a "[quoted link](url)".
.
<p>Here is some quoted <code>code</code> and a “<a href="url">quoted link</a>”.</p>
````````````````````````````````
Here the first `'` is treated as an apostrophe, not
an open quote, because the final single quote is matched
by the single quote before `jolly`:
```````````````````````````````` example
'tis the season to be 'jolly'
.
<p>tis the season to be jolly</p>
````````````````````````````````
Multiple apostrophes should not be marked as open/closing quotes.
```````````````````````````````` example
'We'll use Jane's boat and John's truck,' Jenna said.
.
<p>Well use Janes boat and Johns truck, Jenna said.</p>
````````````````````````````````
An unmatched double quote will be interpreted as a
left double quote, to facilitate this style:
```````````````````````````````` example
"A paragraph with no closing quote.
"Second paragraph by same speaker, in fiction."
.
<p>“A paragraph with no closing quote.</p>
<p>“Second paragraph by same speaker, in fiction.”</p>
````````````````````````````````
A quote following a `]` or `)` character cannot
be an open quote:
```````````````````````````````` example
[a]'s b'
.
<p>[a]s b</p>
````````````````````````````````
Quotes that are escaped come out as literal straight
quotes:
```````````````````````````````` example
\"This is not smart.\"
This isn\'t either.
5\'8\"
.
<p>&quot;This is not smart.&quot;
This isn't either.
5'8&quot;</p>
````````````````````````````````
Two hyphens form an en-dash, three an em-dash.
```````````````````````````````` example
Some dashes: em---em
en--en
em --- em
en -- en
2--3
.
<p>Some dashes: em—em
enen
em — em
en en
23</p>
````````````````````````````````
A sequence of more than three hyphens is
parsed as a sequence of em and/or en dashes,
with no hyphens. If possible, a homogeneous
sequence of dashes is used (so, 10 hyphens
= 5 en dashes, and 9 hyphens = 3 em dashes).
When a heterogeneous sequence must be used,
the em dashes come first, followed by the en
dashes, and as few en dashes as possible are
used (so, 7 hyphens = 2 em dashes an 1 en
dash).
```````````````````````````````` example
one-
two--
three---
four----
five-----
six------
seven-------
eight--------
nine---------
thirteen-------------.
.
<p>one-
two
three—
four
five—
six——
seven—
eight
nine———
thirteen———.</p>
````````````````````````````````
Hyphens can be escaped:
```````````````````````````````` example
Escaped hyphens: \-- \-\-\-.
.
<p>Escaped hyphens: -- ---.</p>
````````````````````````````````
Three periods form an ellipsis:
```````````````````````````````` example
Ellipses...and...and....
.
<p>Ellipses…and…and….</p>
````````````````````````````````
Periods can be escaped if ellipsis-formation
is not wanted:
```````````````````````````````` example
No ellipses\.\.\.
.
<p>No ellipses...</p>
````````````````````````````````

9742
deps/cmark/test/spec.txt vendored Normal file

File diff suppressed because it is too large Load Diff

142
deps/cmark/test/spec_tests.py vendored Executable file
View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
from difflib import unified_diff
import argparse
import re
import json
from cmark import CMark
from normalize import normalize_html
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
help='path to spec')
parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
default=None, help='limit to sections matching regex pattern')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
parser.add_argument('--no-normalize', dest='normalize',
action='store_const', const=False, default=True,
help='do not normalize HTML')
parser.add_argument('-d', '--dump-tests', dest='dump_tests',
action='store_const', const=True, default=False,
help='dump tests in JSON format')
parser.add_argument('--debug-normalization', dest='debug_normalization',
action='store_const', const=True,
default=False, help='filter stdin through normalizer for testing')
parser.add_argument('-n', '--number', type=int, default=None,
help='only consider the test with the given number')
args = parser.parse_args(sys.argv[1:])
def out(str):
sys.stdout.buffer.write(str.encode('utf-8'))
def print_test_header(headertext, example_number, start_line, end_line):
out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext))
def do_test(converter, test, normalize, result_counts):
[retcode, actual_html, err] = converter(test['markdown'])
if retcode == 0:
expected_html = test['html']
unicode_error = None
if normalize:
try:
passed = normalize_html(actual_html) == normalize_html(expected_html)
except UnicodeDecodeError as e:
unicode_error = e
passed = False
else:
passed = actual_html == expected_html
if passed:
result_counts['pass'] += 1
else:
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
out(test['markdown'] + '\n')
if unicode_error:
out("Unicode error: " + str(unicode_error) + '\n')
out("Expected: " + repr(expected_html) + '\n')
out("Got: " + repr(actual_html) + '\n')
else:
expected_html_lines = expected_html.splitlines(True)
actual_html_lines = actual_html.splitlines(True)
for diffline in unified_diff(expected_html_lines, actual_html_lines,
"expected HTML", "actual HTML"):
out(diffline)
out('\n')
result_counts['fail'] += 1
else:
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
out("program returned error code %d\n" % retcode)
sys.stdout.buffer.write(err)
result_counts['error'] += 1
def get_tests(specfile):
line_number = 0
start_line = 0
end_line = 0
example_number = 0
markdown_lines = []
html_lines = []
state = 0 # 0 regular text, 1 markdown example, 2 html output
headertext = ''
tests = []
header_re = re.compile('#+ ')
with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
for line in specf:
line_number = line_number + 1
l = line.strip()
if l == "`" * 32 + " example":
state = 1
elif l == "`" * 32:
state = 0
example_number = example_number + 1
end_line = line_number
tests.append({
"markdown":''.join(markdown_lines).replace('',"\t"),
"html":''.join(html_lines).replace('',"\t"),
"example": example_number,
"start_line": start_line,
"end_line": end_line,
"section": headertext})
start_line = 0
markdown_lines = []
html_lines = []
elif l == ".":
state = 2
elif state == 1:
if start_line == 0:
start_line = line_number - 1
markdown_lines.append(line)
elif state == 2:
html_lines.append(line)
elif state == 0 and re.match(header_re, line):
headertext = header_re.sub('', line).strip()
return tests
if __name__ == "__main__":
if args.debug_normalization:
out(normalize_html(sys.stdin.read()))
exit(0)
all_tests = get_tests(args.spec)
if args.pattern:
pattern_re = re.compile(args.pattern, re.IGNORECASE)
else:
pattern_re = re.compile('.')
tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
if args.dump_tests:
out(json.dumps(tests, ensure_ascii=False, indent=2))
exit(0)
else:
skipped = len(all_tests) - len(tests)
converter = CMark(prog=args.program, library_dir=args.library_dir).to_html
result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
for test in tests:
do_test(converter, test, args.normalize, result_counts)
out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts))
exit(result_counts['fail'] + result_counts['error'])