git subrepo clone --branch=0.30.3 https://github.com/commonmark/cmark.git deps/cmark
subrepo: subdir: "deps/cmark" merged: "5ba25ff" upstream: origin: "https://github.com/commonmark/cmark.git" branch: "0.30.3" commit: "5ba25ff" git-subrepo: version: "0.4.6" commit: "d4444b563"
This commit is contained in:
80
deps/cmark/test/CMakeLists.txt
vendored
Executable file
80
deps/cmark/test/CMakeLists.txt
vendored
Executable file
@@ -0,0 +1,80 @@
|
||||
# To get verbose output: cmake --build build --target "test" -- ARGS='-V'
|
||||
|
||||
# By default, we run the spec tests only if python3 is available.
|
||||
# To require the spec tests, compile with -DSPEC_TESTS=1
|
||||
|
||||
if (SPEC_TESTS)
|
||||
find_package(PythonInterp 3 REQUIRED)
|
||||
else(SPEC_TESTS)
|
||||
find_package(PythonInterp 3)
|
||||
endif(SPEC_TESTS)
|
||||
|
||||
if (CMARK_SHARED OR CMARK_STATIC)
|
||||
add_test(NAME api_test COMMAND api_test)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR)
|
||||
set(NEWPATH "${WIN_DLL_DIR};$ENV{PATH}")
|
||||
string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}")
|
||||
set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}")
|
||||
set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
|
||||
else(WIN32)
|
||||
set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh")
|
||||
endif(WIN32)
|
||||
|
||||
IF (PYTHONINTERP_FOUND)
|
||||
|
||||
add_test(html_normalization
|
||||
${PYTHON_EXECUTABLE} "-m" "doctest"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/normalize.py"
|
||||
)
|
||||
|
||||
if (CMARK_SHARED)
|
||||
add_test(spectest_library
|
||||
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
|
||||
)
|
||||
|
||||
add_test(pathological_tests_library
|
||||
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py"
|
||||
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
|
||||
)
|
||||
|
||||
add_test(roundtriptest_library
|
||||
${PYTHON_EXECUTABLE}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
|
||||
"--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
|
||||
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
|
||||
)
|
||||
|
||||
add_test(entity_library
|
||||
${PYTHON_EXECUTABLE}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
|
||||
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_test(spectest_executable
|
||||
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
|
||||
)
|
||||
|
||||
add_test(smartpuncttest_executable
|
||||
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart"
|
||||
)
|
||||
|
||||
add_test(regressiontest_executable
|
||||
${PYTHON_EXECUTABLE}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
|
||||
)
|
||||
|
||||
ELSE(PYTHONINTERP_FOUND)
|
||||
|
||||
message("\n*** A python 3 interpreter is required to run the spec tests.\n")
|
||||
add_test(skipping_spectests
|
||||
echo "Skipping spec tests, because no python 3 interpreter is available.")
|
||||
|
||||
ENDIF(PYTHONINTERP_FOUND)
|
||||
|
36
deps/cmark/test/afl_test_cases/test.md
vendored
Normal file
36
deps/cmark/test/afl_test_cases/test.md
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
# H1
|
||||
|
||||
H2
|
||||
--
|
||||
|
||||
t ☺
|
||||
*b* **em** `c`
|
||||
≥\&\
|
||||
\_e\_
|
||||
|
||||
4) I1
|
||||
|
||||
5) I2
|
||||
> [l](/u "t")
|
||||
>
|
||||
> - [f]
|
||||
> - 
|
||||
>
|
||||
>> <ftp://hh>
|
||||
>> <u@hh>
|
||||
|
||||
~~~ l☺
|
||||
cb
|
||||
~~~
|
||||
|
||||
c1
|
||||
c2
|
||||
|
||||
***
|
||||
|
||||
<div>
|
||||
<b>x</b>
|
||||
</div>
|
||||
|
||||
[f]: /u "t"
|
||||
|
33
deps/cmark/test/cmark-fuzz.c
vendored
Normal file
33
deps/cmark/test/cmark-fuzz.c
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "cmark.h"
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
struct __attribute__((packed)) {
|
||||
int options;
|
||||
int width;
|
||||
} fuzz_config;
|
||||
|
||||
if (size >= sizeof(fuzz_config)) {
|
||||
/* The beginning of `data` is treated as fuzzer configuration */
|
||||
memcpy(&fuzz_config, data, sizeof(fuzz_config));
|
||||
|
||||
/* Mask off valid option bits */
|
||||
fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
|
||||
|
||||
/* Remainder of input is the markdown */
|
||||
const char *markdown = (const char *)(data + sizeof(fuzz_config));
|
||||
const size_t markdown_size = size - sizeof(fuzz_config);
|
||||
cmark_node *doc = cmark_parse_document(markdown, markdown_size, fuzz_config.options);
|
||||
|
||||
free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width));
|
||||
free(cmark_render_html(doc, fuzz_config.options));
|
||||
free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width));
|
||||
free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width));
|
||||
free(cmark_render_xml(doc, fuzz_config.options));
|
||||
|
||||
cmark_node_free(doc);
|
||||
}
|
||||
return 0;
|
||||
}
|
62
deps/cmark/test/cmark.py
vendored
Normal file
62
deps/cmark/test/cmark.py
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p
|
||||
from subprocess import *
|
||||
import platform
|
||||
import os
|
||||
|
||||
def pipe_through_prog(prog, text):
|
||||
p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
|
||||
[result, err] = p1.communicate(input=text.encode('utf-8'))
|
||||
return [p1.returncode, result.decode('utf-8'), err]
|
||||
|
||||
def to_html(lib, text):
|
||||
markdown = lib.cmark_markdown_to_html
|
||||
markdown.restype = c_char_p
|
||||
markdown.argtypes = [c_char_p, c_size_t, c_int]
|
||||
textbytes = text.encode('utf-8')
|
||||
textlen = len(textbytes)
|
||||
# 1 << 17 == CMARK_OPT_UNSAFE
|
||||
result = markdown(textbytes, textlen, 1 << 17).decode('utf-8')
|
||||
return [0, result, '']
|
||||
|
||||
def to_commonmark(lib, text):
|
||||
textbytes = text.encode('utf-8')
|
||||
textlen = len(textbytes)
|
||||
parse_document = lib.cmark_parse_document
|
||||
parse_document.restype = c_void_p
|
||||
parse_document.argtypes = [c_char_p, c_size_t, c_int]
|
||||
render_commonmark = lib.cmark_render_commonmark
|
||||
render_commonmark.restype = c_char_p
|
||||
render_commonmark.argtypes = [c_void_p, c_int, c_int]
|
||||
node = parse_document(textbytes, textlen, 0)
|
||||
result = render_commonmark(node, 0, 0).decode('utf-8')
|
||||
return [0, result, '']
|
||||
|
||||
class CMark:
|
||||
def __init__(self, prog=None, library_dir=None):
|
||||
self.prog = prog
|
||||
if prog:
|
||||
prog += ' --unsafe'
|
||||
self.to_html = lambda x: pipe_through_prog(prog, x)
|
||||
self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x)
|
||||
else:
|
||||
sysname = platform.system()
|
||||
if sysname == 'Darwin':
|
||||
libnames = [ "libcmark.dylib" ]
|
||||
elif sysname == 'Windows':
|
||||
libnames = [ "cmark.dll", "libcmark.dll" ]
|
||||
else:
|
||||
libnames = [ "libcmark.so" ]
|
||||
if not library_dir:
|
||||
library_dir = os.path.join("build", "src")
|
||||
for libname in libnames:
|
||||
candidate = os.path.join(library_dir, libname)
|
||||
if os.path.isfile(candidate):
|
||||
libpath = candidate
|
||||
break
|
||||
cmark = CDLL(libpath)
|
||||
self.to_html = lambda x: to_html(cmark, x)
|
||||
self.to_commonmark = lambda x: to_commonmark(cmark, x)
|
||||
|
67
deps/cmark/test/entity_tests.py
vendored
Normal file
67
deps/cmark/test/entity_tests.py
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import os
|
||||
import argparse
|
||||
import sys
|
||||
import platform
|
||||
import html
|
||||
from cmark import CMark
|
||||
|
||||
def get_entities():
|
||||
regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
|
||||
with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
|
||||
code = f.read()
|
||||
entities = []
|
||||
for entity, utf8 in re.findall(regex, code, re.MULTILINE):
|
||||
utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
|
||||
entities.append((entity, utf8))
|
||||
return entities
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run cmark tests.')
|
||||
parser.add_argument('--program', dest='program', nargs='?', default=None,
|
||||
help='program to test')
|
||||
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
|
||||
default=None, help='directory containing dynamic library')
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
cmark = CMark(prog=args.program, library_dir=args.library_dir)
|
||||
|
||||
entities = get_entities()
|
||||
|
||||
passed = 0
|
||||
errored = 0
|
||||
failed = 0
|
||||
|
||||
exceptions = {
|
||||
'quot': '"',
|
||||
'QUOT': '"',
|
||||
|
||||
# These are broken, but I'm not too worried about them.
|
||||
'nvlt': '<⃒',
|
||||
'nvgt': '>⃒',
|
||||
}
|
||||
|
||||
print("Testing entities:")
|
||||
for entity, utf8 in entities:
|
||||
[rc, actual, err] = cmark.to_html("&{};".format(entity))
|
||||
check = exceptions.get(entity, utf8)
|
||||
|
||||
if rc != 0:
|
||||
errored += 1
|
||||
print(entity, '[ERRORED (return code {})]'.format(rc))
|
||||
print(err)
|
||||
elif check in actual:
|
||||
# print(entity, '[PASSED]') # omit noisy success output
|
||||
passed += 1
|
||||
else:
|
||||
print(entity, '[FAILED]')
|
||||
print(repr(actual))
|
||||
failed += 1
|
||||
|
||||
print("{} passed, {} failed, {} errored".format(passed, failed, errored))
|
||||
if failed == 0 and errored == 0:
|
||||
exit(0)
|
||||
else:
|
||||
exit(1)
|
49
deps/cmark/test/fuzzing_dictionary
vendored
Normal file
49
deps/cmark/test/fuzzing_dictionary
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
asterisk="*"
|
||||
attr_generic=" a=\"1\""
|
||||
attr_href=" href=\"1\""
|
||||
attr_xml_lang=" xml:lang=\"1\""
|
||||
attr_xmlns=" xmlns=\"1\""
|
||||
backslash="\\"
|
||||
backtick="`"
|
||||
colon=":"
|
||||
dashes="---"
|
||||
double_quote="\""
|
||||
entity_builtin="<"
|
||||
entity_decimal=""
|
||||
entity_external="&a;"
|
||||
entity_hex=""
|
||||
equals="==="
|
||||
exclamation="!"
|
||||
greater_than=">"
|
||||
hash="#"
|
||||
hyphen="-"
|
||||
indent=" "
|
||||
left_bracket="["
|
||||
left_paren="("
|
||||
less_than="<"
|
||||
plus="+"
|
||||
right_bracket="]"
|
||||
right_paren=")"
|
||||
single_quote="'"
|
||||
string_any="ANY"
|
||||
string_brackets="[]"
|
||||
string_cdata="CDATA"
|
||||
string_dashes="--"
|
||||
string_empty_dblquotes="\"\""
|
||||
string_empty_quotes="''"
|
||||
string_idrefs="IDREFS"
|
||||
string_parentheses="()"
|
||||
string_pcdata="#PCDATA"
|
||||
tag_cdata="<![CDATA["
|
||||
tag_close="</a>"
|
||||
tag_doctype="<!DOCTYPE"
|
||||
tag_element="<!ELEMENT"
|
||||
tag_entity="<!ENTITY"
|
||||
tag_notation="<!NOTATION"
|
||||
tag_open="<a>"
|
||||
tag_open_close="<a />"
|
||||
tag_open_exclamation="<!"
|
||||
tag_open_q="<?"
|
||||
tag_sq2_close="]]>"
|
||||
tag_xml_q="<?xml?>"
|
||||
underscore="_"
|
194
deps/cmark/test/normalize.py
vendored
Normal file
194
deps/cmark/test/normalize.py
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from html.parser import HTMLParser
|
||||
import urllib
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParseError
|
||||
except ImportError:
|
||||
# HTMLParseError was removed in Python 3.5. It could never be
|
||||
# thrown, so we define a placeholder instead.
|
||||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
from html.entities import name2codepoint
|
||||
import sys
|
||||
import re
|
||||
import html
|
||||
|
||||
# Normalization code, adapted from
|
||||
# https://github.com/karlcow/markdown-testsuite/
|
||||
significant_attrs = ["alt", "href", "src", "title"]
|
||||
whitespace_re = re.compile('\s+')
|
||||
class MyHTMLParser(HTMLParser):
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.convert_charrefs = False
|
||||
self.last = "starttag"
|
||||
self.in_pre = False
|
||||
self.output = ""
|
||||
self.last_tag = ""
|
||||
def handle_data(self, data):
|
||||
after_tag = self.last == "endtag" or self.last == "starttag"
|
||||
after_block_tag = after_tag and self.is_block_tag(self.last_tag)
|
||||
if after_tag and self.last_tag == "br":
|
||||
data = data.lstrip('\n')
|
||||
if not self.in_pre:
|
||||
data = whitespace_re.sub(' ', data)
|
||||
if after_block_tag and not self.in_pre:
|
||||
if self.last == "starttag":
|
||||
data = data.lstrip()
|
||||
elif self.last == "endtag":
|
||||
data = data.strip()
|
||||
self.output += data
|
||||
self.last = "data"
|
||||
def handle_endtag(self, tag):
|
||||
if tag == "pre":
|
||||
self.in_pre = False
|
||||
elif self.is_block_tag(tag):
|
||||
self.output = self.output.rstrip()
|
||||
self.output += "</" + tag + ">"
|
||||
self.last_tag = tag
|
||||
self.last = "endtag"
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == "pre":
|
||||
self.in_pre = True
|
||||
if self.is_block_tag(tag):
|
||||
self.output = self.output.rstrip()
|
||||
self.output += "<" + tag
|
||||
# For now we don't strip out 'extra' attributes, because of
|
||||
# raw HTML test cases.
|
||||
# attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
|
||||
if attrs:
|
||||
attrs.sort()
|
||||
for (k,v) in attrs:
|
||||
self.output += " " + k
|
||||
if v in ['href','src']:
|
||||
self.output += ("=" + '"' +
|
||||
urllib.quote(urllib.unquote(v), safe='/') + '"')
|
||||
elif v != None:
|
||||
self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
|
||||
self.output += ">"
|
||||
self.last_tag = tag
|
||||
self.last = "starttag"
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
"""Ignore closing tag for self-closing """
|
||||
self.handle_starttag(tag, attrs)
|
||||
self.last_tag = tag
|
||||
self.last = "endtag"
|
||||
def handle_comment(self, data):
|
||||
self.output += '<!--' + data + '-->'
|
||||
self.last = "comment"
|
||||
def handle_decl(self, data):
|
||||
self.output += '<!' + data + '>'
|
||||
self.last = "decl"
|
||||
def unknown_decl(self, data):
|
||||
self.output += '<!' + data + '>'
|
||||
self.last = "decl"
|
||||
def handle_pi(self,data):
|
||||
self.output += '<?' + data + '>'
|
||||
self.last = "pi"
|
||||
def handle_entityref(self, name):
|
||||
try:
|
||||
c = chr(name2codepoint[name])
|
||||
except KeyError:
|
||||
c = None
|
||||
self.output_char(c, '&' + name + ';')
|
||||
self.last = "ref"
|
||||
def handle_charref(self, name):
|
||||
try:
|
||||
if name.startswith("x"):
|
||||
c = chr(int(name[1:], 16))
|
||||
else:
|
||||
c = chr(int(name))
|
||||
except ValueError:
|
||||
c = None
|
||||
self.output_char(c, '&' + name + ';')
|
||||
self.last = "ref"
|
||||
# Helpers.
|
||||
def output_char(self, c, fallback):
|
||||
if c == '<':
|
||||
self.output += "<"
|
||||
elif c == '>':
|
||||
self.output += ">"
|
||||
elif c == '&':
|
||||
self.output += "&"
|
||||
elif c == '"':
|
||||
self.output += """
|
||||
elif c == None:
|
||||
self.output += fallback
|
||||
else:
|
||||
self.output += c
|
||||
|
||||
def is_block_tag(self,tag):
|
||||
return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
|
||||
'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
|
||||
'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
|
||||
'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
|
||||
'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
|
||||
'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
|
||||
|
||||
def normalize_html(html):
|
||||
r"""
|
||||
Return normalized form of HTML which ignores insignificant output
|
||||
differences:
|
||||
|
||||
Multiple inner whitespaces are collapsed to a single space (except
|
||||
in pre tags):
|
||||
|
||||
>>> normalize_html("<p>a \t b</p>")
|
||||
'<p>a b</p>'
|
||||
|
||||
>>> normalize_html("<p>a \t\nb</p>")
|
||||
'<p>a b</p>'
|
||||
|
||||
* Whitespace surrounding block-level tags is removed.
|
||||
|
||||
>>> normalize_html("<p>a b</p>")
|
||||
'<p>a b</p>'
|
||||
|
||||
>>> normalize_html(" <p>a b</p>")
|
||||
'<p>a b</p>'
|
||||
|
||||
>>> normalize_html("<p>a b</p> ")
|
||||
'<p>a b</p>'
|
||||
|
||||
>>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t")
|
||||
'<p>a b</p>'
|
||||
|
||||
>>> normalize_html("<i>a b</i> ")
|
||||
'<i>a b</i> '
|
||||
|
||||
* Self-closing tags are converted to open tags.
|
||||
|
||||
>>> normalize_html("<br />")
|
||||
'<br>'
|
||||
|
||||
* Attributes are sorted and lowercased.
|
||||
|
||||
>>> normalize_html('<a title="bar" HREF="foo">x</a>')
|
||||
'<a href="foo" title="bar">x</a>'
|
||||
|
||||
* References are converted to unicode, except that '<', '>', '&', and
|
||||
'"' are rendered using entities.
|
||||
|
||||
>>> normalize_html("∀&><"")
|
||||
'\u2200&><"'
|
||||
|
||||
"""
|
||||
html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
|
||||
try:
|
||||
parser = MyHTMLParser()
|
||||
# We work around HTMLParser's limitations parsing CDATA
|
||||
# by breaking the input into chunks and passing CDATA chunks
|
||||
# through verbatim.
|
||||
for chunk in re.finditer(html_chunk_re, html):
|
||||
if chunk.group(0)[:8] == "<![CDATA":
|
||||
parser.output += chunk.group(0)
|
||||
else:
|
||||
parser.feed(chunk.group(0))
|
||||
parser.close()
|
||||
return parser.output
|
||||
except HTMLParseError as e:
|
||||
sys.stderr.write("Normalization error: " + e.msg + "\n")
|
||||
return html # on error, return unnormalized HTML
|
193
deps/cmark/test/pathological_tests.py
vendored
Normal file
193
deps/cmark/test/pathological_tests.py
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import argparse
|
||||
import sys
|
||||
import platform
|
||||
import itertools
|
||||
import multiprocessing
|
||||
import queue
|
||||
import time
|
||||
from cmark import CMark
|
||||
|
||||
TIMEOUT = 5
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run cmark tests.')
|
||||
parser.add_argument('--program', dest='program', nargs='?', default=None,
|
||||
help='program to test')
|
||||
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
|
||||
default=None, help='directory containing dynamic library')
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
allowed_failures = {"many references": True}
|
||||
|
||||
cmark = CMark(prog=args.program, library_dir=args.library_dir)
|
||||
|
||||
def hash_collisions():
|
||||
REFMAP_SIZE = 16
|
||||
COUNT = 25000
|
||||
|
||||
def badhash(ref):
|
||||
h = 0
|
||||
for c in ref:
|
||||
a = (h << 6) & 0xFFFFFFFF
|
||||
b = (h << 16) & 0xFFFFFFFF
|
||||
h = ord(c) + a + b - h
|
||||
h = h & 0xFFFFFFFF
|
||||
|
||||
return (h % REFMAP_SIZE) == 0
|
||||
|
||||
keys = ("x%d" % i for i in itertools.count())
|
||||
collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
|
||||
bad_key = next(collisions)
|
||||
|
||||
document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
|
||||
|
||||
return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
|
||||
|
||||
|
||||
# list of pairs consisting of input and a regex that must match the output.
|
||||
pathological = {
|
||||
# note - some pythons have limit of 65535 for {num-matches} in re.
|
||||
"nested strong emph":
|
||||
(("*a **a " * 32500) + "b" + (" a** a*" * 32500),
|
||||
re.compile("(<em>a <strong>a ){32500}b( a</strong> a</em>){32500}")),
|
||||
"many emph closers with no openers":
|
||||
(("a_ " * 32500),
|
||||
re.compile("(a[_] ){32499}a_")),
|
||||
"many emph openers with no closers":
|
||||
(("_a " * 32500),
|
||||
re.compile("(_a ){32499}_a")),
|
||||
"many link closers with no openers":
|
||||
(("a]" * 32500),
|
||||
re.compile("(a\]){32500}")),
|
||||
"many link openers with no closers":
|
||||
(("[a" * 32500),
|
||||
re.compile("(\[a){32500}")),
|
||||
"mismatched openers and closers":
|
||||
(("*a_ " * 25000),
|
||||
re.compile("([*]a[_] ){24999}[*]a_")),
|
||||
"issue #389":
|
||||
(("*a " * 20000 + "_a*_ " * 20000),
|
||||
re.compile("(<em>a ){20000}(_a<\/em>_ ?){20000}")),
|
||||
"openers and closers multiple of 3":
|
||||
(("a**b" + ("c* " * 25000)),
|
||||
re.compile("a[*][*]b(c[*] ){24999}c[*]")),
|
||||
"link openers and emph closers":
|
||||
(("[ a_" * 25000),
|
||||
re.compile("(\[ a_){25000}")),
|
||||
"pattern [ (]( repeated":
|
||||
(("[ (](" * 40000),
|
||||
re.compile("(\[ \(\]\(){40000}")),
|
||||
"pattern ![[]() repeated":
|
||||
("![[]()" * 160000,
|
||||
re.compile("(!\[<a href=\"\"></a>){160000}")),
|
||||
"hard link/emph case":
|
||||
("**x [a*b**c*](d)",
|
||||
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
|
||||
"nested brackets":
|
||||
(("[" * 25000) + "a" + ("]" * 25000),
|
||||
re.compile("\[{25000}a\]{25000}")),
|
||||
"nested block quotes":
|
||||
((("> " * 25000) + "a"),
|
||||
re.compile("(<blockquote>\n){25000}")),
|
||||
"deeply nested lists":
|
||||
("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))),
|
||||
re.compile("<ul>\n(<li>a\n<ul>\n){499}<li>a</li>\n</ul>\n(</li>\n</ul>\n){499}")),
|
||||
"U+0000 in input":
|
||||
("abc\u0000de\u0000",
|
||||
re.compile("abc\ufffd?de\ufffd?")),
|
||||
"backticks":
|
||||
("".join(map(lambda x: ("e" + "`" * x), range(1,2500))),
|
||||
re.compile("^<p>[e`]*</p>\n$")),
|
||||
"unclosed links A":
|
||||
("[a](<b" * 30000,
|
||||
re.compile("(\[a\]\(<b){30000}")),
|
||||
"unclosed links B":
|
||||
("[a](b" * 30000,
|
||||
re.compile("(\[a\]\(b){30000}")),
|
||||
"unclosed <!--":
|
||||
("</" + "<!--" * 300000,
|
||||
re.compile("\<\/(\<!--){300000}")),
|
||||
"reference collisions": hash_collisions()
|
||||
# "many references":
|
||||
# ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
|
||||
# re.compile("(\[0\] ){4999}"))
|
||||
}
|
||||
|
||||
pathological_cmark = {
|
||||
"nested inlines":
|
||||
("*" * 20000 + "a" + "*" * 20000,
|
||||
re.compile("^\*+a\*+$")),
|
||||
}
|
||||
|
||||
whitespace_re = re.compile('/s+/')
|
||||
|
||||
def run_pathological(q, inp):
|
||||
q.put(cmark.to_html(inp))
|
||||
|
||||
def run_pathological_cmark(q, inp):
|
||||
q.put(cmark.to_commonmark(inp))
|
||||
|
||||
def run_tests():
|
||||
q = multiprocessing.Queue()
|
||||
passed = []
|
||||
errored = []
|
||||
failed = []
|
||||
ignored = []
|
||||
|
||||
print("Testing pathological cases:")
|
||||
for description in (*pathological, *pathological_cmark):
|
||||
if description in pathological:
|
||||
(inp, regex) = pathological[description]
|
||||
p = multiprocessing.Process(target=run_pathological,
|
||||
args=(q, inp))
|
||||
else:
|
||||
(inp, regex) = pathological_cmark[description]
|
||||
p = multiprocessing.Process(target=run_pathological_cmark,
|
||||
args=(q, inp))
|
||||
p.start()
|
||||
try:
|
||||
# wait TIMEOUT seconds or until it finishes
|
||||
rc, actual, err = q.get(True, TIMEOUT)
|
||||
p.join()
|
||||
if rc != 0:
|
||||
print(description, '[ERRORED (return code %d)]' %rc)
|
||||
print(err)
|
||||
if description in allowed_failures:
|
||||
ignored.append(description)
|
||||
else:
|
||||
errored.append(description)
|
||||
elif regex.search(actual):
|
||||
print(description, '[PASSED]')
|
||||
passed.append(description)
|
||||
else:
|
||||
print(description, '[FAILED]')
|
||||
print(repr(actual[:60]))
|
||||
if description in allowed_failures:
|
||||
ignored.append(description)
|
||||
else:
|
||||
failed.append(description)
|
||||
except queue.Empty:
|
||||
p.terminate()
|
||||
p.join()
|
||||
print(description, '[TIMEOUT]')
|
||||
if description in allowed_failures:
|
||||
ignored.append(description)
|
||||
else:
|
||||
errored.append(description)
|
||||
|
||||
print("%d passed, %d failed, %d errored" %
|
||||
(len(passed), len(failed), len(errored)))
|
||||
if ignored:
|
||||
print("Ignoring these allowed failures:")
|
||||
for x in ignored:
|
||||
print(x)
|
||||
if failed or errored:
|
||||
exit(1)
|
||||
else:
|
||||
exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
215
deps/cmark/test/regression.txt
vendored
Normal file
215
deps/cmark/test/regression.txt
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
### Regression tests
|
||||
|
||||
Issue #113: EOL character weirdness on Windows
|
||||
(Important: first line ends with CR + CR + LF)
|
||||
|
||||
```````````````````````````````` example
|
||||
line1
|
||||
|
||||
line2
|
||||
.
|
||||
<p>line1</p>
|
||||
<p>line2</p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #114: cmark skipping first character in line
|
||||
(Important: the blank lines around "Repeatedly" contain a tab.)
|
||||
|
||||
```````````````````````````````` example
|
||||
By taking it apart
|
||||
|
||||
- alternative solutions
|
||||
→
|
||||
Repeatedly solving
|
||||
→
|
||||
- how techniques
|
||||
.
|
||||
<p>By taking it apart</p>
|
||||
<ul>
|
||||
<li>alternative solutions</li>
|
||||
</ul>
|
||||
<p>Repeatedly solving</p>
|
||||
<ul>
|
||||
<li>how techniques</li>
|
||||
</ul>
|
||||
````````````````````````````````
|
||||
|
||||
Issue jgm/CommonMark#430: h2..h6 not recognized as block tags.
|
||||
|
||||
```````````````````````````````` example
|
||||
<h1>lorem</h1>
|
||||
|
||||
<h2>lorem</h2>
|
||||
|
||||
<h3>lorem</h3>
|
||||
|
||||
<h4>lorem</h4>
|
||||
|
||||
<h5>lorem</h5>
|
||||
|
||||
<h6>lorem</h6>
|
||||
.
|
||||
<h1>lorem</h1>
|
||||
<h2>lorem</h2>
|
||||
<h3>lorem</h3>
|
||||
<h4>lorem</h4>
|
||||
<h5>lorem</h5>
|
||||
<h6>lorem</h6>
|
||||
````````````````````````````````
|
||||
|
||||
Issue jgm/commonmark.js#109 - tabs after setext header line
|
||||
|
||||
|
||||
```````````````````````````````` example
|
||||
hi
|
||||
--→
|
||||
.
|
||||
<h2>hi</h2>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #177 - incorrect emphasis parsing
|
||||
|
||||
```````````````````````````````` example
|
||||
a***b* c*
|
||||
.
|
||||
<p>a*<em><em>b</em> c</em></p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #193 - unescaped left angle brackets in link destination
|
||||
|
||||
```````````````````````````````` example
|
||||
[a]
|
||||
|
||||
[a]: <te<st>
|
||||
.
|
||||
<p>[a]</p>
|
||||
<p>[a]: <te<st></p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #192 - escaped spaces in link destination
|
||||
|
||||
|
||||
```````````````````````````````` example
|
||||
[a](te\ st)
|
||||
.
|
||||
<p>[a](te\ st)</p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #527 - meta tags in inline contexts
|
||||
|
||||
```````````````````````````````` example
|
||||
City:
|
||||
<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
|
||||
<meta itemprop="name" content="Springfield">
|
||||
</span>
|
||||
.
|
||||
<p>City:
|
||||
<span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
|
||||
<meta itemprop="name" content="Springfield">
|
||||
</span></p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #530 - link parsing corner cases
|
||||
|
||||
```````````````````````````````` example
|
||||
[a](\ b)
|
||||
|
||||
[a](<<b)
|
||||
|
||||
[a](<b
|
||||
)
|
||||
.
|
||||
<p>[a](\ b)</p>
|
||||
<p>[a](<<b)</p>
|
||||
<p>[a](<b
|
||||
)</p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue commonmark#526 - unescaped ( in link title
|
||||
|
||||
```````````````````````````````` example
|
||||
[link](url ((title))
|
||||
.
|
||||
<p>[link](url ((title))</p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue commonamrk#517 - script, pre, style close tag without
|
||||
opener.
|
||||
|
||||
```````````````````````````````` example
|
||||
</script>
|
||||
|
||||
</pre>
|
||||
|
||||
</style>
|
||||
.
|
||||
</script>
|
||||
</pre>
|
||||
</style>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #289.
|
||||
|
||||
```````````````````````````````` example
|
||||
[a](<b) c>
|
||||
.
|
||||
<p>[a](<b) c></p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #334 - UTF-8 BOM
|
||||
|
||||
```````````````````````````````` example
|
||||
# Hi
|
||||
.
|
||||
<h1>Hi</h1>
|
||||
````````````````````````````````
|
||||
|
||||
Issue commonmark.js#213 - type 7 blocks can't interrupt
|
||||
paragraph
|
||||
|
||||
```````````````````````````````` example
|
||||
- <script>
|
||||
- some text
|
||||
some other text
|
||||
</script>
|
||||
.
|
||||
<ul>
|
||||
<li>
|
||||
<script>
|
||||
</li>
|
||||
<li>some text
|
||||
some other text
|
||||
</script></li>
|
||||
</ul>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #383 - emphasis parsing.
|
||||
|
||||
```````````````````````````````` example
|
||||
*****Hello*world****
|
||||
.
|
||||
<p>**<em><strong>Hello<em>world</em></strong></em></p>
|
||||
````````````````````````````````
|
||||
|
||||
Issue #424 - emphasis before links
|
||||
|
||||
```````````````````````````````` example
|
||||
*text* [link](#section)
|
||||
.
|
||||
<p><em>text</em> <a href="#section">link</a></p>
|
||||
````````````````````````````````
|
||||
|
||||
`<!doctype` is case-insensitive
|
||||
```````````````````````````````` example
|
||||
<!docType html>
|
||||
.
|
||||
<!docType html>
|
||||
````````````````````````````````
|
||||
|
||||
Declarations don't need spaces, according to the spec
|
||||
```````````````````````````````` example
|
||||
x <!A>
|
||||
.
|
||||
<p>x <!A></p>
|
||||
````````````````````````````````
|
||||
|
46
deps/cmark/test/roundtrip_tests.py
vendored
Normal file
46
deps/cmark/test/roundtrip_tests.py
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
import sys
|
||||
from spec_tests import get_tests, do_test
|
||||
from cmark import CMark
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.')
|
||||
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
|
||||
help='program to test')
|
||||
parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
|
||||
help='path to spec')
|
||||
parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
|
||||
default=None, help='limit to sections matching regex pattern')
|
||||
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
|
||||
default=None, help='directory containing dynamic library')
|
||||
parser.add_argument('--no-normalize', dest='normalize',
|
||||
action='store_const', const=False, default=True,
|
||||
help='do not normalize HTML')
|
||||
parser.add_argument('-n', '--number', type=int, default=None,
|
||||
help='only consider the test with the given number')
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
spec = sys.argv[1]
|
||||
|
||||
def converter(md):
|
||||
cmark = CMark(prog=args.program, library_dir=args.library_dir)
|
||||
[ec, result, err] = cmark.to_commonmark(md)
|
||||
if ec == 0:
|
||||
[ec, html, err] = cmark.to_html(result)
|
||||
if ec == 0:
|
||||
# In the commonmark writer we insert dummy HTML
|
||||
# comments between lists, and between lists and code
|
||||
# blocks. Strip these out, since the spec uses
|
||||
# two blank lines instead:
|
||||
return [ec, re.sub('<!-- end list -->\n', '', html), '']
|
||||
else:
|
||||
return [ec, html, err]
|
||||
else:
|
||||
return [ec, result, err]
|
||||
|
||||
tests = get_tests(args.spec)
|
||||
result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0}
|
||||
for test in tests:
|
||||
do_test(converter, test, args.normalize, result_counts)
|
||||
|
||||
exit(result_counts['fail'] + result_counts['error'])
|
4
deps/cmark/test/run-cmark-fuzz
vendored
Executable file
4
deps/cmark/test/run-cmark-fuzz
vendored
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash -eu
|
||||
CMARK_FUZZ="$1"
|
||||
shift
|
||||
ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" "${CMARK_FUZZ}" -max_len=256 -timeout=1 -dict=test/fuzzing_dictionary "$@"
|
177
deps/cmark/test/smart_punct.txt
vendored
Normal file
177
deps/cmark/test/smart_punct.txt
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
## Smart punctuation
|
||||
|
||||
Open quotes are matched with closed quotes.
|
||||
The same method is used for matching openers and closers
|
||||
as is used in emphasis parsing:
|
||||
|
||||
```````````````````````````````` example
|
||||
"Hello," said the spider.
|
||||
"'Shelob' is my name."
|
||||
.
|
||||
<p>“Hello,” said the spider.
|
||||
“‘Shelob’ is my name.”</p>
|
||||
````````````````````````````````
|
||||
|
||||
```````````````````````````````` example
|
||||
'A', 'B', and 'C' are letters.
|
||||
.
|
||||
<p>‘A’, ‘B’, and ‘C’ are letters.</p>
|
||||
````````````````````````````````
|
||||
|
||||
```````````````````````````````` example
|
||||
'Oak,' 'elm,' and 'beech' are names of trees.
|
||||
So is 'pine.'
|
||||
.
|
||||
<p>‘Oak,’ ‘elm,’ and ‘beech’ are names of trees.
|
||||
So is ‘pine.’</p>
|
||||
````````````````````````````````
|
||||
|
||||
```````````````````````````````` example
|
||||
'He said, "I want to go."'
|
||||
.
|
||||
<p>‘He said, “I want to go.”’</p>
|
||||
````````````````````````````````
|
||||
|
||||
A single quote that isn't an open quote matched
|
||||
with a close quote will be treated as an
|
||||
apostrophe:
|
||||
|
||||
```````````````````````````````` example
|
||||
Were you alive in the 70's?
|
||||
.
|
||||
<p>Were you alive in the 70’s?</p>
|
||||
````````````````````````````````
|
||||
|
||||
```````````````````````````````` example
|
||||
Here is some quoted '`code`' and a "[quoted link](url)".
|
||||
.
|
||||
<p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p>
|
||||
````````````````````````````````
|
||||
|
||||
Here the first `'` is treated as an apostrophe, not
|
||||
an open quote, because the final single quote is matched
|
||||
by the single quote before `jolly`:
|
||||
|
||||
```````````````````````````````` example
|
||||
'tis the season to be 'jolly'
|
||||
.
|
||||
<p>’tis the season to be ‘jolly’</p>
|
||||
````````````````````````````````
|
||||
|
||||
Multiple apostrophes should not be marked as open/closing quotes.
|
||||
|
||||
```````````````````````````````` example
|
||||
'We'll use Jane's boat and John's truck,' Jenna said.
|
||||
.
|
||||
<p>‘We’ll use Jane’s boat and John’s truck,’ Jenna said.</p>
|
||||
````````````````````````````````
|
||||
|
||||
An unmatched double quote will be interpreted as a
|
||||
left double quote, to facilitate this style:
|
||||
|
||||
```````````````````````````````` example
|
||||
"A paragraph with no closing quote.
|
||||
|
||||
"Second paragraph by same speaker, in fiction."
|
||||
.
|
||||
<p>“A paragraph with no closing quote.</p>
|
||||
<p>“Second paragraph by same speaker, in fiction.”</p>
|
||||
````````````````````````````````
|
||||
|
||||
A quote following a `]` or `)` character cannot
|
||||
be an open quote:
|
||||
|
||||
```````````````````````````````` example
|
||||
[a]'s b'
|
||||
.
|
||||
<p>[a]’s b’</p>
|
||||
````````````````````````````````
|
||||
|
||||
Quotes that are escaped come out as literal straight
|
||||
quotes:
|
||||
|
||||
```````````````````````````````` example
|
||||
\"This is not smart.\"
|
||||
This isn\'t either.
|
||||
5\'8\"
|
||||
.
|
||||
<p>"This is not smart."
|
||||
This isn't either.
|
||||
5'8"</p>
|
||||
````````````````````````````````
|
||||
|
||||
Two hyphens form an en-dash, three an em-dash.
|
||||
|
||||
```````````````````````````````` example
|
||||
Some dashes: em---em
|
||||
en--en
|
||||
em --- em
|
||||
en -- en
|
||||
2--3
|
||||
.
|
||||
<p>Some dashes: em—em
|
||||
en–en
|
||||
em — em
|
||||
en – en
|
||||
2–3</p>
|
||||
````````````````````````````````
|
||||
|
||||
A sequence of more than three hyphens is
|
||||
parsed as a sequence of em and/or en dashes,
|
||||
with no hyphens. If possible, a homogeneous
|
||||
sequence of dashes is used (so, 10 hyphens
|
||||
= 5 en dashes, and 9 hyphens = 3 em dashes).
|
||||
When a heterogeneous sequence must be used,
|
||||
the em dashes come first, followed by the en
|
||||
dashes, and as few en dashes as possible are
|
||||
used (so, 7 hyphens = 2 em dashes an 1 en
|
||||
dash).
|
||||
|
||||
```````````````````````````````` example
|
||||
one-
|
||||
two--
|
||||
three---
|
||||
four----
|
||||
five-----
|
||||
six------
|
||||
seven-------
|
||||
eight--------
|
||||
nine---------
|
||||
thirteen-------------.
|
||||
.
|
||||
<p>one-
|
||||
two–
|
||||
three—
|
||||
four––
|
||||
five—–
|
||||
six——
|
||||
seven—––
|
||||
eight––––
|
||||
nine———
|
||||
thirteen———––.</p>
|
||||
````````````````````````````````
|
||||
|
||||
Hyphens can be escaped:
|
||||
|
||||
```````````````````````````````` example
|
||||
Escaped hyphens: \-- \-\-\-.
|
||||
.
|
||||
<p>Escaped hyphens: -- ---.</p>
|
||||
````````````````````````````````
|
||||
|
||||
Three periods form an ellipsis:
|
||||
|
||||
```````````````````````````````` example
|
||||
Ellipses...and...and....
|
||||
.
|
||||
<p>Ellipses…and…and….</p>
|
||||
````````````````````````````````
|
||||
|
||||
Periods can be escaped if ellipsis-formation
|
||||
is not wanted:
|
||||
|
||||
```````````````````````````````` example
|
||||
No ellipses\.\.\.
|
||||
.
|
||||
<p>No ellipses...</p>
|
||||
````````````````````````````````
|
9742
deps/cmark/test/spec.txt
vendored
Normal file
9742
deps/cmark/test/spec.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
142
deps/cmark/test/spec_tests.py
vendored
Executable file
142
deps/cmark/test/spec_tests.py
vendored
Executable file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
from difflib import unified_diff
|
||||
import argparse
|
||||
import re
|
||||
import json
|
||||
from cmark import CMark
|
||||
from normalize import normalize_html
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run cmark tests.')
|
||||
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
|
||||
help='program to test')
|
||||
parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
|
||||
help='path to spec')
|
||||
parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
|
||||
default=None, help='limit to sections matching regex pattern')
|
||||
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
|
||||
default=None, help='directory containing dynamic library')
|
||||
parser.add_argument('--no-normalize', dest='normalize',
|
||||
action='store_const', const=False, default=True,
|
||||
help='do not normalize HTML')
|
||||
parser.add_argument('-d', '--dump-tests', dest='dump_tests',
|
||||
action='store_const', const=True, default=False,
|
||||
help='dump tests in JSON format')
|
||||
parser.add_argument('--debug-normalization', dest='debug_normalization',
|
||||
action='store_const', const=True,
|
||||
default=False, help='filter stdin through normalizer for testing')
|
||||
parser.add_argument('-n', '--number', type=int, default=None,
|
||||
help='only consider the test with the given number')
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
def out(str):
|
||||
sys.stdout.buffer.write(str.encode('utf-8'))
|
||||
|
||||
def print_test_header(headertext, example_number, start_line, end_line):
|
||||
out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext))
|
||||
|
||||
def do_test(converter, test, normalize, result_counts):
|
||||
[retcode, actual_html, err] = converter(test['markdown'])
|
||||
if retcode == 0:
|
||||
expected_html = test['html']
|
||||
unicode_error = None
|
||||
if normalize:
|
||||
try:
|
||||
passed = normalize_html(actual_html) == normalize_html(expected_html)
|
||||
except UnicodeDecodeError as e:
|
||||
unicode_error = e
|
||||
passed = False
|
||||
else:
|
||||
passed = actual_html == expected_html
|
||||
if passed:
|
||||
result_counts['pass'] += 1
|
||||
else:
|
||||
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
|
||||
out(test['markdown'] + '\n')
|
||||
if unicode_error:
|
||||
out("Unicode error: " + str(unicode_error) + '\n')
|
||||
out("Expected: " + repr(expected_html) + '\n')
|
||||
out("Got: " + repr(actual_html) + '\n')
|
||||
else:
|
||||
expected_html_lines = expected_html.splitlines(True)
|
||||
actual_html_lines = actual_html.splitlines(True)
|
||||
for diffline in unified_diff(expected_html_lines, actual_html_lines,
|
||||
"expected HTML", "actual HTML"):
|
||||
out(diffline)
|
||||
out('\n')
|
||||
result_counts['fail'] += 1
|
||||
else:
|
||||
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
|
||||
out("program returned error code %d\n" % retcode)
|
||||
sys.stdout.buffer.write(err)
|
||||
result_counts['error'] += 1
|
||||
|
||||
def get_tests(specfile):
|
||||
line_number = 0
|
||||
start_line = 0
|
||||
end_line = 0
|
||||
example_number = 0
|
||||
markdown_lines = []
|
||||
html_lines = []
|
||||
state = 0 # 0 regular text, 1 markdown example, 2 html output
|
||||
headertext = ''
|
||||
tests = []
|
||||
|
||||
header_re = re.compile('#+ ')
|
||||
|
||||
with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
|
||||
for line in specf:
|
||||
line_number = line_number + 1
|
||||
l = line.strip()
|
||||
if l == "`" * 32 + " example":
|
||||
state = 1
|
||||
elif l == "`" * 32:
|
||||
state = 0
|
||||
example_number = example_number + 1
|
||||
end_line = line_number
|
||||
tests.append({
|
||||
"markdown":''.join(markdown_lines).replace('→',"\t"),
|
||||
"html":''.join(html_lines).replace('→',"\t"),
|
||||
"example": example_number,
|
||||
"start_line": start_line,
|
||||
"end_line": end_line,
|
||||
"section": headertext})
|
||||
start_line = 0
|
||||
markdown_lines = []
|
||||
html_lines = []
|
||||
elif l == ".":
|
||||
state = 2
|
||||
elif state == 1:
|
||||
if start_line == 0:
|
||||
start_line = line_number - 1
|
||||
markdown_lines.append(line)
|
||||
elif state == 2:
|
||||
html_lines.append(line)
|
||||
elif state == 0 and re.match(header_re, line):
|
||||
headertext = header_re.sub('', line).strip()
|
||||
return tests
|
||||
|
||||
if __name__ == "__main__":
|
||||
if args.debug_normalization:
|
||||
out(normalize_html(sys.stdin.read()))
|
||||
exit(0)
|
||||
|
||||
all_tests = get_tests(args.spec)
|
||||
if args.pattern:
|
||||
pattern_re = re.compile(args.pattern, re.IGNORECASE)
|
||||
else:
|
||||
pattern_re = re.compile('.')
|
||||
tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
|
||||
if args.dump_tests:
|
||||
out(json.dumps(tests, ensure_ascii=False, indent=2))
|
||||
exit(0)
|
||||
else:
|
||||
skipped = len(all_tests) - len(tests)
|
||||
converter = CMark(prog=args.program, library_dir=args.library_dir).to_html
|
||||
result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
|
||||
for test in tests:
|
||||
do_test(converter, test, args.normalize, result_counts)
|
||||
out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts))
|
||||
exit(result_counts['fail'] + result_counts['error'])
|
Reference in New Issue
Block a user