diff --git a/deps/cmark/.editorconfig b/deps/cmark/.editorconfig new file mode 100644 index 0000000..12032e6 --- /dev/null +++ b/deps/cmark/.editorconfig @@ -0,0 +1,18 @@ +# editorconfig.org + +root = true + +[*] +end_of_line = lf +charset = utf-8 +insert_final_newline = true + +[*.{c,h}] +trim_trailing_whitespace = true +indent_style = space +indent_size = 2 + +[Makefile] +trim_trailing_whitespace = true +indent_style = tab +indent_size = 8 diff --git a/deps/cmark/.gitattributes b/deps/cmark/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/deps/cmark/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/deps/cmark/.github/FUNDING.yml b/deps/cmark/.github/FUNDING.yml new file mode 100644 index 0000000..726f8b0 --- /dev/null +++ b/deps/cmark/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [jgm] diff --git a/deps/cmark/.github/workflows/ci.yml b/deps/cmark/.github/workflows/ci.yml new file mode 100644 index 0000000..739d08d --- /dev/null +++ b/deps/cmark/.github/workflows/ci.yml @@ -0,0 +1,102 @@ +name: CI tests + +on: [push, pull_request] + +jobs: + + linter: + + runs-on: ubuntu-latest + + steps: + + - uses: actions/checkout@v1 + - name: Install clang-tidy + run: | + sudo apt-get install -y clang-tidy-11 + sudo update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 100 + - name: lint with clang-tidy + run: | + make lint + env: + CC: clang + CXX: clang++ + + linux: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Install valgrind + run: | + sudo apt install -y valgrind + - name: Build and test + run: | + make + make test + make leakcheck + + macos: + + runs-on: macOS-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + compiler: + - c: 'clang' + cpp: 'clang++' + - c: 'gcc' + cpp: 'g++' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + CC: ${{ matrix.compiler.c }} + CXX: ${{ matrix.compiler.cpp }} + + steps: + - uses: actions/checkout@v1 + - name: Build and test + env: + CMAKE_OPTIONS: -DCMARK_SHARED=OFF + run: | + make + make test + + windows: + + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + cmake_opts: + - '-DCMARK_SHARED=ON' + - '' + env: + CMAKE_OPTIONS: ${{ matrix.cmake_opts }} + + steps: + - uses: actions/checkout@v1 + - uses: ilammy/msvc-dev-cmd@v1 + - name: Build and test + run: | + chcp 65001 + nmake.exe /nologo /f Makefile.nmake test + shell: cmd diff --git a/deps/cmark/.github/workflows/fuzz.yml b/deps/cmark/.github/workflows/fuzz.yml new file mode 100644 index 0000000..aec48b6 --- /dev/null +++ b/deps/cmark/.github/workflows/fuzz.yml @@ -0,0 +1,23 @@ +name: CIFuzz +on: [pull_request] +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'cmark' + dry-run: false + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'cmark' + fuzz-seconds: 600 + dry-run: false + - name: Upload Crash + uses: actions/upload-artifact@v1 + if: failure() + with: + name: artifacts + path: ./out/artifacts diff --git a/deps/cmark/.gitignore b/deps/cmark/.gitignore new file mode 100644 index 0000000..f56da3c --- /dev/null +++ b/deps/cmark/.gitignore @@ -0,0 +1,34 @@ +# Object files +*.o +*.ko +*.obj +*.elf + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex +*.pyc + +*~ +*.bak +*.diff +*# +*.zip +bstrlib.txt +build +cmark.dSYM/* +cmark diff --git a/deps/cmark/.gitrepo b/deps/cmark/.gitrepo new file mode 100644 index 0000000..8ff29cb --- /dev/null +++ b/deps/cmark/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/commonmark/cmark.git + branch = 0.30.3 + commit = 5ba25ff40eba44c811f79ab6a792baf945b8307c + parent = bb99771ac83ea85da0e1ab2c6c0c3475a7d21726 + method = merge + cmdver = 0.4.6 diff --git a/deps/cmark/.travis.yml b/deps/cmark/.travis.yml new file mode 100644 index 0000000..0dddedf --- /dev/null +++ b/deps/cmark/.travis.yml @@ -0,0 +1,43 @@ +# Ensures that sudo is disabled, so that containerized builds are allowed +arch: + - amd64 + - ppc64le +sudo: false + +os: + - linux + - osx +language: c +compiler: + - clang + - gcc +matrix: + include: + - os: linux + compiler: gcc + env: CMAKE_OPTIONS="-DCMARK_SHARED=OFF" +addons: + apt: + # we need a more recent cmake than travis/linux provides (at least 2.8.9): + sources: + - kubuntu-backports + - kalakris-cmake + packages: + - cmake + - python3 + - valgrind +before_install: + - | + if [ ${TRAVIS_OS_NAME:-'linux'} = 'osx' ] + then + echo "Building without python3, to make sure that works." + fi + +script: + - (mkdir -p build && cd build && cmake $CMAKE_OPTIONS ..) + - make test + - | + if [ ${TRAVIS_OS_NAME:-'linux'} = 'linux' ] + then + make leakcheck + fi diff --git a/deps/cmark/CMakeLists.txt b/deps/cmark/CMakeLists.txt new file mode 100644 index 0000000..0a4a4be --- /dev/null +++ b/deps/cmark/CMakeLists.txt @@ -0,0 +1,79 @@ +cmake_minimum_required(VERSION 3.7) +project(cmark VERSION 0.30.3) + +include("FindAsan.cmake") +include(GNUInstallDirs) + +if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") + message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make") +endif() + +option(CMARK_TESTS "Build cmark tests and enable testing" ON) +option(CMARK_STATIC "Build static libcmark library" ON) +option(CMARK_SHARED "Build shared libcmark library" ON) +option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) + +if(NOT MSVC) + set(CMAKE_C_STANDARD 99) + set(CMAKE_C_STANDARD_REQUIRED YES) + set(CMAKE_C_EXTENSIONS NO) +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# The Linux modules distributed with CMake add "-rdynamic" to the build flags +# which is incompatible with static linking under certain configurations. +# Unsetting CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ensures this does not happen. +if(CMARK_STATIC AND "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") + SET(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) +endif() + +# Check integrity of node structure when compiled as debug +add_compile_options($<$:-DCMARK_DEBUG_NODES>) + +# In order to maintain compatibility with older platforms which may not have a +# recent version of CMake (i.e. are running CMake <3.3), we cannot simply use +# the `add_compile_options` with a generator expression. This uses the +# `target_compile_options` with `PRIVATE` to add the flags only to the targets +# so that CMark may be used in projects with non-C languages. +function(cmark_add_compile_options target) + if(MSVC) + target_compile_options(${target} PRIVATE /W4 /wd4706) + if(MSVC_VERSION LESS 1800) + target_compile_options(${target} PRIVATE /TP) + endif() + target_compile_definitions(${target} PRIVATE _CRT_SECURE_NO_WARNINGS) + else() + target_compile_options(${target} PRIVATE + -Wall -Wextra -pedantic + $<$:-Wstrict-prototypes>) + endif() + if(CMAKE_BUILD_TYPE MATCHES profile) + target_compile_options(${target} PRIVATE -pg) + endif() + if(CMAKE_BUILD_TYPE MATCHES ubsan) + target_compile_options(${target} PRIVATE -fsanitize=undefined) + endif() + if(CMARK_LIB_FUZZER) + target_compile_options(${target} PRIVATE -fsanitize-coverage=trace-pc-guard) + endif() +endfunction() + +add_subdirectory(src) +if(CMARK_TESTS AND (CMARK_SHARED OR CMARK_STATIC)) + add_subdirectory(api_test) +endif() +# TODO(compnerd) should this be enabled for MinGW, which sets CMAKE_SYSTEM_NAME +# to Windows, but defines `MINGW`. +if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows) + add_subdirectory(man) +endif() +if(CMARK_TESTS) + enable_testing() + add_subdirectory(test testdir) +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING + "Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE) +endif(NOT CMAKE_BUILD_TYPE) diff --git a/deps/cmark/COPYING b/deps/cmark/COPYING new file mode 100644 index 0000000..db88a81 --- /dev/null +++ b/deps/cmark/COPYING @@ -0,0 +1,170 @@ +Copyright (c) 2014, John MacFarlane + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +----- + +houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c + +derive from https://github.com/vmg/houdini (with some modifications) + +Copyright (C) 2012 Vicent Martí + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +----- + +buffer.h, buffer.c, chunk.h + +are derived from code (C) 2012 Github, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +----- + +utf8.c and utf8.c + +are derived from utf8proc +(), +(C) 2009 Public Software Group e. V., Berlin, Germany. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +----- + +The normalization code in normalize.py was derived from the +markdowntest project, Copyright 2013 Karl Dubost: + +The MIT License (MIT) + +Copyright (c) 2013 Karl Dubost + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +----- + +The CommonMark spec (test/spec.txt) is + +Copyright (C) 2014-15 John MacFarlane + +Released under the Creative Commons CC-BY-SA 4.0 license: +. + +----- + +The test software in test/ is + +Copyright (c) 2014, John MacFarlane + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/cmark/FindAsan.cmake b/deps/cmark/FindAsan.cmake new file mode 100644 index 0000000..064b5b6 --- /dev/null +++ b/deps/cmark/FindAsan.cmake @@ -0,0 +1,74 @@ +# +# The MIT License (MIT) +# +# Copyright (c) 2013 Matthew Arsenault +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# This module tests if address sanitizer is supported by the compiler, +# and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use +# it). This sets the following variables: +# +# CMAKE_C_FLAGS_ASAN - Flags to use for C with asan +# CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan +# HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available + +include(CheckCCompilerFlag) + +# Set -Werror to catch "argument unused during compilation" warnings +set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass +check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER) + +set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass +check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS) + +unset(CMAKE_REQUIRED_FLAGS) + +if(HAVE_FLAG_SANITIZE_ADDRESS) + # Clang 3.2+ use this version + set(ADDRESS_SANITIZER_FLAG "-fsanitize=address") +elseif(HAVE_FLAG_ADDRESS_SANITIZER) + # Older deprecated flag for ASan + set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer") +endif() + +if(NOT ADDRESS_SANITIZER_FLAG) + return() +else(NOT ADDRESS_SANITIZER_FLAG) + set(HAVE_ADDRESS_SANITIZER FALSE) +endif() + +set(HAVE_ADDRESS_SANITIZER TRUE) + +set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" + CACHE STRING "Flags used by the C compiler during ASan builds." + FORCE) +set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" + CACHE STRING "Flags used by the C++ compiler during ASan builds." + FORCE) +set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" + CACHE STRING "Flags used for linking binaries during ASan builds." + FORCE) +set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" + CACHE STRING "Flags used by the shared libraries linker during ASan builds." + FORCE) +mark_as_advanced(CMAKE_C_FLAGS_ASAN + CMAKE_CXX_FLAGS_ASAN + CMAKE_EXE_LINKER_FLAGS_ASAN + CMAKE_SHARED_LINKER_FLAGS_ASAN) diff --git a/deps/cmark/Makefile b/deps/cmark/Makefile new file mode 100644 index 0000000..4938044 --- /dev/null +++ b/deps/cmark/Makefile @@ -0,0 +1,202 @@ +SRCDIR=src +DATADIR=data +BUILDDIR?=build +GENERATOR?=Unix Makefiles +MINGW_BUILDDIR?=build-mingw +MINGW_INSTALLDIR?=windows +SPEC=test/spec.txt +SITE=_site +SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC)) +FUZZCHARS?=2000000 # for fuzztest +BENCHDIR=bench +BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md) +BENCHFILE=$(BENCHDIR)/benchinput.md +ALLTESTS=alltests.md +NUMRUNS?=10 +CMARK=$(BUILDDIR)/src/cmark +CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz +PROG?=$(CMARK) +VERSION?=$(SPECVERSION) +RELEASE?=cmark-$(VERSION) +INSTALL_PREFIX?=/usr/local +CLANG_CHECK?=clang-check +CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i +AFL_PATH?=/usr/local/bin + +.PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl libFuzzer lint + +all: cmake_build man/man3/cmark.3 + +$(CMARK): cmake_build + +cmake_build: $(BUILDDIR) + @$(MAKE) -j2 -C $(BUILDDIR) + @echo "Binaries can be found in $(BUILDDIR)/src" + +$(BUILDDIR): + @cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1) + mkdir -p $(BUILDDIR); \ + cd $(BUILDDIR); \ + cmake .. \ + -G "$(GENERATOR)" \ + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -DCMAKE_INSTALL_PREFIX=$(INSTALL_PREFIX) \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + +install: $(BUILDDIR) + $(MAKE) -C $(BUILDDIR) install + +uninstall: $(BUILDDIR)/install_manifest.txt + xargs rm < $< + +debug: + mkdir -p $(BUILDDIR); \ + cd $(BUILDDIR); \ + cmake .. -DCMAKE_BUILD_TYPE=Debug; \ + $(MAKE) + +ubsan: + mkdir -p $(BUILDDIR); \ + cd $(BUILDDIR); \ + cmake .. -DCMAKE_BUILD_TYPE=Ubsan; \ + $(MAKE) + +asan: + mkdir -p $(BUILDDIR); \ + cd $(BUILDDIR); \ + cmake .. -DCMAKE_BUILD_TYPE=Asan; \ + $(MAKE) + +prof: + mkdir -p $(BUILDDIR); \ + cd $(BUILDDIR); \ + cmake .. -DCMAKE_BUILD_TYPE=Profile; \ + $(MAKE) + +afl: + @[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; } + mkdir -p $(BUILDDIR) + cd $(BUILDDIR) && cmake .. -DCMARK_TESTS=0 -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang + $(MAKE) + $(AFL_PATH)/afl-fuzz \ + -i test/afl_test_cases \ + -o test/afl_results \ + -x test/fuzzing_dictionary \ + -t 100 \ + $(CMARK) $(CMARK_OPTS) + +libFuzzer: + @[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; } + mkdir -p $(BUILDDIR) + cd $(BUILDDIR) && cmake -DCMAKE_BUILD_TYPE=Asan -DCMARK_LIB_FUZZER=ON -DCMAKE_LIB_FUZZER_PATH=$(LIB_FUZZER_PATH) .. + $(MAKE) -j2 -C $(BUILDDIR) cmark-fuzz + test/run-cmark-fuzz $(CMARK_FUZZ) + +lint: $(BUILDDIR) + errs=0 ; \ + for f in `ls src/*.[ch] | grep -v "scanners.c"` ; \ + do echo $$f ; clang-tidy -header-filter='^build/.*' -p=build -warnings-as-errors='*' $$f || errs=1 ; done ; \ + exit $$errs + +mingw: + mkdir -p $(MINGW_BUILDDIR); \ + cd $(MINGW_BUILDDIR); \ + cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\ + $(MAKE) && $(MAKE) install + +man/man3/cmark.3: src/cmark.h | $(CMARK) + python man/make_man_page.py $< > $@ \ + +archive: + git archive --prefix=$(RELEASE)/ -o $(RELEASE).tar.gz HEAD + git archive --prefix=$(RELEASE)/ -o $(RELEASE).zip HEAD + +clean: + rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) + +# We include case_fold_switch.inc in the repository, so this shouldn't +# normally need to be generated. +$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding.txt + perl tools/mkcasefold.pl < $< > $@ + +# We include scanners.c in the repository, so this shouldn't +# normally need to be generated. +$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re + @case "$$(re2c -v)" in \ + *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ + echo "re2c >= 0.14.2 is required"; \ + false; \ + ;; \ + esac + re2c -W -Werror --case-insensitive -b -i --no-generation-date -8 \ + --encoding-policy substitute -o $@ $< + $(CLANG_FORMAT) $@ + +# We include entities.inc in the repository, so normally this +# doesn't need to be regenerated: +$(SRCDIR)/entities.inc: tools/make_entities_inc.py + python3 $< > $@ + +update-spec: + curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\ + > $(SPEC) + +test: $(SPEC) cmake_build + $(MAKE) -C $(BUILDDIR) test || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1) + +$(ALLTESTS): $(SPEC) + python3 test/spec_tests.py --spec $< --dump-tests | python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))' > $@ + +leakcheck: $(ALLTESTS) + for format in html man xml latex commonmark; do \ + for opts in "" "--smart"; do \ + echo "cmark -t $$format $$opts" ; \ + valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format $$opts $(ALLTESTS) >/dev/null || exit 1;\ + done; \ + done; + +fuzztest: + { for i in `seq 1 10`; do \ + cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \ + /usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \ + done } 2>&1 | grep 'user\|abnormally' + +progit: + git clone https://github.com/progit/progit.git + +$(BENCHFILE): progit + echo "" > $@ + for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \ + cat progit/$$lang/*/*.markdown >> $@; \ + done + +# for more accurate results, run with +# sudo renice -10 $$; make bench +bench: $(BENCHFILE) + { for x in `seq 1 $(NUMRUNS)` ; do \ + /usr/bin/env time -p $(PROG) /dev/null ; \ + /usr/bin/env time -p $(PROG) $< >/dev/null ; \ + done \ + } 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py' + +newbench: + for f in $(BENCHSAMPLES) ; do \ + printf "%26s " `basename $$f` ; \ + { for x in `seq 1 $(NUMRUNS)` ; do \ + /usr/bin/env time -p $(PROG) /dev/null ; \ + for x in `seq 1 200` ; do cat $$f ; done | \ + /usr/bin/env time -p $(PROG) > /dev/null; \ + done \ + } 2>&1 | grep 'real' | awk '{print $$2}' | \ + python3 'bench/stats.py'; done + +format: + $(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h + +operf: $(CMARK) + operf $< < $(BENCHFILE) > /dev/null + +distclean: clean + -rm -rf *.dSYM + -rm -f README.html + -rm -rf $(BENCHFILE) $(ALLTESTS) progit diff --git a/deps/cmark/Makefile.nmake b/deps/cmark/Makefile.nmake new file mode 100644 index 0000000..ecfd4f5 --- /dev/null +++ b/deps/cmark/Makefile.nmake @@ -0,0 +1,36 @@ +SRCDIR=src +DATADIR=data +BUILDDIR=build +INSTALLDIR=windows +SPEC=test/spec.txt +PROG=$(BUILDDIR)\src\cmark.exe +GENERATOR=NMake Makefiles + +all: $(BUILDDIR)/CMakeFiles + @cd $(BUILDDIR) && $(MAKE) /nologo && cd .. + +$(BUILDDIR)/CMakeFiles: + @-mkdir $(BUILDDIR) 2> nul + cd $(BUILDDIR) && \ + cmake \ + -G "$(GENERATOR)" \ + -D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \ + .. && \ + cd .. + +install: all + @cd $(BUILDDIR) && $(MAKE) /nologo install && cd .. + +clean: + -rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul + +$(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt + perl mkcasefold.pl < $? > $@ + +test: $(SPEC) all + @cd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && cd .. + +distclean: clean + del /q src\scanners.c 2> nul + del /q spec.md spec.html 2> nul diff --git a/deps/cmark/README.md b/deps/cmark/README.md new file mode 100644 index 0000000..53f622e --- /dev/null +++ b/deps/cmark/README.md @@ -0,0 +1,198 @@ +cmark +===== + +[![CI +tests](https://github.com/commonmark/cmark/workflows/CI%20tests/badge.svg)](https://github.com/commonmark/cmark/actions) + +`cmark` is the C reference implementation of [CommonMark], a +rationalized version of Markdown syntax with a [spec][the spec]. +(For the JavaScript reference implementation, see +[commonmark.js].) + +It provides a shared library (`libcmark`) with functions for parsing +CommonMark documents to an abstract syntax tree (AST), manipulating +the AST, and rendering the document to HTML, groff man, LaTeX, +CommonMark, or an XML representation of the AST. It also provides a +command-line program (`cmark`) for parsing and rendering CommonMark +documents. + +Advantages of this library: + +- **Portable.** The library and program are written in standard + C99 and have no external dependencies. They have been tested with + MSVC, gcc, tcc, and clang. + +- **Fast.** cmark can render a Markdown version of *War and Peace* in + the blink of an eye (127 milliseconds on a ten year old laptop, + vs. 100-400 milliseconds for an eye blink). In our [benchmarks], + cmark is 10,000 times faster than the original `Markdown.pl`, and + on par with the very fastest available Markdown processors. + +- **Accurate.** The library passes all CommonMark conformance tests. + +- **Standardized.** The library can be expected to parse CommonMark + the same way as any other conforming parser. So, for example, + you can use `commonmark.js` on the client to preview content that + will be rendered on the server using `cmark`. + +- **Robust.** The library has been extensively fuzz-tested using + [american fuzzy lop]. The test suite includes pathological cases + that bring many other Markdown parsers to a crawl (for example, + thousands-deep nested bracketed text or block quotes). + +- **Flexible.** CommonMark input is parsed to an AST which can be + manipulated programmatically prior to rendering. + +- **Multiple renderers.** Output in HTML, groff man, LaTeX, CommonMark, + and a custom XML format is supported. And it is easy to write new + renderers to support other formats. + +- **Free.** BSD2-licensed. + +It is easy to use `libcmark` in python, lua, ruby, and other dynamic +languages: see the `wrappers/` subdirectory for some simple examples. + +There are also libraries that wrap `libcmark` for +[Go](https://github.com/rhinoman/go-commonmark), +[Haskell](https://hackage.haskell.org/package/cmark), +[Ruby](https://github.com/gjtorikian/commonmarker), +[Lua](https://github.com/jgm/cmark-lua), +[Perl](https://metacpan.org/release/CommonMark), +[Python](https://pypi.python.org/pypi/paka.cmark), +[R](https://cran.r-project.org/package=commonmark) and +[Scala](https://github.com/sparsetech/cmark-scala). + +Installing +---------- + +Building the C program (`cmark`) and shared library (`libcmark`) +requires [cmake]. If you modify `scanners.re`, then you will also +need [re2c] \(>= 0.14.2\), which is used to generate `scanners.c` from +`scanners.re`. We have included a pre-generated `scanners.c` in +the repository to reduce build dependencies. + +If you have GNU make, you can simply `make`, `make test`, and `make +install`. This calls [cmake] to create a `Makefile` in the `build` +directory, then uses that `Makefile` to create the executable and +library. The binaries can be found in `build/src`. The default +installation prefix is `/usr/local`. To change the installation +prefix, pass the `INSTALL_PREFIX` variable if you run `make` for the +first time: `make INSTALL_PREFIX=path`. + +For a more portable method, you can use [cmake] manually. [cmake] knows +how to create build environments for many build systems. For example, +on FreeBSD: + + mkdir build + cd build + cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path + make # executable will be created as build/src/cmark + make test + make install + +Or, to create Xcode project files on OSX: + + mkdir build + cd build + cmake -G Xcode .. + open cmark.xcodeproj + +The GNU Makefile also provides a few other targets for developers. +To run a benchmark: + + make bench + +For more detailed benchmarks: + + make newbench + +To run a test for memory leaks using `valgrind`: + + make leakcheck + +To reformat source code using `clang-format`: + + make format + +To run a "fuzz test" against ten long randomly generated inputs: + + make fuzztest + +To do a more systematic fuzz test with [american fuzzy lop]: + + AFL_PATH=/path/to/afl_directory make afl + +Fuzzing with [libFuzzer] is also supported but, because libFuzzer is still +under active development, may not work with your system-installed version of +clang. Assuming LLVM has been built in `$HOME/src/llvm/build` the fuzzer can be +run with: + + CC="$HOME/src/llvm/build/bin/clang" LIB_FUZZER_PATH="$HOME/src/llvm/lib/Fuzzer/libFuzzer.a" make libFuzzer + +To make a release tarball and zip archive: + + make archive + +Installing (Windows) +-------------------- + +To compile with MSVC and NMAKE: + + nmake + +You can cross-compile a Windows binary and dll on linux if you have the +`mingw32` compiler: + + make mingw + +The binaries will be in `build-mingw/windows/bin`. + +Usage +----- + +Instructions for the use of the command line program and library can +be found in the man pages in the `man` subdirectory. + +Security +-------- + +By default, the library will scrub raw HTML and potentially +dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). + +To allow these, use the option `CMARK_OPT_UNSAFE` (or +`--unsafe`) with the command line program. If doing so, we +recommend you use a HTML sanitizer specific to your needs to +protect against [XSS +attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). + +Contributing +------------ + +There is a [forum for discussing +CommonMark](http://talk.commonmark.org); you should use it instead of +github issues for questions and possibly open-ended discussions. +Use the [github issue tracker](http://github.com/commonmark/CommonMark/issues) +only for simple, clear, actionable issues. + +Authors +------- + +John MacFarlane wrote the original library and program. +The block parsing algorithm was worked out together with David +Greenspan. Vicent Marti optimized the C implementation for +performance, increasing its speed tenfold. Kārlis Gaņģis helped +work out a better parsing algorithm for links and emphasis, +eliminating several worst-case performance issues. +Nick Wellnhofer contributed many improvements, including +most of the C library's API and its test harness. + +[benchmarks]: benchmarks.md +[the spec]: http://spec.commonmark.org +[CommonMark]: http://commonmark.org +[cmake]: http://www.cmake.org/download/ +[re2c]: http://re2c.org +[commonmark.js]: https://github.com/commonmark/commonmark.js +[Build Status]: https://img.shields.io/travis/commonmark/cmark/master.svg?style=flat +[Windows Build Status]: https://ci.appveyor.com/api/projects/status/h3fd91vtd1xfmp69?svg=true +[american fuzzy lop]: http://lcamtuf.coredump.cx/afl/ +[libFuzzer]: http://llvm.org/docs/LibFuzzer.html diff --git a/deps/cmark/api_test/CMakeLists.txt b/deps/cmark/api_test/CMakeLists.txt new file mode 100644 index 0000000..8f19ff7 --- /dev/null +++ b/deps/cmark/api_test/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(api_test + cplusplus.cpp + harness.c + harness.h + main.c +) +cmark_add_compile_options(api_test) +if(CMARK_SHARED) + target_link_libraries(api_test cmark) +else() + target_link_libraries(api_test cmark_static) +endif() diff --git a/deps/cmark/api_test/cplusplus.cpp b/deps/cmark/api_test/cplusplus.cpp new file mode 100644 index 0000000..5e8f722 --- /dev/null +++ b/deps/cmark/api_test/cplusplus.cpp @@ -0,0 +1,15 @@ +#include + +#include "cmark.h" +#include "cplusplus.h" +#include "harness.h" + +void +test_cplusplus(test_batch_runner *runner) +{ + static const char md[] = "paragraph\n"; + char *html = cmark_markdown_to_html(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "

paragraph

\n", "libcmark works with C++"); + free(html); +} + diff --git a/deps/cmark/api_test/cplusplus.h b/deps/cmark/api_test/cplusplus.h new file mode 100644 index 0000000..1f3dd15 --- /dev/null +++ b/deps/cmark/api_test/cplusplus.h @@ -0,0 +1,16 @@ +#ifndef CMARK_API_TEST_CPLUSPLUS_H +#define CMARK_API_TEST_CPLUSPLUS_H + +#include "harness.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test_cplusplus(test_batch_runner *runner); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/api_test/harness.c b/deps/cmark/api_test/harness.c new file mode 100644 index 0000000..d27e7ca --- /dev/null +++ b/deps/cmark/api_test/harness.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include + +#include "harness.h" + +test_batch_runner *test_batch_runner_new(void) { + return (test_batch_runner *)calloc(1, sizeof(test_batch_runner)); +} + +static void test_result(test_batch_runner *runner, int cond, const char *msg, + va_list ap) { + ++runner->test_num; + + if (cond) { + ++runner->num_passed; + } else { + fprintf(stderr, "FAILED test %d: ", runner->test_num); + vfprintf(stderr, msg, ap); + fprintf(stderr, "\n"); + ++runner->num_failed; + } +} + +void SKIP(test_batch_runner *runner, int num_tests) { + runner->test_num += num_tests; + runner->num_skipped += num_tests; +} + +void OK(test_batch_runner *runner, int cond, const char *msg, ...) { + va_list ap; + va_start(ap, msg); + test_result(runner, cond, msg, ap); + va_end(ap); +} + +void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, + ...) { + int cond = got == expected; + + va_list ap; + va_start(ap, msg); + test_result(runner, cond, msg, ap); + va_end(ap); + + if (!cond) { + fprintf(stderr, " Got: %d\n", got); + fprintf(stderr, " Expected: %d\n", expected); + } +} + +void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, + const char *msg, ...) { + int cond = strcmp(got, expected) == 0; + + va_list ap; + va_start(ap, msg); + test_result(runner, cond, msg, ap); + va_end(ap); + + if (!cond) { + fprintf(stderr, " Got: \"%s\"\n", got); + fprintf(stderr, " Expected: \"%s\"\n", expected); + } +} + +int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; } + +void test_print_summary(test_batch_runner *runner) { + int num_passed = runner->num_passed; + int num_skipped = runner->num_skipped; + int num_failed = runner->num_failed; + + fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed, + num_failed, num_skipped); + + if (test_ok(runner)) { + fprintf(stderr, "PASS\n"); + } else { + fprintf(stderr, "FAIL\n"); + } +} diff --git a/deps/cmark/api_test/harness.h b/deps/cmark/api_test/harness.h new file mode 100644 index 0000000..f352f07 --- /dev/null +++ b/deps/cmark/api_test/harness.h @@ -0,0 +1,35 @@ +#ifndef CMARK_API_TEST_HARNESS_H +#define CMARK_API_TEST_HARNESS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int test_num; + int num_passed; + int num_failed; + int num_skipped; +} test_batch_runner; + +test_batch_runner *test_batch_runner_new(void); + +void SKIP(test_batch_runner *runner, int num_tests); + +void OK(test_batch_runner *runner, int cond, const char *msg, ...); + +void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, + ...); + +void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, + const char *msg, ...); + +int test_ok(test_batch_runner *runner); + +void test_print_summary(test_batch_runner *runner); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/api_test/main.c b/deps/cmark/api_test/main.c new file mode 100644 index 0000000..c2db737 --- /dev/null +++ b/deps/cmark/api_test/main.c @@ -0,0 +1,1105 @@ +#include +#include +#include + +#define CMARK_NO_SHORT_NAMES +#include "cmark.h" +#include "node.h" + +#include "harness.h" +#include "cplusplus.h" + +#define UTF8_REPL "\xEF\xBF\xBD" + +static const cmark_node_type node_types[] = { + CMARK_NODE_DOCUMENT, CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, + CMARK_NODE_ITEM, CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML_BLOCK, + CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, + CMARK_NODE_TEXT, CMARK_NODE_SOFTBREAK, CMARK_NODE_LINEBREAK, + CMARK_NODE_CODE, CMARK_NODE_HTML_INLINE, CMARK_NODE_EMPH, + CMARK_NODE_STRONG, CMARK_NODE_LINK, CMARK_NODE_IMAGE}; +static const int num_node_types = sizeof(node_types) / sizeof(*node_types); + +static void test_md_to_html(test_batch_runner *runner, const char *markdown, + const char *expected_html, const char *msg); + +static void test_content(test_batch_runner *runner, cmark_node_type type, + int allowed_content); + +static void test_char(test_batch_runner *runner, int valid, const char *utf8, + const char *msg); + +static void test_incomplete_char(test_batch_runner *runner, const char *utf8, + const char *msg); + +static void test_continuation_byte(test_batch_runner *runner, const char *utf8); + +static void version(test_batch_runner *runner) { + INT_EQ(runner, cmark_version(), CMARK_VERSION, "cmark_version"); + STR_EQ(runner, cmark_version_string(), CMARK_VERSION_STRING, + "cmark_version_string"); +} + +static void constructor(test_batch_runner *runner) { + for (int i = 0; i < num_node_types; ++i) { + cmark_node_type type = node_types[i]; + cmark_node *node = cmark_node_new(type); + OK(runner, node != NULL, "new type %d", type); + INT_EQ(runner, cmark_node_get_type(node), type, "get_type %d", type); + + switch (node->type) { + case CMARK_NODE_HEADING: + INT_EQ(runner, cmark_node_get_heading_level(node), 1, + "default heading level is 1"); + node->as.heading.level = 1; + break; + + case CMARK_NODE_LIST: + INT_EQ(runner, cmark_node_get_list_type(node), CMARK_BULLET_LIST, + "default is list type is bullet"); + INT_EQ(runner, cmark_node_get_list_delim(node), CMARK_NO_DELIM, + "default is list delim is NO_DELIM"); + INT_EQ(runner, cmark_node_get_list_start(node), 0, + "default is list start is 0"); + INT_EQ(runner, cmark_node_get_list_tight(node), 0, + "default is list is loose"); + break; + + default: + break; + } + + cmark_node_free(node); + } +} + +static void accessors(test_batch_runner *runner) { + static const char markdown[] = "## Header\n" + "\n" + "* Item 1\n" + "* Item 2\n" + "\n" + "2. Item 1\n" + "\n" + "3. Item 2\n" + "\n" + "``` lang\n" + "fenced\n" + "```\n" + " code\n" + "\n" + "
html
\n" + "\n" + "[link](url 'title')\n"; + + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + // Getters + + cmark_node *heading = cmark_node_first_child(doc); + INT_EQ(runner, cmark_node_get_heading_level(heading), 2, "get_heading_level"); + + cmark_node *bullet_list = cmark_node_next(heading); + INT_EQ(runner, cmark_node_get_list_type(bullet_list), CMARK_BULLET_LIST, + "get_list_type bullet"); + INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1, + "get_list_tight tight"); + + cmark_node *ordered_list = cmark_node_next(bullet_list); + INT_EQ(runner, cmark_node_get_list_type(ordered_list), CMARK_ORDERED_LIST, + "get_list_type ordered"); + INT_EQ(runner, cmark_node_get_list_delim(ordered_list), CMARK_PERIOD_DELIM, + "get_list_delim ordered"); + INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2, "get_list_start"); + INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0, + "get_list_tight loose"); + + cmark_node *fenced = cmark_node_next(ordered_list); + STR_EQ(runner, cmark_node_get_literal(fenced), "fenced\n", + "get_literal fenced code"); + STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang", "get_fence_info"); + + cmark_node *code = cmark_node_next(fenced); + STR_EQ(runner, cmark_node_get_literal(code), "code\n", + "get_literal indented code"); + + cmark_node *html = cmark_node_next(code); + STR_EQ(runner, cmark_node_get_literal(html), "
html
\n", + "get_literal html"); + + cmark_node *paragraph = cmark_node_next(html); + INT_EQ(runner, cmark_node_get_start_line(paragraph), 17, "get_start_line"); + INT_EQ(runner, cmark_node_get_start_column(paragraph), 1, "get_start_column"); + INT_EQ(runner, cmark_node_get_end_line(paragraph), 17, "get_end_line"); + + cmark_node *link = cmark_node_first_child(paragraph); + STR_EQ(runner, cmark_node_get_url(link), "url", "get_url"); + STR_EQ(runner, cmark_node_get_title(link), "title", "get_title"); + + cmark_node *string = cmark_node_first_child(link); + STR_EQ(runner, cmark_node_get_literal(string), "link", "get_literal string"); + + // Setters + + OK(runner, cmark_node_set_heading_level(heading, 3), "set_heading_level"); + + OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST), + "set_list_type ordered"); + OK(runner, cmark_node_set_list_delim(bullet_list, CMARK_PAREN_DELIM), + "set_list_delim paren"); + OK(runner, cmark_node_set_list_start(bullet_list, 3), "set_list_start"); + OK(runner, cmark_node_set_list_tight(bullet_list, 0), "set_list_tight loose"); + + OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST), + "set_list_type bullet"); + OK(runner, cmark_node_set_list_tight(ordered_list, 1), + "set_list_tight tight"); + + OK(runner, cmark_node_set_literal(code, "CODE\n"), + "set_literal indented code"); + + OK(runner, cmark_node_set_literal(fenced, "FENCED\n"), + "set_literal fenced code"); + OK(runner, cmark_node_set_fence_info(fenced, "LANG"), "set_fence_info"); + + OK(runner, cmark_node_set_literal(html, "
HTML
\n"), + "set_literal html"); + + OK(runner, cmark_node_set_url(link, "URL"), "set_url"); + OK(runner, cmark_node_set_title(link, "TITLE"), "set_title"); + + OK(runner, cmark_node_set_literal(string, "prefix-LINK"), + "set_literal string"); + + // Set literal to suffix of itself (issue #139). + const char *literal = cmark_node_get_literal(string); + OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), + "set_literal suffix"); + + char *rendered_html = cmark_render_html(doc, + CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE); + static const char expected_html[] = + "

Header

\n" + "
    \n" + "
  1. \n" + "

    Item 1

    \n" + "
  2. \n" + "
  3. \n" + "

    Item 2

    \n" + "
  4. \n" + "
\n" + "
    \n" + "
  • Item 1
  • \n" + "
  • Item 2
  • \n" + "
\n" + "
FENCED\n"
+      "
\n" + "
CODE\n"
+      "
\n" + "
HTML
\n" + "

LINK

\n"; + STR_EQ(runner, rendered_html, expected_html, "setters work"); + free(rendered_html); + + // Getter errors + + INT_EQ(runner, cmark_node_get_heading_level(bullet_list), 0, + "get_heading_level error"); + INT_EQ(runner, cmark_node_get_list_type(heading), CMARK_NO_LIST, + "get_list_type error"); + INT_EQ(runner, cmark_node_get_list_start(code), 0, "get_list_start error"); + INT_EQ(runner, cmark_node_get_list_tight(fenced), 0, "get_list_tight error"); + OK(runner, cmark_node_get_literal(ordered_list) == NULL, "get_literal error"); + OK(runner, cmark_node_get_fence_info(paragraph) == NULL, + "get_fence_info error"); + OK(runner, cmark_node_get_url(html) == NULL, "get_url error"); + OK(runner, cmark_node_get_title(heading) == NULL, "get_title error"); + + // Setter errors + + OK(runner, !cmark_node_set_heading_level(bullet_list, 3), + "set_heading_level error"); + OK(runner, !cmark_node_set_list_type(heading, CMARK_ORDERED_LIST), + "set_list_type error"); + OK(runner, !cmark_node_set_list_start(code, 3), "set_list_start error"); + OK(runner, !cmark_node_set_list_tight(fenced, 0), "set_list_tight error"); + OK(runner, !cmark_node_set_literal(ordered_list, "content\n"), + "set_literal error"); + OK(runner, !cmark_node_set_fence_info(paragraph, "lang"), + "set_fence_info error"); + OK(runner, !cmark_node_set_url(html, "url"), "set_url error"); + OK(runner, !cmark_node_set_title(heading, "title"), "set_title error"); + + OK(runner, !cmark_node_set_heading_level(heading, 0), + "set_heading_level too small"); + OK(runner, !cmark_node_set_heading_level(heading, 7), + "set_heading_level too large"); + OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST), + "set_list_type invalid"); + OK(runner, !cmark_node_set_list_start(bullet_list, -1), + "set_list_start negative"); + + cmark_node_free(doc); +} + +static void free_parent(test_batch_runner *runner) { + static const char markdown[] = "text\n"; + + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + cmark_node *para = cmark_node_first_child(doc); + cmark_node *text = cmark_node_first_child(para); + cmark_node_unlink(text); + cmark_node_free(doc); + STR_EQ(runner, cmark_node_get_literal(text), "text", + "inline content after freeing parent block"); + cmark_node_free(text); +} + +static void node_check(test_batch_runner *runner) { + // Construct an incomplete tree. + cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); + cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH); + cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH); + doc->first_child = p1; + p1->next = p2; + + INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "node_check fixes tree"); + + cmark_node_free(doc); +} + +static void iterator(test_batch_runner *runner) { + cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10, CMARK_OPT_DEFAULT); + int parnodes = 0; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(doc); + cmark_node *cur; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (cur->type == CMARK_NODE_PARAGRAPH && ev_type == CMARK_EVENT_ENTER) { + parnodes += 1; + } + } + INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs"); + + cmark_iter_free(iter); + cmark_node_free(doc); +} + +static void iterator_delete(test_batch_runner *runner) { + static const char md[] = "a *b* c\n" + "\n" + "* item1\n" + "* item2\n" + "\n" + "a `b` c\n" + "\n" + "* item1\n" + "* item2\n"; + cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); + cmark_iter *iter = cmark_iter_new(doc); + cmark_event_type ev_type; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cmark_node *node = cmark_iter_get_node(iter); + // Delete list, emph, and code nodes. + if ((ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LIST) || + (ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_EMPH) || + (ev_type == CMARK_EVENT_ENTER && node->type == CMARK_NODE_CODE)) { + cmark_node_free(node); + } + } + + char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + static const char expected[] = "

a c

\n" + "

a c

\n"; + STR_EQ(runner, html, expected, "iterate and delete nodes"); + + cmark_mem *allocator = cmark_get_default_mem_allocator(); + + allocator->free(html); + cmark_iter_free(iter); + cmark_node_free(doc); +} + +static void create_tree(test_batch_runner *runner) { + char *html; + cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); + + cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); + OK(runner, !cmark_node_insert_before(doc, p), "insert before root fails"); + OK(runner, !cmark_node_insert_after(doc, p), "insert after root fails"); + OK(runner, cmark_node_append_child(doc, p), "append1"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent"); + OK(runner, cmark_node_parent(p) == doc, "node_parent"); + + cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH); + OK(runner, cmark_node_prepend_child(p, emph), "prepend1"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent"); + + cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(str1, "Hello, "); + OK(runner, cmark_node_prepend_child(p, str1), "prepend2"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent"); + + cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(str3, "!"); + OK(runner, cmark_node_append_child(p, str3), "append2"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent"); + + cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(str2, "world"); + OK(runner, cmark_node_append_child(emph, str2), "append3"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); + + html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "

Hello, world!

\n", "render_html"); + free(html); + + OK(runner, cmark_node_insert_before(str1, str3), "ins before1"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before1 consistent"); + // 31e + OK(runner, cmark_node_first_child(p) == str3, "ins before1 works"); + + OK(runner, cmark_node_insert_before(str1, emph), "ins before2"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before2 consistent"); + // 3e1 + OK(runner, cmark_node_last_child(p) == str1, "ins before2 works"); + + OK(runner, cmark_node_insert_after(str1, str3), "ins after1"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after1 consistent"); + // e13 + OK(runner, cmark_node_next(str1) == str3, "ins after1 works"); + + OK(runner, cmark_node_insert_after(str1, emph), "ins after2"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after2 consistent"); + // 1e3 + OK(runner, cmark_node_previous(emph) == str1, "ins after2 works"); + + cmark_node *str4 = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(str4, "brzz"); + OK(runner, cmark_node_replace(str1, str4), "replace"); + // The replaced node is not freed + cmark_node_free(str1); + + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "replace consistent"); + OK(runner, cmark_node_previous(emph) == str4, "replace works"); + INT_EQ(runner, cmark_node_replace(p, str4), 0, "replace str for p fails"); + + cmark_node_unlink(emph); + + html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "

brzz!

\n", "render_html after shuffling"); + free(html); + + cmark_node_free(doc); + cmark_node_free(emph); +} + +static void custom_nodes(test_batch_runner *runner) { + char *html; + char *man; + cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); + cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); + cmark_node_append_child(doc, p); + cmark_node *ci = cmark_node_new(CMARK_NODE_CUSTOM_INLINE); + cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(str1, "Hello"); + OK(runner, cmark_node_append_child(ci, str1), "append1"); + OK(runner, cmark_node_set_on_enter(ci, ""), "set_on_exit"); + STR_EQ(runner, cmark_node_get_on_enter(ci), "", "get_on_exit"); + cmark_node_append_child(p, ci); + cmark_node *cb = cmark_node_new(CMARK_NODE_CUSTOM_BLOCK); + cmark_node_set_on_enter(cb, "

\n\n CMARK_NODE_LAST_INLINE + ? CMARK_NODE_LAST_BLOCK + : CMARK_NODE_LAST_INLINE; + OK(runner, max_node_type < 32, "all node types < 32"); + + int list_item_flag = 1 << CMARK_NODE_ITEM; + int top_level_blocks = + (1 << CMARK_NODE_BLOCK_QUOTE) | (1 << CMARK_NODE_LIST) | + (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_HTML_BLOCK) | + (1 << CMARK_NODE_PARAGRAPH) | (1 << CMARK_NODE_HEADING) | + (1 << CMARK_NODE_THEMATIC_BREAK); + int all_inlines = (1 << CMARK_NODE_TEXT) | (1 << CMARK_NODE_SOFTBREAK) | + (1 << CMARK_NODE_LINEBREAK) | (1 << CMARK_NODE_CODE) | + (1 << CMARK_NODE_HTML_INLINE) | (1 << CMARK_NODE_EMPH) | + (1 << CMARK_NODE_STRONG) | (1 << CMARK_NODE_LINK) | + (1 << CMARK_NODE_IMAGE); + + test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks); + test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks); + test_content(runner, CMARK_NODE_LIST, list_item_flag); + test_content(runner, CMARK_NODE_ITEM, top_level_blocks); + test_content(runner, CMARK_NODE_CODE_BLOCK, 0); + test_content(runner, CMARK_NODE_HTML_BLOCK, 0); + test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines); + test_content(runner, CMARK_NODE_HEADING, all_inlines); + test_content(runner, CMARK_NODE_THEMATIC_BREAK, 0); + test_content(runner, CMARK_NODE_TEXT, 0); + test_content(runner, CMARK_NODE_SOFTBREAK, 0); + test_content(runner, CMARK_NODE_LINEBREAK, 0); + test_content(runner, CMARK_NODE_CODE, 0); + test_content(runner, CMARK_NODE_HTML_INLINE, 0); + test_content(runner, CMARK_NODE_EMPH, all_inlines); + test_content(runner, CMARK_NODE_STRONG, all_inlines); + test_content(runner, CMARK_NODE_LINK, all_inlines); + test_content(runner, CMARK_NODE_IMAGE, all_inlines); +} + +static void test_content(test_batch_runner *runner, cmark_node_type type, + int allowed_content) { + cmark_node *node = cmark_node_new(type); + + for (int i = 0; i < num_node_types; ++i) { + cmark_node_type child_type = node_types[i]; + cmark_node *child = cmark_node_new(child_type); + + int got = cmark_node_append_child(node, child); + int expected = (allowed_content >> child_type) & 1; + + INT_EQ(runner, got, expected, "add %d as child of %d", child_type, type); + + cmark_node_free(child); + } + + cmark_node_free(node); +} + +static void parser(test_batch_runner *runner) { + test_md_to_html(runner, "No newline", "

No newline

\n", + "document without trailing newline"); +} + +static void render_html(test_batch_runner *runner) { + char *html; + + static const char markdown[] = "foo *bar*\n" + "\n" + "paragraph 2\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + cmark_node *paragraph = cmark_node_first_child(doc); + html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "

foo bar

\n", "render single paragraph"); + free(html); + + cmark_node *string = cmark_node_first_child(paragraph); + html = cmark_render_html(string, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "foo ", "render single inline"); + free(html); + + cmark_node *emph = cmark_node_next(string); + html = cmark_render_html(emph, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "bar", "render inline with children"); + free(html); + + cmark_node_free(doc); +} + +static void render_xml(test_batch_runner *runner) { + char *xml; + + static const char markdown[] = "foo *bar*\n" + "\n" + "control -\x0C-\n" + "fffe -\xEF\xBF\xBE-\n" + "ffff -\xEF\xBF\xBF-\n" + "escape <>&\"\n" + "\n" + "```\ncode\n```\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " foo \n" + " \n" + " bar\n" + " \n" + " \n" + " \n" + " control -" UTF8_REPL "-\n" + " \n" + " fffe -" UTF8_REPL "-\n" + " \n" + " ffff -" UTF8_REPL "-\n" + " \n" + " escape <>&"\n" + " \n" + " code\n" + "\n" + "\n", + "render document"); + free(xml); + cmark_node *paragraph = cmark_node_first_child(doc); + xml = cmark_render_xml(paragraph, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " foo \n" + " \n" + " bar\n" + " \n" + "\n", + "render first paragraph with source pos"); + free(xml); + cmark_node_free(doc); +} + +static void render_man(test_batch_runner *runner) { + char *man; + + static const char markdown[] = "foo *bar*\n" + "\n" + "- Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + "- sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 20); + STR_EQ(runner, man, ".PP\n" + "foo \\f[I]bar\\f[]\n" + ".IP \\[bu] 2\n" + "Lorem ipsum dolor\n" + "sit amet,\n" + "consectetur\n" + "adipiscing elit,\n" + ".IP \\[bu] 2\n" + "sed do eiusmod\n" + "tempor incididunt ut\n" + "labore et dolore\n" + "magna aliqua.\n", + "render document with wrapping"); + free(man); + man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, man, ".PP\n" + "foo \\f[I]bar\\f[]\n" + ".IP \\[bu] 2\n" + "Lorem ipsum dolor sit amet,\n" + "consectetur adipiscing elit,\n" + ".IP \\[bu] 2\n" + "sed do eiusmod tempor incididunt\n" + "ut labore et dolore magna aliqua.\n", + "render document without wrapping"); + free(man); + cmark_node_free(doc); +} + +static void render_latex(test_batch_runner *runner) { + char *latex; + + static const char markdown[] = "foo *bar* $%\n" + "\n" + "- Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + "- sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 20); + STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" + "\n" + "\\begin{itemize}\n" + "\\item Lorem ipsum\n" + "dolor sit amet,\n" + "consectetur\n" + "adipiscing elit,\n" + "\n" + "\\item sed do eiusmod\n" + "tempor incididunt ut\n" + "labore et dolore\n" + "magna aliqua.\n" + "\n" + "\\end{itemize}\n", + "render document with wrapping"); + free(latex); + latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" + "\n" + "\\begin{itemize}\n" + "\\item Lorem ipsum dolor sit amet,\n" + "consectetur adipiscing elit,\n" + "\n" + "\\item sed do eiusmod tempor incididunt\n" + "ut labore et dolore magna aliqua.\n" + "\n" + "\\end{itemize}\n", + "render document without wrapping"); + free(latex); + cmark_node_free(doc); +} + +static void render_commonmark(test_batch_runner *runner) { + char *commonmark; + + static const char markdown[] = "> \\- foo *bar* \\*bar\\*\n" + "\n" + "- Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + "- sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n"; + cmark_node *doc = + cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + + commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 26); + STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" + "\n" + " - Lorem ipsum dolor sit\n" + " amet, consectetur\n" + " adipiscing elit,\n" + " - sed do eiusmod tempor\n" + " incididunt ut labore\n" + " et dolore magna\n" + " aliqua.\n", + "render document with wrapping"); + free(commonmark); + commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" + "\n" + " - Lorem ipsum dolor sit amet,\n" + " consectetur adipiscing elit,\n" + " - sed do eiusmod tempor incididunt\n" + " ut labore et dolore magna aliqua.\n", + "render document without wrapping"); + free(commonmark); + + cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); + cmark_node_set_literal(text, "Hi"); + commonmark = cmark_render_commonmark(text, CMARK_OPT_DEFAULT, 0); + STR_EQ(runner, commonmark, "Hi\n", "render single inline node"); + free(commonmark); + + cmark_node_free(text); + cmark_node_free(doc); +} + +static void utf8(test_batch_runner *runner) { + // Ranges + test_char(runner, 1, "\x01", "valid utf8 01"); + test_char(runner, 1, "\x7F", "valid utf8 7F"); + test_char(runner, 0, "\x80", "invalid utf8 80"); + test_char(runner, 0, "\xBF", "invalid utf8 BF"); + test_char(runner, 0, "\xC0\x80", "invalid utf8 C080"); + test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF"); + test_char(runner, 1, "\xC2\x80", "valid utf8 C280"); + test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF"); + test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080"); + test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF"); + test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080"); + test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF"); + test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080"); + test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF"); + test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080"); + test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF"); + test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080"); + test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF"); + test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080"); + test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF"); + test_char(runner, 0, "\xF8", "invalid utf8 F8"); + test_char(runner, 0, "\xFF", "invalid utf8 FF"); + + // Incomplete byte sequences at end of input + test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0"); + test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080"); + + // Invalid continuation bytes + test_continuation_byte(runner, "\xC2\x80"); + test_continuation_byte(runner, "\xE0\xA0\x80"); + test_continuation_byte(runner, "\xF0\x90\x80\x80"); + + // Test string containing null character + static const char string_with_null[] = "((((\0))))"; + char *html = cmark_markdown_to_html( + string_with_null, sizeof(string_with_null) - 1, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "

((((" UTF8_REPL "))))

\n", "utf8 with U+0000"); + free(html); + + // Test NUL followed by newline + static const char string_with_nul_lf[] = "```\n\0\n```\n"; + html = cmark_markdown_to_html( + string_with_nul_lf, sizeof(string_with_nul_lf) - 1, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "
\xef\xbf\xbd\n
\n", + "utf8 with \\0\\n"); + free(html); +} + +static void test_char(test_batch_runner *runner, int valid, const char *utf8, + const char *msg) { + char buf[20]; + sprintf(buf, "((((%s))))", utf8); + + if (valid) { + char expected[30]; + sprintf(expected, "

((((%s))))

\n", utf8); + test_md_to_html(runner, buf, expected, msg); + } else { + test_md_to_html(runner, buf, "

((((" UTF8_REPL "))))

\n", msg); + } +} + +static void test_incomplete_char(test_batch_runner *runner, const char *utf8, + const char *msg) { + char buf[20]; + sprintf(buf, "----%s", utf8); + test_md_to_html(runner, buf, "

----" UTF8_REPL "

\n", msg); +} + +static void test_continuation_byte(test_batch_runner *runner, + const char *utf8) { + size_t len = strlen(utf8); + + for (size_t pos = 1; pos < len; ++pos) { + char buf[20]; + sprintf(buf, "((((%s))))", utf8); + buf[4 + pos] = '\x20'; + + char expected[50]; + strcpy(expected, "

((((" UTF8_REPL "\x20"); + for (size_t i = pos + 1; i < len; ++i) { + strcat(expected, UTF8_REPL); + } + strcat(expected, "))))

\n"); + + char *html = + cmark_markdown_to_html(buf, strlen(buf), CMARK_OPT_VALIDATE_UTF8); + STR_EQ(runner, html, expected, "invalid utf8 continuation byte %d/%d", pos, + len); + free(html); + } +} + +static void line_endings(test_batch_runner *runner) { + // Test list with different line endings + static const char list_with_endings[] = "- a\n- b\r\n- c\r- d"; + char *html = cmark_markdown_to_html( + list_with_endings, sizeof(list_with_endings) - 1, CMARK_OPT_DEFAULT); + STR_EQ(runner, html, + "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d
  • \n
\n", + "list with different line endings"); + free(html); + + static const char crlf_lines[] = "line\r\nline\r\n"; + html = cmark_markdown_to_html(crlf_lines, sizeof(crlf_lines) - 1, + CMARK_OPT_DEFAULT | CMARK_OPT_HARDBREAKS); + STR_EQ(runner, html, "

line
\nline

\n", + "crlf endings with CMARK_OPT_HARDBREAKS"); + free(html); + html = cmark_markdown_to_html(crlf_lines, sizeof(crlf_lines) - 1, + CMARK_OPT_DEFAULT | CMARK_OPT_NOBREAKS); + STR_EQ(runner, html, "

line line

\n", + "crlf endings with CMARK_OPT_NOBREAKS"); + free(html); + + static const char no_line_ending[] = "```\nline\n```"; + html = cmark_markdown_to_html(no_line_ending, sizeof(no_line_ending) - 1, + CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "
line\n
\n", + "fenced code block with no final newline"); + free(html); +} + +static void numeric_entities(test_batch_runner *runner) { + test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", + "Invalid numeric entity 0"); + test_md_to_html(runner, "퟿", "

\xED\x9F\xBF

\n", + "Valid numeric entity 0xD7FF"); + test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", + "Invalid numeric entity 0xD800"); + test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", + "Invalid numeric entity 0xDFFF"); + test_md_to_html(runner, "", "

\xEE\x80\x80

\n", + "Valid numeric entity 0xE000"); + test_md_to_html(runner, "􏿿", "

\xF4\x8F\xBF\xBF

\n", + "Valid numeric entity 0x10FFFF"); + test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", + "Invalid numeric entity 0x110000"); + test_md_to_html(runner, "�", "

&#x80000000;

\n", + "Invalid numeric entity 0x80000000"); + test_md_to_html(runner, "�", "

&#xFFFFFFFF;

\n", + "Invalid numeric entity 0xFFFFFFFF"); + test_md_to_html(runner, "�", "

&#99999999;

\n", + "Invalid numeric entity 99999999"); + + test_md_to_html(runner, "&#;", "

&#;

\n", + "Min decimal entity length"); + test_md_to_html(runner, "&#x;", "

&#x;

\n", + "Min hexadecimal entity length"); + test_md_to_html(runner, "�", "

&#999999999;

\n", + "Max decimal entity length"); + test_md_to_html(runner, "A", "

&#x000000041;

\n", + "Max hexadecimal entity length"); +} + +static void test_safe(test_batch_runner *runner) { + // Test safe mode + static const char raw_html[] = "
\nhi\n
\n\nhi\n[link](JAVAscript:alert('hi'))\n![image](" + "file:my.js)\n"; + char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, + CMARK_OPT_DEFAULT); + STR_EQ(runner, html, "\n

hi\nlink\n\"image\"

\n", + "input with raw HTML and dangerous links"); + free(html); +} + +static void test_md_to_html(test_batch_runner *runner, const char *markdown, + const char *expected_html, const char *msg) { + char *html = cmark_markdown_to_html(markdown, strlen(markdown), + CMARK_OPT_VALIDATE_UTF8); + STR_EQ(runner, html, expected_html, msg); + free(html); +} + +static void test_feed_across_line_ending(test_batch_runner *runner) { + // See #117 + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + cmark_parser_feed(parser, "line1\r", 6); + cmark_parser_feed(parser, "\nline2\r\n", 8); + cmark_node *document = cmark_parser_finish(parser); + OK(runner, document->first_child->next == NULL, "document has one paragraph"); + cmark_parser_free(parser); + cmark_node_free(document); +} + +static void source_pos(test_batch_runner *runner) { + static const char markdown[] = + "# Hi *there*.\n" + "\n" + "Hello “ \n" + "there `hi` -- [okay](www.google.com (ok)).\n" + "\n" + "> 1. Okay.\n" + "> Sure.\n" + ">\n" + "> 2. Yes, okay.\n" + "> ![ok](hi \"yes\")\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " Hi \n" + " \n" + " there\n" + " \n" + " .\n" + " \n" + " \n" + " Hello “ \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " there \n" + " hi\n" + " -- \n" + " \n" + " okay\n" + " \n" + " .\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " Okay.\n" + " \n" + " Sure.\n" + " \n" + " \n" + " \n" + " \n" + " Yes, okay.\n" + " \n" + " \n" + " ok\n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + +static void source_pos_inlines(test_batch_runner *runner) { + { + static const char markdown[] = + "*first*\n" + "second\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " first\n" + " \n" + " \n" + " second\n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "*first\n" + "second*\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " first\n" + " \n" + " second\n" + " \n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "` It is one backtick\n" + "`` They are two backticks\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " ` It is one backtick\n" + " \n" + " `` They are two backticks\n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } +} + +static void ref_source_pos(test_batch_runner *runner) { + static const char markdown[] = + "Let's try [reference] links.\n" + "\n" + "[reference]: https://github.com (GitHub)\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " Let's try \n" + " \n" + " reference\n" + " \n" + " links.\n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + +int main(void) { + int retval; + test_batch_runner *runner = test_batch_runner_new(); + + version(runner); + constructor(runner); + accessors(runner); + free_parent(runner); + node_check(runner); + iterator(runner); + iterator_delete(runner); + create_tree(runner); + custom_nodes(runner); + hierarchy(runner); + parser(runner); + render_html(runner); + render_xml(runner); + render_man(runner); + render_latex(runner); + render_commonmark(runner); + utf8(runner); + line_endings(runner); + numeric_entities(runner); + test_cplusplus(runner); + test_safe(runner); + test_feed_across_line_ending(runner); + source_pos(runner); + source_pos_inlines(runner); + ref_source_pos(runner); + + test_print_summary(runner); + retval = test_ok(runner) ? 0 : 1; + free(runner); + + return retval; +} diff --git a/deps/cmark/bench/samples/block-bq-flat.md b/deps/cmark/bench/samples/block-bq-flat.md new file mode 100644 index 0000000..33e382a --- /dev/null +++ b/deps/cmark/bench/samples/block-bq-flat.md @@ -0,0 +1,16 @@ +> the simple example of a blockquote +> the simple example of a blockquote +> the simple example of a blockquote +> the simple example of a blockquote +... continuation +... continuation +... continuation +... continuation + +empty blockquote: + +> +> +> +> + diff --git a/deps/cmark/bench/samples/block-bq-nested.md b/deps/cmark/bench/samples/block-bq-nested.md new file mode 100644 index 0000000..7ddcffa --- /dev/null +++ b/deps/cmark/bench/samples/block-bq-nested.md @@ -0,0 +1,13 @@ +>>>>>> deeply nested blockquote +>>>>> deeply nested blockquote +>>>> deeply nested blockquote +>>> deeply nested blockquote +>> deeply nested blockquote +> deeply nested blockquote + +> deeply nested blockquote +>> deeply nested blockquote +>>> deeply nested blockquote +>>>> deeply nested blockquote +>>>>> deeply nested blockquote +>>>>>> deeply nested blockquote diff --git a/deps/cmark/bench/samples/block-code.md b/deps/cmark/bench/samples/block-code.md new file mode 100644 index 0000000..2b1554f --- /dev/null +++ b/deps/cmark/bench/samples/block-code.md @@ -0,0 +1,11 @@ + + an + example + + of + + + + a code + block + diff --git a/deps/cmark/bench/samples/block-fences.md b/deps/cmark/bench/samples/block-fences.md new file mode 100644 index 0000000..5fe8b3c --- /dev/null +++ b/deps/cmark/bench/samples/block-fences.md @@ -0,0 +1,14 @@ + +``````````text +an +example +``` +of + + +a fenced +``` +code +block +`````````` + diff --git a/deps/cmark/bench/samples/block-heading.md b/deps/cmark/bench/samples/block-heading.md new file mode 100644 index 0000000..fd98558 --- /dev/null +++ b/deps/cmark/bench/samples/block-heading.md @@ -0,0 +1,9 @@ +# heading +### heading +##### heading + +# heading # +### heading ### +##### heading \#\#\#\#\###### + +############ not a heading diff --git a/deps/cmark/bench/samples/block-hr.md b/deps/cmark/bench/samples/block-hr.md new file mode 100644 index 0000000..e1bad6f --- /dev/null +++ b/deps/cmark/bench/samples/block-hr.md @@ -0,0 +1,10 @@ + + * * * * * + + - - - - - + + ________ + + + ************************* text + diff --git a/deps/cmark/bench/samples/block-html.md b/deps/cmark/bench/samples/block-html.md new file mode 100644 index 0000000..ff7f8fa --- /dev/null +++ b/deps/cmark/bench/samples/block-html.md @@ -0,0 +1,32 @@ +
+ +blah blah + +
+ + + + + +
+ **test** +
+ + + + + + + + + +
+ + test + +
+ + + diff --git a/deps/cmark/bench/samples/block-lheading.md b/deps/cmark/bench/samples/block-lheading.md new file mode 100644 index 0000000..e5c0d99 --- /dev/null +++ b/deps/cmark/bench/samples/block-lheading.md @@ -0,0 +1,8 @@ +heading +--- + +heading +=================================== + +not a heading +----------------------------------- text diff --git a/deps/cmark/bench/samples/block-list-flat.md b/deps/cmark/bench/samples/block-list-flat.md new file mode 100644 index 0000000..14149db --- /dev/null +++ b/deps/cmark/bench/samples/block-list-flat.md @@ -0,0 +1,67 @@ + - tidy + - bullet + - list + + + - loose + + - bullet + + - list + + + 0. ordered + 1. list + 2. example + + + - + - + - + - + + + 1. + 2. + 3. + + + - an example +of a list item + with a continuation + + this part is inside the list + + this part is just a paragraph + + + 1. test + - test + 1. test + - test + + +111111111111111111111111111111111111111111. is this a valid bullet? + + - _________________________ + + - this + - is + + a + + long + - loose + - list + + - with + - some + + tidy + + - list + - items + - in + + - between + - _________________________ diff --git a/deps/cmark/bench/samples/block-list-nested.md b/deps/cmark/bench/samples/block-list-nested.md new file mode 100644 index 0000000..d30aed3 --- /dev/null +++ b/deps/cmark/bench/samples/block-list-nested.md @@ -0,0 +1,36 @@ + + - this + - is + - a + - deeply + - nested + - bullet + - list + + + 1. this + 2. is + 3. a + 4. deeply + 5. nested + 6. unordered + 7. list + + + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 6 + - 5 + - 4 + - 3 + - 2 + - 1 + + + - - - - - - - - - deeply-nested one-element item + diff --git a/deps/cmark/bench/samples/block-ref-flat.md b/deps/cmark/bench/samples/block-ref-flat.md new file mode 100644 index 0000000..c83dccb --- /dev/null +++ b/deps/cmark/bench/samples/block-ref-flat.md @@ -0,0 +1,15 @@ +[1] [2] [3] [1] [2] [3] + +[looooooooooooooooooooooooooooooooooooooooooooooooooong label] + + [1]: + [2]: http://something.example.com/foo/bar 'test' + [3]: + http://foo/bar + [ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]: + 111 + 'test' + [[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q + (as long as it is not referenced anywhere) + + [[[[[[[[[[[[[[[[[[[[]: this is not a valid reference diff --git a/deps/cmark/bench/samples/block-ref-nested.md b/deps/cmark/bench/samples/block-ref-nested.md new file mode 100644 index 0000000..1e10a8c --- /dev/null +++ b/deps/cmark/bench/samples/block-ref-nested.md @@ -0,0 +1,17 @@ +[[[[[[[foo]]]]]]] + +[[[[[[[foo]]]]]]]: bar +[[[[[[foo]]]]]]: bar +[[[[[foo]]]]]: bar +[[[[foo]]]]: bar +[[[foo]]]: bar +[[foo]]: bar +[foo]: bar + +[*[*[*[*[foo]*]*]*]*] + +[*[*[*[*[foo]*]*]*]*]: bar +[*[*[*[foo]*]*]*]: bar +[*[*[foo]*]*]: bar +[*[foo]*]: bar +[foo]: bar diff --git a/deps/cmark/bench/samples/inline-autolink.md b/deps/cmark/bench/samples/inline-autolink.md new file mode 100644 index 0000000..0f71482 --- /dev/null +++ b/deps/cmark/bench/samples/inline-autolink.md @@ -0,0 +1,14 @@ +closed (valid) autolinks: + + + + + + +these are not autolinks: + + diff --git a/deps/cmark/bench/samples/inline-backticks.md b/deps/cmark/bench/samples/inline-backticks.md new file mode 100644 index 0000000..a6ec6e1 --- /dev/null +++ b/deps/cmark/bench/samples/inline-backticks.md @@ -0,0 +1,3 @@ +`lots`of`backticks` + +``i``wonder``how``this``will``be``parsed`` diff --git a/deps/cmark/bench/samples/inline-em-flat.md b/deps/cmark/bench/samples/inline-em-flat.md new file mode 100644 index 0000000..b7668a5 --- /dev/null +++ b/deps/cmark/bench/samples/inline-em-flat.md @@ -0,0 +1,5 @@ +*this* *is* *your* *basic* *boring* *emphasis* + +_this_ _is_ _your_ _basic_ _boring_ _emphasis_ + +**this** **is** **your** **basic** **boring** **emphasis** diff --git a/deps/cmark/bench/samples/inline-em-nested.md b/deps/cmark/bench/samples/inline-em-nested.md new file mode 100644 index 0000000..6bb0a0d --- /dev/null +++ b/deps/cmark/bench/samples/inline-em-nested.md @@ -0,0 +1,5 @@ +*this *is *a *bunch* of* nested* emphases* + +__this __is __a __bunch__ of__ nested__ emphases__ + +***this ***is ***a ***bunch*** of*** nested*** emphases*** diff --git a/deps/cmark/bench/samples/inline-em-worst.md b/deps/cmark/bench/samples/inline-em-worst.md new file mode 100644 index 0000000..b6d21da --- /dev/null +++ b/deps/cmark/bench/samples/inline-em-worst.md @@ -0,0 +1,5 @@ +*this *is *a *worst *case *for *em *backtracking + +__this __is __a __worst __case __for __em __backtracking + +***this ***is ***a ***worst ***case ***for ***em ***backtracking diff --git a/deps/cmark/bench/samples/inline-entity.md b/deps/cmark/bench/samples/inline-entity.md new file mode 100644 index 0000000..da095ed --- /dev/null +++ b/deps/cmark/bench/samples/inline-entity.md @@ -0,0 +1,11 @@ +entities: + +  & © Æ Ď ¾ ℋ ⅆ ∲ + +# Ӓ Ϡ � + +non-entities: + +&18900987654321234567890; &1234567890098765432123456789009876543212345678987654; + +&qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri; diff --git a/deps/cmark/bench/samples/inline-escape.md b/deps/cmark/bench/samples/inline-escape.md new file mode 100644 index 0000000..4e1bb39 --- /dev/null +++ b/deps/cmark/bench/samples/inline-escape.md @@ -0,0 +1,15 @@ + +\t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s + +\!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\? + +\@ \[ \] \^ \_ \` \{ \| \} \~ \- \' + +\ +\\ +\\\ +\\\\ +\\\\\ + +\ \ \ \ + diff --git a/deps/cmark/bench/samples/inline-html.md b/deps/cmark/bench/samples/inline-html.md new file mode 100644 index 0000000..f6e6341 --- /dev/null +++ b/deps/cmark/bench/samples/inline-html.md @@ -0,0 +1,44 @@ +Taking commonmark tests from the spec for benchmarking here: + + + + + + + + + +<33> <__> + + + + + +foo + +foo + +foo + +foo + +foo &<]]> + + + + + + diff --git a/deps/cmark/bench/samples/inline-links-flat.md b/deps/cmark/bench/samples/inline-links-flat.md new file mode 100644 index 0000000..5117db8 --- /dev/null +++ b/deps/cmark/bench/samples/inline-links-flat.md @@ -0,0 +1,23 @@ +Valid links: + + [this is a link]() + [this is a link]() + [this is a link](http://something.example.com/foo/bar 'test') + ![this is an image]() + ![this is an image]() + ![this is an image](http://something.example.com/foo/bar 'test') + + [escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'') + [escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\)) + +Invalid links: + + [this is not a link + + [this is not a link]( + + [this is not a link](http://something.example.com/foo/bar 'test' + + [this is not a link]((((((((((((((((((((((((((((((((((((((((((((((( + + [this is not a link]((((((((((()))))))))) (((((((((())))))))))) diff --git a/deps/cmark/bench/samples/inline-links-nested.md b/deps/cmark/bench/samples/inline-links-nested.md new file mode 100644 index 0000000..4e7dc85 --- /dev/null +++ b/deps/cmark/bench/samples/inline-links-nested.md @@ -0,0 +1,13 @@ +Valid links: + +[[[[[[[[](test)](test)](test)](test)](test)](test)](test)] + +[ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test) + +Invalid links: + +[[[[[[[[[ + +[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ + +![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![ diff --git a/deps/cmark/bench/samples/inline-newlines.md b/deps/cmark/bench/samples/inline-newlines.md new file mode 100644 index 0000000..068a807 --- /dev/null +++ b/deps/cmark/bench/samples/inline-newlines.md @@ -0,0 +1,24 @@ + +this\ +should\ +be\ +separated\ +by\ +newlines + +this +should +be +separated +by +newlines +too + +this +should +not +be +separated +by +newlines + diff --git a/deps/cmark/bench/samples/lorem1.md b/deps/cmark/bench/samples/lorem1.md new file mode 100644 index 0000000..eccb898 --- /dev/null +++ b/deps/cmark/bench/samples/lorem1.md @@ -0,0 +1,13 @@ +Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat. + +Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit. + +Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu. + +Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero. + +Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat. + +Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique. + +[1]: https://github.com/markdown-it diff --git a/deps/cmark/bench/samples/rawtabs.md b/deps/cmark/bench/samples/rawtabs.md new file mode 100644 index 0000000..dc989ea --- /dev/null +++ b/deps/cmark/bench/samples/rawtabs.md @@ -0,0 +1,18 @@ + +this is a test for tab expansion, be careful not to replace them with spaces + +1 4444 +22 333 +333 22 +4444 1 + + + tab-indented line + space-indented line + tab-indented line + + +a lot of spaces in between here + +a lot of tabs in between here + diff --git a/deps/cmark/bench/statistics.py b/deps/cmark/bench/statistics.py new file mode 100644 index 0000000..25a26d4 --- /dev/null +++ b/deps/cmark/bench/statistics.py @@ -0,0 +1,595 @@ +## Module statistics.py +## +## Copyright (c) 2013 Steven D'Aprano . +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + + +""" +Basic statistics module. + +This module provides functions for calculating statistics of data, including +averages, variance, and standard deviation. + +Calculating averages +-------------------- + +================== ============================================= +Function Description +================== ============================================= +mean Arithmetic mean (average) of data. +median Median (middle value) of data. +median_low Low median of data. +median_high High median of data. +median_grouped Median, or 50th percentile, of grouped data. +mode Mode (most common value) of data. +================== ============================================= + +Calculate the arithmetic mean ("the average") of data: + +>>> mean([-1.0, 2.5, 3.25, 5.75]) +2.625 + + +Calculate the standard median of discrete data: + +>>> median([2, 3, 4, 5]) +3.5 + + +Calculate the median, or 50th percentile, of data grouped into class intervals +centred on the data values provided. E.g. if your data points are rounded to +the nearest whole number: + +>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS +2.8333333333... + +This should be interpreted in this way: you have two data points in the class +interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in +the class interval 3.5-4.5. The median of these data points is 2.8333... + + +Calculating variability or spread +--------------------------------- + +================== ============================================= +Function Description +================== ============================================= +pvariance Population variance of data. +variance Sample variance of data. +pstdev Population standard deviation of data. +stdev Sample standard deviation of data. +================== ============================================= + +Calculate the standard deviation of sample data: + +>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS +4.38961843444... + +If you have previously calculated the mean, you can pass it as the optional +second argument to the four "spread" functions to avoid recalculating it: + +>>> data = [1, 2, 2, 4, 4, 4, 5, 6] +>>> mu = mean(data) +>>> pvariance(data, mu) +2.5 + + +Exceptions +---------- + +A single exception is defined: StatisticsError is a subclass of ValueError. + +""" + +__all__ = [ 'StatisticsError', + 'pstdev', 'pvariance', 'stdev', 'variance', + 'median', 'median_low', 'median_high', 'median_grouped', + 'mean', 'mode', + ] + + +import collections +import math + +from fractions import Fraction +from decimal import Decimal + + +# === Exceptions === + +class StatisticsError(ValueError): + pass + + +# === Private utilities === + +def _sum(data, start=0): + """_sum(data [, start]) -> value + + Return a high-precision sum of the given numeric data. If optional + argument ``start`` is given, it is added to the total. If ``data`` is + empty, ``start`` (defaulting to 0) is returned. + + + Examples + -------- + + >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) + 11.0 + + Some sources of round-off error will be avoided: + + >>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero. + 1000.0 + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + Fraction(63, 20) + + >>> from decimal import Decimal as D + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> _sum(data) + Decimal('0.6963') + + Mixed types are currently treated as an error, except that int is + allowed. + """ + # We fail as soon as we reach a value that is not an int or the type of + # the first value which is not an int. E.g. _sum([int, int, float, int]) + # is okay, but sum([int, int, float, Fraction]) is not. + allowed_types = set([int, type(start)]) + n, d = _exact_ratio(start) + partials = {d: n} # map {denominator: sum of numerators} + # Micro-optimizations. + exact_ratio = _exact_ratio + partials_get = partials.get + # Add numerators for each denominator. + for x in data: + _check_type(type(x), allowed_types) + n, d = exact_ratio(x) + partials[d] = partials_get(d, 0) + n + # Find the expected result type. If allowed_types has only one item, it + # will be int; if it has two, use the one which isn't int. + assert len(allowed_types) in (1, 2) + if len(allowed_types) == 1: + assert allowed_types.pop() is int + T = int + else: + T = (allowed_types - set([int])).pop() + if None in partials: + assert issubclass(T, (float, Decimal)) + assert not math.isfinite(partials[None]) + return T(partials[None]) + total = Fraction() + for d, n in sorted(partials.items()): + total += Fraction(n, d) + if issubclass(T, int): + assert total.denominator == 1 + return T(total.numerator) + if issubclass(T, Decimal): + return T(total.numerator)/total.denominator + return T(total) + + +def _check_type(T, allowed): + if T not in allowed: + if len(allowed) == 1: + allowed.add(T) + else: + types = ', '.join([t.__name__ for t in allowed] + [T.__name__]) + raise TypeError("unsupported mixed types: %s" % types) + + +def _exact_ratio(x): + """Convert Real number x exactly to (numerator, denominator) pair. + + >>> _exact_ratio(0.25) + (1, 4) + + x is expected to be an int, Fraction, Decimal or float. + """ + try: + try: + # int, Fraction + return (x.numerator, x.denominator) + except AttributeError: + # float + try: + return x.as_integer_ratio() + except AttributeError: + # Decimal + try: + return _decimal_to_ratio(x) + except AttributeError: + msg = "can't convert type '{}' to numerator/denominator" + raise TypeError(msg.format(type(x).__name__)) from None + except (OverflowError, ValueError): + # INF or NAN + if __debug__: + # Decimal signalling NANs cannot be converted to float :-( + if isinstance(x, Decimal): + assert not x.is_finite() + else: + assert not math.isfinite(x) + return (x, None) + + +# FIXME This is faster than Fraction.from_decimal, but still too slow. +def _decimal_to_ratio(d): + """Convert Decimal d to exact integer ratio (numerator, denominator). + + >>> from decimal import Decimal + >>> _decimal_to_ratio(Decimal("2.6")) + (26, 10) + + """ + sign, digits, exp = d.as_tuple() + if exp in ('F', 'n', 'N'): # INF, NAN, sNAN + assert not d.is_finite() + raise ValueError + num = 0 + for digit in digits: + num = num*10 + digit + if exp < 0: + den = 10**-exp + else: + num *= 10**exp + den = 1 + if sign: + num = -num + return (num, den) + + +def _counts(data): + # Generate a table of sorted (value, frequency) pairs. + table = collections.Counter(iter(data)).most_common() + if not table: + return table + # Extract the values with the highest frequency. + maxfreq = table[0][1] + for i in range(1, len(table)): + if table[i][1] != maxfreq: + table = table[:i] + break + return table + + +# === Measures of central tendency (averages) === + +def mean(data): + """Return the sample arithmetic mean of data. + + >>> mean([1, 2, 3, 4, 4]) + 2.8 + + >>> from fractions import Fraction as F + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) + + >>> from decimal import Decimal as D + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + If ``data`` is empty, StatisticsError will be raised. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + return _sum(data)/n + + +# FIXME: investigate ways to calculate medians without sorting? Quickselect? +def median(data): + """Return the median (middle value) of numeric data. + + When the number of data points is odd, return the middle data point. + When the number of data points is even, the median is interpolated by + taking the average of the two middle values: + + >>> median([1, 3, 5]) + 3 + >>> median([1, 3, 5, 7]) + 4.0 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + i = n//2 + return (data[i - 1] + data[i])/2 + + +def median_low(data): + """Return the low median of numeric data. + + When the number of data points is odd, the middle value is returned. + When it is even, the smaller of the two middle values is returned. + + >>> median_low([1, 3, 5]) + 3 + >>> median_low([1, 3, 5, 7]) + 3 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + return data[n//2 - 1] + + +def median_high(data): + """Return the high median of data. + + When the number of data points is odd, the middle value is returned. + When it is even, the larger of the two middle values is returned. + + >>> median_high([1, 3, 5]) + 3 + >>> median_high([1, 3, 5, 7]) + 5 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + return data[n//2] + + +def median_grouped(data, interval=1): + """"Return the 50th percentile (median) of grouped continuous data. + + >>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) + 3.7 + >>> median_grouped([52, 52, 53, 54]) + 52.5 + + This calculates the median as the 50th percentile, and should be + used when your data is continuous and grouped. In the above example, + the values 1, 2, 3, etc. actually represent the midpoint of classes + 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in + class 3.5-4.5, and interpolation is used to estimate it. + + Optional argument ``interval`` represents the class interval, and + defaults to 1. Changing the class interval naturally will change the + interpolated 50th percentile value: + + >>> median_grouped([1, 3, 3, 5, 7], interval=1) + 3.25 + >>> median_grouped([1, 3, 3, 5, 7], interval=2) + 3.5 + + This function does not check whether the data points are at least + ``interval`` apart. + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + elif n == 1: + return data[0] + # Find the value at the midpoint. Remember this corresponds to the + # centre of the class interval. + x = data[n//2] + for obj in (x, interval): + if isinstance(obj, (str, bytes)): + raise TypeError('expected number but got %r' % obj) + try: + L = x - interval/2 # The lower limit of the median interval. + except TypeError: + # Mixed type. For now we just coerce to float. + L = float(x) - float(interval)/2 + cf = data.index(x) # Number of values below the median interval. + # FIXME The following line could be more efficient for big lists. + f = data.count(x) # Number of data points in the median interval. + return L + interval*(n/2 - cf)/f + + +def mode(data): + """Return the most common data point from discrete or nominal data. + + ``mode`` assumes discrete data, and returns a single value. This is the + standard treatment of the mode as commonly taught in schools: + + >>> mode([1, 1, 2, 3, 3, 3, 3, 4]) + 3 + + This also works with nominal (non-numeric) data: + + >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) + 'red' + + If there is not exactly one most common value, ``mode`` will raise + StatisticsError. + """ + # Generate a table of sorted (value, frequency) pairs. + table = _counts(data) + if len(table) == 1: + return table[0][0] + elif table: + raise StatisticsError( + 'no unique mode; found %d equally common values' % len(table) + ) + else: + raise StatisticsError('no mode for empty data') + + +# === Measures of spread === + +# See http://mathworld.wolfram.com/Variance.html +# http://mathworld.wolfram.com/SampleVariance.html +# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance +# +# Under no circumstances use the so-called "computational formula for +# variance", as that is only suitable for hand calculations with a small +# amount of low-precision data. It has terrible numeric properties. +# +# See a comparison of three computational methods here: +# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ + +def _ss(data, c=None): + """Return sum of square deviations of sequence data. + + If ``c`` is None, the mean is calculated in one pass, and the deviations + from the mean are calculated in a second pass. Otherwise, deviations are + calculated from ``c`` as given. Use the second case with care, as it can + lead to garbage results. + """ + if c is None: + c = mean(data) + ss = _sum((x-c)**2 for x in data) + # The following sum should mathematically equal zero, but due to rounding + # error may not. + ss -= _sum((x-c) for x in data)**2/len(data) + assert not ss < 0, 'negative sum of square deviations: %f' % ss + return ss + + +def variance(data, xbar=None): + """Return the sample variance of data. + + data should be an iterable of Real-valued numbers, with at least two + values. The optional argument xbar, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. + + Examples: + + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 + + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: + + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 + + This function does not check that ``xbar`` is actually the mean of + ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or + impossible results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('31.01875') + + >>> from fractions import Fraction as F + >>> variance([F(1, 6), F(1, 2), F(5, 3)]) + Fraction(67, 108) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 2: + raise StatisticsError('variance requires at least two data points') + ss = _ss(data, xbar) + return ss/(n-1) + + +def pvariance(data, mu=None): + """Return the population variance of ``data``. + + data should be an iterable of Real-valued numbers, with at least one + value. The optional argument mu, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function to calculate the variance from the entire population. + To estimate the variance from a sample, the ``variance`` function is + usually a better choice. + + Examples: + + >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] + >>> pvariance(data) + 1.25 + + If you have already calculated the mean of the data, you can pass it as + the optional second argument to avoid recalculating it: + + >>> mu = mean(data) + >>> pvariance(data, mu) + 1.25 + + This function does not check that ``mu`` is actually the mean of ``data``. + Giving arbitrary values for ``mu`` may lead to invalid or impossible + results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('24.815') + + >>> from fractions import Fraction as F + >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) + Fraction(13, 72) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('pvariance requires at least one data point') + ss = _ss(data, mu) + return ss/n + + +def stdev(data, xbar=None): + """Return the square root of the sample variance. + + See ``variance`` for arguments and other details. + + >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 1.0810874155219827 + + """ + var = variance(data, xbar) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) + + +def pstdev(data, mu=None): + """Return the square root of the population variance. + + See ``pvariance`` for arguments and other details. + + >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 0.986893273527251 + + """ + var = pvariance(data, mu) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) diff --git a/deps/cmark/bench/stats.py b/deps/cmark/bench/stats.py new file mode 100644 index 0000000..c244b41 --- /dev/null +++ b/deps/cmark/bench/stats.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +import sys +import statistics + +def pairs(l, n): + return zip(*[l[i::n] for i in range(n)]) + +# data comes in pairs: +# n - time for running the program with no input +# m - time for running it with the benchmark input +# we measure (m - n) + +values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)] + +print("mean = %.4f, median = %.4f, stdev = %.4f" % + (statistics.mean(values), statistics.median(values), + statistics.stdev(values))) + diff --git a/deps/cmark/benchmarks.md b/deps/cmark/benchmarks.md new file mode 100644 index 0000000..bb5e28e --- /dev/null +++ b/deps/cmark/benchmarks.md @@ -0,0 +1,23 @@ +# Benchmarks + +Here are some benchmarks, run on a 2.3GHz 8-core i9 macbook pro. +The input text is a 1106 KB Markdown file built by concatenating +the Markdown sources of all the localizations of the first edition +of [*Pro Git*](https://github.com/progit/progit/tree/master/en) by +Scott Chacon. + +|Implementation | Time (sec)| +|-------------------|-----------:| +| **commonmark.js** | 0.59 | +| **cmark** | 0.12 | +| **md4c** | 0.04 | + +To run these benchmarks, use `make bench PROG=/path/to/program`. + +`time` is used to measure execution speed. The reported +time is the *difference* between the time to run the program +with the benchmark input and the time to run it with no input. +(This procedure ensures that implementations in dynamic languages are +not penalized by startup time.) A median of ten runs is taken. The +process is reniced to a high priority so that the system doesn't +interrupt runs. diff --git a/deps/cmark/changelog.txt b/deps/cmark/changelog.txt new file mode 100644 index 0000000..0f6354f --- /dev/null +++ b/deps/cmark/changelog.txt @@ -0,0 +1,1316 @@ +[0.30.3] + + * Fix quadratic complexity bug with repeated `![[]()`. + Resolves CVE-2023-22486. Add new pathological test. (John MacFarlane) + + * Allow declarations with no space, as per spec (#456, John MacFarlane). + + * Set `enumi*` counter correctly in LaTeX output (#451, John MacFarlane). + + * Allow ` and -->`. + Since the scanner finds the longest match, we had to + move some of the logic outside of the scanner. (John MacFarlane) + + * Fix quadratic parsing issue with repeated ` + matches_end_condition = + scan_html_block_end_2(input, parser->first_nonspace); + break; + case 3: + // ?> + matches_end_condition = + scan_html_block_end_3(input, parser->first_nonspace); + break; + case 4: + // > + matches_end_condition = + scan_html_block_end_4(input, parser->first_nonspace); + break; + case 5: + // ]]> + matches_end_condition = + scan_html_block_end_5(input, parser->first_nonspace); + break; + default: + matches_end_condition = 0; + break; + } + + if (matches_end_condition) { + container = finalize(parser, container); + assert(parser->current != NULL); + } + } else if (parser->blank) { + // ??? do nothing + } else if (accepts_lines(S_type(container))) { + if (S_type(container) == CMARK_NODE_HEADING && + container->as.heading.setext == false) { + chop_trailing_hashtags(input); + } + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + false); + add_line(input, parser); + } else { + // create paragraph container for line + container = add_child(parser, container, CMARK_NODE_PARAGRAPH, + parser->first_nonspace + 1); + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + false); + add_line(input, parser); + } + + parser->current = container; + } +} + +/* See http://spec.commonmark.org/0.24/#phase-1-block-structure */ +static void S_process_line(cmark_parser *parser, const unsigned char *buffer, + bufsize_t bytes) { + cmark_node *last_matched_container; + bool all_matched = true; + cmark_node *container; + cmark_chunk input; + + if (parser->options & CMARK_OPT_VALIDATE_UTF8) + cmark_utf8proc_check(&parser->curline, buffer, bytes); + else + cmark_strbuf_put(&parser->curline, buffer, bytes); + + bytes = parser->curline.size; + + // ensure line ends with a newline: + if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1])) + cmark_strbuf_putc(&parser->curline, '\n'); + + parser->offset = 0; + parser->column = 0; + parser->first_nonspace = 0; + parser->first_nonspace_column = 0; + parser->thematic_break_kill_pos = 0; + parser->indent = 0; + parser->blank = false; + parser->partially_consumed_tab = false; + + input.data = parser->curline.ptr; + input.len = parser->curline.size; + + parser->line_number++; + + last_matched_container = check_open_blocks(parser, &input, &all_matched); + + if (!last_matched_container) + goto finished; + + container = last_matched_container; + + open_new_blocks(parser, &container, &input, all_matched); + + add_text_to_container(parser, container, last_matched_container, &input); + +finished: + parser->last_line_length = input.len; + if (parser->last_line_length && + input.data[parser->last_line_length - 1] == '\n') + parser->last_line_length -= 1; + if (parser->last_line_length && + input.data[parser->last_line_length - 1] == '\r') + parser->last_line_length -= 1; + + cmark_strbuf_clear(&parser->curline); +} + +cmark_node *cmark_parser_finish(cmark_parser *parser) { + if (parser->linebuf.size) { + S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); + cmark_strbuf_clear(&parser->linebuf); + } + + finalize_document(parser); + + cmark_consolidate_text_nodes(parser->root); + + cmark_strbuf_free(&parser->curline); + +#if CMARK_DEBUG_NODES + if (cmark_node_check(parser->root, stderr)) { + abort(); + } +#endif + return parser->root; +} diff --git a/deps/cmark/src/buffer.c b/deps/cmark/src/buffer.c new file mode 100644 index 0000000..d946493 --- /dev/null +++ b/deps/cmark/src/buffer.c @@ -0,0 +1,278 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "cmark_ctype.h" +#include "buffer.h" + +/* Used as default value for cmark_strbuf->ptr so that people can always + * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. + */ +unsigned char cmark_strbuf__initbuf[1]; + +#ifndef MIN +#define MIN(x, y) ((x < y) ? x : y) +#endif + +void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, + bufsize_t initial_size) { + buf->mem = mem; + buf->asize = 0; + buf->size = 0; + buf->ptr = cmark_strbuf__initbuf; + + if (initial_size > 0) + cmark_strbuf_grow(buf, initial_size); +} + +static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { + cmark_strbuf_grow(buf, buf->size + add); +} + +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { + assert(target_size > 0); + + if (target_size < buf->asize) + return; + + if (target_size > (bufsize_t)(INT32_MAX / 2)) { + fprintf(stderr, + "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n", + (INT32_MAX / 2)); + abort(); + } + + /* Oversize the buffer by 50% to guarantee amortized linear time + * complexity on append operations. */ + bufsize_t new_size = target_size + target_size / 2; + new_size += 1; + new_size = (new_size + 7) & ~7; + + buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, + new_size); + buf->asize = new_size; +} + +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } + +void cmark_strbuf_free(cmark_strbuf *buf) { + if (!buf) + return; + + if (buf->ptr != cmark_strbuf__initbuf) + buf->mem->free(buf->ptr); + + cmark_strbuf_init(buf->mem, buf, 0); +} + +void cmark_strbuf_clear(cmark_strbuf *buf) { + buf->size = 0; + + if (buf->asize > 0) + buf->ptr[0] = '\0'; +} + +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, + bufsize_t len) { + if (len <= 0 || data == NULL) { + cmark_strbuf_clear(buf); + } else { + if (data != buf->ptr) { + if (len >= buf->asize) + cmark_strbuf_grow(buf, len); + memmove(buf->ptr, data, len); + } + buf->size = len; + buf->ptr[buf->size] = '\0'; + } +} + +void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { + cmark_strbuf_set(buf, (const unsigned char *)string, + string ? strlen(string) : 0); +} + +void cmark_strbuf_putc(cmark_strbuf *buf, int c) { + S_strbuf_grow_by(buf, 1); + buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); + buf->ptr[buf->size] = '\0'; +} + +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, + bufsize_t len) { + if (len <= 0) + return; + + S_strbuf_grow_by(buf, len); + memmove(buf->ptr + buf->size, data, len); + buf->size += len; + buf->ptr[buf->size] = '\0'; +} + +void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { + cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); +} + +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, + const cmark_strbuf *buf) { + bufsize_t copylen; + + assert(buf); + if (!data || datasize <= 0) + return; + + data[0] = '\0'; + + if (buf->size == 0 || buf->asize <= 0) + return; + + copylen = buf->size; + if (copylen > datasize - 1) + copylen = datasize - 1; + memmove(data, buf->ptr, copylen); + data[copylen] = '\0'; +} + +void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { + cmark_strbuf t = *buf_a; + *buf_a = *buf_b; + *buf_b = t; +} + +unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { + unsigned char *data = buf->ptr; + + if (buf->asize == 0) { + /* return an empty string */ + return (unsigned char *)buf->mem->calloc(1, 1); + } + + cmark_strbuf_init(buf->mem, buf, 0); + return data; +} + +int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { + int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); + return (result != 0) ? result + : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; +} + +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { + if (pos >= buf->size) + return -1; + if (pos < 0) + pos = 0; + + const unsigned char *p = + (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); + if (!p) + return -1; + + return (bufsize_t)(p - (const unsigned char *)buf->ptr); +} + +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { + if (pos < 0 || buf->size == 0) + return -1; + if (pos >= buf->size) + pos = buf->size - 1; + + bufsize_t i; + for (i = pos; i >= 0; i--) { + if (buf->ptr[i] == (unsigned char)c) + return i; + } + + return -1; +} + +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { + if (len < 0) + len = 0; + + if (len < buf->size) { + buf->size = len; + buf->ptr[buf->size] = '\0'; + } +} + +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { + if (n > 0) { + if (n > buf->size) + n = buf->size; + buf->size = buf->size - n; + if (buf->size) + memmove(buf->ptr, buf->ptr + n, buf->size); + + buf->ptr[buf->size] = '\0'; + } +} + +void cmark_strbuf_rtrim(cmark_strbuf *buf) { + if (!buf->size) + return; + + while (buf->size > 0) { + if (!cmark_isspace(buf->ptr[buf->size - 1])) + break; + + buf->size--; + } + + buf->ptr[buf->size] = '\0'; +} + +void cmark_strbuf_trim(cmark_strbuf *buf) { + bufsize_t i = 0; + + if (!buf->size) + return; + + while (i < buf->size && cmark_isspace(buf->ptr[i])) + i++; + + cmark_strbuf_drop(buf, i); + + cmark_strbuf_rtrim(buf); +} + +// Destructively modify string, collapsing consecutive +// space and newline characters into a single space. +void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { + bool last_char_was_space = false; + bufsize_t r, w; + + for (r = 0, w = 0; r < s->size; ++r) { + if (cmark_isspace(s->ptr[r])) { + if (!last_char_was_space) { + s->ptr[w++] = ' '; + last_char_was_space = true; + } + } else { + s->ptr[w++] = s->ptr[r]; + last_char_was_space = false; + } + } + + cmark_strbuf_truncate(s, w); +} + +// Destructively unescape a string: remove backslashes before punctuation chars. +extern void cmark_strbuf_unescape(cmark_strbuf *buf) { + bufsize_t r, w; + + for (r = 0, w = 0; r < buf->size; ++r) { + if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) + r++; + + buf->ptr[w++] = buf->ptr[r]; + } + + cmark_strbuf_truncate(buf, w); +} diff --git a/deps/cmark/src/buffer.h b/deps/cmark/src/buffer.h new file mode 100644 index 0000000..55b552f --- /dev/null +++ b/deps/cmark/src/buffer.h @@ -0,0 +1,84 @@ +#ifndef CMARK_BUFFER_H +#define CMARK_BUFFER_H + +#include +#include +#include +#include +#include +#include "config.h" +#include "cmark.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int32_t bufsize_t; + +typedef struct { + cmark_mem *mem; + unsigned char *ptr; + bufsize_t asize, size; +} cmark_strbuf; + +extern unsigned char cmark_strbuf__initbuf[]; + +#define CMARK_BUF_INIT(mem) \ + { mem, cmark_strbuf__initbuf, 0, 0 } + +/** + * Initialize a cmark_strbuf structure. + * + * For the cases where CMARK_BUF_INIT cannot be used to do static + * initialization. + */ +void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, + bufsize_t initial_size); + +/** + * Grow the buffer to hold at least `target_size` bytes. + */ +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); + +void cmark_strbuf_free(cmark_strbuf *buf); +void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); + +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); + +int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); + +unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, + const cmark_strbuf *buf); + +/* +static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { + return (char *)buf->ptr; +} +*/ + +#define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) + +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, + bufsize_t len); +void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); +void cmark_strbuf_putc(cmark_strbuf *buf, int c); +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, + bufsize_t len); +void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); +void cmark_strbuf_clear(cmark_strbuf *buf); + +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); +void cmark_strbuf_rtrim(cmark_strbuf *buf); +void cmark_strbuf_trim(cmark_strbuf *buf); +void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); +void cmark_strbuf_unescape(cmark_strbuf *s); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/case_fold_switch.inc b/deps/cmark/src/case_fold_switch.inc new file mode 100644 index 0000000..f8ad7eb --- /dev/null +++ b/deps/cmark/src/case_fold_switch.inc @@ -0,0 +1,4714 @@ + switch (c) { + case 0x0041: + bufpush(0x0061); + break; + case 0x0042: + bufpush(0x0062); + break; + case 0x0043: + bufpush(0x0063); + break; + case 0x0044: + bufpush(0x0064); + break; + case 0x0045: + bufpush(0x0065); + break; + case 0x0046: + bufpush(0x0066); + break; + case 0x0047: + bufpush(0x0067); + break; + case 0x0048: + bufpush(0x0068); + break; + case 0x0049: + bufpush(0x0069); + break; + case 0x004A: + bufpush(0x006A); + break; + case 0x004B: + bufpush(0x006B); + break; + case 0x004C: + bufpush(0x006C); + break; + case 0x004D: + bufpush(0x006D); + break; + case 0x004E: + bufpush(0x006E); + break; + case 0x004F: + bufpush(0x006F); + break; + case 0x0050: + bufpush(0x0070); + break; + case 0x0051: + bufpush(0x0071); + break; + case 0x0052: + bufpush(0x0072); + break; + case 0x0053: + bufpush(0x0073); + break; + case 0x0054: + bufpush(0x0074); + break; + case 0x0055: + bufpush(0x0075); + break; + case 0x0056: + bufpush(0x0076); + break; + case 0x0057: + bufpush(0x0077); + break; + case 0x0058: + bufpush(0x0078); + break; + case 0x0059: + bufpush(0x0079); + break; + case 0x005A: + bufpush(0x007A); + break; + case 0x00B5: + bufpush(0x03BC); + break; + case 0x00C0: + bufpush(0x00E0); + break; + case 0x00C1: + bufpush(0x00E1); + break; + case 0x00C2: + bufpush(0x00E2); + break; + case 0x00C3: + bufpush(0x00E3); + break; + case 0x00C4: + bufpush(0x00E4); + break; + case 0x00C5: + bufpush(0x00E5); + break; + case 0x00C6: + bufpush(0x00E6); + break; + case 0x00C7: + bufpush(0x00E7); + break; + case 0x00C8: + bufpush(0x00E8); + break; + case 0x00C9: + bufpush(0x00E9); + break; + case 0x00CA: + bufpush(0x00EA); + break; + case 0x00CB: + bufpush(0x00EB); + break; + case 0x00CC: + bufpush(0x00EC); + break; + case 0x00CD: + bufpush(0x00ED); + break; + case 0x00CE: + bufpush(0x00EE); + break; + case 0x00CF: + bufpush(0x00EF); + break; + case 0x00D0: + bufpush(0x00F0); + break; + case 0x00D1: + bufpush(0x00F1); + break; + case 0x00D2: + bufpush(0x00F2); + break; + case 0x00D3: + bufpush(0x00F3); + break; + case 0x00D4: + bufpush(0x00F4); + break; + case 0x00D5: + bufpush(0x00F5); + break; + case 0x00D6: + bufpush(0x00F6); + break; + case 0x00D8: + bufpush(0x00F8); + break; + case 0x00D9: + bufpush(0x00F9); + break; + case 0x00DA: + bufpush(0x00FA); + break; + case 0x00DB: + bufpush(0x00FB); + break; + case 0x00DC: + bufpush(0x00FC); + break; + case 0x00DD: + bufpush(0x00FD); + break; + case 0x00DE: + bufpush(0x00FE); + break; + case 0x00DF: + bufpush(0x0073); + bufpush(0x0073); + break; + case 0x0100: + bufpush(0x0101); + break; + case 0x0102: + bufpush(0x0103); + break; + case 0x0104: + bufpush(0x0105); + break; + case 0x0106: + bufpush(0x0107); + break; + case 0x0108: + bufpush(0x0109); + break; + case 0x010A: + bufpush(0x010B); + break; + case 0x010C: + bufpush(0x010D); + break; + case 0x010E: + bufpush(0x010F); + break; + case 0x0110: + bufpush(0x0111); + break; + case 0x0112: + bufpush(0x0113); + break; + case 0x0114: + bufpush(0x0115); + break; + case 0x0116: + bufpush(0x0117); + break; + case 0x0118: + bufpush(0x0119); + break; + case 0x011A: + bufpush(0x011B); + break; + case 0x011C: + bufpush(0x011D); + break; + case 0x011E: + bufpush(0x011F); + break; + case 0x0120: + bufpush(0x0121); + break; + case 0x0122: + bufpush(0x0123); + break; + case 0x0124: + bufpush(0x0125); + break; + case 0x0126: + bufpush(0x0127); + break; + case 0x0128: + bufpush(0x0129); + break; + case 0x012A: + bufpush(0x012B); + break; + case 0x012C: + bufpush(0x012D); + break; + case 0x012E: + bufpush(0x012F); + break; + case 0x0130: + bufpush(0x0069); + bufpush(0x0307); + break; + case 0x0132: + bufpush(0x0133); + break; + case 0x0134: + bufpush(0x0135); + break; + case 0x0136: + bufpush(0x0137); + break; + case 0x0139: + bufpush(0x013A); + break; + case 0x013B: + bufpush(0x013C); + break; + case 0x013D: + bufpush(0x013E); + break; + case 0x013F: + bufpush(0x0140); + break; + case 0x0141: + bufpush(0x0142); + break; + case 0x0143: + bufpush(0x0144); + break; + case 0x0145: + bufpush(0x0146); + break; + case 0x0147: + bufpush(0x0148); + break; + case 0x0149: + bufpush(0x02BC); + bufpush(0x006E); + break; + case 0x014A: + bufpush(0x014B); + break; + case 0x014C: + bufpush(0x014D); + break; + case 0x014E: + bufpush(0x014F); + break; + case 0x0150: + bufpush(0x0151); + break; + case 0x0152: + bufpush(0x0153); + break; + case 0x0154: + bufpush(0x0155); + break; + case 0x0156: + bufpush(0x0157); + break; + case 0x0158: + bufpush(0x0159); + break; + case 0x015A: + bufpush(0x015B); + break; + case 0x015C: + bufpush(0x015D); + break; + case 0x015E: + bufpush(0x015F); + break; + case 0x0160: + bufpush(0x0161); + break; + case 0x0162: + bufpush(0x0163); + break; + case 0x0164: + bufpush(0x0165); + break; + case 0x0166: + bufpush(0x0167); + break; + case 0x0168: + bufpush(0x0169); + break; + case 0x016A: + bufpush(0x016B); + break; + case 0x016C: + bufpush(0x016D); + break; + case 0x016E: + bufpush(0x016F); + break; + case 0x0170: + bufpush(0x0171); + break; + case 0x0172: + bufpush(0x0173); + break; + case 0x0174: + bufpush(0x0175); + break; + case 0x0176: + bufpush(0x0177); + break; + case 0x0178: + bufpush(0x00FF); + break; + case 0x0179: + bufpush(0x017A); + break; + case 0x017B: + bufpush(0x017C); + break; + case 0x017D: + bufpush(0x017E); + break; + case 0x017F: + bufpush(0x0073); + break; + case 0x0181: + bufpush(0x0253); + break; + case 0x0182: + bufpush(0x0183); + break; + case 0x0184: + bufpush(0x0185); + break; + case 0x0186: + bufpush(0x0254); + break; + case 0x0187: + bufpush(0x0188); + break; + case 0x0189: + bufpush(0x0256); + break; + case 0x018A: + bufpush(0x0257); + break; + case 0x018B: + bufpush(0x018C); + break; + case 0x018E: + bufpush(0x01DD); + break; + case 0x018F: + bufpush(0x0259); + break; + case 0x0190: + bufpush(0x025B); + break; + case 0x0191: + bufpush(0x0192); + break; + case 0x0193: + bufpush(0x0260); + break; + case 0x0194: + bufpush(0x0263); + break; + case 0x0196: + bufpush(0x0269); + break; + case 0x0197: + bufpush(0x0268); + break; + case 0x0198: + bufpush(0x0199); + break; + case 0x019C: + bufpush(0x026F); + break; + case 0x019D: + bufpush(0x0272); + break; + case 0x019F: + bufpush(0x0275); + break; + case 0x01A0: + bufpush(0x01A1); + break; + case 0x01A2: + bufpush(0x01A3); + break; + case 0x01A4: + bufpush(0x01A5); + break; + case 0x01A6: + bufpush(0x0280); + break; + case 0x01A7: + bufpush(0x01A8); + break; + case 0x01A9: + bufpush(0x0283); + break; + case 0x01AC: + bufpush(0x01AD); + break; + case 0x01AE: + bufpush(0x0288); + break; + case 0x01AF: + bufpush(0x01B0); + break; + case 0x01B1: + bufpush(0x028A); + break; + case 0x01B2: + bufpush(0x028B); + break; + case 0x01B3: + bufpush(0x01B4); + break; + case 0x01B5: + bufpush(0x01B6); + break; + case 0x01B7: + bufpush(0x0292); + break; + case 0x01B8: + bufpush(0x01B9); + break; + case 0x01BC: + bufpush(0x01BD); + break; + case 0x01C4: + bufpush(0x01C6); + break; + case 0x01C5: + bufpush(0x01C6); + break; + case 0x01C7: + bufpush(0x01C9); + break; + case 0x01C8: + bufpush(0x01C9); + break; + case 0x01CA: + bufpush(0x01CC); + break; + case 0x01CB: + bufpush(0x01CC); + break; + case 0x01CD: + bufpush(0x01CE); + break; + case 0x01CF: + bufpush(0x01D0); + break; + case 0x01D1: + bufpush(0x01D2); + break; + case 0x01D3: + bufpush(0x01D4); + break; + case 0x01D5: + bufpush(0x01D6); + break; + case 0x01D7: + bufpush(0x01D8); + break; + case 0x01D9: + bufpush(0x01DA); + break; + case 0x01DB: + bufpush(0x01DC); + break; + case 0x01DE: + bufpush(0x01DF); + break; + case 0x01E0: + bufpush(0x01E1); + break; + case 0x01E2: + bufpush(0x01E3); + break; + case 0x01E4: + bufpush(0x01E5); + break; + case 0x01E6: + bufpush(0x01E7); + break; + case 0x01E8: + bufpush(0x01E9); + break; + case 0x01EA: + bufpush(0x01EB); + break; + case 0x01EC: + bufpush(0x01ED); + break; + case 0x01EE: + bufpush(0x01EF); + break; + case 0x01F0: + bufpush(0x006A); + bufpush(0x030C); + break; + case 0x01F1: + bufpush(0x01F3); + break; + case 0x01F2: + bufpush(0x01F3); + break; + case 0x01F4: + bufpush(0x01F5); + break; + case 0x01F6: + bufpush(0x0195); + break; + case 0x01F7: + bufpush(0x01BF); + break; + case 0x01F8: + bufpush(0x01F9); + break; + case 0x01FA: + bufpush(0x01FB); + break; + case 0x01FC: + bufpush(0x01FD); + break; + case 0x01FE: + bufpush(0x01FF); + break; + case 0x0200: + bufpush(0x0201); + break; + case 0x0202: + bufpush(0x0203); + break; + case 0x0204: + bufpush(0x0205); + break; + case 0x0206: + bufpush(0x0207); + break; + case 0x0208: + bufpush(0x0209); + break; + case 0x020A: + bufpush(0x020B); + break; + case 0x020C: + bufpush(0x020D); + break; + case 0x020E: + bufpush(0x020F); + break; + case 0x0210: + bufpush(0x0211); + break; + case 0x0212: + bufpush(0x0213); + break; + case 0x0214: + bufpush(0x0215); + break; + case 0x0216: + bufpush(0x0217); + break; + case 0x0218: + bufpush(0x0219); + break; + case 0x021A: + bufpush(0x021B); + break; + case 0x021C: + bufpush(0x021D); + break; + case 0x021E: + bufpush(0x021F); + break; + case 0x0220: + bufpush(0x019E); + break; + case 0x0222: + bufpush(0x0223); + break; + case 0x0224: + bufpush(0x0225); + break; + case 0x0226: + bufpush(0x0227); + break; + case 0x0228: + bufpush(0x0229); + break; + case 0x022A: + bufpush(0x022B); + break; + case 0x022C: + bufpush(0x022D); + break; + case 0x022E: + bufpush(0x022F); + break; + case 0x0230: + bufpush(0x0231); + break; + case 0x0232: + bufpush(0x0233); + break; + case 0x023A: + bufpush(0x2C65); + break; + case 0x023B: + bufpush(0x023C); + break; + case 0x023D: + bufpush(0x019A); + break; + case 0x023E: + bufpush(0x2C66); + break; + case 0x0241: + bufpush(0x0242); + break; + case 0x0243: + bufpush(0x0180); + break; + case 0x0244: + bufpush(0x0289); + break; + case 0x0245: + bufpush(0x028C); + break; + case 0x0246: + bufpush(0x0247); + break; + case 0x0248: + bufpush(0x0249); + break; + case 0x024A: + bufpush(0x024B); + break; + case 0x024C: + bufpush(0x024D); + break; + case 0x024E: + bufpush(0x024F); + break; + case 0x0345: + bufpush(0x03B9); + break; + case 0x0370: + bufpush(0x0371); + break; + case 0x0372: + bufpush(0x0373); + break; + case 0x0376: + bufpush(0x0377); + break; + case 0x037F: + bufpush(0x03F3); + break; + case 0x0386: + bufpush(0x03AC); + break; + case 0x0388: + bufpush(0x03AD); + break; + case 0x0389: + bufpush(0x03AE); + break; + case 0x038A: + bufpush(0x03AF); + break; + case 0x038C: + bufpush(0x03CC); + break; + case 0x038E: + bufpush(0x03CD); + break; + case 0x038F: + bufpush(0x03CE); + break; + case 0x0390: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x0391: + bufpush(0x03B1); + break; + case 0x0392: + bufpush(0x03B2); + break; + case 0x0393: + bufpush(0x03B3); + break; + case 0x0394: + bufpush(0x03B4); + break; + case 0x0395: + bufpush(0x03B5); + break; + case 0x0396: + bufpush(0x03B6); + break; + case 0x0397: + bufpush(0x03B7); + break; + case 0x0398: + bufpush(0x03B8); + break; + case 0x0399: + bufpush(0x03B9); + break; + case 0x039A: + bufpush(0x03BA); + break; + case 0x039B: + bufpush(0x03BB); + break; + case 0x039C: + bufpush(0x03BC); + break; + case 0x039D: + bufpush(0x03BD); + break; + case 0x039E: + bufpush(0x03BE); + break; + case 0x039F: + bufpush(0x03BF); + break; + case 0x03A0: + bufpush(0x03C0); + break; + case 0x03A1: + bufpush(0x03C1); + break; + case 0x03A3: + bufpush(0x03C3); + break; + case 0x03A4: + bufpush(0x03C4); + break; + case 0x03A5: + bufpush(0x03C5); + break; + case 0x03A6: + bufpush(0x03C6); + break; + case 0x03A7: + bufpush(0x03C7); + break; + case 0x03A8: + bufpush(0x03C8); + break; + case 0x03A9: + bufpush(0x03C9); + break; + case 0x03AA: + bufpush(0x03CA); + break; + case 0x03AB: + bufpush(0x03CB); + break; + case 0x03B0: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x03C2: + bufpush(0x03C3); + break; + case 0x03CF: + bufpush(0x03D7); + break; + case 0x03D0: + bufpush(0x03B2); + break; + case 0x03D1: + bufpush(0x03B8); + break; + case 0x03D5: + bufpush(0x03C6); + break; + case 0x03D6: + bufpush(0x03C0); + break; + case 0x03D8: + bufpush(0x03D9); + break; + case 0x03DA: + bufpush(0x03DB); + break; + case 0x03DC: + bufpush(0x03DD); + break; + case 0x03DE: + bufpush(0x03DF); + break; + case 0x03E0: + bufpush(0x03E1); + break; + case 0x03E2: + bufpush(0x03E3); + break; + case 0x03E4: + bufpush(0x03E5); + break; + case 0x03E6: + bufpush(0x03E7); + break; + case 0x03E8: + bufpush(0x03E9); + break; + case 0x03EA: + bufpush(0x03EB); + break; + case 0x03EC: + bufpush(0x03ED); + break; + case 0x03EE: + bufpush(0x03EF); + break; + case 0x03F0: + bufpush(0x03BA); + break; + case 0x03F1: + bufpush(0x03C1); + break; + case 0x03F4: + bufpush(0x03B8); + break; + case 0x03F5: + bufpush(0x03B5); + break; + case 0x03F7: + bufpush(0x03F8); + break; + case 0x03F9: + bufpush(0x03F2); + break; + case 0x03FA: + bufpush(0x03FB); + break; + case 0x03FD: + bufpush(0x037B); + break; + case 0x03FE: + bufpush(0x037C); + break; + case 0x03FF: + bufpush(0x037D); + break; + case 0x0400: + bufpush(0x0450); + break; + case 0x0401: + bufpush(0x0451); + break; + case 0x0402: + bufpush(0x0452); + break; + case 0x0403: + bufpush(0x0453); + break; + case 0x0404: + bufpush(0x0454); + break; + case 0x0405: + bufpush(0x0455); + break; + case 0x0406: + bufpush(0x0456); + break; + case 0x0407: + bufpush(0x0457); + break; + case 0x0408: + bufpush(0x0458); + break; + case 0x0409: + bufpush(0x0459); + break; + case 0x040A: + bufpush(0x045A); + break; + case 0x040B: + bufpush(0x045B); + break; + case 0x040C: + bufpush(0x045C); + break; + case 0x040D: + bufpush(0x045D); + break; + case 0x040E: + bufpush(0x045E); + break; + case 0x040F: + bufpush(0x045F); + break; + case 0x0410: + bufpush(0x0430); + break; + case 0x0411: + bufpush(0x0431); + break; + case 0x0412: + bufpush(0x0432); + break; + case 0x0413: + bufpush(0x0433); + break; + case 0x0414: + bufpush(0x0434); + break; + case 0x0415: + bufpush(0x0435); + break; + case 0x0416: + bufpush(0x0436); + break; + case 0x0417: + bufpush(0x0437); + break; + case 0x0418: + bufpush(0x0438); + break; + case 0x0419: + bufpush(0x0439); + break; + case 0x041A: + bufpush(0x043A); + break; + case 0x041B: + bufpush(0x043B); + break; + case 0x041C: + bufpush(0x043C); + break; + case 0x041D: + bufpush(0x043D); + break; + case 0x041E: + bufpush(0x043E); + break; + case 0x041F: + bufpush(0x043F); + break; + case 0x0420: + bufpush(0x0440); + break; + case 0x0421: + bufpush(0x0441); + break; + case 0x0422: + bufpush(0x0442); + break; + case 0x0423: + bufpush(0x0443); + break; + case 0x0424: + bufpush(0x0444); + break; + case 0x0425: + bufpush(0x0445); + break; + case 0x0426: + bufpush(0x0446); + break; + case 0x0427: + bufpush(0x0447); + break; + case 0x0428: + bufpush(0x0448); + break; + case 0x0429: + bufpush(0x0449); + break; + case 0x042A: + bufpush(0x044A); + break; + case 0x042B: + bufpush(0x044B); + break; + case 0x042C: + bufpush(0x044C); + break; + case 0x042D: + bufpush(0x044D); + break; + case 0x042E: + bufpush(0x044E); + break; + case 0x042F: + bufpush(0x044F); + break; + case 0x0460: + bufpush(0x0461); + break; + case 0x0462: + bufpush(0x0463); + break; + case 0x0464: + bufpush(0x0465); + break; + case 0x0466: + bufpush(0x0467); + break; + case 0x0468: + bufpush(0x0469); + break; + case 0x046A: + bufpush(0x046B); + break; + case 0x046C: + bufpush(0x046D); + break; + case 0x046E: + bufpush(0x046F); + break; + case 0x0470: + bufpush(0x0471); + break; + case 0x0472: + bufpush(0x0473); + break; + case 0x0474: + bufpush(0x0475); + break; + case 0x0476: + bufpush(0x0477); + break; + case 0x0478: + bufpush(0x0479); + break; + case 0x047A: + bufpush(0x047B); + break; + case 0x047C: + bufpush(0x047D); + break; + case 0x047E: + bufpush(0x047F); + break; + case 0x0480: + bufpush(0x0481); + break; + case 0x048A: + bufpush(0x048B); + break; + case 0x048C: + bufpush(0x048D); + break; + case 0x048E: + bufpush(0x048F); + break; + case 0x0490: + bufpush(0x0491); + break; + case 0x0492: + bufpush(0x0493); + break; + case 0x0494: + bufpush(0x0495); + break; + case 0x0496: + bufpush(0x0497); + break; + case 0x0498: + bufpush(0x0499); + break; + case 0x049A: + bufpush(0x049B); + break; + case 0x049C: + bufpush(0x049D); + break; + case 0x049E: + bufpush(0x049F); + break; + case 0x04A0: + bufpush(0x04A1); + break; + case 0x04A2: + bufpush(0x04A3); + break; + case 0x04A4: + bufpush(0x04A5); + break; + case 0x04A6: + bufpush(0x04A7); + break; + case 0x04A8: + bufpush(0x04A9); + break; + case 0x04AA: + bufpush(0x04AB); + break; + case 0x04AC: + bufpush(0x04AD); + break; + case 0x04AE: + bufpush(0x04AF); + break; + case 0x04B0: + bufpush(0x04B1); + break; + case 0x04B2: + bufpush(0x04B3); + break; + case 0x04B4: + bufpush(0x04B5); + break; + case 0x04B6: + bufpush(0x04B7); + break; + case 0x04B8: + bufpush(0x04B9); + break; + case 0x04BA: + bufpush(0x04BB); + break; + case 0x04BC: + bufpush(0x04BD); + break; + case 0x04BE: + bufpush(0x04BF); + break; + case 0x04C0: + bufpush(0x04CF); + break; + case 0x04C1: + bufpush(0x04C2); + break; + case 0x04C3: + bufpush(0x04C4); + break; + case 0x04C5: + bufpush(0x04C6); + break; + case 0x04C7: + bufpush(0x04C8); + break; + case 0x04C9: + bufpush(0x04CA); + break; + case 0x04CB: + bufpush(0x04CC); + break; + case 0x04CD: + bufpush(0x04CE); + break; + case 0x04D0: + bufpush(0x04D1); + break; + case 0x04D2: + bufpush(0x04D3); + break; + case 0x04D4: + bufpush(0x04D5); + break; + case 0x04D6: + bufpush(0x04D7); + break; + case 0x04D8: + bufpush(0x04D9); + break; + case 0x04DA: + bufpush(0x04DB); + break; + case 0x04DC: + bufpush(0x04DD); + break; + case 0x04DE: + bufpush(0x04DF); + break; + case 0x04E0: + bufpush(0x04E1); + break; + case 0x04E2: + bufpush(0x04E3); + break; + case 0x04E4: + bufpush(0x04E5); + break; + case 0x04E6: + bufpush(0x04E7); + break; + case 0x04E8: + bufpush(0x04E9); + break; + case 0x04EA: + bufpush(0x04EB); + break; + case 0x04EC: + bufpush(0x04ED); + break; + case 0x04EE: + bufpush(0x04EF); + break; + case 0x04F0: + bufpush(0x04F1); + break; + case 0x04F2: + bufpush(0x04F3); + break; + case 0x04F4: + bufpush(0x04F5); + break; + case 0x04F6: + bufpush(0x04F7); + break; + case 0x04F8: + bufpush(0x04F9); + break; + case 0x04FA: + bufpush(0x04FB); + break; + case 0x04FC: + bufpush(0x04FD); + break; + case 0x04FE: + bufpush(0x04FF); + break; + case 0x0500: + bufpush(0x0501); + break; + case 0x0502: + bufpush(0x0503); + break; + case 0x0504: + bufpush(0x0505); + break; + case 0x0506: + bufpush(0x0507); + break; + case 0x0508: + bufpush(0x0509); + break; + case 0x050A: + bufpush(0x050B); + break; + case 0x050C: + bufpush(0x050D); + break; + case 0x050E: + bufpush(0x050F); + break; + case 0x0510: + bufpush(0x0511); + break; + case 0x0512: + bufpush(0x0513); + break; + case 0x0514: + bufpush(0x0515); + break; + case 0x0516: + bufpush(0x0517); + break; + case 0x0518: + bufpush(0x0519); + break; + case 0x051A: + bufpush(0x051B); + break; + case 0x051C: + bufpush(0x051D); + break; + case 0x051E: + bufpush(0x051F); + break; + case 0x0520: + bufpush(0x0521); + break; + case 0x0522: + bufpush(0x0523); + break; + case 0x0524: + bufpush(0x0525); + break; + case 0x0526: + bufpush(0x0527); + break; + case 0x0528: + bufpush(0x0529); + break; + case 0x052A: + bufpush(0x052B); + break; + case 0x052C: + bufpush(0x052D); + break; + case 0x052E: + bufpush(0x052F); + break; + case 0x0531: + bufpush(0x0561); + break; + case 0x0532: + bufpush(0x0562); + break; + case 0x0533: + bufpush(0x0563); + break; + case 0x0534: + bufpush(0x0564); + break; + case 0x0535: + bufpush(0x0565); + break; + case 0x0536: + bufpush(0x0566); + break; + case 0x0537: + bufpush(0x0567); + break; + case 0x0538: + bufpush(0x0568); + break; + case 0x0539: + bufpush(0x0569); + break; + case 0x053A: + bufpush(0x056A); + break; + case 0x053B: + bufpush(0x056B); + break; + case 0x053C: + bufpush(0x056C); + break; + case 0x053D: + bufpush(0x056D); + break; + case 0x053E: + bufpush(0x056E); + break; + case 0x053F: + bufpush(0x056F); + break; + case 0x0540: + bufpush(0x0570); + break; + case 0x0541: + bufpush(0x0571); + break; + case 0x0542: + bufpush(0x0572); + break; + case 0x0543: + bufpush(0x0573); + break; + case 0x0544: + bufpush(0x0574); + break; + case 0x0545: + bufpush(0x0575); + break; + case 0x0546: + bufpush(0x0576); + break; + case 0x0547: + bufpush(0x0577); + break; + case 0x0548: + bufpush(0x0578); + break; + case 0x0549: + bufpush(0x0579); + break; + case 0x054A: + bufpush(0x057A); + break; + case 0x054B: + bufpush(0x057B); + break; + case 0x054C: + bufpush(0x057C); + break; + case 0x054D: + bufpush(0x057D); + break; + case 0x054E: + bufpush(0x057E); + break; + case 0x054F: + bufpush(0x057F); + break; + case 0x0550: + bufpush(0x0580); + break; + case 0x0551: + bufpush(0x0581); + break; + case 0x0552: + bufpush(0x0582); + break; + case 0x0553: + bufpush(0x0583); + break; + case 0x0554: + bufpush(0x0584); + break; + case 0x0555: + bufpush(0x0585); + break; + case 0x0556: + bufpush(0x0586); + break; + case 0x0587: + bufpush(0x0565); + bufpush(0x0582); + break; + case 0x10A0: + bufpush(0x2D00); + break; + case 0x10A1: + bufpush(0x2D01); + break; + case 0x10A2: + bufpush(0x2D02); + break; + case 0x10A3: + bufpush(0x2D03); + break; + case 0x10A4: + bufpush(0x2D04); + break; + case 0x10A5: + bufpush(0x2D05); + break; + case 0x10A6: + bufpush(0x2D06); + break; + case 0x10A7: + bufpush(0x2D07); + break; + case 0x10A8: + bufpush(0x2D08); + break; + case 0x10A9: + bufpush(0x2D09); + break; + case 0x10AA: + bufpush(0x2D0A); + break; + case 0x10AB: + bufpush(0x2D0B); + break; + case 0x10AC: + bufpush(0x2D0C); + break; + case 0x10AD: + bufpush(0x2D0D); + break; + case 0x10AE: + bufpush(0x2D0E); + break; + case 0x10AF: + bufpush(0x2D0F); + break; + case 0x10B0: + bufpush(0x2D10); + break; + case 0x10B1: + bufpush(0x2D11); + break; + case 0x10B2: + bufpush(0x2D12); + break; + case 0x10B3: + bufpush(0x2D13); + break; + case 0x10B4: + bufpush(0x2D14); + break; + case 0x10B5: + bufpush(0x2D15); + break; + case 0x10B6: + bufpush(0x2D16); + break; + case 0x10B7: + bufpush(0x2D17); + break; + case 0x10B8: + bufpush(0x2D18); + break; + case 0x10B9: + bufpush(0x2D19); + break; + case 0x10BA: + bufpush(0x2D1A); + break; + case 0x10BB: + bufpush(0x2D1B); + break; + case 0x10BC: + bufpush(0x2D1C); + break; + case 0x10BD: + bufpush(0x2D1D); + break; + case 0x10BE: + bufpush(0x2D1E); + break; + case 0x10BF: + bufpush(0x2D1F); + break; + case 0x10C0: + bufpush(0x2D20); + break; + case 0x10C1: + bufpush(0x2D21); + break; + case 0x10C2: + bufpush(0x2D22); + break; + case 0x10C3: + bufpush(0x2D23); + break; + case 0x10C4: + bufpush(0x2D24); + break; + case 0x10C5: + bufpush(0x2D25); + break; + case 0x10C7: + bufpush(0x2D27); + break; + case 0x10CD: + bufpush(0x2D2D); + break; + case 0x13F8: + bufpush(0x13F0); + break; + case 0x13F9: + bufpush(0x13F1); + break; + case 0x13FA: + bufpush(0x13F2); + break; + case 0x13FB: + bufpush(0x13F3); + break; + case 0x13FC: + bufpush(0x13F4); + break; + case 0x13FD: + bufpush(0x13F5); + break; + case 0x1C80: + bufpush(0x0432); + break; + case 0x1C81: + bufpush(0x0434); + break; + case 0x1C82: + bufpush(0x043E); + break; + case 0x1C83: + bufpush(0x0441); + break; + case 0x1C84: + bufpush(0x0442); + break; + case 0x1C85: + bufpush(0x0442); + break; + case 0x1C86: + bufpush(0x044A); + break; + case 0x1C87: + bufpush(0x0463); + break; + case 0x1C88: + bufpush(0xA64B); + break; + case 0x1C90: + bufpush(0x10D0); + break; + case 0x1C91: + bufpush(0x10D1); + break; + case 0x1C92: + bufpush(0x10D2); + break; + case 0x1C93: + bufpush(0x10D3); + break; + case 0x1C94: + bufpush(0x10D4); + break; + case 0x1C95: + bufpush(0x10D5); + break; + case 0x1C96: + bufpush(0x10D6); + break; + case 0x1C97: + bufpush(0x10D7); + break; + case 0x1C98: + bufpush(0x10D8); + break; + case 0x1C99: + bufpush(0x10D9); + break; + case 0x1C9A: + bufpush(0x10DA); + break; + case 0x1C9B: + bufpush(0x10DB); + break; + case 0x1C9C: + bufpush(0x10DC); + break; + case 0x1C9D: + bufpush(0x10DD); + break; + case 0x1C9E: + bufpush(0x10DE); + break; + case 0x1C9F: + bufpush(0x10DF); + break; + case 0x1CA0: + bufpush(0x10E0); + break; + case 0x1CA1: + bufpush(0x10E1); + break; + case 0x1CA2: + bufpush(0x10E2); + break; + case 0x1CA3: + bufpush(0x10E3); + break; + case 0x1CA4: + bufpush(0x10E4); + break; + case 0x1CA5: + bufpush(0x10E5); + break; + case 0x1CA6: + bufpush(0x10E6); + break; + case 0x1CA7: + bufpush(0x10E7); + break; + case 0x1CA8: + bufpush(0x10E8); + break; + case 0x1CA9: + bufpush(0x10E9); + break; + case 0x1CAA: + bufpush(0x10EA); + break; + case 0x1CAB: + bufpush(0x10EB); + break; + case 0x1CAC: + bufpush(0x10EC); + break; + case 0x1CAD: + bufpush(0x10ED); + break; + case 0x1CAE: + bufpush(0x10EE); + break; + case 0x1CAF: + bufpush(0x10EF); + break; + case 0x1CB0: + bufpush(0x10F0); + break; + case 0x1CB1: + bufpush(0x10F1); + break; + case 0x1CB2: + bufpush(0x10F2); + break; + case 0x1CB3: + bufpush(0x10F3); + break; + case 0x1CB4: + bufpush(0x10F4); + break; + case 0x1CB5: + bufpush(0x10F5); + break; + case 0x1CB6: + bufpush(0x10F6); + break; + case 0x1CB7: + bufpush(0x10F7); + break; + case 0x1CB8: + bufpush(0x10F8); + break; + case 0x1CB9: + bufpush(0x10F9); + break; + case 0x1CBA: + bufpush(0x10FA); + break; + case 0x1CBD: + bufpush(0x10FD); + break; + case 0x1CBE: + bufpush(0x10FE); + break; + case 0x1CBF: + bufpush(0x10FF); + break; + case 0x1E00: + bufpush(0x1E01); + break; + case 0x1E02: + bufpush(0x1E03); + break; + case 0x1E04: + bufpush(0x1E05); + break; + case 0x1E06: + bufpush(0x1E07); + break; + case 0x1E08: + bufpush(0x1E09); + break; + case 0x1E0A: + bufpush(0x1E0B); + break; + case 0x1E0C: + bufpush(0x1E0D); + break; + case 0x1E0E: + bufpush(0x1E0F); + break; + case 0x1E10: + bufpush(0x1E11); + break; + case 0x1E12: + bufpush(0x1E13); + break; + case 0x1E14: + bufpush(0x1E15); + break; + case 0x1E16: + bufpush(0x1E17); + break; + case 0x1E18: + bufpush(0x1E19); + break; + case 0x1E1A: + bufpush(0x1E1B); + break; + case 0x1E1C: + bufpush(0x1E1D); + break; + case 0x1E1E: + bufpush(0x1E1F); + break; + case 0x1E20: + bufpush(0x1E21); + break; + case 0x1E22: + bufpush(0x1E23); + break; + case 0x1E24: + bufpush(0x1E25); + break; + case 0x1E26: + bufpush(0x1E27); + break; + case 0x1E28: + bufpush(0x1E29); + break; + case 0x1E2A: + bufpush(0x1E2B); + break; + case 0x1E2C: + bufpush(0x1E2D); + break; + case 0x1E2E: + bufpush(0x1E2F); + break; + case 0x1E30: + bufpush(0x1E31); + break; + case 0x1E32: + bufpush(0x1E33); + break; + case 0x1E34: + bufpush(0x1E35); + break; + case 0x1E36: + bufpush(0x1E37); + break; + case 0x1E38: + bufpush(0x1E39); + break; + case 0x1E3A: + bufpush(0x1E3B); + break; + case 0x1E3C: + bufpush(0x1E3D); + break; + case 0x1E3E: + bufpush(0x1E3F); + break; + case 0x1E40: + bufpush(0x1E41); + break; + case 0x1E42: + bufpush(0x1E43); + break; + case 0x1E44: + bufpush(0x1E45); + break; + case 0x1E46: + bufpush(0x1E47); + break; + case 0x1E48: + bufpush(0x1E49); + break; + case 0x1E4A: + bufpush(0x1E4B); + break; + case 0x1E4C: + bufpush(0x1E4D); + break; + case 0x1E4E: + bufpush(0x1E4F); + break; + case 0x1E50: + bufpush(0x1E51); + break; + case 0x1E52: + bufpush(0x1E53); + break; + case 0x1E54: + bufpush(0x1E55); + break; + case 0x1E56: + bufpush(0x1E57); + break; + case 0x1E58: + bufpush(0x1E59); + break; + case 0x1E5A: + bufpush(0x1E5B); + break; + case 0x1E5C: + bufpush(0x1E5D); + break; + case 0x1E5E: + bufpush(0x1E5F); + break; + case 0x1E60: + bufpush(0x1E61); + break; + case 0x1E62: + bufpush(0x1E63); + break; + case 0x1E64: + bufpush(0x1E65); + break; + case 0x1E66: + bufpush(0x1E67); + break; + case 0x1E68: + bufpush(0x1E69); + break; + case 0x1E6A: + bufpush(0x1E6B); + break; + case 0x1E6C: + bufpush(0x1E6D); + break; + case 0x1E6E: + bufpush(0x1E6F); + break; + case 0x1E70: + bufpush(0x1E71); + break; + case 0x1E72: + bufpush(0x1E73); + break; + case 0x1E74: + bufpush(0x1E75); + break; + case 0x1E76: + bufpush(0x1E77); + break; + case 0x1E78: + bufpush(0x1E79); + break; + case 0x1E7A: + bufpush(0x1E7B); + break; + case 0x1E7C: + bufpush(0x1E7D); + break; + case 0x1E7E: + bufpush(0x1E7F); + break; + case 0x1E80: + bufpush(0x1E81); + break; + case 0x1E82: + bufpush(0x1E83); + break; + case 0x1E84: + bufpush(0x1E85); + break; + case 0x1E86: + bufpush(0x1E87); + break; + case 0x1E88: + bufpush(0x1E89); + break; + case 0x1E8A: + bufpush(0x1E8B); + break; + case 0x1E8C: + bufpush(0x1E8D); + break; + case 0x1E8E: + bufpush(0x1E8F); + break; + case 0x1E90: + bufpush(0x1E91); + break; + case 0x1E92: + bufpush(0x1E93); + break; + case 0x1E94: + bufpush(0x1E95); + break; + case 0x1E96: + bufpush(0x0068); + bufpush(0x0331); + break; + case 0x1E97: + bufpush(0x0074); + bufpush(0x0308); + break; + case 0x1E98: + bufpush(0x0077); + bufpush(0x030A); + break; + case 0x1E99: + bufpush(0x0079); + bufpush(0x030A); + break; + case 0x1E9A: + bufpush(0x0061); + bufpush(0x02BE); + break; + case 0x1E9B: + bufpush(0x1E61); + break; + case 0x1E9E: + bufpush(0x0073); + bufpush(0x0073); + break; + case 0x1EA0: + bufpush(0x1EA1); + break; + case 0x1EA2: + bufpush(0x1EA3); + break; + case 0x1EA4: + bufpush(0x1EA5); + break; + case 0x1EA6: + bufpush(0x1EA7); + break; + case 0x1EA8: + bufpush(0x1EA9); + break; + case 0x1EAA: + bufpush(0x1EAB); + break; + case 0x1EAC: + bufpush(0x1EAD); + break; + case 0x1EAE: + bufpush(0x1EAF); + break; + case 0x1EB0: + bufpush(0x1EB1); + break; + case 0x1EB2: + bufpush(0x1EB3); + break; + case 0x1EB4: + bufpush(0x1EB5); + break; + case 0x1EB6: + bufpush(0x1EB7); + break; + case 0x1EB8: + bufpush(0x1EB9); + break; + case 0x1EBA: + bufpush(0x1EBB); + break; + case 0x1EBC: + bufpush(0x1EBD); + break; + case 0x1EBE: + bufpush(0x1EBF); + break; + case 0x1EC0: + bufpush(0x1EC1); + break; + case 0x1EC2: + bufpush(0x1EC3); + break; + case 0x1EC4: + bufpush(0x1EC5); + break; + case 0x1EC6: + bufpush(0x1EC7); + break; + case 0x1EC8: + bufpush(0x1EC9); + break; + case 0x1ECA: + bufpush(0x1ECB); + break; + case 0x1ECC: + bufpush(0x1ECD); + break; + case 0x1ECE: + bufpush(0x1ECF); + break; + case 0x1ED0: + bufpush(0x1ED1); + break; + case 0x1ED2: + bufpush(0x1ED3); + break; + case 0x1ED4: + bufpush(0x1ED5); + break; + case 0x1ED6: + bufpush(0x1ED7); + break; + case 0x1ED8: + bufpush(0x1ED9); + break; + case 0x1EDA: + bufpush(0x1EDB); + break; + case 0x1EDC: + bufpush(0x1EDD); + break; + case 0x1EDE: + bufpush(0x1EDF); + break; + case 0x1EE0: + bufpush(0x1EE1); + break; + case 0x1EE2: + bufpush(0x1EE3); + break; + case 0x1EE4: + bufpush(0x1EE5); + break; + case 0x1EE6: + bufpush(0x1EE7); + break; + case 0x1EE8: + bufpush(0x1EE9); + break; + case 0x1EEA: + bufpush(0x1EEB); + break; + case 0x1EEC: + bufpush(0x1EED); + break; + case 0x1EEE: + bufpush(0x1EEF); + break; + case 0x1EF0: + bufpush(0x1EF1); + break; + case 0x1EF2: + bufpush(0x1EF3); + break; + case 0x1EF4: + bufpush(0x1EF5); + break; + case 0x1EF6: + bufpush(0x1EF7); + break; + case 0x1EF8: + bufpush(0x1EF9); + break; + case 0x1EFA: + bufpush(0x1EFB); + break; + case 0x1EFC: + bufpush(0x1EFD); + break; + case 0x1EFE: + bufpush(0x1EFF); + break; + case 0x1F08: + bufpush(0x1F00); + break; + case 0x1F09: + bufpush(0x1F01); + break; + case 0x1F0A: + bufpush(0x1F02); + break; + case 0x1F0B: + bufpush(0x1F03); + break; + case 0x1F0C: + bufpush(0x1F04); + break; + case 0x1F0D: + bufpush(0x1F05); + break; + case 0x1F0E: + bufpush(0x1F06); + break; + case 0x1F0F: + bufpush(0x1F07); + break; + case 0x1F18: + bufpush(0x1F10); + break; + case 0x1F19: + bufpush(0x1F11); + break; + case 0x1F1A: + bufpush(0x1F12); + break; + case 0x1F1B: + bufpush(0x1F13); + break; + case 0x1F1C: + bufpush(0x1F14); + break; + case 0x1F1D: + bufpush(0x1F15); + break; + case 0x1F28: + bufpush(0x1F20); + break; + case 0x1F29: + bufpush(0x1F21); + break; + case 0x1F2A: + bufpush(0x1F22); + break; + case 0x1F2B: + bufpush(0x1F23); + break; + case 0x1F2C: + bufpush(0x1F24); + break; + case 0x1F2D: + bufpush(0x1F25); + break; + case 0x1F2E: + bufpush(0x1F26); + break; + case 0x1F2F: + bufpush(0x1F27); + break; + case 0x1F38: + bufpush(0x1F30); + break; + case 0x1F39: + bufpush(0x1F31); + break; + case 0x1F3A: + bufpush(0x1F32); + break; + case 0x1F3B: + bufpush(0x1F33); + break; + case 0x1F3C: + bufpush(0x1F34); + break; + case 0x1F3D: + bufpush(0x1F35); + break; + case 0x1F3E: + bufpush(0x1F36); + break; + case 0x1F3F: + bufpush(0x1F37); + break; + case 0x1F48: + bufpush(0x1F40); + break; + case 0x1F49: + bufpush(0x1F41); + break; + case 0x1F4A: + bufpush(0x1F42); + break; + case 0x1F4B: + bufpush(0x1F43); + break; + case 0x1F4C: + bufpush(0x1F44); + break; + case 0x1F4D: + bufpush(0x1F45); + break; + case 0x1F50: + bufpush(0x03C5); + bufpush(0x0313); + break; + case 0x1F52: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0300); + break; + case 0x1F54: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0301); + break; + case 0x1F56: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0342); + break; + case 0x1F59: + bufpush(0x1F51); + break; + case 0x1F5B: + bufpush(0x1F53); + break; + case 0x1F5D: + bufpush(0x1F55); + break; + case 0x1F5F: + bufpush(0x1F57); + break; + case 0x1F68: + bufpush(0x1F60); + break; + case 0x1F69: + bufpush(0x1F61); + break; + case 0x1F6A: + bufpush(0x1F62); + break; + case 0x1F6B: + bufpush(0x1F63); + break; + case 0x1F6C: + bufpush(0x1F64); + break; + case 0x1F6D: + bufpush(0x1F65); + break; + case 0x1F6E: + bufpush(0x1F66); + break; + case 0x1F6F: + bufpush(0x1F67); + break; + case 0x1F80: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F81: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F82: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F83: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F84: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F85: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F86: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F87: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F88: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F89: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F8A: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F8B: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F8C: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F8D: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F8E: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F8F: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F90: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F91: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F92: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F93: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F94: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F95: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F96: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F97: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1F98: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F99: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F9A: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F9B: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F9C: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F9D: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F9E: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F9F: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1FA0: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA1: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FA2: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FA3: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FA4: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FA5: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FA6: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FA7: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FA8: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA9: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FAA: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FAB: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FAC: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FAD: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FAE: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FAF: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FB2: + bufpush(0x1F70); + bufpush(0x03B9); + break; + case 0x1FB3: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FB4: + bufpush(0x03AC); + bufpush(0x03B9); + break; + case 0x1FB6: + bufpush(0x03B1); + bufpush(0x0342); + break; + case 0x1FB7: + bufpush(0x03B1); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FB8: + bufpush(0x1FB0); + break; + case 0x1FB9: + bufpush(0x1FB1); + break; + case 0x1FBA: + bufpush(0x1F70); + break; + case 0x1FBB: + bufpush(0x1F71); + break; + case 0x1FBC: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FBE: + bufpush(0x03B9); + break; + case 0x1FC2: + bufpush(0x1F74); + bufpush(0x03B9); + break; + case 0x1FC3: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FC4: + bufpush(0x03AE); + bufpush(0x03B9); + break; + case 0x1FC6: + bufpush(0x03B7); + bufpush(0x0342); + break; + case 0x1FC7: + bufpush(0x03B7); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FC8: + bufpush(0x1F72); + break; + case 0x1FC9: + bufpush(0x1F73); + break; + case 0x1FCA: + bufpush(0x1F74); + break; + case 0x1FCB: + bufpush(0x1F75); + break; + case 0x1FCC: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FD2: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FD3: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FD6: + bufpush(0x03B9); + bufpush(0x0342); + break; + case 0x1FD7: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FD8: + bufpush(0x1FD0); + break; + case 0x1FD9: + bufpush(0x1FD1); + break; + case 0x1FDA: + bufpush(0x1F76); + break; + case 0x1FDB: + bufpush(0x1F77); + break; + case 0x1FE2: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FE3: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FE4: + bufpush(0x03C1); + bufpush(0x0313); + break; + case 0x1FE6: + bufpush(0x03C5); + bufpush(0x0342); + break; + case 0x1FE7: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FE8: + bufpush(0x1FE0); + break; + case 0x1FE9: + bufpush(0x1FE1); + break; + case 0x1FEA: + bufpush(0x1F7A); + break; + case 0x1FEB: + bufpush(0x1F7B); + break; + case 0x1FEC: + bufpush(0x1FE5); + break; + case 0x1FF2: + bufpush(0x1F7C); + bufpush(0x03B9); + break; + case 0x1FF3: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x1FF4: + bufpush(0x03CE); + bufpush(0x03B9); + break; + case 0x1FF6: + bufpush(0x03C9); + bufpush(0x0342); + break; + case 0x1FF7: + bufpush(0x03C9); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FF8: + bufpush(0x1F78); + break; + case 0x1FF9: + bufpush(0x1F79); + break; + case 0x1FFA: + bufpush(0x1F7C); + break; + case 0x1FFB: + bufpush(0x1F7D); + break; + case 0x1FFC: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x2126: + bufpush(0x03C9); + break; + case 0x212A: + bufpush(0x006B); + break; + case 0x212B: + bufpush(0x00E5); + break; + case 0x2132: + bufpush(0x214E); + break; + case 0x2160: + bufpush(0x2170); + break; + case 0x2161: + bufpush(0x2171); + break; + case 0x2162: + bufpush(0x2172); + break; + case 0x2163: + bufpush(0x2173); + break; + case 0x2164: + bufpush(0x2174); + break; + case 0x2165: + bufpush(0x2175); + break; + case 0x2166: + bufpush(0x2176); + break; + case 0x2167: + bufpush(0x2177); + break; + case 0x2168: + bufpush(0x2178); + break; + case 0x2169: + bufpush(0x2179); + break; + case 0x216A: + bufpush(0x217A); + break; + case 0x216B: + bufpush(0x217B); + break; + case 0x216C: + bufpush(0x217C); + break; + case 0x216D: + bufpush(0x217D); + break; + case 0x216E: + bufpush(0x217E); + break; + case 0x216F: + bufpush(0x217F); + break; + case 0x2183: + bufpush(0x2184); + break; + case 0x24B6: + bufpush(0x24D0); + break; + case 0x24B7: + bufpush(0x24D1); + break; + case 0x24B8: + bufpush(0x24D2); + break; + case 0x24B9: + bufpush(0x24D3); + break; + case 0x24BA: + bufpush(0x24D4); + break; + case 0x24BB: + bufpush(0x24D5); + break; + case 0x24BC: + bufpush(0x24D6); + break; + case 0x24BD: + bufpush(0x24D7); + break; + case 0x24BE: + bufpush(0x24D8); + break; + case 0x24BF: + bufpush(0x24D9); + break; + case 0x24C0: + bufpush(0x24DA); + break; + case 0x24C1: + bufpush(0x24DB); + break; + case 0x24C2: + bufpush(0x24DC); + break; + case 0x24C3: + bufpush(0x24DD); + break; + case 0x24C4: + bufpush(0x24DE); + break; + case 0x24C5: + bufpush(0x24DF); + break; + case 0x24C6: + bufpush(0x24E0); + break; + case 0x24C7: + bufpush(0x24E1); + break; + case 0x24C8: + bufpush(0x24E2); + break; + case 0x24C9: + bufpush(0x24E3); + break; + case 0x24CA: + bufpush(0x24E4); + break; + case 0x24CB: + bufpush(0x24E5); + break; + case 0x24CC: + bufpush(0x24E6); + break; + case 0x24CD: + bufpush(0x24E7); + break; + case 0x24CE: + bufpush(0x24E8); + break; + case 0x24CF: + bufpush(0x24E9); + break; + case 0x2C00: + bufpush(0x2C30); + break; + case 0x2C01: + bufpush(0x2C31); + break; + case 0x2C02: + bufpush(0x2C32); + break; + case 0x2C03: + bufpush(0x2C33); + break; + case 0x2C04: + bufpush(0x2C34); + break; + case 0x2C05: + bufpush(0x2C35); + break; + case 0x2C06: + bufpush(0x2C36); + break; + case 0x2C07: + bufpush(0x2C37); + break; + case 0x2C08: + bufpush(0x2C38); + break; + case 0x2C09: + bufpush(0x2C39); + break; + case 0x2C0A: + bufpush(0x2C3A); + break; + case 0x2C0B: + bufpush(0x2C3B); + break; + case 0x2C0C: + bufpush(0x2C3C); + break; + case 0x2C0D: + bufpush(0x2C3D); + break; + case 0x2C0E: + bufpush(0x2C3E); + break; + case 0x2C0F: + bufpush(0x2C3F); + break; + case 0x2C10: + bufpush(0x2C40); + break; + case 0x2C11: + bufpush(0x2C41); + break; + case 0x2C12: + bufpush(0x2C42); + break; + case 0x2C13: + bufpush(0x2C43); + break; + case 0x2C14: + bufpush(0x2C44); + break; + case 0x2C15: + bufpush(0x2C45); + break; + case 0x2C16: + bufpush(0x2C46); + break; + case 0x2C17: + bufpush(0x2C47); + break; + case 0x2C18: + bufpush(0x2C48); + break; + case 0x2C19: + bufpush(0x2C49); + break; + case 0x2C1A: + bufpush(0x2C4A); + break; + case 0x2C1B: + bufpush(0x2C4B); + break; + case 0x2C1C: + bufpush(0x2C4C); + break; + case 0x2C1D: + bufpush(0x2C4D); + break; + case 0x2C1E: + bufpush(0x2C4E); + break; + case 0x2C1F: + bufpush(0x2C4F); + break; + case 0x2C20: + bufpush(0x2C50); + break; + case 0x2C21: + bufpush(0x2C51); + break; + case 0x2C22: + bufpush(0x2C52); + break; + case 0x2C23: + bufpush(0x2C53); + break; + case 0x2C24: + bufpush(0x2C54); + break; + case 0x2C25: + bufpush(0x2C55); + break; + case 0x2C26: + bufpush(0x2C56); + break; + case 0x2C27: + bufpush(0x2C57); + break; + case 0x2C28: + bufpush(0x2C58); + break; + case 0x2C29: + bufpush(0x2C59); + break; + case 0x2C2A: + bufpush(0x2C5A); + break; + case 0x2C2B: + bufpush(0x2C5B); + break; + case 0x2C2C: + bufpush(0x2C5C); + break; + case 0x2C2D: + bufpush(0x2C5D); + break; + case 0x2C2E: + bufpush(0x2C5E); + break; + case 0x2C2F: + bufpush(0x2C5F); + break; + case 0x2C60: + bufpush(0x2C61); + break; + case 0x2C62: + bufpush(0x026B); + break; + case 0x2C63: + bufpush(0x1D7D); + break; + case 0x2C64: + bufpush(0x027D); + break; + case 0x2C67: + bufpush(0x2C68); + break; + case 0x2C69: + bufpush(0x2C6A); + break; + case 0x2C6B: + bufpush(0x2C6C); + break; + case 0x2C6D: + bufpush(0x0251); + break; + case 0x2C6E: + bufpush(0x0271); + break; + case 0x2C6F: + bufpush(0x0250); + break; + case 0x2C70: + bufpush(0x0252); + break; + case 0x2C72: + bufpush(0x2C73); + break; + case 0x2C75: + bufpush(0x2C76); + break; + case 0x2C7E: + bufpush(0x023F); + break; + case 0x2C7F: + bufpush(0x0240); + break; + case 0x2C80: + bufpush(0x2C81); + break; + case 0x2C82: + bufpush(0x2C83); + break; + case 0x2C84: + bufpush(0x2C85); + break; + case 0x2C86: + bufpush(0x2C87); + break; + case 0x2C88: + bufpush(0x2C89); + break; + case 0x2C8A: + bufpush(0x2C8B); + break; + case 0x2C8C: + bufpush(0x2C8D); + break; + case 0x2C8E: + bufpush(0x2C8F); + break; + case 0x2C90: + bufpush(0x2C91); + break; + case 0x2C92: + bufpush(0x2C93); + break; + case 0x2C94: + bufpush(0x2C95); + break; + case 0x2C96: + bufpush(0x2C97); + break; + case 0x2C98: + bufpush(0x2C99); + break; + case 0x2C9A: + bufpush(0x2C9B); + break; + case 0x2C9C: + bufpush(0x2C9D); + break; + case 0x2C9E: + bufpush(0x2C9F); + break; + case 0x2CA0: + bufpush(0x2CA1); + break; + case 0x2CA2: + bufpush(0x2CA3); + break; + case 0x2CA4: + bufpush(0x2CA5); + break; + case 0x2CA6: + bufpush(0x2CA7); + break; + case 0x2CA8: + bufpush(0x2CA9); + break; + case 0x2CAA: + bufpush(0x2CAB); + break; + case 0x2CAC: + bufpush(0x2CAD); + break; + case 0x2CAE: + bufpush(0x2CAF); + break; + case 0x2CB0: + bufpush(0x2CB1); + break; + case 0x2CB2: + bufpush(0x2CB3); + break; + case 0x2CB4: + bufpush(0x2CB5); + break; + case 0x2CB6: + bufpush(0x2CB7); + break; + case 0x2CB8: + bufpush(0x2CB9); + break; + case 0x2CBA: + bufpush(0x2CBB); + break; + case 0x2CBC: + bufpush(0x2CBD); + break; + case 0x2CBE: + bufpush(0x2CBF); + break; + case 0x2CC0: + bufpush(0x2CC1); + break; + case 0x2CC2: + bufpush(0x2CC3); + break; + case 0x2CC4: + bufpush(0x2CC5); + break; + case 0x2CC6: + bufpush(0x2CC7); + break; + case 0x2CC8: + bufpush(0x2CC9); + break; + case 0x2CCA: + bufpush(0x2CCB); + break; + case 0x2CCC: + bufpush(0x2CCD); + break; + case 0x2CCE: + bufpush(0x2CCF); + break; + case 0x2CD0: + bufpush(0x2CD1); + break; + case 0x2CD2: + bufpush(0x2CD3); + break; + case 0x2CD4: + bufpush(0x2CD5); + break; + case 0x2CD6: + bufpush(0x2CD7); + break; + case 0x2CD8: + bufpush(0x2CD9); + break; + case 0x2CDA: + bufpush(0x2CDB); + break; + case 0x2CDC: + bufpush(0x2CDD); + break; + case 0x2CDE: + bufpush(0x2CDF); + break; + case 0x2CE0: + bufpush(0x2CE1); + break; + case 0x2CE2: + bufpush(0x2CE3); + break; + case 0x2CEB: + bufpush(0x2CEC); + break; + case 0x2CED: + bufpush(0x2CEE); + break; + case 0x2CF2: + bufpush(0x2CF3); + break; + case 0xA640: + bufpush(0xA641); + break; + case 0xA642: + bufpush(0xA643); + break; + case 0xA644: + bufpush(0xA645); + break; + case 0xA646: + bufpush(0xA647); + break; + case 0xA648: + bufpush(0xA649); + break; + case 0xA64A: + bufpush(0xA64B); + break; + case 0xA64C: + bufpush(0xA64D); + break; + case 0xA64E: + bufpush(0xA64F); + break; + case 0xA650: + bufpush(0xA651); + break; + case 0xA652: + bufpush(0xA653); + break; + case 0xA654: + bufpush(0xA655); + break; + case 0xA656: + bufpush(0xA657); + break; + case 0xA658: + bufpush(0xA659); + break; + case 0xA65A: + bufpush(0xA65B); + break; + case 0xA65C: + bufpush(0xA65D); + break; + case 0xA65E: + bufpush(0xA65F); + break; + case 0xA660: + bufpush(0xA661); + break; + case 0xA662: + bufpush(0xA663); + break; + case 0xA664: + bufpush(0xA665); + break; + case 0xA666: + bufpush(0xA667); + break; + case 0xA668: + bufpush(0xA669); + break; + case 0xA66A: + bufpush(0xA66B); + break; + case 0xA66C: + bufpush(0xA66D); + break; + case 0xA680: + bufpush(0xA681); + break; + case 0xA682: + bufpush(0xA683); + break; + case 0xA684: + bufpush(0xA685); + break; + case 0xA686: + bufpush(0xA687); + break; + case 0xA688: + bufpush(0xA689); + break; + case 0xA68A: + bufpush(0xA68B); + break; + case 0xA68C: + bufpush(0xA68D); + break; + case 0xA68E: + bufpush(0xA68F); + break; + case 0xA690: + bufpush(0xA691); + break; + case 0xA692: + bufpush(0xA693); + break; + case 0xA694: + bufpush(0xA695); + break; + case 0xA696: + bufpush(0xA697); + break; + case 0xA698: + bufpush(0xA699); + break; + case 0xA69A: + bufpush(0xA69B); + break; + case 0xA722: + bufpush(0xA723); + break; + case 0xA724: + bufpush(0xA725); + break; + case 0xA726: + bufpush(0xA727); + break; + case 0xA728: + bufpush(0xA729); + break; + case 0xA72A: + bufpush(0xA72B); + break; + case 0xA72C: + bufpush(0xA72D); + break; + case 0xA72E: + bufpush(0xA72F); + break; + case 0xA732: + bufpush(0xA733); + break; + case 0xA734: + bufpush(0xA735); + break; + case 0xA736: + bufpush(0xA737); + break; + case 0xA738: + bufpush(0xA739); + break; + case 0xA73A: + bufpush(0xA73B); + break; + case 0xA73C: + bufpush(0xA73D); + break; + case 0xA73E: + bufpush(0xA73F); + break; + case 0xA740: + bufpush(0xA741); + break; + case 0xA742: + bufpush(0xA743); + break; + case 0xA744: + bufpush(0xA745); + break; + case 0xA746: + bufpush(0xA747); + break; + case 0xA748: + bufpush(0xA749); + break; + case 0xA74A: + bufpush(0xA74B); + break; + case 0xA74C: + bufpush(0xA74D); + break; + case 0xA74E: + bufpush(0xA74F); + break; + case 0xA750: + bufpush(0xA751); + break; + case 0xA752: + bufpush(0xA753); + break; + case 0xA754: + bufpush(0xA755); + break; + case 0xA756: + bufpush(0xA757); + break; + case 0xA758: + bufpush(0xA759); + break; + case 0xA75A: + bufpush(0xA75B); + break; + case 0xA75C: + bufpush(0xA75D); + break; + case 0xA75E: + bufpush(0xA75F); + break; + case 0xA760: + bufpush(0xA761); + break; + case 0xA762: + bufpush(0xA763); + break; + case 0xA764: + bufpush(0xA765); + break; + case 0xA766: + bufpush(0xA767); + break; + case 0xA768: + bufpush(0xA769); + break; + case 0xA76A: + bufpush(0xA76B); + break; + case 0xA76C: + bufpush(0xA76D); + break; + case 0xA76E: + bufpush(0xA76F); + break; + case 0xA779: + bufpush(0xA77A); + break; + case 0xA77B: + bufpush(0xA77C); + break; + case 0xA77D: + bufpush(0x1D79); + break; + case 0xA77E: + bufpush(0xA77F); + break; + case 0xA780: + bufpush(0xA781); + break; + case 0xA782: + bufpush(0xA783); + break; + case 0xA784: + bufpush(0xA785); + break; + case 0xA786: + bufpush(0xA787); + break; + case 0xA78B: + bufpush(0xA78C); + break; + case 0xA78D: + bufpush(0x0265); + break; + case 0xA790: + bufpush(0xA791); + break; + case 0xA792: + bufpush(0xA793); + break; + case 0xA796: + bufpush(0xA797); + break; + case 0xA798: + bufpush(0xA799); + break; + case 0xA79A: + bufpush(0xA79B); + break; + case 0xA79C: + bufpush(0xA79D); + break; + case 0xA79E: + bufpush(0xA79F); + break; + case 0xA7A0: + bufpush(0xA7A1); + break; + case 0xA7A2: + bufpush(0xA7A3); + break; + case 0xA7A4: + bufpush(0xA7A5); + break; + case 0xA7A6: + bufpush(0xA7A7); + break; + case 0xA7A8: + bufpush(0xA7A9); + break; + case 0xA7AA: + bufpush(0x0266); + break; + case 0xA7AB: + bufpush(0x025C); + break; + case 0xA7AC: + bufpush(0x0261); + break; + case 0xA7AD: + bufpush(0x026C); + break; + case 0xA7AE: + bufpush(0x026A); + break; + case 0xA7B0: + bufpush(0x029E); + break; + case 0xA7B1: + bufpush(0x0287); + break; + case 0xA7B2: + bufpush(0x029D); + break; + case 0xA7B3: + bufpush(0xAB53); + break; + case 0xA7B4: + bufpush(0xA7B5); + break; + case 0xA7B6: + bufpush(0xA7B7); + break; + case 0xA7B8: + bufpush(0xA7B9); + break; + case 0xA7BA: + bufpush(0xA7BB); + break; + case 0xA7BC: + bufpush(0xA7BD); + break; + case 0xA7BE: + bufpush(0xA7BF); + break; + case 0xA7C0: + bufpush(0xA7C1); + break; + case 0xA7C2: + bufpush(0xA7C3); + break; + case 0xA7C4: + bufpush(0xA794); + break; + case 0xA7C5: + bufpush(0x0282); + break; + case 0xA7C6: + bufpush(0x1D8E); + break; + case 0xA7C7: + bufpush(0xA7C8); + break; + case 0xA7C9: + bufpush(0xA7CA); + break; + case 0xA7D0: + bufpush(0xA7D1); + break; + case 0xA7D6: + bufpush(0xA7D7); + break; + case 0xA7D8: + bufpush(0xA7D9); + break; + case 0xA7F5: + bufpush(0xA7F6); + break; + case 0xAB70: + bufpush(0x13A0); + break; + case 0xAB71: + bufpush(0x13A1); + break; + case 0xAB72: + bufpush(0x13A2); + break; + case 0xAB73: + bufpush(0x13A3); + break; + case 0xAB74: + bufpush(0x13A4); + break; + case 0xAB75: + bufpush(0x13A5); + break; + case 0xAB76: + bufpush(0x13A6); + break; + case 0xAB77: + bufpush(0x13A7); + break; + case 0xAB78: + bufpush(0x13A8); + break; + case 0xAB79: + bufpush(0x13A9); + break; + case 0xAB7A: + bufpush(0x13AA); + break; + case 0xAB7B: + bufpush(0x13AB); + break; + case 0xAB7C: + bufpush(0x13AC); + break; + case 0xAB7D: + bufpush(0x13AD); + break; + case 0xAB7E: + bufpush(0x13AE); + break; + case 0xAB7F: + bufpush(0x13AF); + break; + case 0xAB80: + bufpush(0x13B0); + break; + case 0xAB81: + bufpush(0x13B1); + break; + case 0xAB82: + bufpush(0x13B2); + break; + case 0xAB83: + bufpush(0x13B3); + break; + case 0xAB84: + bufpush(0x13B4); + break; + case 0xAB85: + bufpush(0x13B5); + break; + case 0xAB86: + bufpush(0x13B6); + break; + case 0xAB87: + bufpush(0x13B7); + break; + case 0xAB88: + bufpush(0x13B8); + break; + case 0xAB89: + bufpush(0x13B9); + break; + case 0xAB8A: + bufpush(0x13BA); + break; + case 0xAB8B: + bufpush(0x13BB); + break; + case 0xAB8C: + bufpush(0x13BC); + break; + case 0xAB8D: + bufpush(0x13BD); + break; + case 0xAB8E: + bufpush(0x13BE); + break; + case 0xAB8F: + bufpush(0x13BF); + break; + case 0xAB90: + bufpush(0x13C0); + break; + case 0xAB91: + bufpush(0x13C1); + break; + case 0xAB92: + bufpush(0x13C2); + break; + case 0xAB93: + bufpush(0x13C3); + break; + case 0xAB94: + bufpush(0x13C4); + break; + case 0xAB95: + bufpush(0x13C5); + break; + case 0xAB96: + bufpush(0x13C6); + break; + case 0xAB97: + bufpush(0x13C7); + break; + case 0xAB98: + bufpush(0x13C8); + break; + case 0xAB99: + bufpush(0x13C9); + break; + case 0xAB9A: + bufpush(0x13CA); + break; + case 0xAB9B: + bufpush(0x13CB); + break; + case 0xAB9C: + bufpush(0x13CC); + break; + case 0xAB9D: + bufpush(0x13CD); + break; + case 0xAB9E: + bufpush(0x13CE); + break; + case 0xAB9F: + bufpush(0x13CF); + break; + case 0xABA0: + bufpush(0x13D0); + break; + case 0xABA1: + bufpush(0x13D1); + break; + case 0xABA2: + bufpush(0x13D2); + break; + case 0xABA3: + bufpush(0x13D3); + break; + case 0xABA4: + bufpush(0x13D4); + break; + case 0xABA5: + bufpush(0x13D5); + break; + case 0xABA6: + bufpush(0x13D6); + break; + case 0xABA7: + bufpush(0x13D7); + break; + case 0xABA8: + bufpush(0x13D8); + break; + case 0xABA9: + bufpush(0x13D9); + break; + case 0xABAA: + bufpush(0x13DA); + break; + case 0xABAB: + bufpush(0x13DB); + break; + case 0xABAC: + bufpush(0x13DC); + break; + case 0xABAD: + bufpush(0x13DD); + break; + case 0xABAE: + bufpush(0x13DE); + break; + case 0xABAF: + bufpush(0x13DF); + break; + case 0xABB0: + bufpush(0x13E0); + break; + case 0xABB1: + bufpush(0x13E1); + break; + case 0xABB2: + bufpush(0x13E2); + break; + case 0xABB3: + bufpush(0x13E3); + break; + case 0xABB4: + bufpush(0x13E4); + break; + case 0xABB5: + bufpush(0x13E5); + break; + case 0xABB6: + bufpush(0x13E6); + break; + case 0xABB7: + bufpush(0x13E7); + break; + case 0xABB8: + bufpush(0x13E8); + break; + case 0xABB9: + bufpush(0x13E9); + break; + case 0xABBA: + bufpush(0x13EA); + break; + case 0xABBB: + bufpush(0x13EB); + break; + case 0xABBC: + bufpush(0x13EC); + break; + case 0xABBD: + bufpush(0x13ED); + break; + case 0xABBE: + bufpush(0x13EE); + break; + case 0xABBF: + bufpush(0x13EF); + break; + case 0xFB00: + bufpush(0x0066); + bufpush(0x0066); + break; + case 0xFB01: + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB02: + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB03: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB04: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB05: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB06: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB13: + bufpush(0x0574); + bufpush(0x0576); + break; + case 0xFB14: + bufpush(0x0574); + bufpush(0x0565); + break; + case 0xFB15: + bufpush(0x0574); + bufpush(0x056B); + break; + case 0xFB16: + bufpush(0x057E); + bufpush(0x0576); + break; + case 0xFB17: + bufpush(0x0574); + bufpush(0x056D); + break; + case 0xFF21: + bufpush(0xFF41); + break; + case 0xFF22: + bufpush(0xFF42); + break; + case 0xFF23: + bufpush(0xFF43); + break; + case 0xFF24: + bufpush(0xFF44); + break; + case 0xFF25: + bufpush(0xFF45); + break; + case 0xFF26: + bufpush(0xFF46); + break; + case 0xFF27: + bufpush(0xFF47); + break; + case 0xFF28: + bufpush(0xFF48); + break; + case 0xFF29: + bufpush(0xFF49); + break; + case 0xFF2A: + bufpush(0xFF4A); + break; + case 0xFF2B: + bufpush(0xFF4B); + break; + case 0xFF2C: + bufpush(0xFF4C); + break; + case 0xFF2D: + bufpush(0xFF4D); + break; + case 0xFF2E: + bufpush(0xFF4E); + break; + case 0xFF2F: + bufpush(0xFF4F); + break; + case 0xFF30: + bufpush(0xFF50); + break; + case 0xFF31: + bufpush(0xFF51); + break; + case 0xFF32: + bufpush(0xFF52); + break; + case 0xFF33: + bufpush(0xFF53); + break; + case 0xFF34: + bufpush(0xFF54); + break; + case 0xFF35: + bufpush(0xFF55); + break; + case 0xFF36: + bufpush(0xFF56); + break; + case 0xFF37: + bufpush(0xFF57); + break; + case 0xFF38: + bufpush(0xFF58); + break; + case 0xFF39: + bufpush(0xFF59); + break; + case 0xFF3A: + bufpush(0xFF5A); + break; + case 0x10400: + bufpush(0x10428); + break; + case 0x10401: + bufpush(0x10429); + break; + case 0x10402: + bufpush(0x1042A); + break; + case 0x10403: + bufpush(0x1042B); + break; + case 0x10404: + bufpush(0x1042C); + break; + case 0x10405: + bufpush(0x1042D); + break; + case 0x10406: + bufpush(0x1042E); + break; + case 0x10407: + bufpush(0x1042F); + break; + case 0x10408: + bufpush(0x10430); + break; + case 0x10409: + bufpush(0x10431); + break; + case 0x1040A: + bufpush(0x10432); + break; + case 0x1040B: + bufpush(0x10433); + break; + case 0x1040C: + bufpush(0x10434); + break; + case 0x1040D: + bufpush(0x10435); + break; + case 0x1040E: + bufpush(0x10436); + break; + case 0x1040F: + bufpush(0x10437); + break; + case 0x10410: + bufpush(0x10438); + break; + case 0x10411: + bufpush(0x10439); + break; + case 0x10412: + bufpush(0x1043A); + break; + case 0x10413: + bufpush(0x1043B); + break; + case 0x10414: + bufpush(0x1043C); + break; + case 0x10415: + bufpush(0x1043D); + break; + case 0x10416: + bufpush(0x1043E); + break; + case 0x10417: + bufpush(0x1043F); + break; + case 0x10418: + bufpush(0x10440); + break; + case 0x10419: + bufpush(0x10441); + break; + case 0x1041A: + bufpush(0x10442); + break; + case 0x1041B: + bufpush(0x10443); + break; + case 0x1041C: + bufpush(0x10444); + break; + case 0x1041D: + bufpush(0x10445); + break; + case 0x1041E: + bufpush(0x10446); + break; + case 0x1041F: + bufpush(0x10447); + break; + case 0x10420: + bufpush(0x10448); + break; + case 0x10421: + bufpush(0x10449); + break; + case 0x10422: + bufpush(0x1044A); + break; + case 0x10423: + bufpush(0x1044B); + break; + case 0x10424: + bufpush(0x1044C); + break; + case 0x10425: + bufpush(0x1044D); + break; + case 0x10426: + bufpush(0x1044E); + break; + case 0x10427: + bufpush(0x1044F); + break; + case 0x104B0: + bufpush(0x104D8); + break; + case 0x104B1: + bufpush(0x104D9); + break; + case 0x104B2: + bufpush(0x104DA); + break; + case 0x104B3: + bufpush(0x104DB); + break; + case 0x104B4: + bufpush(0x104DC); + break; + case 0x104B5: + bufpush(0x104DD); + break; + case 0x104B6: + bufpush(0x104DE); + break; + case 0x104B7: + bufpush(0x104DF); + break; + case 0x104B8: + bufpush(0x104E0); + break; + case 0x104B9: + bufpush(0x104E1); + break; + case 0x104BA: + bufpush(0x104E2); + break; + case 0x104BB: + bufpush(0x104E3); + break; + case 0x104BC: + bufpush(0x104E4); + break; + case 0x104BD: + bufpush(0x104E5); + break; + case 0x104BE: + bufpush(0x104E6); + break; + case 0x104BF: + bufpush(0x104E7); + break; + case 0x104C0: + bufpush(0x104E8); + break; + case 0x104C1: + bufpush(0x104E9); + break; + case 0x104C2: + bufpush(0x104EA); + break; + case 0x104C3: + bufpush(0x104EB); + break; + case 0x104C4: + bufpush(0x104EC); + break; + case 0x104C5: + bufpush(0x104ED); + break; + case 0x104C6: + bufpush(0x104EE); + break; + case 0x104C7: + bufpush(0x104EF); + break; + case 0x104C8: + bufpush(0x104F0); + break; + case 0x104C9: + bufpush(0x104F1); + break; + case 0x104CA: + bufpush(0x104F2); + break; + case 0x104CB: + bufpush(0x104F3); + break; + case 0x104CC: + bufpush(0x104F4); + break; + case 0x104CD: + bufpush(0x104F5); + break; + case 0x104CE: + bufpush(0x104F6); + break; + case 0x104CF: + bufpush(0x104F7); + break; + case 0x104D0: + bufpush(0x104F8); + break; + case 0x104D1: + bufpush(0x104F9); + break; + case 0x104D2: + bufpush(0x104FA); + break; + case 0x104D3: + bufpush(0x104FB); + break; + case 0x10570: + bufpush(0x10597); + break; + case 0x10571: + bufpush(0x10598); + break; + case 0x10572: + bufpush(0x10599); + break; + case 0x10573: + bufpush(0x1059A); + break; + case 0x10574: + bufpush(0x1059B); + break; + case 0x10575: + bufpush(0x1059C); + break; + case 0x10576: + bufpush(0x1059D); + break; + case 0x10577: + bufpush(0x1059E); + break; + case 0x10578: + bufpush(0x1059F); + break; + case 0x10579: + bufpush(0x105A0); + break; + case 0x1057A: + bufpush(0x105A1); + break; + case 0x1057C: + bufpush(0x105A3); + break; + case 0x1057D: + bufpush(0x105A4); + break; + case 0x1057E: + bufpush(0x105A5); + break; + case 0x1057F: + bufpush(0x105A6); + break; + case 0x10580: + bufpush(0x105A7); + break; + case 0x10581: + bufpush(0x105A8); + break; + case 0x10582: + bufpush(0x105A9); + break; + case 0x10583: + bufpush(0x105AA); + break; + case 0x10584: + bufpush(0x105AB); + break; + case 0x10585: + bufpush(0x105AC); + break; + case 0x10586: + bufpush(0x105AD); + break; + case 0x10587: + bufpush(0x105AE); + break; + case 0x10588: + bufpush(0x105AF); + break; + case 0x10589: + bufpush(0x105B0); + break; + case 0x1058A: + bufpush(0x105B1); + break; + case 0x1058C: + bufpush(0x105B3); + break; + case 0x1058D: + bufpush(0x105B4); + break; + case 0x1058E: + bufpush(0x105B5); + break; + case 0x1058F: + bufpush(0x105B6); + break; + case 0x10590: + bufpush(0x105B7); + break; + case 0x10591: + bufpush(0x105B8); + break; + case 0x10592: + bufpush(0x105B9); + break; + case 0x10594: + bufpush(0x105BB); + break; + case 0x10595: + bufpush(0x105BC); + break; + case 0x10C80: + bufpush(0x10CC0); + break; + case 0x10C81: + bufpush(0x10CC1); + break; + case 0x10C82: + bufpush(0x10CC2); + break; + case 0x10C83: + bufpush(0x10CC3); + break; + case 0x10C84: + bufpush(0x10CC4); + break; + case 0x10C85: + bufpush(0x10CC5); + break; + case 0x10C86: + bufpush(0x10CC6); + break; + case 0x10C87: + bufpush(0x10CC7); + break; + case 0x10C88: + bufpush(0x10CC8); + break; + case 0x10C89: + bufpush(0x10CC9); + break; + case 0x10C8A: + bufpush(0x10CCA); + break; + case 0x10C8B: + bufpush(0x10CCB); + break; + case 0x10C8C: + bufpush(0x10CCC); + break; + case 0x10C8D: + bufpush(0x10CCD); + break; + case 0x10C8E: + bufpush(0x10CCE); + break; + case 0x10C8F: + bufpush(0x10CCF); + break; + case 0x10C90: + bufpush(0x10CD0); + break; + case 0x10C91: + bufpush(0x10CD1); + break; + case 0x10C92: + bufpush(0x10CD2); + break; + case 0x10C93: + bufpush(0x10CD3); + break; + case 0x10C94: + bufpush(0x10CD4); + break; + case 0x10C95: + bufpush(0x10CD5); + break; + case 0x10C96: + bufpush(0x10CD6); + break; + case 0x10C97: + bufpush(0x10CD7); + break; + case 0x10C98: + bufpush(0x10CD8); + break; + case 0x10C99: + bufpush(0x10CD9); + break; + case 0x10C9A: + bufpush(0x10CDA); + break; + case 0x10C9B: + bufpush(0x10CDB); + break; + case 0x10C9C: + bufpush(0x10CDC); + break; + case 0x10C9D: + bufpush(0x10CDD); + break; + case 0x10C9E: + bufpush(0x10CDE); + break; + case 0x10C9F: + bufpush(0x10CDF); + break; + case 0x10CA0: + bufpush(0x10CE0); + break; + case 0x10CA1: + bufpush(0x10CE1); + break; + case 0x10CA2: + bufpush(0x10CE2); + break; + case 0x10CA3: + bufpush(0x10CE3); + break; + case 0x10CA4: + bufpush(0x10CE4); + break; + case 0x10CA5: + bufpush(0x10CE5); + break; + case 0x10CA6: + bufpush(0x10CE6); + break; + case 0x10CA7: + bufpush(0x10CE7); + break; + case 0x10CA8: + bufpush(0x10CE8); + break; + case 0x10CA9: + bufpush(0x10CE9); + break; + case 0x10CAA: + bufpush(0x10CEA); + break; + case 0x10CAB: + bufpush(0x10CEB); + break; + case 0x10CAC: + bufpush(0x10CEC); + break; + case 0x10CAD: + bufpush(0x10CED); + break; + case 0x10CAE: + bufpush(0x10CEE); + break; + case 0x10CAF: + bufpush(0x10CEF); + break; + case 0x10CB0: + bufpush(0x10CF0); + break; + case 0x10CB1: + bufpush(0x10CF1); + break; + case 0x10CB2: + bufpush(0x10CF2); + break; + case 0x118A0: + bufpush(0x118C0); + break; + case 0x118A1: + bufpush(0x118C1); + break; + case 0x118A2: + bufpush(0x118C2); + break; + case 0x118A3: + bufpush(0x118C3); + break; + case 0x118A4: + bufpush(0x118C4); + break; + case 0x118A5: + bufpush(0x118C5); + break; + case 0x118A6: + bufpush(0x118C6); + break; + case 0x118A7: + bufpush(0x118C7); + break; + case 0x118A8: + bufpush(0x118C8); + break; + case 0x118A9: + bufpush(0x118C9); + break; + case 0x118AA: + bufpush(0x118CA); + break; + case 0x118AB: + bufpush(0x118CB); + break; + case 0x118AC: + bufpush(0x118CC); + break; + case 0x118AD: + bufpush(0x118CD); + break; + case 0x118AE: + bufpush(0x118CE); + break; + case 0x118AF: + bufpush(0x118CF); + break; + case 0x118B0: + bufpush(0x118D0); + break; + case 0x118B1: + bufpush(0x118D1); + break; + case 0x118B2: + bufpush(0x118D2); + break; + case 0x118B3: + bufpush(0x118D3); + break; + case 0x118B4: + bufpush(0x118D4); + break; + case 0x118B5: + bufpush(0x118D5); + break; + case 0x118B6: + bufpush(0x118D6); + break; + case 0x118B7: + bufpush(0x118D7); + break; + case 0x118B8: + bufpush(0x118D8); + break; + case 0x118B9: + bufpush(0x118D9); + break; + case 0x118BA: + bufpush(0x118DA); + break; + case 0x118BB: + bufpush(0x118DB); + break; + case 0x118BC: + bufpush(0x118DC); + break; + case 0x118BD: + bufpush(0x118DD); + break; + case 0x118BE: + bufpush(0x118DE); + break; + case 0x118BF: + bufpush(0x118DF); + break; + case 0x16E40: + bufpush(0x16E60); + break; + case 0x16E41: + bufpush(0x16E61); + break; + case 0x16E42: + bufpush(0x16E62); + break; + case 0x16E43: + bufpush(0x16E63); + break; + case 0x16E44: + bufpush(0x16E64); + break; + case 0x16E45: + bufpush(0x16E65); + break; + case 0x16E46: + bufpush(0x16E66); + break; + case 0x16E47: + bufpush(0x16E67); + break; + case 0x16E48: + bufpush(0x16E68); + break; + case 0x16E49: + bufpush(0x16E69); + break; + case 0x16E4A: + bufpush(0x16E6A); + break; + case 0x16E4B: + bufpush(0x16E6B); + break; + case 0x16E4C: + bufpush(0x16E6C); + break; + case 0x16E4D: + bufpush(0x16E6D); + break; + case 0x16E4E: + bufpush(0x16E6E); + break; + case 0x16E4F: + bufpush(0x16E6F); + break; + case 0x16E50: + bufpush(0x16E70); + break; + case 0x16E51: + bufpush(0x16E71); + break; + case 0x16E52: + bufpush(0x16E72); + break; + case 0x16E53: + bufpush(0x16E73); + break; + case 0x16E54: + bufpush(0x16E74); + break; + case 0x16E55: + bufpush(0x16E75); + break; + case 0x16E56: + bufpush(0x16E76); + break; + case 0x16E57: + bufpush(0x16E77); + break; + case 0x16E58: + bufpush(0x16E78); + break; + case 0x16E59: + bufpush(0x16E79); + break; + case 0x16E5A: + bufpush(0x16E7A); + break; + case 0x16E5B: + bufpush(0x16E7B); + break; + case 0x16E5C: + bufpush(0x16E7C); + break; + case 0x16E5D: + bufpush(0x16E7D); + break; + case 0x16E5E: + bufpush(0x16E7E); + break; + case 0x16E5F: + bufpush(0x16E7F); + break; + case 0x1E900: + bufpush(0x1E922); + break; + case 0x1E901: + bufpush(0x1E923); + break; + case 0x1E902: + bufpush(0x1E924); + break; + case 0x1E903: + bufpush(0x1E925); + break; + case 0x1E904: + bufpush(0x1E926); + break; + case 0x1E905: + bufpush(0x1E927); + break; + case 0x1E906: + bufpush(0x1E928); + break; + case 0x1E907: + bufpush(0x1E929); + break; + case 0x1E908: + bufpush(0x1E92A); + break; + case 0x1E909: + bufpush(0x1E92B); + break; + case 0x1E90A: + bufpush(0x1E92C); + break; + case 0x1E90B: + bufpush(0x1E92D); + break; + case 0x1E90C: + bufpush(0x1E92E); + break; + case 0x1E90D: + bufpush(0x1E92F); + break; + case 0x1E90E: + bufpush(0x1E930); + break; + case 0x1E90F: + bufpush(0x1E931); + break; + case 0x1E910: + bufpush(0x1E932); + break; + case 0x1E911: + bufpush(0x1E933); + break; + case 0x1E912: + bufpush(0x1E934); + break; + case 0x1E913: + bufpush(0x1E935); + break; + case 0x1E914: + bufpush(0x1E936); + break; + case 0x1E915: + bufpush(0x1E937); + break; + case 0x1E916: + bufpush(0x1E938); + break; + case 0x1E917: + bufpush(0x1E939); + break; + case 0x1E918: + bufpush(0x1E93A); + break; + case 0x1E919: + bufpush(0x1E93B); + break; + case 0x1E91A: + bufpush(0x1E93C); + break; + case 0x1E91B: + bufpush(0x1E93D); + break; + case 0x1E91C: + bufpush(0x1E93E); + break; + case 0x1E91D: + bufpush(0x1E93F); + break; + case 0x1E91E: + bufpush(0x1E940); + break; + case 0x1E91F: + bufpush(0x1E941); + break; + case 0x1E920: + bufpush(0x1E942); + break; + case 0x1E921: + bufpush(0x1E943); + break; + default: + bufpush(c); + } diff --git a/deps/cmark/src/chunk.h b/deps/cmark/src/chunk.h new file mode 100644 index 0000000..bf3c42e --- /dev/null +++ b/deps/cmark/src/chunk.h @@ -0,0 +1,69 @@ +#ifndef CMARK_CHUNK_H +#define CMARK_CHUNK_H + +#include +#include +#include +#include "cmark.h" +#include "buffer.h" +#include "cmark_ctype.h" + +#define CMARK_CHUNK_EMPTY \ + { NULL, 0 } + +typedef struct { + const unsigned char *data; + bufsize_t len; +} cmark_chunk; + +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +static CMARK_INLINE void cmark_chunk_free(cmark_chunk *c) { + c->data = NULL; + c->len = 0; +} + +static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) { + while (c->len && cmark_isspace(c->data[0])) { + c->data++; + c->len--; + } +} + +static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) { + while (c->len > 0) { + if (!cmark_isspace(c->data[c->len - 1])) + break; + + c->len--; + } +} + +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) { + cmark_chunk_ltrim(c); + cmark_chunk_rtrim(c); +} + +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, + bufsize_t offset) { + const unsigned char *p = + (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); + return p ? (bufsize_t)(p - ch->data) : ch->len; +} + +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) { + bufsize_t len = data ? (bufsize_t)strlen(data) : 0; + cmark_chunk c = {(unsigned char *)data, len}; + return c; +} + +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, + bufsize_t pos, bufsize_t len) { + cmark_chunk c = {ch->data + pos, len}; + return c; +} + +#endif diff --git a/deps/cmark/src/cmark.c b/deps/cmark/src/cmark.c new file mode 100644 index 0000000..60aedbc --- /dev/null +++ b/deps/cmark/src/cmark.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include "node.h" +#include "houdini.h" +#include "cmark.h" +#include "buffer.h" + +int cmark_version(void) { return CMARK_VERSION; } + +const char *cmark_version_string(void) { return CMARK_VERSION_STRING; } + +static void *xcalloc(size_t nmem, size_t size) { + void *ptr = calloc(nmem, size); + if (!ptr) { + fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n"); + abort(); + } + return ptr; +} + +static void *xrealloc(void *ptr, size_t size) { + void *new_ptr = realloc(ptr, size); + if (!new_ptr) { + fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n"); + abort(); + } + return new_ptr; +} + +cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; + +cmark_mem *cmark_get_default_mem_allocator(void) { + return &DEFAULT_MEM_ALLOCATOR; +} + + +char *cmark_markdown_to_html(const char *text, size_t len, int options) { + cmark_node *doc; + char *result; + + doc = cmark_parse_document(text, len, options); + + result = cmark_render_html(doc, options); + cmark_node_free(doc); + + return result; +} diff --git a/deps/cmark/src/cmark.h b/deps/cmark/src/cmark.h new file mode 100644 index 0000000..d2021f0 --- /dev/null +++ b/deps/cmark/src/cmark.h @@ -0,0 +1,654 @@ +#ifndef CMARK_H +#define CMARK_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** # NAME + * + * **cmark** - CommonMark parsing, manipulating, and rendering + */ + +/** # DESCRIPTION + * + * ## Simple Interface + */ + +/** Convert 'text' (assumed to be a UTF-8 encoded string with length + * 'len') from CommonMark Markdown to HTML, returning a null-terminated, + * UTF-8-encoded string. It is the caller's responsibility + * to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_markdown_to_html(const char *text, size_t len, int options); + +/** ## Node Structure + */ + +typedef enum { + /* Error status */ + CMARK_NODE_NONE, + + /* Block */ + CMARK_NODE_DOCUMENT, + CMARK_NODE_BLOCK_QUOTE, + CMARK_NODE_LIST, + CMARK_NODE_ITEM, + CMARK_NODE_CODE_BLOCK, + CMARK_NODE_HTML_BLOCK, + CMARK_NODE_CUSTOM_BLOCK, + CMARK_NODE_PARAGRAPH, + CMARK_NODE_HEADING, + CMARK_NODE_THEMATIC_BREAK, + + CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, + CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, + + /* Inline */ + CMARK_NODE_TEXT, + CMARK_NODE_SOFTBREAK, + CMARK_NODE_LINEBREAK, + CMARK_NODE_CODE, + CMARK_NODE_HTML_INLINE, + CMARK_NODE_CUSTOM_INLINE, + CMARK_NODE_EMPH, + CMARK_NODE_STRONG, + CMARK_NODE_LINK, + CMARK_NODE_IMAGE, + + CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, + CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE +} cmark_node_type; + +/* For backwards compatibility: */ +#define CMARK_NODE_HEADER CMARK_NODE_HEADING +#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK +#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK +#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE + +typedef enum { + CMARK_NO_LIST, + CMARK_BULLET_LIST, + CMARK_ORDERED_LIST +} cmark_list_type; + +typedef enum { + CMARK_NO_DELIM, + CMARK_PERIOD_DELIM, + CMARK_PAREN_DELIM +} cmark_delim_type; + +typedef struct cmark_node cmark_node; +typedef struct cmark_parser cmark_parser; +typedef struct cmark_iter cmark_iter; + +/** + * ## Custom memory allocator support + */ + +/** Defines the memory allocation functions to be used by CMark + * when parsing and allocating a document tree + */ +typedef struct cmark_mem { + void *(*calloc)(size_t, size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); +} cmark_mem; + +/** Returns a pointer to the default memory allocator. + */ +CMARK_EXPORT cmark_mem *cmark_get_default_mem_allocator(void); + +/** + * ## Creating and Destroying Nodes + */ + +/** Creates a new node of type 'type'. Note that the node may have + * other required properties, which it is the caller's responsibility + * to assign. + */ +CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type); + +/** Same as `cmark_node_new`, but explicitly listing the memory + * allocator used to allocate the node. Note: be sure to use the same + * allocator for every node in a tree, or bad things can happen. + */ +CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, + cmark_mem *mem); + +/** Frees the memory allocated for a node and any children. + */ +CMARK_EXPORT void cmark_node_free(cmark_node *node); + +/** + * ## Tree Traversal + */ + +/** Returns the next node in the sequence after 'node', or NULL if + * there is none. + */ +CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node); + +/** Returns the previous node in the sequence after 'node', or NULL if + * there is none. + */ +CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node); + +/** Returns the parent of 'node', or NULL if there is none. + */ +CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node); + +/** Returns the first child of 'node', or NULL if 'node' has no children. + */ +CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node); + +/** Returns the last child of 'node', or NULL if 'node' has no children. + */ +CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node); + +/** + * ## Iterator + * + * An iterator will walk through a tree of nodes, starting from a root + * node, returning one node at a time, together with information about + * whether the node is being entered or exited. The iterator will + * first descend to a child node, if there is one. When there is no + * child, the iterator will go to the next sibling. When there is no + * next sibling, the iterator will return to the parent (but with + * a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will + * return `CMARK_EVENT_DONE` when it reaches the root node again. + * One natural application is an HTML renderer, where an `ENTER` event + * outputs an open tag and an `EXIT` event outputs a close tag. + * An iterator might also be used to transform an AST in some systematic + * way, for example, turning all level-3 headings into regular paragraphs. + * + * void + * usage_example(cmark_node *root) { + * cmark_event_type ev_type; + * cmark_iter *iter = cmark_iter_new(root); + * + * while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + * cmark_node *cur = cmark_iter_get_node(iter); + * // Do something with `cur` and `ev_type` + * } + * + * cmark_iter_free(iter); + * } + * + * Iterators will never return `EXIT` events for leaf nodes, which are nodes + * of type: + * + * * CMARK_NODE_HTML_BLOCK + * * CMARK_NODE_THEMATIC_BREAK + * * CMARK_NODE_CODE_BLOCK + * * CMARK_NODE_TEXT + * * CMARK_NODE_SOFTBREAK + * * CMARK_NODE_LINEBREAK + * * CMARK_NODE_CODE + * * CMARK_NODE_HTML_INLINE + * + * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for + * leaf nodes. + */ + +typedef enum { + CMARK_EVENT_NONE, + CMARK_EVENT_DONE, + CMARK_EVENT_ENTER, + CMARK_EVENT_EXIT +} cmark_event_type; + +/** Creates a new iterator starting at 'root'. The current node and event + * type are undefined until 'cmark_iter_next' is called for the first time. + * The memory allocated for the iterator should be released using + * 'cmark_iter_free' when it is no longer needed. + */ +CMARK_EXPORT +cmark_iter *cmark_iter_new(cmark_node *root); + +/** Frees the memory allocated for an iterator. + */ +CMARK_EXPORT +void cmark_iter_free(cmark_iter *iter); + +/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`, + * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`). + */ +CMARK_EXPORT +cmark_event_type cmark_iter_next(cmark_iter *iter); + +/** Returns the current node. + */ +CMARK_EXPORT +cmark_node *cmark_iter_get_node(cmark_iter *iter); + +/** Returns the current event type. + */ +CMARK_EXPORT +cmark_event_type cmark_iter_get_event_type(cmark_iter *iter); + +/** Returns the root node. + */ +CMARK_EXPORT +cmark_node *cmark_iter_get_root(cmark_iter *iter); + +/** Resets the iterator so that the current node is 'current' and + * the event type is 'event_type'. The new current node must be a + * descendant of the root node or the root node itself. + */ +CMARK_EXPORT +void cmark_iter_reset(cmark_iter *iter, cmark_node *current, + cmark_event_type event_type); + +/** + * ## Accessors + */ + +/** Returns the user data of 'node'. + */ +CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); + +/** Sets arbitrary user data for 'node'. Returns 1 on success, + * 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); + +/** Returns the type of 'node', or `CMARK_NODE_NONE` on error. + */ +CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); + +/** Like 'cmark_node_get_type', but returns a string representation + of the type, or `""`. + */ +CMARK_EXPORT +const char *cmark_node_get_type_string(cmark_node *node); + +/** Returns the string contents of 'node', or an empty + string if none is set. Returns NULL if called on a + node that does not have string content. + */ +CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node); + +/** Sets the string contents of 'node'. Returns 1 on success, + * 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content); + +/** Returns the heading level of 'node', or 0 if 'node' is not a heading. + */ +CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node); + +/* For backwards compatibility */ +#define cmark_node_get_header_level cmark_node_get_heading_level +#define cmark_node_set_header_level cmark_node_set_heading_level + +/** Sets the heading level of 'node', returning 1 on success and 0 on error. + */ +CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level); + +/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node' + * is not a list. + */ +CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); + +/** Sets the list type of 'node', returning 1 on success and 0 on error. + */ +CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, + cmark_list_type type); + +/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node' + * is not a list. + */ +CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node); + +/** Sets the list delimiter type of 'node', returning 1 on success and 0 + * on error. + */ +CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node, + cmark_delim_type delim); + +/** Returns starting number of 'node', if it is an ordered list, otherwise 0. + */ +CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node); + +/** Sets starting number of 'node', if it is an ordered list. Returns 1 + * on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); + +/** Returns 1 if 'node' is a tight list, 0 otherwise. + */ +CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node); + +/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); + +/** Returns the info string from a fenced code block. + */ +CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); + +/** Sets the info string in a fenced code block, returning 1 on + * success and 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); + +/** Returns the URL of a link or image 'node', or an empty string + if no URL is set. Returns NULL if called on a node that is + not a link or image. + */ +CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node); + +/** Sets the URL of a link or image 'node'. Returns 1 on success, + * 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); + +/** Returns the title of a link or image 'node', or an empty + string if no title is set. Returns NULL if called on a node + that is not a link or image. + */ +CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node); + +/** Sets the title of a link or image 'node'. Returns 1 on success, + * 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); + +/** Returns the literal "on enter" text for a custom 'node', or + an empty string if no on_enter is set. Returns NULL if called + on a non-custom node. + */ +CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node); + +/** Sets the literal text to render "on enter" for a custom 'node'. + Any children of the node will be rendered after this text. + Returns 1 on success 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node, + const char *on_enter); + +/** Returns the literal "on exit" text for a custom 'node', or + an empty string if no on_exit is set. Returns NULL if + called on a non-custom node. + */ +CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node); + +/** Sets the literal text to render "on exit" for a custom 'node'. + Any children of the node will be rendered before this text. + Returns 1 on success 0 on failure. + */ +CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit); + +/** Returns the line on which 'node' begins. + */ +CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node); + +/** Returns the column at which 'node' begins. + */ +CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node); + +/** Returns the line on which 'node' ends. + */ +CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); + +/** Returns the column at which 'node' ends. + */ +CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); + +/** + * ## Tree Manipulation + */ + +/** Unlinks a 'node', removing it from the tree, but not freeing its + * memory. (Use 'cmark_node_free' for that.) + */ +CMARK_EXPORT void cmark_node_unlink(cmark_node *node); + +/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, + cmark_node *sibling); + +/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); + +/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does + * not free its memory). + * Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode); + +/** Adds 'child' to the beginning of the children of 'node'. + * Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); + +/** Adds 'child' to the end of the children of 'node'. + * Returns 1 on success, 0 on failure. + */ +CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); + +/** Consolidates adjacent text nodes. + */ +CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); + +/** + * ## Parsing + * + * Simple interface: + * + * cmark_node *document = cmark_parse_document("Hello *world*", 13, + * CMARK_OPT_DEFAULT); + * + * Streaming interface: + * + * cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + * FILE *fp = fopen("myfile.md", "rb"); + * while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { + * cmark_parser_feed(parser, buffer, bytes); + * if (bytes < sizeof(buffer)) { + * break; + * } + * } + * document = cmark_parser_finish(parser); + * cmark_parser_free(parser); + */ + +/** Creates a new parser object. + */ +CMARK_EXPORT +cmark_parser *cmark_parser_new(int options); + +/** Creates a new parser object with the given memory allocator + */ +CMARK_EXPORT +cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); + +/** Frees memory allocated for a parser object. + */ +CMARK_EXPORT +void cmark_parser_free(cmark_parser *parser); + +/** Feeds a string of length 'len' to 'parser'. + */ +CMARK_EXPORT +void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); + +/** Finish parsing and return a pointer to a tree of nodes. + */ +CMARK_EXPORT +cmark_node *cmark_parser_finish(cmark_parser *parser); + +/** Parse a CommonMark document in 'buffer' of length 'len'. + * Returns a pointer to a tree of nodes. The memory allocated for + * the node tree should be released using 'cmark_node_free' + * when it is no longer needed. + */ +CMARK_EXPORT +cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); + +/** Parse a CommonMark document in file 'f', returning a pointer to + * a tree of nodes. The memory allocated for the node tree should be + * released using 'cmark_node_free' when it is no longer needed. + */ +CMARK_EXPORT +cmark_node *cmark_parse_file(FILE *f, int options); + +/** + * ## Rendering + */ + +/** Render a 'node' tree as XML. It is the caller's responsibility + * to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_xml(cmark_node *root, int options); + +/** Render a 'node' tree as an HTML fragment. It is up to the user + * to add an appropriate header and footer. It is the caller's + * responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_html(cmark_node *root, int options); + +/** Render a 'node' tree as a groff man page, without the header. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_man(cmark_node *root, int options, int width); + +/** Render a 'node' tree as a commonmark document. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_commonmark(cmark_node *root, int options, int width); + +/** Render a 'node' tree as a LaTeX document. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_latex(cmark_node *root, int options, int width); + +/** + * ## Options + */ + +/** Default options. + */ +#define CMARK_OPT_DEFAULT 0 + +/** + * ### Options affecting rendering + */ + +/** Include a `data-sourcepos` attribute on all block elements. + */ +#define CMARK_OPT_SOURCEPOS (1 << 1) + +/** Render `softbreak` elements as hard line breaks. + */ +#define CMARK_OPT_HARDBREAKS (1 << 2) + +/** `CMARK_OPT_SAFE` is defined here for API compatibility, + but it no longer has any effect. "Safe" mode is now the default: + set `CMARK_OPT_UNSAFE` to disable it. + */ +#define CMARK_OPT_SAFE (1 << 3) + +/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`, + * `file:`, and `data:`, except for `image/png`, `image/gif`, + * `image/jpeg`, or `image/webp` mime types). By default, + * raw HTML is replaced by a placeholder HTML comment. Unsafe + * links are replaced by empty strings. + */ +#define CMARK_OPT_UNSAFE (1 << 17) + +/** Render `softbreak` elements as spaces. + */ +#define CMARK_OPT_NOBREAKS (1 << 4) + +/** + * ### Options affecting parsing + */ + +/** Legacy option (no effect). + */ +#define CMARK_OPT_NORMALIZE (1 << 8) + +/** Validate UTF-8 in the input before parsing, replacing illegal + * sequences with the replacement character U+FFFD. + */ +#define CMARK_OPT_VALIDATE_UTF8 (1 << 9) + +/** Convert straight quotes to curly, --- to em dashes, -- to en dashes. + */ +#define CMARK_OPT_SMART (1 << 10) + +/** + * ## Version information + */ + +/** The library version as integer for runtime checks. Also available as + * macro CMARK_VERSION for compile time checks. + * + * * Bits 16-23 contain the major version. + * * Bits 8-15 contain the minor version. + * * Bits 0-7 contain the patchlevel. + * + * In hexadecimal format, the number 0x010203 represents version 1.2.3. + */ +CMARK_EXPORT +int cmark_version(void); + +/** The library version string for runtime checks. Also available as + * macro CMARK_VERSION_STRING for compile time checks. + */ +CMARK_EXPORT +const char *cmark_version_string(void); + +/** # AUTHORS + * + * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. + */ + +#ifndef CMARK_NO_SHORT_NAMES +#define NODE_DOCUMENT CMARK_NODE_DOCUMENT +#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE +#define NODE_LIST CMARK_NODE_LIST +#define NODE_ITEM CMARK_NODE_ITEM +#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK +#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK +#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK +#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH +#define NODE_HEADING CMARK_NODE_HEADING +#define NODE_HEADER CMARK_NODE_HEADER +#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK +#define NODE_HRULE CMARK_NODE_HRULE +#define NODE_TEXT CMARK_NODE_TEXT +#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK +#define NODE_LINEBREAK CMARK_NODE_LINEBREAK +#define NODE_CODE CMARK_NODE_CODE +#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE +#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE +#define NODE_EMPH CMARK_NODE_EMPH +#define NODE_STRONG CMARK_NODE_STRONG +#define NODE_LINK CMARK_NODE_LINK +#define NODE_IMAGE CMARK_NODE_IMAGE +#define BULLET_LIST CMARK_BULLET_LIST +#define ORDERED_LIST CMARK_ORDERED_LIST +#define PERIOD_DELIM CMARK_PERIOD_DELIM +#define PAREN_DELIM CMARK_PAREN_DELIM +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/cmarkConfig.cmake.in b/deps/cmark/src/cmarkConfig.cmake.in new file mode 100644 index 0000000..3d865ff --- /dev/null +++ b/deps/cmark/src/cmarkConfig.cmake.in @@ -0,0 +1,4 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/cmark-targets.cmake") +check_required_components("cmark") diff --git a/deps/cmark/src/cmark_ctype.c b/deps/cmark/src/cmark_ctype.c new file mode 100644 index 0000000..baa68e9 --- /dev/null +++ b/deps/cmark/src/cmark_ctype.c @@ -0,0 +1,44 @@ +#include + +#include "cmark_ctype.h" + +/** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other + */ +static const uint8_t cmark_ctype_class[256] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, + /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, + /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, + /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + * Returns 1 if c is a "whitespace" character as defined by the spec. + */ +int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; } + +/** + * Returns 1 if c is an ascii punctuation character. + */ +int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; } + +int cmark_isalnum(char c) { + uint8_t result; + result = cmark_ctype_class[(uint8_t)c]; + return (result == 3 || result == 4); +} + +int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; } + +int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; } diff --git a/deps/cmark/src/cmark_ctype.h b/deps/cmark/src/cmark_ctype.h new file mode 100644 index 0000000..9a07618 --- /dev/null +++ b/deps/cmark/src/cmark_ctype.h @@ -0,0 +1,26 @@ +#ifndef CMARK_CMARK_CTYPE_H +#define CMARK_CMARK_CTYPE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** Locale-independent versions of functions from ctype.h. + * We want cmark to behave the same no matter what the system locale. + */ + +int cmark_isspace(char c); + +int cmark_ispunct(char c); + +int cmark_isalnum(char c); + +int cmark_isdigit(char c); + +int cmark_isalpha(char c); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/cmark_version.h.in b/deps/cmark/src/cmark_version.h.in new file mode 100644 index 0000000..41de3ac --- /dev/null +++ b/deps/cmark/src/cmark_version.h.in @@ -0,0 +1,7 @@ +#ifndef CMARK_VERSION_H +#define CMARK_VERSION_H + +#define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 16) | (@PROJECT_VERSION_MINOR@ << 8) | @PROJECT_VERSION_PATCH@) +#define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@" + +#endif diff --git a/deps/cmark/src/commonmark.c b/deps/cmark/src/commonmark.c new file mode 100644 index 0000000..0dde2ea --- /dev/null +++ b/deps/cmark/src/commonmark.c @@ -0,0 +1,468 @@ +#include +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" +#include "scanners.h" +#include "render.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define ENCODED_SIZE 20 +#define LISTMARKER_SIZE 20 + +// Functions to convert cmark_nodes to commonmark strings. + +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, + int32_t c, unsigned char nextc) { + bool needs_escaping = false; + bool follows_digit = + renderer->buffer->size > 0 && + cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); + char encoded[ENCODED_SIZE]; + int options = renderer->options; + + needs_escaping = + c < 0x80 && escape != LITERAL && + ((escape == NORMAL && + (c < 0x20 || + c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || + c == '>' || c == '\\' || c == '`' || c == '!' || + (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || + ((CMARK_OPT_SMART & options) && + ((c == '-' && nextc == '-') || + (c == '.' && nextc == '.') || + c == '"' || c == '\'')) || + (renderer->begin_content && (c == '-' || c == '+' || c == '=') && + // begin_content doesn't get set to false til we've passed digits + // at the beginning of line, so... + !follows_digit) || + (renderer->begin_content && (c == '.' || c == ')') && follows_digit && + (nextc == 0 || cmark_isspace(nextc))))) || + (escape == URL && + (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || + c == ')' || c == '(')) || + (escape == TITLE && + (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); + + if (needs_escaping) { + if (escape == URL && cmark_isspace(c)) { + // use percent encoding for spaces + snprintf(encoded, ENCODED_SIZE, "%%%2X", c); + cmark_strbuf_puts(renderer->buffer, encoded); + renderer->column += 3; + } else if (cmark_ispunct(c)) { + cmark_render_ascii(renderer, "\\"); + cmark_render_code_point(renderer, c); + } else { // render as entity + snprintf(encoded, ENCODED_SIZE, "&#%d;", c); + cmark_strbuf_puts(renderer->buffer, encoded); + renderer->column += strlen(encoded); + } + } else { + cmark_render_code_point(renderer, c); + } +} + +static int longest_backtick_sequence(const char *code) { + int longest = 0; + int current = 0; + size_t i = 0; + size_t code_len = strlen(code); + while (i <= code_len) { + if (code[i] == '`') { + current++; + } else { + if (current > longest) { + longest = current; + } + current = 0; + } + i++; + } + return longest; +} + +static int shortest_unused_backtick_sequence(const char *code) { + // note: if the shortest sequence is >= 32, this returns 32 + // so as not to overflow the bit array. + uint32_t used = 1; + int current = 0; + size_t i = 0; + size_t code_len = strlen(code); + while (i <= code_len) { + if (code[i] == '`') { + current++; + } else { + if (current > 0 && current < 32) { + used |= (1U << current); + } + current = 0; + } + i++; + } + // return number of first bit that is 0: + i = 0; + while (i < 32 && used & 1) { + used = used >> 1; + i++; + } + return (int)i; +} + +static bool is_autolink(cmark_node *node) { + const unsigned char *title; + const unsigned char *url; + cmark_node *link_text; + + if (node->type != CMARK_NODE_LINK) { + return false; + } + + url = node->as.link.url; + if (url == NULL || _scan_scheme(url) == 0) { + return false; + } + + title = node->as.link.title; + // if it has a title, we can't treat it as an autolink: + if (title && title[0]) { + return false; + } + + link_text = node->first_child; + if (link_text == NULL) { + return false; + } + cmark_consolidate_text_nodes(link_text); + if (strncmp((const char *)url, "mailto:", 7) == 0) { + url += 7; + } + return link_text->data != NULL && + strcmp((const char *)url, (char *)link_text->data) == 0; +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + cmark_node *tmp; + int list_number; + cmark_delim_type list_delim; + size_t numticks; + bool extra_spaces; + size_t i; + bool entering = (ev_type == CMARK_EVENT_ENTER); + const char *info, *code, *title; + char fencechar[2] = {'\0', '\0'}; + size_t code_len; + char listmarker[LISTMARKER_SIZE]; + const char *emph_delim; + bool first_in_list_item; + bufsize_t marker_width; + bool has_nonspace; + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && + !(CMARK_OPT_HARDBREAKS & options); + + // Don't adjust tight list status til we've started the list. + // Otherwise we lose the blank line between a paragraph and + // a following list. + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + LIT("> "); + renderer->begin_content = true; + cmark_strbuf_puts(renderer->prefix, "> "); + } else { + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); + BLANKLINE(); + } + break; + + case CMARK_NODE_LIST: + if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { + // this ensures that a following indented code block or list will be + // inteprereted correctly. + CR(); + LIT(""); + BLANKLINE(); + } + break; + + case CMARK_NODE_ITEM: + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + marker_width = 4; + } else { + list_number = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + // we ensure a width of at least 4 so + // we get nice transition from single digits + // to double + snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, + list_delim == CMARK_PAREN_DELIM ? ")" : ".", + list_number < 10 ? " " : " "); + marker_width = strlen(listmarker); + } + if (entering) { + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + LIT(" - "); + renderer->begin_content = true; + } else { + LIT(listmarker); + renderer->begin_content = true; + } + for (i = marker_width; i--;) { + cmark_strbuf_putc(renderer->prefix, ' '); + } + } else { + cmark_strbuf_truncate(renderer->prefix, + renderer->prefix->size - marker_width); + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + for (i = cmark_node_get_heading_level(node); i > 0; i--) { + LIT("#"); + } + LIT(" "); + renderer->begin_content = true; + renderer->no_linebreaks = true; + } else { + renderer->no_linebreaks = false; + BLANKLINE(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + + first_in_list_item = node->prev == NULL && node->parent && + node->parent->type == CMARK_NODE_ITEM; + + if (!first_in_list_item) { + BLANKLINE(); + } + info = cmark_node_get_fence_info(node); + fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; + code = cmark_node_get_literal(node); + + numticks = longest_backtick_sequence(code) + 1; + if (numticks < 3) { + numticks = 3; + } + for (i = 0; i < numticks; i++) { + LIT(fencechar); + } + LIT(" "); + OUT(info, false, LITERAL); + CR(); + OUT(cmark_node_get_literal(node), false, LITERAL); + CR(); + for (i = 0; i < numticks; i++) { + LIT(fencechar); + } + + BLANKLINE(); + break; + + case CMARK_NODE_HTML_BLOCK: + BLANKLINE(); + OUT(cmark_node_get_literal(node), false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_CUSTOM_BLOCK: + BLANKLINE(); + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_THEMATIC_BREAK: + BLANKLINE(); + LIT("-----"); + BLANKLINE(); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + BLANKLINE(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + if (!(CMARK_OPT_HARDBREAKS & options)) { + LIT(" "); + } + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (CMARK_OPT_HARDBREAKS & options) { + LIT(" "); + CR(); + } else if (!renderer->no_linebreaks && renderer->width == 0 && + !(CMARK_OPT_HARDBREAKS & options) && + !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + code = cmark_node_get_literal(node); + code_len = strlen(code); + numticks = shortest_unused_backtick_sequence(code); + has_nonspace = false; + for (i=0; i < code_len; i++) { + if (code[i] != ' ') { + has_nonspace = true; + break; + } + } + extra_spaces = code_len == 0 || + code[0] == '`' || code[code_len - 1] == '`' || + (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); + for (i = 0; i < numticks; i++) { + LIT("`"); + } + if (extra_spaces) { + LIT(" "); + } + OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); + if (extra_spaces) { + LIT(" "); + } + for (i = 0; i < numticks; i++) { + LIT("`"); + } + break; + + case CMARK_NODE_HTML_INLINE: + OUT(cmark_node_get_literal(node), false, LITERAL); + break; + + case CMARK_NODE_CUSTOM_INLINE: + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + break; + + case CMARK_NODE_STRONG: + if (entering) { + LIT("**"); + } else { + LIT("**"); + } + break; + + case CMARK_NODE_EMPH: + // If we have EMPH(EMPH(x)), we need to use *_x_* + // because **x** is STRONG(x): + if (node->parent && node->parent->type == CMARK_NODE_EMPH && + node->next == NULL && node->prev == NULL) { + emph_delim = "_"; + } else { + emph_delim = "*"; + } + if (entering) { + LIT(emph_delim); + } else { + LIT(emph_delim); + } + break; + + case CMARK_NODE_LINK: + if (is_autolink(node)) { + if (entering) { + LIT("<"); + if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { + LIT((const char *)cmark_node_get_url(node) + 7); + } else { + LIT((const char *)cmark_node_get_url(node)); + } + LIT(">"); + // return signal to skip contents of node... + return 0; + } + } else { + if (entering) { + LIT("["); + } else { + LIT("]("); + OUT(cmark_node_get_url(node), false, URL); + title = cmark_node_get_title(node); + if (strlen(title) > 0) { + LIT(" \""); + OUT(title, false, TITLE); + LIT("\""); + } + LIT(")"); + } + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + LIT("!["); + } else { + LIT("]("); + OUT(cmark_node_get_url(node), false, URL); + title = cmark_node_get_title(node); + if (strlen(title) > 0) { + OUT(" \"", allow_wrap, LITERAL); + OUT(title, false, TITLE); + LIT("\""); + } + LIT(")"); + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_commonmark(cmark_node *root, int options, int width) { + if (options & CMARK_OPT_HARDBREAKS) { + // disable breaking on width, since it has + // a different meaning with OPT_HARDBREAKS + width = 0; + } + return cmark_render(root, options, width, outc, S_render_node); +} diff --git a/deps/cmark/src/config.h.in b/deps/cmark/src/config.h.in new file mode 100644 index 0000000..de1a4dd --- /dev/null +++ b/deps/cmark/src/config.h.in @@ -0,0 +1,76 @@ +#ifndef CMARK_CONFIG_H +#define CMARK_CONFIG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#cmakedefine HAVE_STDBOOL_H + +#ifdef HAVE_STDBOOL_H + #include +#elif !defined(__cplusplus) + typedef char bool; +#endif + +#cmakedefine HAVE___BUILTIN_EXPECT + +#cmakedefine HAVE___ATTRIBUTE__ + +#ifdef HAVE___ATTRIBUTE__ + #define CMARK_ATTRIBUTE(list) __attribute__ (list) +#else + #define CMARK_ATTRIBUTE(list) +#endif + +#ifndef CMARK_INLINE + #if defined(_MSC_VER) && !defined(__cplusplus) + #define CMARK_INLINE __inline + #else + #define CMARK_INLINE inline + #endif +#endif + +/* snprintf and vsnprintf fallbacks for MSVC before 2015, + due to Valentin Milea http://stackoverflow.com/questions/2915672/ +*/ + +#if defined(_MSC_VER) && _MSC_VER < 1900 + +#include +#include + +#define snprintf c99_snprintf +#define vsnprintf c99_vsnprintf + +CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap) +{ + int count = -1; + + if (size != 0) + count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); + if (count == -1) + count = _vscprintf(format, ap); + + return count; +} + +CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...) +{ + int count; + va_list ap; + + va_start(ap, format); + count = c99_vsnprintf(outBuf, size, format, ap); + va_end(ap); + + return count; +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/entities.inc b/deps/cmark/src/entities.inc new file mode 100644 index 0000000..a7c36e2 --- /dev/null +++ b/deps/cmark/src/entities.inc @@ -0,0 +1,2138 @@ +/* Autogenerated by tools/make_headers_inc.py */ + +struct cmark_entity_node { + unsigned char *entity; + unsigned char bytes[8]; +}; + +#define CMARK_ENTITY_MIN_LENGTH 2 +#define CMARK_ENTITY_MAX_LENGTH 32 +#define CMARK_NUM_ENTITIES 2125 + +static const struct cmark_entity_node cmark_entities[] = { +{(unsigned char*)"AElig", {195, 134, 0}}, +{(unsigned char*)"AMP", {38, 0}}, +{(unsigned char*)"Aacute", {195, 129, 0}}, +{(unsigned char*)"Abreve", {196, 130, 0}}, +{(unsigned char*)"Acirc", {195, 130, 0}}, +{(unsigned char*)"Acy", {208, 144, 0}}, +{(unsigned char*)"Afr", {240, 157, 148, 132, 0}}, +{(unsigned char*)"Agrave", {195, 128, 0}}, +{(unsigned char*)"Alpha", {206, 145, 0}}, +{(unsigned char*)"Amacr", {196, 128, 0}}, +{(unsigned char*)"And", {226, 169, 147, 0}}, +{(unsigned char*)"Aogon", {196, 132, 0}}, +{(unsigned char*)"Aopf", {240, 157, 148, 184, 0}}, +{(unsigned char*)"ApplyFunction", {226, 129, 161, 0}}, +{(unsigned char*)"Aring", {195, 133, 0}}, +{(unsigned char*)"Ascr", {240, 157, 146, 156, 0}}, +{(unsigned char*)"Assign", {226, 137, 148, 0}}, +{(unsigned char*)"Atilde", {195, 131, 0}}, +{(unsigned char*)"Auml", {195, 132, 0}}, +{(unsigned char*)"Backslash", {226, 136, 150, 0}}, +{(unsigned char*)"Barv", {226, 171, 167, 0}}, +{(unsigned char*)"Barwed", {226, 140, 134, 0}}, +{(unsigned char*)"Bcy", {208, 145, 0}}, +{(unsigned char*)"Because", {226, 136, 181, 0}}, +{(unsigned char*)"Bernoullis", {226, 132, 172, 0}}, +{(unsigned char*)"Beta", {206, 146, 0}}, +{(unsigned char*)"Bfr", {240, 157, 148, 133, 0}}, +{(unsigned char*)"Bopf", {240, 157, 148, 185, 0}}, +{(unsigned char*)"Breve", {203, 152, 0}}, +{(unsigned char*)"Bscr", {226, 132, 172, 0}}, +{(unsigned char*)"Bumpeq", {226, 137, 142, 0}}, +{(unsigned char*)"CHcy", {208, 167, 0}}, +{(unsigned char*)"COPY", {194, 169, 0}}, +{(unsigned char*)"Cacute", {196, 134, 0}}, +{(unsigned char*)"Cap", {226, 139, 146, 0}}, +{(unsigned char*)"CapitalDifferentialD", {226, 133, 133, 0}}, +{(unsigned char*)"Cayleys", {226, 132, 173, 0}}, +{(unsigned char*)"Ccaron", {196, 140, 0}}, +{(unsigned char*)"Ccedil", {195, 135, 0}}, +{(unsigned char*)"Ccirc", {196, 136, 0}}, +{(unsigned char*)"Cconint", {226, 136, 176, 0}}, +{(unsigned char*)"Cdot", {196, 138, 0}}, +{(unsigned char*)"Cedilla", {194, 184, 0}}, +{(unsigned char*)"CenterDot", {194, 183, 0}}, +{(unsigned char*)"Cfr", {226, 132, 173, 0}}, +{(unsigned char*)"Chi", {206, 167, 0}}, +{(unsigned char*)"CircleDot", {226, 138, 153, 0}}, +{(unsigned char*)"CircleMinus", {226, 138, 150, 0}}, +{(unsigned char*)"CirclePlus", {226, 138, 149, 0}}, +{(unsigned char*)"CircleTimes", {226, 138, 151, 0}}, +{(unsigned char*)"ClockwiseContourIntegral", {226, 136, 178, 0}}, +{(unsigned char*)"CloseCurlyDoubleQuote", {226, 128, 157, 0}}, +{(unsigned char*)"CloseCurlyQuote", {226, 128, 153, 0}}, +{(unsigned char*)"Colon", {226, 136, 183, 0}}, +{(unsigned char*)"Colone", {226, 169, 180, 0}}, +{(unsigned char*)"Congruent", {226, 137, 161, 0}}, +{(unsigned char*)"Conint", {226, 136, 175, 0}}, +{(unsigned char*)"ContourIntegral", {226, 136, 174, 0}}, +{(unsigned char*)"Copf", {226, 132, 130, 0}}, +{(unsigned char*)"Coproduct", {226, 136, 144, 0}}, +{(unsigned char*)"CounterClockwiseContourIntegral", {226, 136, 179, 0}}, +{(unsigned char*)"Cross", {226, 168, 175, 0}}, +{(unsigned char*)"Cscr", {240, 157, 146, 158, 0}}, +{(unsigned char*)"Cup", {226, 139, 147, 0}}, +{(unsigned char*)"CupCap", {226, 137, 141, 0}}, +{(unsigned char*)"DD", {226, 133, 133, 0}}, +{(unsigned char*)"DDotrahd", {226, 164, 145, 0}}, +{(unsigned char*)"DJcy", {208, 130, 0}}, +{(unsigned char*)"DScy", {208, 133, 0}}, +{(unsigned char*)"DZcy", {208, 143, 0}}, +{(unsigned char*)"Dagger", {226, 128, 161, 0}}, +{(unsigned char*)"Darr", {226, 134, 161, 0}}, +{(unsigned char*)"Dashv", {226, 171, 164, 0}}, +{(unsigned char*)"Dcaron", {196, 142, 0}}, +{(unsigned char*)"Dcy", {208, 148, 0}}, +{(unsigned char*)"Del", {226, 136, 135, 0}}, +{(unsigned char*)"Delta", {206, 148, 0}}, +{(unsigned char*)"Dfr", {240, 157, 148, 135, 0}}, +{(unsigned char*)"DiacriticalAcute", {194, 180, 0}}, +{(unsigned char*)"DiacriticalDot", {203, 153, 0}}, +{(unsigned char*)"DiacriticalDoubleAcute", {203, 157, 0}}, +{(unsigned char*)"DiacriticalGrave", {96, 0}}, +{(unsigned char*)"DiacriticalTilde", {203, 156, 0}}, +{(unsigned char*)"Diamond", {226, 139, 132, 0}}, +{(unsigned char*)"DifferentialD", {226, 133, 134, 0}}, +{(unsigned char*)"Dopf", {240, 157, 148, 187, 0}}, +{(unsigned char*)"Dot", {194, 168, 0}}, +{(unsigned char*)"DotDot", {226, 131, 156, 0}}, +{(unsigned char*)"DotEqual", {226, 137, 144, 0}}, +{(unsigned char*)"DoubleContourIntegral", {226, 136, 175, 0}}, +{(unsigned char*)"DoubleDot", {194, 168, 0}}, +{(unsigned char*)"DoubleDownArrow", {226, 135, 147, 0}}, +{(unsigned char*)"DoubleLeftArrow", {226, 135, 144, 0}}, +{(unsigned char*)"DoubleLeftRightArrow", {226, 135, 148, 0}}, +{(unsigned char*)"DoubleLeftTee", {226, 171, 164, 0}}, +{(unsigned char*)"DoubleLongLeftArrow", {226, 159, 184, 0}}, +{(unsigned char*)"DoubleLongLeftRightArrow", {226, 159, 186, 0}}, +{(unsigned char*)"DoubleLongRightArrow", {226, 159, 185, 0}}, +{(unsigned char*)"DoubleRightArrow", {226, 135, 146, 0}}, +{(unsigned char*)"DoubleRightTee", {226, 138, 168, 0}}, +{(unsigned char*)"DoubleUpArrow", {226, 135, 145, 0}}, +{(unsigned char*)"DoubleUpDownArrow", {226, 135, 149, 0}}, +{(unsigned char*)"DoubleVerticalBar", {226, 136, 165, 0}}, +{(unsigned char*)"DownArrow", {226, 134, 147, 0}}, +{(unsigned char*)"DownArrowBar", {226, 164, 147, 0}}, +{(unsigned char*)"DownArrowUpArrow", {226, 135, 181, 0}}, +{(unsigned char*)"DownBreve", {204, 145, 0}}, +{(unsigned char*)"DownLeftRightVector", {226, 165, 144, 0}}, +{(unsigned char*)"DownLeftTeeVector", {226, 165, 158, 0}}, +{(unsigned char*)"DownLeftVector", {226, 134, 189, 0}}, +{(unsigned char*)"DownLeftVectorBar", {226, 165, 150, 0}}, +{(unsigned char*)"DownRightTeeVector", {226, 165, 159, 0}}, +{(unsigned char*)"DownRightVector", {226, 135, 129, 0}}, +{(unsigned char*)"DownRightVectorBar", {226, 165, 151, 0}}, +{(unsigned char*)"DownTee", {226, 138, 164, 0}}, +{(unsigned char*)"DownTeeArrow", {226, 134, 167, 0}}, +{(unsigned char*)"Downarrow", {226, 135, 147, 0}}, +{(unsigned char*)"Dscr", {240, 157, 146, 159, 0}}, +{(unsigned char*)"Dstrok", {196, 144, 0}}, +{(unsigned char*)"ENG", {197, 138, 0}}, +{(unsigned char*)"ETH", {195, 144, 0}}, +{(unsigned char*)"Eacute", {195, 137, 0}}, +{(unsigned char*)"Ecaron", {196, 154, 0}}, +{(unsigned char*)"Ecirc", {195, 138, 0}}, +{(unsigned char*)"Ecy", {208, 173, 0}}, +{(unsigned char*)"Edot", {196, 150, 0}}, +{(unsigned char*)"Efr", {240, 157, 148, 136, 0}}, +{(unsigned char*)"Egrave", {195, 136, 0}}, +{(unsigned char*)"Element", {226, 136, 136, 0}}, +{(unsigned char*)"Emacr", {196, 146, 0}}, +{(unsigned char*)"EmptySmallSquare", {226, 151, 187, 0}}, +{(unsigned char*)"EmptyVerySmallSquare", {226, 150, 171, 0}}, +{(unsigned char*)"Eogon", {196, 152, 0}}, +{(unsigned char*)"Eopf", {240, 157, 148, 188, 0}}, +{(unsigned char*)"Epsilon", {206, 149, 0}}, +{(unsigned char*)"Equal", {226, 169, 181, 0}}, +{(unsigned char*)"EqualTilde", {226, 137, 130, 0}}, +{(unsigned char*)"Equilibrium", {226, 135, 140, 0}}, +{(unsigned char*)"Escr", {226, 132, 176, 0}}, +{(unsigned char*)"Esim", {226, 169, 179, 0}}, +{(unsigned char*)"Eta", {206, 151, 0}}, +{(unsigned char*)"Euml", {195, 139, 0}}, +{(unsigned char*)"Exists", {226, 136, 131, 0}}, +{(unsigned char*)"ExponentialE", {226, 133, 135, 0}}, +{(unsigned char*)"Fcy", {208, 164, 0}}, +{(unsigned char*)"Ffr", {240, 157, 148, 137, 0}}, +{(unsigned char*)"FilledSmallSquare", {226, 151, 188, 0}}, +{(unsigned char*)"FilledVerySmallSquare", {226, 150, 170, 0}}, +{(unsigned char*)"Fopf", {240, 157, 148, 189, 0}}, +{(unsigned char*)"ForAll", {226, 136, 128, 0}}, +{(unsigned char*)"Fouriertrf", {226, 132, 177, 0}}, +{(unsigned char*)"Fscr", {226, 132, 177, 0}}, +{(unsigned char*)"GJcy", {208, 131, 0}}, +{(unsigned char*)"GT", {62, 0}}, +{(unsigned char*)"Gamma", {206, 147, 0}}, +{(unsigned char*)"Gammad", {207, 156, 0}}, +{(unsigned char*)"Gbreve", {196, 158, 0}}, +{(unsigned char*)"Gcedil", {196, 162, 0}}, +{(unsigned char*)"Gcirc", {196, 156, 0}}, +{(unsigned char*)"Gcy", {208, 147, 0}}, +{(unsigned char*)"Gdot", {196, 160, 0}}, +{(unsigned char*)"Gfr", {240, 157, 148, 138, 0}}, +{(unsigned char*)"Gg", {226, 139, 153, 0}}, +{(unsigned char*)"Gopf", {240, 157, 148, 190, 0}}, +{(unsigned char*)"GreaterEqual", {226, 137, 165, 0}}, +{(unsigned char*)"GreaterEqualLess", {226, 139, 155, 0}}, +{(unsigned char*)"GreaterFullEqual", {226, 137, 167, 0}}, +{(unsigned char*)"GreaterGreater", {226, 170, 162, 0}}, +{(unsigned char*)"GreaterLess", {226, 137, 183, 0}}, +{(unsigned char*)"GreaterSlantEqual", {226, 169, 190, 0}}, +{(unsigned char*)"GreaterTilde", {226, 137, 179, 0}}, +{(unsigned char*)"Gscr", {240, 157, 146, 162, 0}}, +{(unsigned char*)"Gt", {226, 137, 171, 0}}, +{(unsigned char*)"HARDcy", {208, 170, 0}}, +{(unsigned char*)"Hacek", {203, 135, 0}}, +{(unsigned char*)"Hat", {94, 0}}, +{(unsigned char*)"Hcirc", {196, 164, 0}}, +{(unsigned char*)"Hfr", {226, 132, 140, 0}}, +{(unsigned char*)"HilbertSpace", {226, 132, 139, 0}}, +{(unsigned char*)"Hopf", {226, 132, 141, 0}}, +{(unsigned char*)"HorizontalLine", {226, 148, 128, 0}}, +{(unsigned char*)"Hscr", {226, 132, 139, 0}}, +{(unsigned char*)"Hstrok", {196, 166, 0}}, +{(unsigned char*)"HumpDownHump", {226, 137, 142, 0}}, +{(unsigned char*)"HumpEqual", {226, 137, 143, 0}}, +{(unsigned char*)"IEcy", {208, 149, 0}}, +{(unsigned char*)"IJlig", {196, 178, 0}}, +{(unsigned char*)"IOcy", {208, 129, 0}}, +{(unsigned char*)"Iacute", {195, 141, 0}}, +{(unsigned char*)"Icirc", {195, 142, 0}}, +{(unsigned char*)"Icy", {208, 152, 0}}, +{(unsigned char*)"Idot", {196, 176, 0}}, +{(unsigned char*)"Ifr", {226, 132, 145, 0}}, +{(unsigned char*)"Igrave", {195, 140, 0}}, +{(unsigned char*)"Im", {226, 132, 145, 0}}, +{(unsigned char*)"Imacr", {196, 170, 0}}, +{(unsigned char*)"ImaginaryI", {226, 133, 136, 0}}, +{(unsigned char*)"Implies", {226, 135, 146, 0}}, +{(unsigned char*)"Int", {226, 136, 172, 0}}, +{(unsigned char*)"Integral", {226, 136, 171, 0}}, +{(unsigned char*)"Intersection", {226, 139, 130, 0}}, +{(unsigned char*)"InvisibleComma", {226, 129, 163, 0}}, +{(unsigned char*)"InvisibleTimes", {226, 129, 162, 0}}, +{(unsigned char*)"Iogon", {196, 174, 0}}, +{(unsigned char*)"Iopf", {240, 157, 149, 128, 0}}, +{(unsigned char*)"Iota", {206, 153, 0}}, +{(unsigned char*)"Iscr", {226, 132, 144, 0}}, +{(unsigned char*)"Itilde", {196, 168, 0}}, +{(unsigned char*)"Iukcy", {208, 134, 0}}, +{(unsigned char*)"Iuml", {195, 143, 0}}, +{(unsigned char*)"Jcirc", {196, 180, 0}}, +{(unsigned char*)"Jcy", {208, 153, 0}}, +{(unsigned char*)"Jfr", {240, 157, 148, 141, 0}}, +{(unsigned char*)"Jopf", {240, 157, 149, 129, 0}}, +{(unsigned char*)"Jscr", {240, 157, 146, 165, 0}}, +{(unsigned char*)"Jsercy", {208, 136, 0}}, +{(unsigned char*)"Jukcy", {208, 132, 0}}, +{(unsigned char*)"KHcy", {208, 165, 0}}, +{(unsigned char*)"KJcy", {208, 140, 0}}, +{(unsigned char*)"Kappa", {206, 154, 0}}, +{(unsigned char*)"Kcedil", {196, 182, 0}}, +{(unsigned char*)"Kcy", {208, 154, 0}}, +{(unsigned char*)"Kfr", {240, 157, 148, 142, 0}}, +{(unsigned char*)"Kopf", {240, 157, 149, 130, 0}}, +{(unsigned char*)"Kscr", {240, 157, 146, 166, 0}}, +{(unsigned char*)"LJcy", {208, 137, 0}}, +{(unsigned char*)"LT", {60, 0}}, +{(unsigned char*)"Lacute", {196, 185, 0}}, +{(unsigned char*)"Lambda", {206, 155, 0}}, +{(unsigned char*)"Lang", {226, 159, 170, 0}}, +{(unsigned char*)"Laplacetrf", {226, 132, 146, 0}}, +{(unsigned char*)"Larr", {226, 134, 158, 0}}, +{(unsigned char*)"Lcaron", {196, 189, 0}}, +{(unsigned char*)"Lcedil", {196, 187, 0}}, +{(unsigned char*)"Lcy", {208, 155, 0}}, +{(unsigned char*)"LeftAngleBracket", {226, 159, 168, 0}}, +{(unsigned char*)"LeftArrow", {226, 134, 144, 0}}, +{(unsigned char*)"LeftArrowBar", {226, 135, 164, 0}}, +{(unsigned char*)"LeftArrowRightArrow", {226, 135, 134, 0}}, +{(unsigned char*)"LeftCeiling", {226, 140, 136, 0}}, +{(unsigned char*)"LeftDoubleBracket", {226, 159, 166, 0}}, +{(unsigned char*)"LeftDownTeeVector", {226, 165, 161, 0}}, +{(unsigned char*)"LeftDownVector", {226, 135, 131, 0}}, +{(unsigned char*)"LeftDownVectorBar", {226, 165, 153, 0}}, +{(unsigned char*)"LeftFloor", {226, 140, 138, 0}}, +{(unsigned char*)"LeftRightArrow", {226, 134, 148, 0}}, +{(unsigned char*)"LeftRightVector", {226, 165, 142, 0}}, +{(unsigned char*)"LeftTee", {226, 138, 163, 0}}, +{(unsigned char*)"LeftTeeArrow", {226, 134, 164, 0}}, +{(unsigned char*)"LeftTeeVector", {226, 165, 154, 0}}, +{(unsigned char*)"LeftTriangle", {226, 138, 178, 0}}, +{(unsigned char*)"LeftTriangleBar", {226, 167, 143, 0}}, +{(unsigned char*)"LeftTriangleEqual", {226, 138, 180, 0}}, +{(unsigned char*)"LeftUpDownVector", {226, 165, 145, 0}}, +{(unsigned char*)"LeftUpTeeVector", {226, 165, 160, 0}}, +{(unsigned char*)"LeftUpVector", {226, 134, 191, 0}}, +{(unsigned char*)"LeftUpVectorBar", {226, 165, 152, 0}}, +{(unsigned char*)"LeftVector", {226, 134, 188, 0}}, +{(unsigned char*)"LeftVectorBar", {226, 165, 146, 0}}, +{(unsigned char*)"Leftarrow", {226, 135, 144, 0}}, +{(unsigned char*)"Leftrightarrow", {226, 135, 148, 0}}, +{(unsigned char*)"LessEqualGreater", {226, 139, 154, 0}}, +{(unsigned char*)"LessFullEqual", {226, 137, 166, 0}}, +{(unsigned char*)"LessGreater", {226, 137, 182, 0}}, +{(unsigned char*)"LessLess", {226, 170, 161, 0}}, +{(unsigned char*)"LessSlantEqual", {226, 169, 189, 0}}, +{(unsigned char*)"LessTilde", {226, 137, 178, 0}}, +{(unsigned char*)"Lfr", {240, 157, 148, 143, 0}}, +{(unsigned char*)"Ll", {226, 139, 152, 0}}, +{(unsigned char*)"Lleftarrow", {226, 135, 154, 0}}, +{(unsigned char*)"Lmidot", {196, 191, 0}}, +{(unsigned char*)"LongLeftArrow", {226, 159, 181, 0}}, +{(unsigned char*)"LongLeftRightArrow", {226, 159, 183, 0}}, +{(unsigned char*)"LongRightArrow", {226, 159, 182, 0}}, +{(unsigned char*)"Longleftarrow", {226, 159, 184, 0}}, +{(unsigned char*)"Longleftrightarrow", {226, 159, 186, 0}}, +{(unsigned char*)"Longrightarrow", {226, 159, 185, 0}}, +{(unsigned char*)"Lopf", {240, 157, 149, 131, 0}}, +{(unsigned char*)"LowerLeftArrow", {226, 134, 153, 0}}, +{(unsigned char*)"LowerRightArrow", {226, 134, 152, 0}}, +{(unsigned char*)"Lscr", {226, 132, 146, 0}}, +{(unsigned char*)"Lsh", {226, 134, 176, 0}}, +{(unsigned char*)"Lstrok", {197, 129, 0}}, +{(unsigned char*)"Lt", {226, 137, 170, 0}}, +{(unsigned char*)"Map", {226, 164, 133, 0}}, +{(unsigned char*)"Mcy", {208, 156, 0}}, +{(unsigned char*)"MediumSpace", {226, 129, 159, 0}}, +{(unsigned char*)"Mellintrf", {226, 132, 179, 0}}, +{(unsigned char*)"Mfr", {240, 157, 148, 144, 0}}, +{(unsigned char*)"MinusPlus", {226, 136, 147, 0}}, +{(unsigned char*)"Mopf", {240, 157, 149, 132, 0}}, +{(unsigned char*)"Mscr", {226, 132, 179, 0}}, +{(unsigned char*)"Mu", {206, 156, 0}}, +{(unsigned char*)"NJcy", {208, 138, 0}}, +{(unsigned char*)"Nacute", {197, 131, 0}}, +{(unsigned char*)"Ncaron", {197, 135, 0}}, +{(unsigned char*)"Ncedil", {197, 133, 0}}, +{(unsigned char*)"Ncy", {208, 157, 0}}, +{(unsigned char*)"NegativeMediumSpace", {226, 128, 139, 0}}, +{(unsigned char*)"NegativeThickSpace", {226, 128, 139, 0}}, +{(unsigned char*)"NegativeThinSpace", {226, 128, 139, 0}}, +{(unsigned char*)"NegativeVeryThinSpace", {226, 128, 139, 0}}, +{(unsigned char*)"NestedGreaterGreater", {226, 137, 171, 0}}, +{(unsigned char*)"NestedLessLess", {226, 137, 170, 0}}, +{(unsigned char*)"NewLine", {10, 0}}, +{(unsigned char*)"Nfr", {240, 157, 148, 145, 0}}, +{(unsigned char*)"NoBreak", {226, 129, 160, 0}}, +{(unsigned char*)"NonBreakingSpace", {194, 160, 0}}, +{(unsigned char*)"Nopf", {226, 132, 149, 0}}, +{(unsigned char*)"Not", {226, 171, 172, 0}}, +{(unsigned char*)"NotCongruent", {226, 137, 162, 0}}, +{(unsigned char*)"NotCupCap", {226, 137, 173, 0}}, +{(unsigned char*)"NotDoubleVerticalBar", {226, 136, 166, 0}}, +{(unsigned char*)"NotElement", {226, 136, 137, 0}}, +{(unsigned char*)"NotEqual", {226, 137, 160, 0}}, +{(unsigned char*)"NotEqualTilde", {226, 137, 130, 204, 184, 0}}, +{(unsigned char*)"NotExists", {226, 136, 132, 0}}, +{(unsigned char*)"NotGreater", {226, 137, 175, 0}}, +{(unsigned char*)"NotGreaterEqual", {226, 137, 177, 0}}, +{(unsigned char*)"NotGreaterFullEqual", {226, 137, 167, 204, 184, 0}}, +{(unsigned char*)"NotGreaterGreater", {226, 137, 171, 204, 184, 0}}, +{(unsigned char*)"NotGreaterLess", {226, 137, 185, 0}}, +{(unsigned char*)"NotGreaterSlantEqual", {226, 169, 190, 204, 184, 0}}, +{(unsigned char*)"NotGreaterTilde", {226, 137, 181, 0}}, +{(unsigned char*)"NotHumpDownHump", {226, 137, 142, 204, 184, 0}}, +{(unsigned char*)"NotHumpEqual", {226, 137, 143, 204, 184, 0}}, +{(unsigned char*)"NotLeftTriangle", {226, 139, 170, 0}}, +{(unsigned char*)"NotLeftTriangleBar", {226, 167, 143, 204, 184, 0}}, +{(unsigned char*)"NotLeftTriangleEqual", {226, 139, 172, 0}}, +{(unsigned char*)"NotLess", {226, 137, 174, 0}}, +{(unsigned char*)"NotLessEqual", {226, 137, 176, 0}}, +{(unsigned char*)"NotLessGreater", {226, 137, 184, 0}}, +{(unsigned char*)"NotLessLess", {226, 137, 170, 204, 184, 0}}, +{(unsigned char*)"NotLessSlantEqual", {226, 169, 189, 204, 184, 0}}, +{(unsigned char*)"NotLessTilde", {226, 137, 180, 0}}, +{(unsigned char*)"NotNestedGreaterGreater", {226, 170, 162, 204, 184, 0}}, +{(unsigned char*)"NotNestedLessLess", {226, 170, 161, 204, 184, 0}}, +{(unsigned char*)"NotPrecedes", {226, 138, 128, 0}}, +{(unsigned char*)"NotPrecedesEqual", {226, 170, 175, 204, 184, 0}}, +{(unsigned char*)"NotPrecedesSlantEqual", {226, 139, 160, 0}}, +{(unsigned char*)"NotReverseElement", {226, 136, 140, 0}}, +{(unsigned char*)"NotRightTriangle", {226, 139, 171, 0}}, +{(unsigned char*)"NotRightTriangleBar", {226, 167, 144, 204, 184, 0}}, +{(unsigned char*)"NotRightTriangleEqual", {226, 139, 173, 0}}, +{(unsigned char*)"NotSquareSubset", {226, 138, 143, 204, 184, 0}}, +{(unsigned char*)"NotSquareSubsetEqual", {226, 139, 162, 0}}, +{(unsigned char*)"NotSquareSuperset", {226, 138, 144, 204, 184, 0}}, +{(unsigned char*)"NotSquareSupersetEqual", {226, 139, 163, 0}}, +{(unsigned char*)"NotSubset", {226, 138, 130, 226, 131, 146, 0}}, +{(unsigned char*)"NotSubsetEqual", {226, 138, 136, 0}}, +{(unsigned char*)"NotSucceeds", {226, 138, 129, 0}}, +{(unsigned char*)"NotSucceedsEqual", {226, 170, 176, 204, 184, 0}}, +{(unsigned char*)"NotSucceedsSlantEqual", {226, 139, 161, 0}}, +{(unsigned char*)"NotSucceedsTilde", {226, 137, 191, 204, 184, 0}}, +{(unsigned char*)"NotSuperset", {226, 138, 131, 226, 131, 146, 0}}, +{(unsigned char*)"NotSupersetEqual", {226, 138, 137, 0}}, +{(unsigned char*)"NotTilde", {226, 137, 129, 0}}, +{(unsigned char*)"NotTildeEqual", {226, 137, 132, 0}}, +{(unsigned char*)"NotTildeFullEqual", {226, 137, 135, 0}}, +{(unsigned char*)"NotTildeTilde", {226, 137, 137, 0}}, +{(unsigned char*)"NotVerticalBar", {226, 136, 164, 0}}, +{(unsigned char*)"Nscr", {240, 157, 146, 169, 0}}, +{(unsigned char*)"Ntilde", {195, 145, 0}}, +{(unsigned char*)"Nu", {206, 157, 0}}, +{(unsigned char*)"OElig", {197, 146, 0}}, +{(unsigned char*)"Oacute", {195, 147, 0}}, +{(unsigned char*)"Ocirc", {195, 148, 0}}, +{(unsigned char*)"Ocy", {208, 158, 0}}, +{(unsigned char*)"Odblac", {197, 144, 0}}, +{(unsigned char*)"Ofr", {240, 157, 148, 146, 0}}, +{(unsigned char*)"Ograve", {195, 146, 0}}, +{(unsigned char*)"Omacr", {197, 140, 0}}, +{(unsigned char*)"Omega", {206, 169, 0}}, +{(unsigned char*)"Omicron", {206, 159, 0}}, +{(unsigned char*)"Oopf", {240, 157, 149, 134, 0}}, +{(unsigned char*)"OpenCurlyDoubleQuote", {226, 128, 156, 0}}, +{(unsigned char*)"OpenCurlyQuote", {226, 128, 152, 0}}, +{(unsigned char*)"Or", {226, 169, 148, 0}}, +{(unsigned char*)"Oscr", {240, 157, 146, 170, 0}}, +{(unsigned char*)"Oslash", {195, 152, 0}}, +{(unsigned char*)"Otilde", {195, 149, 0}}, +{(unsigned char*)"Otimes", {226, 168, 183, 0}}, +{(unsigned char*)"Ouml", {195, 150, 0}}, +{(unsigned char*)"OverBar", {226, 128, 190, 0}}, +{(unsigned char*)"OverBrace", {226, 143, 158, 0}}, +{(unsigned char*)"OverBracket", {226, 142, 180, 0}}, +{(unsigned char*)"OverParenthesis", {226, 143, 156, 0}}, +{(unsigned char*)"PartialD", {226, 136, 130, 0}}, +{(unsigned char*)"Pcy", {208, 159, 0}}, +{(unsigned char*)"Pfr", {240, 157, 148, 147, 0}}, +{(unsigned char*)"Phi", {206, 166, 0}}, +{(unsigned char*)"Pi", {206, 160, 0}}, +{(unsigned char*)"PlusMinus", {194, 177, 0}}, +{(unsigned char*)"Poincareplane", {226, 132, 140, 0}}, +{(unsigned char*)"Popf", {226, 132, 153, 0}}, +{(unsigned char*)"Pr", {226, 170, 187, 0}}, +{(unsigned char*)"Precedes", {226, 137, 186, 0}}, +{(unsigned char*)"PrecedesEqual", {226, 170, 175, 0}}, +{(unsigned char*)"PrecedesSlantEqual", {226, 137, 188, 0}}, +{(unsigned char*)"PrecedesTilde", {226, 137, 190, 0}}, +{(unsigned char*)"Prime", {226, 128, 179, 0}}, +{(unsigned char*)"Product", {226, 136, 143, 0}}, +{(unsigned char*)"Proportion", {226, 136, 183, 0}}, +{(unsigned char*)"Proportional", {226, 136, 157, 0}}, +{(unsigned char*)"Pscr", {240, 157, 146, 171, 0}}, +{(unsigned char*)"Psi", {206, 168, 0}}, +{(unsigned char*)"QUOT", {34, 0}}, +{(unsigned char*)"Qfr", {240, 157, 148, 148, 0}}, +{(unsigned char*)"Qopf", {226, 132, 154, 0}}, +{(unsigned char*)"Qscr", {240, 157, 146, 172, 0}}, +{(unsigned char*)"RBarr", {226, 164, 144, 0}}, +{(unsigned char*)"REG", {194, 174, 0}}, +{(unsigned char*)"Racute", {197, 148, 0}}, +{(unsigned char*)"Rang", {226, 159, 171, 0}}, +{(unsigned char*)"Rarr", {226, 134, 160, 0}}, +{(unsigned char*)"Rarrtl", {226, 164, 150, 0}}, +{(unsigned char*)"Rcaron", {197, 152, 0}}, +{(unsigned char*)"Rcedil", {197, 150, 0}}, +{(unsigned char*)"Rcy", {208, 160, 0}}, +{(unsigned char*)"Re", {226, 132, 156, 0}}, +{(unsigned char*)"ReverseElement", {226, 136, 139, 0}}, +{(unsigned char*)"ReverseEquilibrium", {226, 135, 139, 0}}, +{(unsigned char*)"ReverseUpEquilibrium", {226, 165, 175, 0}}, +{(unsigned char*)"Rfr", {226, 132, 156, 0}}, +{(unsigned char*)"Rho", {206, 161, 0}}, +{(unsigned char*)"RightAngleBracket", {226, 159, 169, 0}}, +{(unsigned char*)"RightArrow", {226, 134, 146, 0}}, +{(unsigned char*)"RightArrowBar", {226, 135, 165, 0}}, +{(unsigned char*)"RightArrowLeftArrow", {226, 135, 132, 0}}, +{(unsigned char*)"RightCeiling", {226, 140, 137, 0}}, +{(unsigned char*)"RightDoubleBracket", {226, 159, 167, 0}}, +{(unsigned char*)"RightDownTeeVector", {226, 165, 157, 0}}, +{(unsigned char*)"RightDownVector", {226, 135, 130, 0}}, +{(unsigned char*)"RightDownVectorBar", {226, 165, 149, 0}}, +{(unsigned char*)"RightFloor", {226, 140, 139, 0}}, +{(unsigned char*)"RightTee", {226, 138, 162, 0}}, +{(unsigned char*)"RightTeeArrow", {226, 134, 166, 0}}, +{(unsigned char*)"RightTeeVector", {226, 165, 155, 0}}, +{(unsigned char*)"RightTriangle", {226, 138, 179, 0}}, +{(unsigned char*)"RightTriangleBar", {226, 167, 144, 0}}, +{(unsigned char*)"RightTriangleEqual", {226, 138, 181, 0}}, +{(unsigned char*)"RightUpDownVector", {226, 165, 143, 0}}, +{(unsigned char*)"RightUpTeeVector", {226, 165, 156, 0}}, +{(unsigned char*)"RightUpVector", {226, 134, 190, 0}}, +{(unsigned char*)"RightUpVectorBar", {226, 165, 148, 0}}, +{(unsigned char*)"RightVector", {226, 135, 128, 0}}, +{(unsigned char*)"RightVectorBar", {226, 165, 147, 0}}, +{(unsigned char*)"Rightarrow", {226, 135, 146, 0}}, +{(unsigned char*)"Ropf", {226, 132, 157, 0}}, +{(unsigned char*)"RoundImplies", {226, 165, 176, 0}}, +{(unsigned char*)"Rrightarrow", {226, 135, 155, 0}}, +{(unsigned char*)"Rscr", {226, 132, 155, 0}}, +{(unsigned char*)"Rsh", {226, 134, 177, 0}}, +{(unsigned char*)"RuleDelayed", {226, 167, 180, 0}}, +{(unsigned char*)"SHCHcy", {208, 169, 0}}, +{(unsigned char*)"SHcy", {208, 168, 0}}, +{(unsigned char*)"SOFTcy", {208, 172, 0}}, +{(unsigned char*)"Sacute", {197, 154, 0}}, +{(unsigned char*)"Sc", {226, 170, 188, 0}}, +{(unsigned char*)"Scaron", {197, 160, 0}}, +{(unsigned char*)"Scedil", {197, 158, 0}}, +{(unsigned char*)"Scirc", {197, 156, 0}}, +{(unsigned char*)"Scy", {208, 161, 0}}, +{(unsigned char*)"Sfr", {240, 157, 148, 150, 0}}, +{(unsigned char*)"ShortDownArrow", {226, 134, 147, 0}}, +{(unsigned char*)"ShortLeftArrow", {226, 134, 144, 0}}, +{(unsigned char*)"ShortRightArrow", {226, 134, 146, 0}}, +{(unsigned char*)"ShortUpArrow", {226, 134, 145, 0}}, +{(unsigned char*)"Sigma", {206, 163, 0}}, +{(unsigned char*)"SmallCircle", {226, 136, 152, 0}}, +{(unsigned char*)"Sopf", {240, 157, 149, 138, 0}}, +{(unsigned char*)"Sqrt", {226, 136, 154, 0}}, +{(unsigned char*)"Square", {226, 150, 161, 0}}, +{(unsigned char*)"SquareIntersection", {226, 138, 147, 0}}, +{(unsigned char*)"SquareSubset", {226, 138, 143, 0}}, +{(unsigned char*)"SquareSubsetEqual", {226, 138, 145, 0}}, +{(unsigned char*)"SquareSuperset", {226, 138, 144, 0}}, +{(unsigned char*)"SquareSupersetEqual", {226, 138, 146, 0}}, +{(unsigned char*)"SquareUnion", {226, 138, 148, 0}}, +{(unsigned char*)"Sscr", {240, 157, 146, 174, 0}}, +{(unsigned char*)"Star", {226, 139, 134, 0}}, +{(unsigned char*)"Sub", {226, 139, 144, 0}}, +{(unsigned char*)"Subset", {226, 139, 144, 0}}, +{(unsigned char*)"SubsetEqual", {226, 138, 134, 0}}, +{(unsigned char*)"Succeeds", {226, 137, 187, 0}}, +{(unsigned char*)"SucceedsEqual", {226, 170, 176, 0}}, +{(unsigned char*)"SucceedsSlantEqual", {226, 137, 189, 0}}, +{(unsigned char*)"SucceedsTilde", {226, 137, 191, 0}}, +{(unsigned char*)"SuchThat", {226, 136, 139, 0}}, +{(unsigned char*)"Sum", {226, 136, 145, 0}}, +{(unsigned char*)"Sup", {226, 139, 145, 0}}, +{(unsigned char*)"Superset", {226, 138, 131, 0}}, +{(unsigned char*)"SupersetEqual", {226, 138, 135, 0}}, +{(unsigned char*)"Supset", {226, 139, 145, 0}}, +{(unsigned char*)"THORN", {195, 158, 0}}, +{(unsigned char*)"TRADE", {226, 132, 162, 0}}, +{(unsigned char*)"TSHcy", {208, 139, 0}}, +{(unsigned char*)"TScy", {208, 166, 0}}, +{(unsigned char*)"Tab", {9, 0}}, +{(unsigned char*)"Tau", {206, 164, 0}}, +{(unsigned char*)"Tcaron", {197, 164, 0}}, +{(unsigned char*)"Tcedil", {197, 162, 0}}, +{(unsigned char*)"Tcy", {208, 162, 0}}, +{(unsigned char*)"Tfr", {240, 157, 148, 151, 0}}, +{(unsigned char*)"Therefore", {226, 136, 180, 0}}, +{(unsigned char*)"Theta", {206, 152, 0}}, +{(unsigned char*)"ThickSpace", {226, 129, 159, 226, 128, 138, 0}}, +{(unsigned char*)"ThinSpace", {226, 128, 137, 0}}, +{(unsigned char*)"Tilde", {226, 136, 188, 0}}, +{(unsigned char*)"TildeEqual", {226, 137, 131, 0}}, +{(unsigned char*)"TildeFullEqual", {226, 137, 133, 0}}, +{(unsigned char*)"TildeTilde", {226, 137, 136, 0}}, +{(unsigned char*)"Topf", {240, 157, 149, 139, 0}}, +{(unsigned char*)"TripleDot", {226, 131, 155, 0}}, +{(unsigned char*)"Tscr", {240, 157, 146, 175, 0}}, +{(unsigned char*)"Tstrok", {197, 166, 0}}, +{(unsigned char*)"Uacute", {195, 154, 0}}, +{(unsigned char*)"Uarr", {226, 134, 159, 0}}, +{(unsigned char*)"Uarrocir", {226, 165, 137, 0}}, +{(unsigned char*)"Ubrcy", {208, 142, 0}}, +{(unsigned char*)"Ubreve", {197, 172, 0}}, +{(unsigned char*)"Ucirc", {195, 155, 0}}, +{(unsigned char*)"Ucy", {208, 163, 0}}, +{(unsigned char*)"Udblac", {197, 176, 0}}, +{(unsigned char*)"Ufr", {240, 157, 148, 152, 0}}, +{(unsigned char*)"Ugrave", {195, 153, 0}}, +{(unsigned char*)"Umacr", {197, 170, 0}}, +{(unsigned char*)"UnderBar", {95, 0}}, +{(unsigned char*)"UnderBrace", {226, 143, 159, 0}}, +{(unsigned char*)"UnderBracket", {226, 142, 181, 0}}, +{(unsigned char*)"UnderParenthesis", {226, 143, 157, 0}}, +{(unsigned char*)"Union", {226, 139, 131, 0}}, +{(unsigned char*)"UnionPlus", {226, 138, 142, 0}}, +{(unsigned char*)"Uogon", {197, 178, 0}}, +{(unsigned char*)"Uopf", {240, 157, 149, 140, 0}}, +{(unsigned char*)"UpArrow", {226, 134, 145, 0}}, +{(unsigned char*)"UpArrowBar", {226, 164, 146, 0}}, +{(unsigned char*)"UpArrowDownArrow", {226, 135, 133, 0}}, +{(unsigned char*)"UpDownArrow", {226, 134, 149, 0}}, +{(unsigned char*)"UpEquilibrium", {226, 165, 174, 0}}, +{(unsigned char*)"UpTee", {226, 138, 165, 0}}, +{(unsigned char*)"UpTeeArrow", {226, 134, 165, 0}}, +{(unsigned char*)"Uparrow", {226, 135, 145, 0}}, +{(unsigned char*)"Updownarrow", {226, 135, 149, 0}}, +{(unsigned char*)"UpperLeftArrow", {226, 134, 150, 0}}, +{(unsigned char*)"UpperRightArrow", {226, 134, 151, 0}}, +{(unsigned char*)"Upsi", {207, 146, 0}}, +{(unsigned char*)"Upsilon", {206, 165, 0}}, +{(unsigned char*)"Uring", {197, 174, 0}}, +{(unsigned char*)"Uscr", {240, 157, 146, 176, 0}}, +{(unsigned char*)"Utilde", {197, 168, 0}}, +{(unsigned char*)"Uuml", {195, 156, 0}}, +{(unsigned char*)"VDash", {226, 138, 171, 0}}, +{(unsigned char*)"Vbar", {226, 171, 171, 0}}, +{(unsigned char*)"Vcy", {208, 146, 0}}, +{(unsigned char*)"Vdash", {226, 138, 169, 0}}, +{(unsigned char*)"Vdashl", {226, 171, 166, 0}}, +{(unsigned char*)"Vee", {226, 139, 129, 0}}, +{(unsigned char*)"Verbar", {226, 128, 150, 0}}, +{(unsigned char*)"Vert", {226, 128, 150, 0}}, +{(unsigned char*)"VerticalBar", {226, 136, 163, 0}}, +{(unsigned char*)"VerticalLine", {124, 0}}, +{(unsigned char*)"VerticalSeparator", {226, 157, 152, 0}}, +{(unsigned char*)"VerticalTilde", {226, 137, 128, 0}}, +{(unsigned char*)"VeryThinSpace", {226, 128, 138, 0}}, +{(unsigned char*)"Vfr", {240, 157, 148, 153, 0}}, +{(unsigned char*)"Vopf", {240, 157, 149, 141, 0}}, +{(unsigned char*)"Vscr", {240, 157, 146, 177, 0}}, +{(unsigned char*)"Vvdash", {226, 138, 170, 0}}, +{(unsigned char*)"Wcirc", {197, 180, 0}}, +{(unsigned char*)"Wedge", {226, 139, 128, 0}}, +{(unsigned char*)"Wfr", {240, 157, 148, 154, 0}}, +{(unsigned char*)"Wopf", {240, 157, 149, 142, 0}}, +{(unsigned char*)"Wscr", {240, 157, 146, 178, 0}}, +{(unsigned char*)"Xfr", {240, 157, 148, 155, 0}}, +{(unsigned char*)"Xi", {206, 158, 0}}, +{(unsigned char*)"Xopf", {240, 157, 149, 143, 0}}, +{(unsigned char*)"Xscr", {240, 157, 146, 179, 0}}, +{(unsigned char*)"YAcy", {208, 175, 0}}, +{(unsigned char*)"YIcy", {208, 135, 0}}, +{(unsigned char*)"YUcy", {208, 174, 0}}, +{(unsigned char*)"Yacute", {195, 157, 0}}, +{(unsigned char*)"Ycirc", {197, 182, 0}}, +{(unsigned char*)"Ycy", {208, 171, 0}}, +{(unsigned char*)"Yfr", {240, 157, 148, 156, 0}}, +{(unsigned char*)"Yopf", {240, 157, 149, 144, 0}}, +{(unsigned char*)"Yscr", {240, 157, 146, 180, 0}}, +{(unsigned char*)"Yuml", {197, 184, 0}}, +{(unsigned char*)"ZHcy", {208, 150, 0}}, +{(unsigned char*)"Zacute", {197, 185, 0}}, +{(unsigned char*)"Zcaron", {197, 189, 0}}, +{(unsigned char*)"Zcy", {208, 151, 0}}, +{(unsigned char*)"Zdot", {197, 187, 0}}, +{(unsigned char*)"ZeroWidthSpace", {226, 128, 139, 0}}, +{(unsigned char*)"Zeta", {206, 150, 0}}, +{(unsigned char*)"Zfr", {226, 132, 168, 0}}, +{(unsigned char*)"Zopf", {226, 132, 164, 0}}, +{(unsigned char*)"Zscr", {240, 157, 146, 181, 0}}, +{(unsigned char*)"aacute", {195, 161, 0}}, +{(unsigned char*)"abreve", {196, 131, 0}}, +{(unsigned char*)"ac", {226, 136, 190, 0}}, +{(unsigned char*)"acE", {226, 136, 190, 204, 179, 0}}, +{(unsigned char*)"acd", {226, 136, 191, 0}}, +{(unsigned char*)"acirc", {195, 162, 0}}, +{(unsigned char*)"acute", {194, 180, 0}}, +{(unsigned char*)"acy", {208, 176, 0}}, +{(unsigned char*)"aelig", {195, 166, 0}}, +{(unsigned char*)"af", {226, 129, 161, 0}}, +{(unsigned char*)"afr", {240, 157, 148, 158, 0}}, +{(unsigned char*)"agrave", {195, 160, 0}}, +{(unsigned char*)"alefsym", {226, 132, 181, 0}}, +{(unsigned char*)"aleph", {226, 132, 181, 0}}, +{(unsigned char*)"alpha", {206, 177, 0}}, +{(unsigned char*)"amacr", {196, 129, 0}}, +{(unsigned char*)"amalg", {226, 168, 191, 0}}, +{(unsigned char*)"amp", {38, 0}}, +{(unsigned char*)"and", {226, 136, 167, 0}}, +{(unsigned char*)"andand", {226, 169, 149, 0}}, +{(unsigned char*)"andd", {226, 169, 156, 0}}, +{(unsigned char*)"andslope", {226, 169, 152, 0}}, +{(unsigned char*)"andv", {226, 169, 154, 0}}, +{(unsigned char*)"ang", {226, 136, 160, 0}}, +{(unsigned char*)"ange", {226, 166, 164, 0}}, +{(unsigned char*)"angle", {226, 136, 160, 0}}, +{(unsigned char*)"angmsd", {226, 136, 161, 0}}, +{(unsigned char*)"angmsdaa", {226, 166, 168, 0}}, +{(unsigned char*)"angmsdab", {226, 166, 169, 0}}, +{(unsigned char*)"angmsdac", {226, 166, 170, 0}}, +{(unsigned char*)"angmsdad", {226, 166, 171, 0}}, +{(unsigned char*)"angmsdae", {226, 166, 172, 0}}, +{(unsigned char*)"angmsdaf", {226, 166, 173, 0}}, +{(unsigned char*)"angmsdag", {226, 166, 174, 0}}, +{(unsigned char*)"angmsdah", {226, 166, 175, 0}}, +{(unsigned char*)"angrt", {226, 136, 159, 0}}, +{(unsigned char*)"angrtvb", {226, 138, 190, 0}}, +{(unsigned char*)"angrtvbd", {226, 166, 157, 0}}, +{(unsigned char*)"angsph", {226, 136, 162, 0}}, +{(unsigned char*)"angst", {195, 133, 0}}, +{(unsigned char*)"angzarr", {226, 141, 188, 0}}, +{(unsigned char*)"aogon", {196, 133, 0}}, +{(unsigned char*)"aopf", {240, 157, 149, 146, 0}}, +{(unsigned char*)"ap", {226, 137, 136, 0}}, +{(unsigned char*)"apE", {226, 169, 176, 0}}, +{(unsigned char*)"apacir", {226, 169, 175, 0}}, +{(unsigned char*)"ape", {226, 137, 138, 0}}, +{(unsigned char*)"apid", {226, 137, 139, 0}}, +{(unsigned char*)"apos", {39, 0}}, +{(unsigned char*)"approx", {226, 137, 136, 0}}, +{(unsigned char*)"approxeq", {226, 137, 138, 0}}, +{(unsigned char*)"aring", {195, 165, 0}}, +{(unsigned char*)"ascr", {240, 157, 146, 182, 0}}, +{(unsigned char*)"ast", {42, 0}}, +{(unsigned char*)"asymp", {226, 137, 136, 0}}, +{(unsigned char*)"asympeq", {226, 137, 141, 0}}, +{(unsigned char*)"atilde", {195, 163, 0}}, +{(unsigned char*)"auml", {195, 164, 0}}, +{(unsigned char*)"awconint", {226, 136, 179, 0}}, +{(unsigned char*)"awint", {226, 168, 145, 0}}, +{(unsigned char*)"bNot", {226, 171, 173, 0}}, +{(unsigned char*)"backcong", {226, 137, 140, 0}}, +{(unsigned char*)"backepsilon", {207, 182, 0}}, +{(unsigned char*)"backprime", {226, 128, 181, 0}}, +{(unsigned char*)"backsim", {226, 136, 189, 0}}, +{(unsigned char*)"backsimeq", {226, 139, 141, 0}}, +{(unsigned char*)"barvee", {226, 138, 189, 0}}, +{(unsigned char*)"barwed", {226, 140, 133, 0}}, +{(unsigned char*)"barwedge", {226, 140, 133, 0}}, +{(unsigned char*)"bbrk", {226, 142, 181, 0}}, +{(unsigned char*)"bbrktbrk", {226, 142, 182, 0}}, +{(unsigned char*)"bcong", {226, 137, 140, 0}}, +{(unsigned char*)"bcy", {208, 177, 0}}, +{(unsigned char*)"bdquo", {226, 128, 158, 0}}, +{(unsigned char*)"becaus", {226, 136, 181, 0}}, +{(unsigned char*)"because", {226, 136, 181, 0}}, +{(unsigned char*)"bemptyv", {226, 166, 176, 0}}, +{(unsigned char*)"bepsi", {207, 182, 0}}, +{(unsigned char*)"bernou", {226, 132, 172, 0}}, +{(unsigned char*)"beta", {206, 178, 0}}, +{(unsigned char*)"beth", {226, 132, 182, 0}}, +{(unsigned char*)"between", {226, 137, 172, 0}}, +{(unsigned char*)"bfr", {240, 157, 148, 159, 0}}, +{(unsigned char*)"bigcap", {226, 139, 130, 0}}, +{(unsigned char*)"bigcirc", {226, 151, 175, 0}}, +{(unsigned char*)"bigcup", {226, 139, 131, 0}}, +{(unsigned char*)"bigodot", {226, 168, 128, 0}}, +{(unsigned char*)"bigoplus", {226, 168, 129, 0}}, +{(unsigned char*)"bigotimes", {226, 168, 130, 0}}, +{(unsigned char*)"bigsqcup", {226, 168, 134, 0}}, +{(unsigned char*)"bigstar", {226, 152, 133, 0}}, +{(unsigned char*)"bigtriangledown", {226, 150, 189, 0}}, +{(unsigned char*)"bigtriangleup", {226, 150, 179, 0}}, +{(unsigned char*)"biguplus", {226, 168, 132, 0}}, +{(unsigned char*)"bigvee", {226, 139, 129, 0}}, +{(unsigned char*)"bigwedge", {226, 139, 128, 0}}, +{(unsigned char*)"bkarow", {226, 164, 141, 0}}, +{(unsigned char*)"blacklozenge", {226, 167, 171, 0}}, +{(unsigned char*)"blacksquare", {226, 150, 170, 0}}, +{(unsigned char*)"blacktriangle", {226, 150, 180, 0}}, +{(unsigned char*)"blacktriangledown", {226, 150, 190, 0}}, +{(unsigned char*)"blacktriangleleft", {226, 151, 130, 0}}, +{(unsigned char*)"blacktriangleright", {226, 150, 184, 0}}, +{(unsigned char*)"blank", {226, 144, 163, 0}}, +{(unsigned char*)"blk12", {226, 150, 146, 0}}, +{(unsigned char*)"blk14", {226, 150, 145, 0}}, +{(unsigned char*)"blk34", {226, 150, 147, 0}}, +{(unsigned char*)"block", {226, 150, 136, 0}}, +{(unsigned char*)"bne", {61, 226, 131, 165, 0}}, +{(unsigned char*)"bnequiv", {226, 137, 161, 226, 131, 165, 0}}, +{(unsigned char*)"bnot", {226, 140, 144, 0}}, +{(unsigned char*)"bopf", {240, 157, 149, 147, 0}}, +{(unsigned char*)"bot", {226, 138, 165, 0}}, +{(unsigned char*)"bottom", {226, 138, 165, 0}}, +{(unsigned char*)"bowtie", {226, 139, 136, 0}}, +{(unsigned char*)"boxDL", {226, 149, 151, 0}}, +{(unsigned char*)"boxDR", {226, 149, 148, 0}}, +{(unsigned char*)"boxDl", {226, 149, 150, 0}}, +{(unsigned char*)"boxDr", {226, 149, 147, 0}}, +{(unsigned char*)"boxH", {226, 149, 144, 0}}, +{(unsigned char*)"boxHD", {226, 149, 166, 0}}, +{(unsigned char*)"boxHU", {226, 149, 169, 0}}, +{(unsigned char*)"boxHd", {226, 149, 164, 0}}, +{(unsigned char*)"boxHu", {226, 149, 167, 0}}, +{(unsigned char*)"boxUL", {226, 149, 157, 0}}, +{(unsigned char*)"boxUR", {226, 149, 154, 0}}, +{(unsigned char*)"boxUl", {226, 149, 156, 0}}, +{(unsigned char*)"boxUr", {226, 149, 153, 0}}, +{(unsigned char*)"boxV", {226, 149, 145, 0}}, +{(unsigned char*)"boxVH", {226, 149, 172, 0}}, +{(unsigned char*)"boxVL", {226, 149, 163, 0}}, +{(unsigned char*)"boxVR", {226, 149, 160, 0}}, +{(unsigned char*)"boxVh", {226, 149, 171, 0}}, +{(unsigned char*)"boxVl", {226, 149, 162, 0}}, +{(unsigned char*)"boxVr", {226, 149, 159, 0}}, +{(unsigned char*)"boxbox", {226, 167, 137, 0}}, +{(unsigned char*)"boxdL", {226, 149, 149, 0}}, +{(unsigned char*)"boxdR", {226, 149, 146, 0}}, +{(unsigned char*)"boxdl", {226, 148, 144, 0}}, +{(unsigned char*)"boxdr", {226, 148, 140, 0}}, +{(unsigned char*)"boxh", {226, 148, 128, 0}}, +{(unsigned char*)"boxhD", {226, 149, 165, 0}}, +{(unsigned char*)"boxhU", {226, 149, 168, 0}}, +{(unsigned char*)"boxhd", {226, 148, 172, 0}}, +{(unsigned char*)"boxhu", {226, 148, 180, 0}}, +{(unsigned char*)"boxminus", {226, 138, 159, 0}}, +{(unsigned char*)"boxplus", {226, 138, 158, 0}}, +{(unsigned char*)"boxtimes", {226, 138, 160, 0}}, +{(unsigned char*)"boxuL", {226, 149, 155, 0}}, +{(unsigned char*)"boxuR", {226, 149, 152, 0}}, +{(unsigned char*)"boxul", {226, 148, 152, 0}}, +{(unsigned char*)"boxur", {226, 148, 148, 0}}, +{(unsigned char*)"boxv", {226, 148, 130, 0}}, +{(unsigned char*)"boxvH", {226, 149, 170, 0}}, +{(unsigned char*)"boxvL", {226, 149, 161, 0}}, +{(unsigned char*)"boxvR", {226, 149, 158, 0}}, +{(unsigned char*)"boxvh", {226, 148, 188, 0}}, +{(unsigned char*)"boxvl", {226, 148, 164, 0}}, +{(unsigned char*)"boxvr", {226, 148, 156, 0}}, +{(unsigned char*)"bprime", {226, 128, 181, 0}}, +{(unsigned char*)"breve", {203, 152, 0}}, +{(unsigned char*)"brvbar", {194, 166, 0}}, +{(unsigned char*)"bscr", {240, 157, 146, 183, 0}}, +{(unsigned char*)"bsemi", {226, 129, 143, 0}}, +{(unsigned char*)"bsim", {226, 136, 189, 0}}, +{(unsigned char*)"bsime", {226, 139, 141, 0}}, +{(unsigned char*)"bsol", {92, 0}}, +{(unsigned char*)"bsolb", {226, 167, 133, 0}}, +{(unsigned char*)"bsolhsub", {226, 159, 136, 0}}, +{(unsigned char*)"bull", {226, 128, 162, 0}}, +{(unsigned char*)"bullet", {226, 128, 162, 0}}, +{(unsigned char*)"bump", {226, 137, 142, 0}}, +{(unsigned char*)"bumpE", {226, 170, 174, 0}}, +{(unsigned char*)"bumpe", {226, 137, 143, 0}}, +{(unsigned char*)"bumpeq", {226, 137, 143, 0}}, +{(unsigned char*)"cacute", {196, 135, 0}}, +{(unsigned char*)"cap", {226, 136, 169, 0}}, +{(unsigned char*)"capand", {226, 169, 132, 0}}, +{(unsigned char*)"capbrcup", {226, 169, 137, 0}}, +{(unsigned char*)"capcap", {226, 169, 139, 0}}, +{(unsigned char*)"capcup", {226, 169, 135, 0}}, +{(unsigned char*)"capdot", {226, 169, 128, 0}}, +{(unsigned char*)"caps", {226, 136, 169, 239, 184, 128, 0}}, +{(unsigned char*)"caret", {226, 129, 129, 0}}, +{(unsigned char*)"caron", {203, 135, 0}}, +{(unsigned char*)"ccaps", {226, 169, 141, 0}}, +{(unsigned char*)"ccaron", {196, 141, 0}}, +{(unsigned char*)"ccedil", {195, 167, 0}}, +{(unsigned char*)"ccirc", {196, 137, 0}}, +{(unsigned char*)"ccups", {226, 169, 140, 0}}, +{(unsigned char*)"ccupssm", {226, 169, 144, 0}}, +{(unsigned char*)"cdot", {196, 139, 0}}, +{(unsigned char*)"cedil", {194, 184, 0}}, +{(unsigned char*)"cemptyv", {226, 166, 178, 0}}, +{(unsigned char*)"cent", {194, 162, 0}}, +{(unsigned char*)"centerdot", {194, 183, 0}}, +{(unsigned char*)"cfr", {240, 157, 148, 160, 0}}, +{(unsigned char*)"chcy", {209, 135, 0}}, +{(unsigned char*)"check", {226, 156, 147, 0}}, +{(unsigned char*)"checkmark", {226, 156, 147, 0}}, +{(unsigned char*)"chi", {207, 135, 0}}, +{(unsigned char*)"cir", {226, 151, 139, 0}}, +{(unsigned char*)"cirE", {226, 167, 131, 0}}, +{(unsigned char*)"circ", {203, 134, 0}}, +{(unsigned char*)"circeq", {226, 137, 151, 0}}, +{(unsigned char*)"circlearrowleft", {226, 134, 186, 0}}, +{(unsigned char*)"circlearrowright", {226, 134, 187, 0}}, +{(unsigned char*)"circledR", {194, 174, 0}}, +{(unsigned char*)"circledS", {226, 147, 136, 0}}, +{(unsigned char*)"circledast", {226, 138, 155, 0}}, +{(unsigned char*)"circledcirc", {226, 138, 154, 0}}, +{(unsigned char*)"circleddash", {226, 138, 157, 0}}, +{(unsigned char*)"cire", {226, 137, 151, 0}}, +{(unsigned char*)"cirfnint", {226, 168, 144, 0}}, +{(unsigned char*)"cirmid", {226, 171, 175, 0}}, +{(unsigned char*)"cirscir", {226, 167, 130, 0}}, +{(unsigned char*)"clubs", {226, 153, 163, 0}}, +{(unsigned char*)"clubsuit", {226, 153, 163, 0}}, +{(unsigned char*)"colon", {58, 0}}, +{(unsigned char*)"colone", {226, 137, 148, 0}}, +{(unsigned char*)"coloneq", {226, 137, 148, 0}}, +{(unsigned char*)"comma", {44, 0}}, +{(unsigned char*)"commat", {64, 0}}, +{(unsigned char*)"comp", {226, 136, 129, 0}}, +{(unsigned char*)"compfn", {226, 136, 152, 0}}, +{(unsigned char*)"complement", {226, 136, 129, 0}}, +{(unsigned char*)"complexes", {226, 132, 130, 0}}, +{(unsigned char*)"cong", {226, 137, 133, 0}}, +{(unsigned char*)"congdot", {226, 169, 173, 0}}, +{(unsigned char*)"conint", {226, 136, 174, 0}}, +{(unsigned char*)"copf", {240, 157, 149, 148, 0}}, +{(unsigned char*)"coprod", {226, 136, 144, 0}}, +{(unsigned char*)"copy", {194, 169, 0}}, +{(unsigned char*)"copysr", {226, 132, 151, 0}}, +{(unsigned char*)"crarr", {226, 134, 181, 0}}, +{(unsigned char*)"cross", {226, 156, 151, 0}}, +{(unsigned char*)"cscr", {240, 157, 146, 184, 0}}, +{(unsigned char*)"csub", {226, 171, 143, 0}}, +{(unsigned char*)"csube", {226, 171, 145, 0}}, +{(unsigned char*)"csup", {226, 171, 144, 0}}, +{(unsigned char*)"csupe", {226, 171, 146, 0}}, +{(unsigned char*)"ctdot", {226, 139, 175, 0}}, +{(unsigned char*)"cudarrl", {226, 164, 184, 0}}, +{(unsigned char*)"cudarrr", {226, 164, 181, 0}}, +{(unsigned char*)"cuepr", {226, 139, 158, 0}}, +{(unsigned char*)"cuesc", {226, 139, 159, 0}}, +{(unsigned char*)"cularr", {226, 134, 182, 0}}, +{(unsigned char*)"cularrp", {226, 164, 189, 0}}, +{(unsigned char*)"cup", {226, 136, 170, 0}}, +{(unsigned char*)"cupbrcap", {226, 169, 136, 0}}, +{(unsigned char*)"cupcap", {226, 169, 134, 0}}, +{(unsigned char*)"cupcup", {226, 169, 138, 0}}, +{(unsigned char*)"cupdot", {226, 138, 141, 0}}, +{(unsigned char*)"cupor", {226, 169, 133, 0}}, +{(unsigned char*)"cups", {226, 136, 170, 239, 184, 128, 0}}, +{(unsigned char*)"curarr", {226, 134, 183, 0}}, +{(unsigned char*)"curarrm", {226, 164, 188, 0}}, +{(unsigned char*)"curlyeqprec", {226, 139, 158, 0}}, +{(unsigned char*)"curlyeqsucc", {226, 139, 159, 0}}, +{(unsigned char*)"curlyvee", {226, 139, 142, 0}}, +{(unsigned char*)"curlywedge", {226, 139, 143, 0}}, +{(unsigned char*)"curren", {194, 164, 0}}, +{(unsigned char*)"curvearrowleft", {226, 134, 182, 0}}, +{(unsigned char*)"curvearrowright", {226, 134, 183, 0}}, +{(unsigned char*)"cuvee", {226, 139, 142, 0}}, +{(unsigned char*)"cuwed", {226, 139, 143, 0}}, +{(unsigned char*)"cwconint", {226, 136, 178, 0}}, +{(unsigned char*)"cwint", {226, 136, 177, 0}}, +{(unsigned char*)"cylcty", {226, 140, 173, 0}}, +{(unsigned char*)"dArr", {226, 135, 147, 0}}, +{(unsigned char*)"dHar", {226, 165, 165, 0}}, +{(unsigned char*)"dagger", {226, 128, 160, 0}}, +{(unsigned char*)"daleth", {226, 132, 184, 0}}, +{(unsigned char*)"darr", {226, 134, 147, 0}}, +{(unsigned char*)"dash", {226, 128, 144, 0}}, +{(unsigned char*)"dashv", {226, 138, 163, 0}}, +{(unsigned char*)"dbkarow", {226, 164, 143, 0}}, +{(unsigned char*)"dblac", {203, 157, 0}}, +{(unsigned char*)"dcaron", {196, 143, 0}}, +{(unsigned char*)"dcy", {208, 180, 0}}, +{(unsigned char*)"dd", {226, 133, 134, 0}}, +{(unsigned char*)"ddagger", {226, 128, 161, 0}}, +{(unsigned char*)"ddarr", {226, 135, 138, 0}}, +{(unsigned char*)"ddotseq", {226, 169, 183, 0}}, +{(unsigned char*)"deg", {194, 176, 0}}, +{(unsigned char*)"delta", {206, 180, 0}}, +{(unsigned char*)"demptyv", {226, 166, 177, 0}}, +{(unsigned char*)"dfisht", {226, 165, 191, 0}}, +{(unsigned char*)"dfr", {240, 157, 148, 161, 0}}, +{(unsigned char*)"dharl", {226, 135, 131, 0}}, +{(unsigned char*)"dharr", {226, 135, 130, 0}}, +{(unsigned char*)"diam", {226, 139, 132, 0}}, +{(unsigned char*)"diamond", {226, 139, 132, 0}}, +{(unsigned char*)"diamondsuit", {226, 153, 166, 0}}, +{(unsigned char*)"diams", {226, 153, 166, 0}}, +{(unsigned char*)"die", {194, 168, 0}}, +{(unsigned char*)"digamma", {207, 157, 0}}, +{(unsigned char*)"disin", {226, 139, 178, 0}}, +{(unsigned char*)"div", {195, 183, 0}}, +{(unsigned char*)"divide", {195, 183, 0}}, +{(unsigned char*)"divideontimes", {226, 139, 135, 0}}, +{(unsigned char*)"divonx", {226, 139, 135, 0}}, +{(unsigned char*)"djcy", {209, 146, 0}}, +{(unsigned char*)"dlcorn", {226, 140, 158, 0}}, +{(unsigned char*)"dlcrop", {226, 140, 141, 0}}, +{(unsigned char*)"dollar", {36, 0}}, +{(unsigned char*)"dopf", {240, 157, 149, 149, 0}}, +{(unsigned char*)"dot", {203, 153, 0}}, +{(unsigned char*)"doteq", {226, 137, 144, 0}}, +{(unsigned char*)"doteqdot", {226, 137, 145, 0}}, +{(unsigned char*)"dotminus", {226, 136, 184, 0}}, +{(unsigned char*)"dotplus", {226, 136, 148, 0}}, +{(unsigned char*)"dotsquare", {226, 138, 161, 0}}, +{(unsigned char*)"doublebarwedge", {226, 140, 134, 0}}, +{(unsigned char*)"downarrow", {226, 134, 147, 0}}, +{(unsigned char*)"downdownarrows", {226, 135, 138, 0}}, +{(unsigned char*)"downharpoonleft", {226, 135, 131, 0}}, +{(unsigned char*)"downharpoonright", {226, 135, 130, 0}}, +{(unsigned char*)"drbkarow", {226, 164, 144, 0}}, +{(unsigned char*)"drcorn", {226, 140, 159, 0}}, +{(unsigned char*)"drcrop", {226, 140, 140, 0}}, +{(unsigned char*)"dscr", {240, 157, 146, 185, 0}}, +{(unsigned char*)"dscy", {209, 149, 0}}, +{(unsigned char*)"dsol", {226, 167, 182, 0}}, +{(unsigned char*)"dstrok", {196, 145, 0}}, +{(unsigned char*)"dtdot", {226, 139, 177, 0}}, +{(unsigned char*)"dtri", {226, 150, 191, 0}}, +{(unsigned char*)"dtrif", {226, 150, 190, 0}}, +{(unsigned char*)"duarr", {226, 135, 181, 0}}, +{(unsigned char*)"duhar", {226, 165, 175, 0}}, +{(unsigned char*)"dwangle", {226, 166, 166, 0}}, +{(unsigned char*)"dzcy", {209, 159, 0}}, +{(unsigned char*)"dzigrarr", {226, 159, 191, 0}}, +{(unsigned char*)"eDDot", {226, 169, 183, 0}}, +{(unsigned char*)"eDot", {226, 137, 145, 0}}, +{(unsigned char*)"eacute", {195, 169, 0}}, +{(unsigned char*)"easter", {226, 169, 174, 0}}, +{(unsigned char*)"ecaron", {196, 155, 0}}, +{(unsigned char*)"ecir", {226, 137, 150, 0}}, +{(unsigned char*)"ecirc", {195, 170, 0}}, +{(unsigned char*)"ecolon", {226, 137, 149, 0}}, +{(unsigned char*)"ecy", {209, 141, 0}}, +{(unsigned char*)"edot", {196, 151, 0}}, +{(unsigned char*)"ee", {226, 133, 135, 0}}, +{(unsigned char*)"efDot", {226, 137, 146, 0}}, +{(unsigned char*)"efr", {240, 157, 148, 162, 0}}, +{(unsigned char*)"eg", {226, 170, 154, 0}}, +{(unsigned char*)"egrave", {195, 168, 0}}, +{(unsigned char*)"egs", {226, 170, 150, 0}}, +{(unsigned char*)"egsdot", {226, 170, 152, 0}}, +{(unsigned char*)"el", {226, 170, 153, 0}}, +{(unsigned char*)"elinters", {226, 143, 167, 0}}, +{(unsigned char*)"ell", {226, 132, 147, 0}}, +{(unsigned char*)"els", {226, 170, 149, 0}}, +{(unsigned char*)"elsdot", {226, 170, 151, 0}}, +{(unsigned char*)"emacr", {196, 147, 0}}, +{(unsigned char*)"empty", {226, 136, 133, 0}}, +{(unsigned char*)"emptyset", {226, 136, 133, 0}}, +{(unsigned char*)"emptyv", {226, 136, 133, 0}}, +{(unsigned char*)"emsp", {226, 128, 131, 0}}, +{(unsigned char*)"emsp13", {226, 128, 132, 0}}, +{(unsigned char*)"emsp14", {226, 128, 133, 0}}, +{(unsigned char*)"eng", {197, 139, 0}}, +{(unsigned char*)"ensp", {226, 128, 130, 0}}, +{(unsigned char*)"eogon", {196, 153, 0}}, +{(unsigned char*)"eopf", {240, 157, 149, 150, 0}}, +{(unsigned char*)"epar", {226, 139, 149, 0}}, +{(unsigned char*)"eparsl", {226, 167, 163, 0}}, +{(unsigned char*)"eplus", {226, 169, 177, 0}}, +{(unsigned char*)"epsi", {206, 181, 0}}, +{(unsigned char*)"epsilon", {206, 181, 0}}, +{(unsigned char*)"epsiv", {207, 181, 0}}, +{(unsigned char*)"eqcirc", {226, 137, 150, 0}}, +{(unsigned char*)"eqcolon", {226, 137, 149, 0}}, +{(unsigned char*)"eqsim", {226, 137, 130, 0}}, +{(unsigned char*)"eqslantgtr", {226, 170, 150, 0}}, +{(unsigned char*)"eqslantless", {226, 170, 149, 0}}, +{(unsigned char*)"equals", {61, 0}}, +{(unsigned char*)"equest", {226, 137, 159, 0}}, +{(unsigned char*)"equiv", {226, 137, 161, 0}}, +{(unsigned char*)"equivDD", {226, 169, 184, 0}}, +{(unsigned char*)"eqvparsl", {226, 167, 165, 0}}, +{(unsigned char*)"erDot", {226, 137, 147, 0}}, +{(unsigned char*)"erarr", {226, 165, 177, 0}}, +{(unsigned char*)"escr", {226, 132, 175, 0}}, +{(unsigned char*)"esdot", {226, 137, 144, 0}}, +{(unsigned char*)"esim", {226, 137, 130, 0}}, +{(unsigned char*)"eta", {206, 183, 0}}, +{(unsigned char*)"eth", {195, 176, 0}}, +{(unsigned char*)"euml", {195, 171, 0}}, +{(unsigned char*)"euro", {226, 130, 172, 0}}, +{(unsigned char*)"excl", {33, 0}}, +{(unsigned char*)"exist", {226, 136, 131, 0}}, +{(unsigned char*)"expectation", {226, 132, 176, 0}}, +{(unsigned char*)"exponentiale", {226, 133, 135, 0}}, +{(unsigned char*)"fallingdotseq", {226, 137, 146, 0}}, +{(unsigned char*)"fcy", {209, 132, 0}}, +{(unsigned char*)"female", {226, 153, 128, 0}}, +{(unsigned char*)"ffilig", {239, 172, 131, 0}}, +{(unsigned char*)"fflig", {239, 172, 128, 0}}, +{(unsigned char*)"ffllig", {239, 172, 132, 0}}, +{(unsigned char*)"ffr", {240, 157, 148, 163, 0}}, +{(unsigned char*)"filig", {239, 172, 129, 0}}, +{(unsigned char*)"fjlig", {102, 106, 0}}, +{(unsigned char*)"flat", {226, 153, 173, 0}}, +{(unsigned char*)"fllig", {239, 172, 130, 0}}, +{(unsigned char*)"fltns", {226, 150, 177, 0}}, +{(unsigned char*)"fnof", {198, 146, 0}}, +{(unsigned char*)"fopf", {240, 157, 149, 151, 0}}, +{(unsigned char*)"forall", {226, 136, 128, 0}}, +{(unsigned char*)"fork", {226, 139, 148, 0}}, +{(unsigned char*)"forkv", {226, 171, 153, 0}}, +{(unsigned char*)"fpartint", {226, 168, 141, 0}}, +{(unsigned char*)"frac12", {194, 189, 0}}, +{(unsigned char*)"frac13", {226, 133, 147, 0}}, +{(unsigned char*)"frac14", {194, 188, 0}}, +{(unsigned char*)"frac15", {226, 133, 149, 0}}, +{(unsigned char*)"frac16", {226, 133, 153, 0}}, +{(unsigned char*)"frac18", {226, 133, 155, 0}}, +{(unsigned char*)"frac23", {226, 133, 148, 0}}, +{(unsigned char*)"frac25", {226, 133, 150, 0}}, +{(unsigned char*)"frac34", {194, 190, 0}}, +{(unsigned char*)"frac35", {226, 133, 151, 0}}, +{(unsigned char*)"frac38", {226, 133, 156, 0}}, +{(unsigned char*)"frac45", {226, 133, 152, 0}}, +{(unsigned char*)"frac56", {226, 133, 154, 0}}, +{(unsigned char*)"frac58", {226, 133, 157, 0}}, +{(unsigned char*)"frac78", {226, 133, 158, 0}}, +{(unsigned char*)"frasl", {226, 129, 132, 0}}, +{(unsigned char*)"frown", {226, 140, 162, 0}}, +{(unsigned char*)"fscr", {240, 157, 146, 187, 0}}, +{(unsigned char*)"gE", {226, 137, 167, 0}}, +{(unsigned char*)"gEl", {226, 170, 140, 0}}, +{(unsigned char*)"gacute", {199, 181, 0}}, +{(unsigned char*)"gamma", {206, 179, 0}}, +{(unsigned char*)"gammad", {207, 157, 0}}, +{(unsigned char*)"gap", {226, 170, 134, 0}}, +{(unsigned char*)"gbreve", {196, 159, 0}}, +{(unsigned char*)"gcirc", {196, 157, 0}}, +{(unsigned char*)"gcy", {208, 179, 0}}, +{(unsigned char*)"gdot", {196, 161, 0}}, +{(unsigned char*)"ge", {226, 137, 165, 0}}, +{(unsigned char*)"gel", {226, 139, 155, 0}}, +{(unsigned char*)"geq", {226, 137, 165, 0}}, +{(unsigned char*)"geqq", {226, 137, 167, 0}}, +{(unsigned char*)"geqslant", {226, 169, 190, 0}}, +{(unsigned char*)"ges", {226, 169, 190, 0}}, +{(unsigned char*)"gescc", {226, 170, 169, 0}}, +{(unsigned char*)"gesdot", {226, 170, 128, 0}}, +{(unsigned char*)"gesdoto", {226, 170, 130, 0}}, +{(unsigned char*)"gesdotol", {226, 170, 132, 0}}, +{(unsigned char*)"gesl", {226, 139, 155, 239, 184, 128, 0}}, +{(unsigned char*)"gesles", {226, 170, 148, 0}}, +{(unsigned char*)"gfr", {240, 157, 148, 164, 0}}, +{(unsigned char*)"gg", {226, 137, 171, 0}}, +{(unsigned char*)"ggg", {226, 139, 153, 0}}, +{(unsigned char*)"gimel", {226, 132, 183, 0}}, +{(unsigned char*)"gjcy", {209, 147, 0}}, +{(unsigned char*)"gl", {226, 137, 183, 0}}, +{(unsigned char*)"glE", {226, 170, 146, 0}}, +{(unsigned char*)"gla", {226, 170, 165, 0}}, +{(unsigned char*)"glj", {226, 170, 164, 0}}, +{(unsigned char*)"gnE", {226, 137, 169, 0}}, +{(unsigned char*)"gnap", {226, 170, 138, 0}}, +{(unsigned char*)"gnapprox", {226, 170, 138, 0}}, +{(unsigned char*)"gne", {226, 170, 136, 0}}, +{(unsigned char*)"gneq", {226, 170, 136, 0}}, +{(unsigned char*)"gneqq", {226, 137, 169, 0}}, +{(unsigned char*)"gnsim", {226, 139, 167, 0}}, +{(unsigned char*)"gopf", {240, 157, 149, 152, 0}}, +{(unsigned char*)"grave", {96, 0}}, +{(unsigned char*)"gscr", {226, 132, 138, 0}}, +{(unsigned char*)"gsim", {226, 137, 179, 0}}, +{(unsigned char*)"gsime", {226, 170, 142, 0}}, +{(unsigned char*)"gsiml", {226, 170, 144, 0}}, +{(unsigned char*)"gt", {62, 0}}, +{(unsigned char*)"gtcc", {226, 170, 167, 0}}, +{(unsigned char*)"gtcir", {226, 169, 186, 0}}, +{(unsigned char*)"gtdot", {226, 139, 151, 0}}, +{(unsigned char*)"gtlPar", {226, 166, 149, 0}}, +{(unsigned char*)"gtquest", {226, 169, 188, 0}}, +{(unsigned char*)"gtrapprox", {226, 170, 134, 0}}, +{(unsigned char*)"gtrarr", {226, 165, 184, 0}}, +{(unsigned char*)"gtrdot", {226, 139, 151, 0}}, +{(unsigned char*)"gtreqless", {226, 139, 155, 0}}, +{(unsigned char*)"gtreqqless", {226, 170, 140, 0}}, +{(unsigned char*)"gtrless", {226, 137, 183, 0}}, +{(unsigned char*)"gtrsim", {226, 137, 179, 0}}, +{(unsigned char*)"gvertneqq", {226, 137, 169, 239, 184, 128, 0}}, +{(unsigned char*)"gvnE", {226, 137, 169, 239, 184, 128, 0}}, +{(unsigned char*)"hArr", {226, 135, 148, 0}}, +{(unsigned char*)"hairsp", {226, 128, 138, 0}}, +{(unsigned char*)"half", {194, 189, 0}}, +{(unsigned char*)"hamilt", {226, 132, 139, 0}}, +{(unsigned char*)"hardcy", {209, 138, 0}}, +{(unsigned char*)"harr", {226, 134, 148, 0}}, +{(unsigned char*)"harrcir", {226, 165, 136, 0}}, +{(unsigned char*)"harrw", {226, 134, 173, 0}}, +{(unsigned char*)"hbar", {226, 132, 143, 0}}, +{(unsigned char*)"hcirc", {196, 165, 0}}, +{(unsigned char*)"hearts", {226, 153, 165, 0}}, +{(unsigned char*)"heartsuit", {226, 153, 165, 0}}, +{(unsigned char*)"hellip", {226, 128, 166, 0}}, +{(unsigned char*)"hercon", {226, 138, 185, 0}}, +{(unsigned char*)"hfr", {240, 157, 148, 165, 0}}, +{(unsigned char*)"hksearow", {226, 164, 165, 0}}, +{(unsigned char*)"hkswarow", {226, 164, 166, 0}}, +{(unsigned char*)"hoarr", {226, 135, 191, 0}}, +{(unsigned char*)"homtht", {226, 136, 187, 0}}, +{(unsigned char*)"hookleftarrow", {226, 134, 169, 0}}, +{(unsigned char*)"hookrightarrow", {226, 134, 170, 0}}, +{(unsigned char*)"hopf", {240, 157, 149, 153, 0}}, +{(unsigned char*)"horbar", {226, 128, 149, 0}}, +{(unsigned char*)"hscr", {240, 157, 146, 189, 0}}, +{(unsigned char*)"hslash", {226, 132, 143, 0}}, +{(unsigned char*)"hstrok", {196, 167, 0}}, +{(unsigned char*)"hybull", {226, 129, 131, 0}}, +{(unsigned char*)"hyphen", {226, 128, 144, 0}}, +{(unsigned char*)"iacute", {195, 173, 0}}, +{(unsigned char*)"ic", {226, 129, 163, 0}}, +{(unsigned char*)"icirc", {195, 174, 0}}, +{(unsigned char*)"icy", {208, 184, 0}}, +{(unsigned char*)"iecy", {208, 181, 0}}, +{(unsigned char*)"iexcl", {194, 161, 0}}, +{(unsigned char*)"iff", {226, 135, 148, 0}}, +{(unsigned char*)"ifr", {240, 157, 148, 166, 0}}, +{(unsigned char*)"igrave", {195, 172, 0}}, +{(unsigned char*)"ii", {226, 133, 136, 0}}, +{(unsigned char*)"iiiint", {226, 168, 140, 0}}, +{(unsigned char*)"iiint", {226, 136, 173, 0}}, +{(unsigned char*)"iinfin", {226, 167, 156, 0}}, +{(unsigned char*)"iiota", {226, 132, 169, 0}}, +{(unsigned char*)"ijlig", {196, 179, 0}}, +{(unsigned char*)"imacr", {196, 171, 0}}, +{(unsigned char*)"image", {226, 132, 145, 0}}, +{(unsigned char*)"imagline", {226, 132, 144, 0}}, +{(unsigned char*)"imagpart", {226, 132, 145, 0}}, +{(unsigned char*)"imath", {196, 177, 0}}, +{(unsigned char*)"imof", {226, 138, 183, 0}}, +{(unsigned char*)"imped", {198, 181, 0}}, +{(unsigned char*)"in", {226, 136, 136, 0}}, +{(unsigned char*)"incare", {226, 132, 133, 0}}, +{(unsigned char*)"infin", {226, 136, 158, 0}}, +{(unsigned char*)"infintie", {226, 167, 157, 0}}, +{(unsigned char*)"inodot", {196, 177, 0}}, +{(unsigned char*)"int", {226, 136, 171, 0}}, +{(unsigned char*)"intcal", {226, 138, 186, 0}}, +{(unsigned char*)"integers", {226, 132, 164, 0}}, +{(unsigned char*)"intercal", {226, 138, 186, 0}}, +{(unsigned char*)"intlarhk", {226, 168, 151, 0}}, +{(unsigned char*)"intprod", {226, 168, 188, 0}}, +{(unsigned char*)"iocy", {209, 145, 0}}, +{(unsigned char*)"iogon", {196, 175, 0}}, +{(unsigned char*)"iopf", {240, 157, 149, 154, 0}}, +{(unsigned char*)"iota", {206, 185, 0}}, +{(unsigned char*)"iprod", {226, 168, 188, 0}}, +{(unsigned char*)"iquest", {194, 191, 0}}, +{(unsigned char*)"iscr", {240, 157, 146, 190, 0}}, +{(unsigned char*)"isin", {226, 136, 136, 0}}, +{(unsigned char*)"isinE", {226, 139, 185, 0}}, +{(unsigned char*)"isindot", {226, 139, 181, 0}}, +{(unsigned char*)"isins", {226, 139, 180, 0}}, +{(unsigned char*)"isinsv", {226, 139, 179, 0}}, +{(unsigned char*)"isinv", {226, 136, 136, 0}}, +{(unsigned char*)"it", {226, 129, 162, 0}}, +{(unsigned char*)"itilde", {196, 169, 0}}, +{(unsigned char*)"iukcy", {209, 150, 0}}, +{(unsigned char*)"iuml", {195, 175, 0}}, +{(unsigned char*)"jcirc", {196, 181, 0}}, +{(unsigned char*)"jcy", {208, 185, 0}}, +{(unsigned char*)"jfr", {240, 157, 148, 167, 0}}, +{(unsigned char*)"jmath", {200, 183, 0}}, +{(unsigned char*)"jopf", {240, 157, 149, 155, 0}}, +{(unsigned char*)"jscr", {240, 157, 146, 191, 0}}, +{(unsigned char*)"jsercy", {209, 152, 0}}, +{(unsigned char*)"jukcy", {209, 148, 0}}, +{(unsigned char*)"kappa", {206, 186, 0}}, +{(unsigned char*)"kappav", {207, 176, 0}}, +{(unsigned char*)"kcedil", {196, 183, 0}}, +{(unsigned char*)"kcy", {208, 186, 0}}, +{(unsigned char*)"kfr", {240, 157, 148, 168, 0}}, +{(unsigned char*)"kgreen", {196, 184, 0}}, +{(unsigned char*)"khcy", {209, 133, 0}}, +{(unsigned char*)"kjcy", {209, 156, 0}}, +{(unsigned char*)"kopf", {240, 157, 149, 156, 0}}, +{(unsigned char*)"kscr", {240, 157, 147, 128, 0}}, +{(unsigned char*)"lAarr", {226, 135, 154, 0}}, +{(unsigned char*)"lArr", {226, 135, 144, 0}}, +{(unsigned char*)"lAtail", {226, 164, 155, 0}}, +{(unsigned char*)"lBarr", {226, 164, 142, 0}}, +{(unsigned char*)"lE", {226, 137, 166, 0}}, +{(unsigned char*)"lEg", {226, 170, 139, 0}}, +{(unsigned char*)"lHar", {226, 165, 162, 0}}, +{(unsigned char*)"lacute", {196, 186, 0}}, +{(unsigned char*)"laemptyv", {226, 166, 180, 0}}, +{(unsigned char*)"lagran", {226, 132, 146, 0}}, +{(unsigned char*)"lambda", {206, 187, 0}}, +{(unsigned char*)"lang", {226, 159, 168, 0}}, +{(unsigned char*)"langd", {226, 166, 145, 0}}, +{(unsigned char*)"langle", {226, 159, 168, 0}}, +{(unsigned char*)"lap", {226, 170, 133, 0}}, +{(unsigned char*)"laquo", {194, 171, 0}}, +{(unsigned char*)"larr", {226, 134, 144, 0}}, +{(unsigned char*)"larrb", {226, 135, 164, 0}}, +{(unsigned char*)"larrbfs", {226, 164, 159, 0}}, +{(unsigned char*)"larrfs", {226, 164, 157, 0}}, +{(unsigned char*)"larrhk", {226, 134, 169, 0}}, +{(unsigned char*)"larrlp", {226, 134, 171, 0}}, +{(unsigned char*)"larrpl", {226, 164, 185, 0}}, +{(unsigned char*)"larrsim", {226, 165, 179, 0}}, +{(unsigned char*)"larrtl", {226, 134, 162, 0}}, +{(unsigned char*)"lat", {226, 170, 171, 0}}, +{(unsigned char*)"latail", {226, 164, 153, 0}}, +{(unsigned char*)"late", {226, 170, 173, 0}}, +{(unsigned char*)"lates", {226, 170, 173, 239, 184, 128, 0}}, +{(unsigned char*)"lbarr", {226, 164, 140, 0}}, +{(unsigned char*)"lbbrk", {226, 157, 178, 0}}, +{(unsigned char*)"lbrace", {123, 0}}, +{(unsigned char*)"lbrack", {91, 0}}, +{(unsigned char*)"lbrke", {226, 166, 139, 0}}, +{(unsigned char*)"lbrksld", {226, 166, 143, 0}}, +{(unsigned char*)"lbrkslu", {226, 166, 141, 0}}, +{(unsigned char*)"lcaron", {196, 190, 0}}, +{(unsigned char*)"lcedil", {196, 188, 0}}, +{(unsigned char*)"lceil", {226, 140, 136, 0}}, +{(unsigned char*)"lcub", {123, 0}}, +{(unsigned char*)"lcy", {208, 187, 0}}, +{(unsigned char*)"ldca", {226, 164, 182, 0}}, +{(unsigned char*)"ldquo", {226, 128, 156, 0}}, +{(unsigned char*)"ldquor", {226, 128, 158, 0}}, +{(unsigned char*)"ldrdhar", {226, 165, 167, 0}}, +{(unsigned char*)"ldrushar", {226, 165, 139, 0}}, +{(unsigned char*)"ldsh", {226, 134, 178, 0}}, +{(unsigned char*)"le", {226, 137, 164, 0}}, +{(unsigned char*)"leftarrow", {226, 134, 144, 0}}, +{(unsigned char*)"leftarrowtail", {226, 134, 162, 0}}, +{(unsigned char*)"leftharpoondown", {226, 134, 189, 0}}, +{(unsigned char*)"leftharpoonup", {226, 134, 188, 0}}, +{(unsigned char*)"leftleftarrows", {226, 135, 135, 0}}, +{(unsigned char*)"leftrightarrow", {226, 134, 148, 0}}, +{(unsigned char*)"leftrightarrows", {226, 135, 134, 0}}, +{(unsigned char*)"leftrightharpoons", {226, 135, 139, 0}}, +{(unsigned char*)"leftrightsquigarrow", {226, 134, 173, 0}}, +{(unsigned char*)"leftthreetimes", {226, 139, 139, 0}}, +{(unsigned char*)"leg", {226, 139, 154, 0}}, +{(unsigned char*)"leq", {226, 137, 164, 0}}, +{(unsigned char*)"leqq", {226, 137, 166, 0}}, +{(unsigned char*)"leqslant", {226, 169, 189, 0}}, +{(unsigned char*)"les", {226, 169, 189, 0}}, +{(unsigned char*)"lescc", {226, 170, 168, 0}}, +{(unsigned char*)"lesdot", {226, 169, 191, 0}}, +{(unsigned char*)"lesdoto", {226, 170, 129, 0}}, +{(unsigned char*)"lesdotor", {226, 170, 131, 0}}, +{(unsigned char*)"lesg", {226, 139, 154, 239, 184, 128, 0}}, +{(unsigned char*)"lesges", {226, 170, 147, 0}}, +{(unsigned char*)"lessapprox", {226, 170, 133, 0}}, +{(unsigned char*)"lessdot", {226, 139, 150, 0}}, +{(unsigned char*)"lesseqgtr", {226, 139, 154, 0}}, +{(unsigned char*)"lesseqqgtr", {226, 170, 139, 0}}, +{(unsigned char*)"lessgtr", {226, 137, 182, 0}}, +{(unsigned char*)"lesssim", {226, 137, 178, 0}}, +{(unsigned char*)"lfisht", {226, 165, 188, 0}}, +{(unsigned char*)"lfloor", {226, 140, 138, 0}}, +{(unsigned char*)"lfr", {240, 157, 148, 169, 0}}, +{(unsigned char*)"lg", {226, 137, 182, 0}}, +{(unsigned char*)"lgE", {226, 170, 145, 0}}, +{(unsigned char*)"lhard", {226, 134, 189, 0}}, +{(unsigned char*)"lharu", {226, 134, 188, 0}}, +{(unsigned char*)"lharul", {226, 165, 170, 0}}, +{(unsigned char*)"lhblk", {226, 150, 132, 0}}, +{(unsigned char*)"ljcy", {209, 153, 0}}, +{(unsigned char*)"ll", {226, 137, 170, 0}}, +{(unsigned char*)"llarr", {226, 135, 135, 0}}, +{(unsigned char*)"llcorner", {226, 140, 158, 0}}, +{(unsigned char*)"llhard", {226, 165, 171, 0}}, +{(unsigned char*)"lltri", {226, 151, 186, 0}}, +{(unsigned char*)"lmidot", {197, 128, 0}}, +{(unsigned char*)"lmoust", {226, 142, 176, 0}}, +{(unsigned char*)"lmoustache", {226, 142, 176, 0}}, +{(unsigned char*)"lnE", {226, 137, 168, 0}}, +{(unsigned char*)"lnap", {226, 170, 137, 0}}, +{(unsigned char*)"lnapprox", {226, 170, 137, 0}}, +{(unsigned char*)"lne", {226, 170, 135, 0}}, +{(unsigned char*)"lneq", {226, 170, 135, 0}}, +{(unsigned char*)"lneqq", {226, 137, 168, 0}}, +{(unsigned char*)"lnsim", {226, 139, 166, 0}}, +{(unsigned char*)"loang", {226, 159, 172, 0}}, +{(unsigned char*)"loarr", {226, 135, 189, 0}}, +{(unsigned char*)"lobrk", {226, 159, 166, 0}}, +{(unsigned char*)"longleftarrow", {226, 159, 181, 0}}, +{(unsigned char*)"longleftrightarrow", {226, 159, 183, 0}}, +{(unsigned char*)"longmapsto", {226, 159, 188, 0}}, +{(unsigned char*)"longrightarrow", {226, 159, 182, 0}}, +{(unsigned char*)"looparrowleft", {226, 134, 171, 0}}, +{(unsigned char*)"looparrowright", {226, 134, 172, 0}}, +{(unsigned char*)"lopar", {226, 166, 133, 0}}, +{(unsigned char*)"lopf", {240, 157, 149, 157, 0}}, +{(unsigned char*)"loplus", {226, 168, 173, 0}}, +{(unsigned char*)"lotimes", {226, 168, 180, 0}}, +{(unsigned char*)"lowast", {226, 136, 151, 0}}, +{(unsigned char*)"lowbar", {95, 0}}, +{(unsigned char*)"loz", {226, 151, 138, 0}}, +{(unsigned char*)"lozenge", {226, 151, 138, 0}}, +{(unsigned char*)"lozf", {226, 167, 171, 0}}, +{(unsigned char*)"lpar", {40, 0}}, +{(unsigned char*)"lparlt", {226, 166, 147, 0}}, +{(unsigned char*)"lrarr", {226, 135, 134, 0}}, +{(unsigned char*)"lrcorner", {226, 140, 159, 0}}, +{(unsigned char*)"lrhar", {226, 135, 139, 0}}, +{(unsigned char*)"lrhard", {226, 165, 173, 0}}, +{(unsigned char*)"lrm", {226, 128, 142, 0}}, +{(unsigned char*)"lrtri", {226, 138, 191, 0}}, +{(unsigned char*)"lsaquo", {226, 128, 185, 0}}, +{(unsigned char*)"lscr", {240, 157, 147, 129, 0}}, +{(unsigned char*)"lsh", {226, 134, 176, 0}}, +{(unsigned char*)"lsim", {226, 137, 178, 0}}, +{(unsigned char*)"lsime", {226, 170, 141, 0}}, +{(unsigned char*)"lsimg", {226, 170, 143, 0}}, +{(unsigned char*)"lsqb", {91, 0}}, +{(unsigned char*)"lsquo", {226, 128, 152, 0}}, +{(unsigned char*)"lsquor", {226, 128, 154, 0}}, +{(unsigned char*)"lstrok", {197, 130, 0}}, +{(unsigned char*)"lt", {60, 0}}, +{(unsigned char*)"ltcc", {226, 170, 166, 0}}, +{(unsigned char*)"ltcir", {226, 169, 185, 0}}, +{(unsigned char*)"ltdot", {226, 139, 150, 0}}, +{(unsigned char*)"lthree", {226, 139, 139, 0}}, +{(unsigned char*)"ltimes", {226, 139, 137, 0}}, +{(unsigned char*)"ltlarr", {226, 165, 182, 0}}, +{(unsigned char*)"ltquest", {226, 169, 187, 0}}, +{(unsigned char*)"ltrPar", {226, 166, 150, 0}}, +{(unsigned char*)"ltri", {226, 151, 131, 0}}, +{(unsigned char*)"ltrie", {226, 138, 180, 0}}, +{(unsigned char*)"ltrif", {226, 151, 130, 0}}, +{(unsigned char*)"lurdshar", {226, 165, 138, 0}}, +{(unsigned char*)"luruhar", {226, 165, 166, 0}}, +{(unsigned char*)"lvertneqq", {226, 137, 168, 239, 184, 128, 0}}, +{(unsigned char*)"lvnE", {226, 137, 168, 239, 184, 128, 0}}, +{(unsigned char*)"mDDot", {226, 136, 186, 0}}, +{(unsigned char*)"macr", {194, 175, 0}}, +{(unsigned char*)"male", {226, 153, 130, 0}}, +{(unsigned char*)"malt", {226, 156, 160, 0}}, +{(unsigned char*)"maltese", {226, 156, 160, 0}}, +{(unsigned char*)"map", {226, 134, 166, 0}}, +{(unsigned char*)"mapsto", {226, 134, 166, 0}}, +{(unsigned char*)"mapstodown", {226, 134, 167, 0}}, +{(unsigned char*)"mapstoleft", {226, 134, 164, 0}}, +{(unsigned char*)"mapstoup", {226, 134, 165, 0}}, +{(unsigned char*)"marker", {226, 150, 174, 0}}, +{(unsigned char*)"mcomma", {226, 168, 169, 0}}, +{(unsigned char*)"mcy", {208, 188, 0}}, +{(unsigned char*)"mdash", {226, 128, 148, 0}}, +{(unsigned char*)"measuredangle", {226, 136, 161, 0}}, +{(unsigned char*)"mfr", {240, 157, 148, 170, 0}}, +{(unsigned char*)"mho", {226, 132, 167, 0}}, +{(unsigned char*)"micro", {194, 181, 0}}, +{(unsigned char*)"mid", {226, 136, 163, 0}}, +{(unsigned char*)"midast", {42, 0}}, +{(unsigned char*)"midcir", {226, 171, 176, 0}}, +{(unsigned char*)"middot", {194, 183, 0}}, +{(unsigned char*)"minus", {226, 136, 146, 0}}, +{(unsigned char*)"minusb", {226, 138, 159, 0}}, +{(unsigned char*)"minusd", {226, 136, 184, 0}}, +{(unsigned char*)"minusdu", {226, 168, 170, 0}}, +{(unsigned char*)"mlcp", {226, 171, 155, 0}}, +{(unsigned char*)"mldr", {226, 128, 166, 0}}, +{(unsigned char*)"mnplus", {226, 136, 147, 0}}, +{(unsigned char*)"models", {226, 138, 167, 0}}, +{(unsigned char*)"mopf", {240, 157, 149, 158, 0}}, +{(unsigned char*)"mp", {226, 136, 147, 0}}, +{(unsigned char*)"mscr", {240, 157, 147, 130, 0}}, +{(unsigned char*)"mstpos", {226, 136, 190, 0}}, +{(unsigned char*)"mu", {206, 188, 0}}, +{(unsigned char*)"multimap", {226, 138, 184, 0}}, +{(unsigned char*)"mumap", {226, 138, 184, 0}}, +{(unsigned char*)"nGg", {226, 139, 153, 204, 184, 0}}, +{(unsigned char*)"nGt", {226, 137, 171, 226, 131, 146, 0}}, +{(unsigned char*)"nGtv", {226, 137, 171, 204, 184, 0}}, +{(unsigned char*)"nLeftarrow", {226, 135, 141, 0}}, +{(unsigned char*)"nLeftrightarrow", {226, 135, 142, 0}}, +{(unsigned char*)"nLl", {226, 139, 152, 204, 184, 0}}, +{(unsigned char*)"nLt", {226, 137, 170, 226, 131, 146, 0}}, +{(unsigned char*)"nLtv", {226, 137, 170, 204, 184, 0}}, +{(unsigned char*)"nRightarrow", {226, 135, 143, 0}}, +{(unsigned char*)"nVDash", {226, 138, 175, 0}}, +{(unsigned char*)"nVdash", {226, 138, 174, 0}}, +{(unsigned char*)"nabla", {226, 136, 135, 0}}, +{(unsigned char*)"nacute", {197, 132, 0}}, +{(unsigned char*)"nang", {226, 136, 160, 226, 131, 146, 0}}, +{(unsigned char*)"nap", {226, 137, 137, 0}}, +{(unsigned char*)"napE", {226, 169, 176, 204, 184, 0}}, +{(unsigned char*)"napid", {226, 137, 139, 204, 184, 0}}, +{(unsigned char*)"napos", {197, 137, 0}}, +{(unsigned char*)"napprox", {226, 137, 137, 0}}, +{(unsigned char*)"natur", {226, 153, 174, 0}}, +{(unsigned char*)"natural", {226, 153, 174, 0}}, +{(unsigned char*)"naturals", {226, 132, 149, 0}}, +{(unsigned char*)"nbsp", {194, 160, 0}}, +{(unsigned char*)"nbump", {226, 137, 142, 204, 184, 0}}, +{(unsigned char*)"nbumpe", {226, 137, 143, 204, 184, 0}}, +{(unsigned char*)"ncap", {226, 169, 131, 0}}, +{(unsigned char*)"ncaron", {197, 136, 0}}, +{(unsigned char*)"ncedil", {197, 134, 0}}, +{(unsigned char*)"ncong", {226, 137, 135, 0}}, +{(unsigned char*)"ncongdot", {226, 169, 173, 204, 184, 0}}, +{(unsigned char*)"ncup", {226, 169, 130, 0}}, +{(unsigned char*)"ncy", {208, 189, 0}}, +{(unsigned char*)"ndash", {226, 128, 147, 0}}, +{(unsigned char*)"ne", {226, 137, 160, 0}}, +{(unsigned char*)"neArr", {226, 135, 151, 0}}, +{(unsigned char*)"nearhk", {226, 164, 164, 0}}, +{(unsigned char*)"nearr", {226, 134, 151, 0}}, +{(unsigned char*)"nearrow", {226, 134, 151, 0}}, +{(unsigned char*)"nedot", {226, 137, 144, 204, 184, 0}}, +{(unsigned char*)"nequiv", {226, 137, 162, 0}}, +{(unsigned char*)"nesear", {226, 164, 168, 0}}, +{(unsigned char*)"nesim", {226, 137, 130, 204, 184, 0}}, +{(unsigned char*)"nexist", {226, 136, 132, 0}}, +{(unsigned char*)"nexists", {226, 136, 132, 0}}, +{(unsigned char*)"nfr", {240, 157, 148, 171, 0}}, +{(unsigned char*)"ngE", {226, 137, 167, 204, 184, 0}}, +{(unsigned char*)"nge", {226, 137, 177, 0}}, +{(unsigned char*)"ngeq", {226, 137, 177, 0}}, +{(unsigned char*)"ngeqq", {226, 137, 167, 204, 184, 0}}, +{(unsigned char*)"ngeqslant", {226, 169, 190, 204, 184, 0}}, +{(unsigned char*)"nges", {226, 169, 190, 204, 184, 0}}, +{(unsigned char*)"ngsim", {226, 137, 181, 0}}, +{(unsigned char*)"ngt", {226, 137, 175, 0}}, +{(unsigned char*)"ngtr", {226, 137, 175, 0}}, +{(unsigned char*)"nhArr", {226, 135, 142, 0}}, +{(unsigned char*)"nharr", {226, 134, 174, 0}}, +{(unsigned char*)"nhpar", {226, 171, 178, 0}}, +{(unsigned char*)"ni", {226, 136, 139, 0}}, +{(unsigned char*)"nis", {226, 139, 188, 0}}, +{(unsigned char*)"nisd", {226, 139, 186, 0}}, +{(unsigned char*)"niv", {226, 136, 139, 0}}, +{(unsigned char*)"njcy", {209, 154, 0}}, +{(unsigned char*)"nlArr", {226, 135, 141, 0}}, +{(unsigned char*)"nlE", {226, 137, 166, 204, 184, 0}}, +{(unsigned char*)"nlarr", {226, 134, 154, 0}}, +{(unsigned char*)"nldr", {226, 128, 165, 0}}, +{(unsigned char*)"nle", {226, 137, 176, 0}}, +{(unsigned char*)"nleftarrow", {226, 134, 154, 0}}, +{(unsigned char*)"nleftrightarrow", {226, 134, 174, 0}}, +{(unsigned char*)"nleq", {226, 137, 176, 0}}, +{(unsigned char*)"nleqq", {226, 137, 166, 204, 184, 0}}, +{(unsigned char*)"nleqslant", {226, 169, 189, 204, 184, 0}}, +{(unsigned char*)"nles", {226, 169, 189, 204, 184, 0}}, +{(unsigned char*)"nless", {226, 137, 174, 0}}, +{(unsigned char*)"nlsim", {226, 137, 180, 0}}, +{(unsigned char*)"nlt", {226, 137, 174, 0}}, +{(unsigned char*)"nltri", {226, 139, 170, 0}}, +{(unsigned char*)"nltrie", {226, 139, 172, 0}}, +{(unsigned char*)"nmid", {226, 136, 164, 0}}, +{(unsigned char*)"nopf", {240, 157, 149, 159, 0}}, +{(unsigned char*)"not", {194, 172, 0}}, +{(unsigned char*)"notin", {226, 136, 137, 0}}, +{(unsigned char*)"notinE", {226, 139, 185, 204, 184, 0}}, +{(unsigned char*)"notindot", {226, 139, 181, 204, 184, 0}}, +{(unsigned char*)"notinva", {226, 136, 137, 0}}, +{(unsigned char*)"notinvb", {226, 139, 183, 0}}, +{(unsigned char*)"notinvc", {226, 139, 182, 0}}, +{(unsigned char*)"notni", {226, 136, 140, 0}}, +{(unsigned char*)"notniva", {226, 136, 140, 0}}, +{(unsigned char*)"notnivb", {226, 139, 190, 0}}, +{(unsigned char*)"notnivc", {226, 139, 189, 0}}, +{(unsigned char*)"npar", {226, 136, 166, 0}}, +{(unsigned char*)"nparallel", {226, 136, 166, 0}}, +{(unsigned char*)"nparsl", {226, 171, 189, 226, 131, 165, 0}}, +{(unsigned char*)"npart", {226, 136, 130, 204, 184, 0}}, +{(unsigned char*)"npolint", {226, 168, 148, 0}}, +{(unsigned char*)"npr", {226, 138, 128, 0}}, +{(unsigned char*)"nprcue", {226, 139, 160, 0}}, +{(unsigned char*)"npre", {226, 170, 175, 204, 184, 0}}, +{(unsigned char*)"nprec", {226, 138, 128, 0}}, +{(unsigned char*)"npreceq", {226, 170, 175, 204, 184, 0}}, +{(unsigned char*)"nrArr", {226, 135, 143, 0}}, +{(unsigned char*)"nrarr", {226, 134, 155, 0}}, +{(unsigned char*)"nrarrc", {226, 164, 179, 204, 184, 0}}, +{(unsigned char*)"nrarrw", {226, 134, 157, 204, 184, 0}}, +{(unsigned char*)"nrightarrow", {226, 134, 155, 0}}, +{(unsigned char*)"nrtri", {226, 139, 171, 0}}, +{(unsigned char*)"nrtrie", {226, 139, 173, 0}}, +{(unsigned char*)"nsc", {226, 138, 129, 0}}, +{(unsigned char*)"nsccue", {226, 139, 161, 0}}, +{(unsigned char*)"nsce", {226, 170, 176, 204, 184, 0}}, +{(unsigned char*)"nscr", {240, 157, 147, 131, 0}}, +{(unsigned char*)"nshortmid", {226, 136, 164, 0}}, +{(unsigned char*)"nshortparallel", {226, 136, 166, 0}}, +{(unsigned char*)"nsim", {226, 137, 129, 0}}, +{(unsigned char*)"nsime", {226, 137, 132, 0}}, +{(unsigned char*)"nsimeq", {226, 137, 132, 0}}, +{(unsigned char*)"nsmid", {226, 136, 164, 0}}, +{(unsigned char*)"nspar", {226, 136, 166, 0}}, +{(unsigned char*)"nsqsube", {226, 139, 162, 0}}, +{(unsigned char*)"nsqsupe", {226, 139, 163, 0}}, +{(unsigned char*)"nsub", {226, 138, 132, 0}}, +{(unsigned char*)"nsubE", {226, 171, 133, 204, 184, 0}}, +{(unsigned char*)"nsube", {226, 138, 136, 0}}, +{(unsigned char*)"nsubset", {226, 138, 130, 226, 131, 146, 0}}, +{(unsigned char*)"nsubseteq", {226, 138, 136, 0}}, +{(unsigned char*)"nsubseteqq", {226, 171, 133, 204, 184, 0}}, +{(unsigned char*)"nsucc", {226, 138, 129, 0}}, +{(unsigned char*)"nsucceq", {226, 170, 176, 204, 184, 0}}, +{(unsigned char*)"nsup", {226, 138, 133, 0}}, +{(unsigned char*)"nsupE", {226, 171, 134, 204, 184, 0}}, +{(unsigned char*)"nsupe", {226, 138, 137, 0}}, +{(unsigned char*)"nsupset", {226, 138, 131, 226, 131, 146, 0}}, +{(unsigned char*)"nsupseteq", {226, 138, 137, 0}}, +{(unsigned char*)"nsupseteqq", {226, 171, 134, 204, 184, 0}}, +{(unsigned char*)"ntgl", {226, 137, 185, 0}}, +{(unsigned char*)"ntilde", {195, 177, 0}}, +{(unsigned char*)"ntlg", {226, 137, 184, 0}}, +{(unsigned char*)"ntriangleleft", {226, 139, 170, 0}}, +{(unsigned char*)"ntrianglelefteq", {226, 139, 172, 0}}, +{(unsigned char*)"ntriangleright", {226, 139, 171, 0}}, +{(unsigned char*)"ntrianglerighteq", {226, 139, 173, 0}}, +{(unsigned char*)"nu", {206, 189, 0}}, +{(unsigned char*)"num", {35, 0}}, +{(unsigned char*)"numero", {226, 132, 150, 0}}, +{(unsigned char*)"numsp", {226, 128, 135, 0}}, +{(unsigned char*)"nvDash", {226, 138, 173, 0}}, +{(unsigned char*)"nvHarr", {226, 164, 132, 0}}, +{(unsigned char*)"nvap", {226, 137, 141, 226, 131, 146, 0}}, +{(unsigned char*)"nvdash", {226, 138, 172, 0}}, +{(unsigned char*)"nvge", {226, 137, 165, 226, 131, 146, 0}}, +{(unsigned char*)"nvgt", {62, 226, 131, 146, 0}}, +{(unsigned char*)"nvinfin", {226, 167, 158, 0}}, +{(unsigned char*)"nvlArr", {226, 164, 130, 0}}, +{(unsigned char*)"nvle", {226, 137, 164, 226, 131, 146, 0}}, +{(unsigned char*)"nvlt", {60, 226, 131, 146, 0}}, +{(unsigned char*)"nvltrie", {226, 138, 180, 226, 131, 146, 0}}, +{(unsigned char*)"nvrArr", {226, 164, 131, 0}}, +{(unsigned char*)"nvrtrie", {226, 138, 181, 226, 131, 146, 0}}, +{(unsigned char*)"nvsim", {226, 136, 188, 226, 131, 146, 0}}, +{(unsigned char*)"nwArr", {226, 135, 150, 0}}, +{(unsigned char*)"nwarhk", {226, 164, 163, 0}}, +{(unsigned char*)"nwarr", {226, 134, 150, 0}}, +{(unsigned char*)"nwarrow", {226, 134, 150, 0}}, +{(unsigned char*)"nwnear", {226, 164, 167, 0}}, +{(unsigned char*)"oS", {226, 147, 136, 0}}, +{(unsigned char*)"oacute", {195, 179, 0}}, +{(unsigned char*)"oast", {226, 138, 155, 0}}, +{(unsigned char*)"ocir", {226, 138, 154, 0}}, +{(unsigned char*)"ocirc", {195, 180, 0}}, +{(unsigned char*)"ocy", {208, 190, 0}}, +{(unsigned char*)"odash", {226, 138, 157, 0}}, +{(unsigned char*)"odblac", {197, 145, 0}}, +{(unsigned char*)"odiv", {226, 168, 184, 0}}, +{(unsigned char*)"odot", {226, 138, 153, 0}}, +{(unsigned char*)"odsold", {226, 166, 188, 0}}, +{(unsigned char*)"oelig", {197, 147, 0}}, +{(unsigned char*)"ofcir", {226, 166, 191, 0}}, +{(unsigned char*)"ofr", {240, 157, 148, 172, 0}}, +{(unsigned char*)"ogon", {203, 155, 0}}, +{(unsigned char*)"ograve", {195, 178, 0}}, +{(unsigned char*)"ogt", {226, 167, 129, 0}}, +{(unsigned char*)"ohbar", {226, 166, 181, 0}}, +{(unsigned char*)"ohm", {206, 169, 0}}, +{(unsigned char*)"oint", {226, 136, 174, 0}}, +{(unsigned char*)"olarr", {226, 134, 186, 0}}, +{(unsigned char*)"olcir", {226, 166, 190, 0}}, +{(unsigned char*)"olcross", {226, 166, 187, 0}}, +{(unsigned char*)"oline", {226, 128, 190, 0}}, +{(unsigned char*)"olt", {226, 167, 128, 0}}, +{(unsigned char*)"omacr", {197, 141, 0}}, +{(unsigned char*)"omega", {207, 137, 0}}, +{(unsigned char*)"omicron", {206, 191, 0}}, +{(unsigned char*)"omid", {226, 166, 182, 0}}, +{(unsigned char*)"ominus", {226, 138, 150, 0}}, +{(unsigned char*)"oopf", {240, 157, 149, 160, 0}}, +{(unsigned char*)"opar", {226, 166, 183, 0}}, +{(unsigned char*)"operp", {226, 166, 185, 0}}, +{(unsigned char*)"oplus", {226, 138, 149, 0}}, +{(unsigned char*)"or", {226, 136, 168, 0}}, +{(unsigned char*)"orarr", {226, 134, 187, 0}}, +{(unsigned char*)"ord", {226, 169, 157, 0}}, +{(unsigned char*)"order", {226, 132, 180, 0}}, +{(unsigned char*)"orderof", {226, 132, 180, 0}}, +{(unsigned char*)"ordf", {194, 170, 0}}, +{(unsigned char*)"ordm", {194, 186, 0}}, +{(unsigned char*)"origof", {226, 138, 182, 0}}, +{(unsigned char*)"oror", {226, 169, 150, 0}}, +{(unsigned char*)"orslope", {226, 169, 151, 0}}, +{(unsigned char*)"orv", {226, 169, 155, 0}}, +{(unsigned char*)"oscr", {226, 132, 180, 0}}, +{(unsigned char*)"oslash", {195, 184, 0}}, +{(unsigned char*)"osol", {226, 138, 152, 0}}, +{(unsigned char*)"otilde", {195, 181, 0}}, +{(unsigned char*)"otimes", {226, 138, 151, 0}}, +{(unsigned char*)"otimesas", {226, 168, 182, 0}}, +{(unsigned char*)"ouml", {195, 182, 0}}, +{(unsigned char*)"ovbar", {226, 140, 189, 0}}, +{(unsigned char*)"par", {226, 136, 165, 0}}, +{(unsigned char*)"para", {194, 182, 0}}, +{(unsigned char*)"parallel", {226, 136, 165, 0}}, +{(unsigned char*)"parsim", {226, 171, 179, 0}}, +{(unsigned char*)"parsl", {226, 171, 189, 0}}, +{(unsigned char*)"part", {226, 136, 130, 0}}, +{(unsigned char*)"pcy", {208, 191, 0}}, +{(unsigned char*)"percnt", {37, 0}}, +{(unsigned char*)"period", {46, 0}}, +{(unsigned char*)"permil", {226, 128, 176, 0}}, +{(unsigned char*)"perp", {226, 138, 165, 0}}, +{(unsigned char*)"pertenk", {226, 128, 177, 0}}, +{(unsigned char*)"pfr", {240, 157, 148, 173, 0}}, +{(unsigned char*)"phi", {207, 134, 0}}, +{(unsigned char*)"phiv", {207, 149, 0}}, +{(unsigned char*)"phmmat", {226, 132, 179, 0}}, +{(unsigned char*)"phone", {226, 152, 142, 0}}, +{(unsigned char*)"pi", {207, 128, 0}}, +{(unsigned char*)"pitchfork", {226, 139, 148, 0}}, +{(unsigned char*)"piv", {207, 150, 0}}, +{(unsigned char*)"planck", {226, 132, 143, 0}}, +{(unsigned char*)"planckh", {226, 132, 142, 0}}, +{(unsigned char*)"plankv", {226, 132, 143, 0}}, +{(unsigned char*)"plus", {43, 0}}, +{(unsigned char*)"plusacir", {226, 168, 163, 0}}, +{(unsigned char*)"plusb", {226, 138, 158, 0}}, +{(unsigned char*)"pluscir", {226, 168, 162, 0}}, +{(unsigned char*)"plusdo", {226, 136, 148, 0}}, +{(unsigned char*)"plusdu", {226, 168, 165, 0}}, +{(unsigned char*)"pluse", {226, 169, 178, 0}}, +{(unsigned char*)"plusmn", {194, 177, 0}}, +{(unsigned char*)"plussim", {226, 168, 166, 0}}, +{(unsigned char*)"plustwo", {226, 168, 167, 0}}, +{(unsigned char*)"pm", {194, 177, 0}}, +{(unsigned char*)"pointint", {226, 168, 149, 0}}, +{(unsigned char*)"popf", {240, 157, 149, 161, 0}}, +{(unsigned char*)"pound", {194, 163, 0}}, +{(unsigned char*)"pr", {226, 137, 186, 0}}, +{(unsigned char*)"prE", {226, 170, 179, 0}}, +{(unsigned char*)"prap", {226, 170, 183, 0}}, +{(unsigned char*)"prcue", {226, 137, 188, 0}}, +{(unsigned char*)"pre", {226, 170, 175, 0}}, +{(unsigned char*)"prec", {226, 137, 186, 0}}, +{(unsigned char*)"precapprox", {226, 170, 183, 0}}, +{(unsigned char*)"preccurlyeq", {226, 137, 188, 0}}, +{(unsigned char*)"preceq", {226, 170, 175, 0}}, +{(unsigned char*)"precnapprox", {226, 170, 185, 0}}, +{(unsigned char*)"precneqq", {226, 170, 181, 0}}, +{(unsigned char*)"precnsim", {226, 139, 168, 0}}, +{(unsigned char*)"precsim", {226, 137, 190, 0}}, +{(unsigned char*)"prime", {226, 128, 178, 0}}, +{(unsigned char*)"primes", {226, 132, 153, 0}}, +{(unsigned char*)"prnE", {226, 170, 181, 0}}, +{(unsigned char*)"prnap", {226, 170, 185, 0}}, +{(unsigned char*)"prnsim", {226, 139, 168, 0}}, +{(unsigned char*)"prod", {226, 136, 143, 0}}, +{(unsigned char*)"profalar", {226, 140, 174, 0}}, +{(unsigned char*)"profline", {226, 140, 146, 0}}, +{(unsigned char*)"profsurf", {226, 140, 147, 0}}, +{(unsigned char*)"prop", {226, 136, 157, 0}}, +{(unsigned char*)"propto", {226, 136, 157, 0}}, +{(unsigned char*)"prsim", {226, 137, 190, 0}}, +{(unsigned char*)"prurel", {226, 138, 176, 0}}, +{(unsigned char*)"pscr", {240, 157, 147, 133, 0}}, +{(unsigned char*)"psi", {207, 136, 0}}, +{(unsigned char*)"puncsp", {226, 128, 136, 0}}, +{(unsigned char*)"qfr", {240, 157, 148, 174, 0}}, +{(unsigned char*)"qint", {226, 168, 140, 0}}, +{(unsigned char*)"qopf", {240, 157, 149, 162, 0}}, +{(unsigned char*)"qprime", {226, 129, 151, 0}}, +{(unsigned char*)"qscr", {240, 157, 147, 134, 0}}, +{(unsigned char*)"quaternions", {226, 132, 141, 0}}, +{(unsigned char*)"quatint", {226, 168, 150, 0}}, +{(unsigned char*)"quest", {63, 0}}, +{(unsigned char*)"questeq", {226, 137, 159, 0}}, +{(unsigned char*)"quot", {34, 0}}, +{(unsigned char*)"rAarr", {226, 135, 155, 0}}, +{(unsigned char*)"rArr", {226, 135, 146, 0}}, +{(unsigned char*)"rAtail", {226, 164, 156, 0}}, +{(unsigned char*)"rBarr", {226, 164, 143, 0}}, +{(unsigned char*)"rHar", {226, 165, 164, 0}}, +{(unsigned char*)"race", {226, 136, 189, 204, 177, 0}}, +{(unsigned char*)"racute", {197, 149, 0}}, +{(unsigned char*)"radic", {226, 136, 154, 0}}, +{(unsigned char*)"raemptyv", {226, 166, 179, 0}}, +{(unsigned char*)"rang", {226, 159, 169, 0}}, +{(unsigned char*)"rangd", {226, 166, 146, 0}}, +{(unsigned char*)"range", {226, 166, 165, 0}}, +{(unsigned char*)"rangle", {226, 159, 169, 0}}, +{(unsigned char*)"raquo", {194, 187, 0}}, +{(unsigned char*)"rarr", {226, 134, 146, 0}}, +{(unsigned char*)"rarrap", {226, 165, 181, 0}}, +{(unsigned char*)"rarrb", {226, 135, 165, 0}}, +{(unsigned char*)"rarrbfs", {226, 164, 160, 0}}, +{(unsigned char*)"rarrc", {226, 164, 179, 0}}, +{(unsigned char*)"rarrfs", {226, 164, 158, 0}}, +{(unsigned char*)"rarrhk", {226, 134, 170, 0}}, +{(unsigned char*)"rarrlp", {226, 134, 172, 0}}, +{(unsigned char*)"rarrpl", {226, 165, 133, 0}}, +{(unsigned char*)"rarrsim", {226, 165, 180, 0}}, +{(unsigned char*)"rarrtl", {226, 134, 163, 0}}, +{(unsigned char*)"rarrw", {226, 134, 157, 0}}, +{(unsigned char*)"ratail", {226, 164, 154, 0}}, +{(unsigned char*)"ratio", {226, 136, 182, 0}}, +{(unsigned char*)"rationals", {226, 132, 154, 0}}, +{(unsigned char*)"rbarr", {226, 164, 141, 0}}, +{(unsigned char*)"rbbrk", {226, 157, 179, 0}}, +{(unsigned char*)"rbrace", {125, 0}}, +{(unsigned char*)"rbrack", {93, 0}}, +{(unsigned char*)"rbrke", {226, 166, 140, 0}}, +{(unsigned char*)"rbrksld", {226, 166, 142, 0}}, +{(unsigned char*)"rbrkslu", {226, 166, 144, 0}}, +{(unsigned char*)"rcaron", {197, 153, 0}}, +{(unsigned char*)"rcedil", {197, 151, 0}}, +{(unsigned char*)"rceil", {226, 140, 137, 0}}, +{(unsigned char*)"rcub", {125, 0}}, +{(unsigned char*)"rcy", {209, 128, 0}}, +{(unsigned char*)"rdca", {226, 164, 183, 0}}, +{(unsigned char*)"rdldhar", {226, 165, 169, 0}}, +{(unsigned char*)"rdquo", {226, 128, 157, 0}}, +{(unsigned char*)"rdquor", {226, 128, 157, 0}}, +{(unsigned char*)"rdsh", {226, 134, 179, 0}}, +{(unsigned char*)"real", {226, 132, 156, 0}}, +{(unsigned char*)"realine", {226, 132, 155, 0}}, +{(unsigned char*)"realpart", {226, 132, 156, 0}}, +{(unsigned char*)"reals", {226, 132, 157, 0}}, +{(unsigned char*)"rect", {226, 150, 173, 0}}, +{(unsigned char*)"reg", {194, 174, 0}}, +{(unsigned char*)"rfisht", {226, 165, 189, 0}}, +{(unsigned char*)"rfloor", {226, 140, 139, 0}}, +{(unsigned char*)"rfr", {240, 157, 148, 175, 0}}, +{(unsigned char*)"rhard", {226, 135, 129, 0}}, +{(unsigned char*)"rharu", {226, 135, 128, 0}}, +{(unsigned char*)"rharul", {226, 165, 172, 0}}, +{(unsigned char*)"rho", {207, 129, 0}}, +{(unsigned char*)"rhov", {207, 177, 0}}, +{(unsigned char*)"rightarrow", {226, 134, 146, 0}}, +{(unsigned char*)"rightarrowtail", {226, 134, 163, 0}}, +{(unsigned char*)"rightharpoondown", {226, 135, 129, 0}}, +{(unsigned char*)"rightharpoonup", {226, 135, 128, 0}}, +{(unsigned char*)"rightleftarrows", {226, 135, 132, 0}}, +{(unsigned char*)"rightleftharpoons", {226, 135, 140, 0}}, +{(unsigned char*)"rightrightarrows", {226, 135, 137, 0}}, +{(unsigned char*)"rightsquigarrow", {226, 134, 157, 0}}, +{(unsigned char*)"rightthreetimes", {226, 139, 140, 0}}, +{(unsigned char*)"ring", {203, 154, 0}}, +{(unsigned char*)"risingdotseq", {226, 137, 147, 0}}, +{(unsigned char*)"rlarr", {226, 135, 132, 0}}, +{(unsigned char*)"rlhar", {226, 135, 140, 0}}, +{(unsigned char*)"rlm", {226, 128, 143, 0}}, +{(unsigned char*)"rmoust", {226, 142, 177, 0}}, +{(unsigned char*)"rmoustache", {226, 142, 177, 0}}, +{(unsigned char*)"rnmid", {226, 171, 174, 0}}, +{(unsigned char*)"roang", {226, 159, 173, 0}}, +{(unsigned char*)"roarr", {226, 135, 190, 0}}, +{(unsigned char*)"robrk", {226, 159, 167, 0}}, +{(unsigned char*)"ropar", {226, 166, 134, 0}}, +{(unsigned char*)"ropf", {240, 157, 149, 163, 0}}, +{(unsigned char*)"roplus", {226, 168, 174, 0}}, +{(unsigned char*)"rotimes", {226, 168, 181, 0}}, +{(unsigned char*)"rpar", {41, 0}}, +{(unsigned char*)"rpargt", {226, 166, 148, 0}}, +{(unsigned char*)"rppolint", {226, 168, 146, 0}}, +{(unsigned char*)"rrarr", {226, 135, 137, 0}}, +{(unsigned char*)"rsaquo", {226, 128, 186, 0}}, +{(unsigned char*)"rscr", {240, 157, 147, 135, 0}}, +{(unsigned char*)"rsh", {226, 134, 177, 0}}, +{(unsigned char*)"rsqb", {93, 0}}, +{(unsigned char*)"rsquo", {226, 128, 153, 0}}, +{(unsigned char*)"rsquor", {226, 128, 153, 0}}, +{(unsigned char*)"rthree", {226, 139, 140, 0}}, +{(unsigned char*)"rtimes", {226, 139, 138, 0}}, +{(unsigned char*)"rtri", {226, 150, 185, 0}}, +{(unsigned char*)"rtrie", {226, 138, 181, 0}}, +{(unsigned char*)"rtrif", {226, 150, 184, 0}}, +{(unsigned char*)"rtriltri", {226, 167, 142, 0}}, +{(unsigned char*)"ruluhar", {226, 165, 168, 0}}, +{(unsigned char*)"rx", {226, 132, 158, 0}}, +{(unsigned char*)"sacute", {197, 155, 0}}, +{(unsigned char*)"sbquo", {226, 128, 154, 0}}, +{(unsigned char*)"sc", {226, 137, 187, 0}}, +{(unsigned char*)"scE", {226, 170, 180, 0}}, +{(unsigned char*)"scap", {226, 170, 184, 0}}, +{(unsigned char*)"scaron", {197, 161, 0}}, +{(unsigned char*)"sccue", {226, 137, 189, 0}}, +{(unsigned char*)"sce", {226, 170, 176, 0}}, +{(unsigned char*)"scedil", {197, 159, 0}}, +{(unsigned char*)"scirc", {197, 157, 0}}, +{(unsigned char*)"scnE", {226, 170, 182, 0}}, +{(unsigned char*)"scnap", {226, 170, 186, 0}}, +{(unsigned char*)"scnsim", {226, 139, 169, 0}}, +{(unsigned char*)"scpolint", {226, 168, 147, 0}}, +{(unsigned char*)"scsim", {226, 137, 191, 0}}, +{(unsigned char*)"scy", {209, 129, 0}}, +{(unsigned char*)"sdot", {226, 139, 133, 0}}, +{(unsigned char*)"sdotb", {226, 138, 161, 0}}, +{(unsigned char*)"sdote", {226, 169, 166, 0}}, +{(unsigned char*)"seArr", {226, 135, 152, 0}}, +{(unsigned char*)"searhk", {226, 164, 165, 0}}, +{(unsigned char*)"searr", {226, 134, 152, 0}}, +{(unsigned char*)"searrow", {226, 134, 152, 0}}, +{(unsigned char*)"sect", {194, 167, 0}}, +{(unsigned char*)"semi", {59, 0}}, +{(unsigned char*)"seswar", {226, 164, 169, 0}}, +{(unsigned char*)"setminus", {226, 136, 150, 0}}, +{(unsigned char*)"setmn", {226, 136, 150, 0}}, +{(unsigned char*)"sext", {226, 156, 182, 0}}, +{(unsigned char*)"sfr", {240, 157, 148, 176, 0}}, +{(unsigned char*)"sfrown", {226, 140, 162, 0}}, +{(unsigned char*)"sharp", {226, 153, 175, 0}}, +{(unsigned char*)"shchcy", {209, 137, 0}}, +{(unsigned char*)"shcy", {209, 136, 0}}, +{(unsigned char*)"shortmid", {226, 136, 163, 0}}, +{(unsigned char*)"shortparallel", {226, 136, 165, 0}}, +{(unsigned char*)"shy", {194, 173, 0}}, +{(unsigned char*)"sigma", {207, 131, 0}}, +{(unsigned char*)"sigmaf", {207, 130, 0}}, +{(unsigned char*)"sigmav", {207, 130, 0}}, +{(unsigned char*)"sim", {226, 136, 188, 0}}, +{(unsigned char*)"simdot", {226, 169, 170, 0}}, +{(unsigned char*)"sime", {226, 137, 131, 0}}, +{(unsigned char*)"simeq", {226, 137, 131, 0}}, +{(unsigned char*)"simg", {226, 170, 158, 0}}, +{(unsigned char*)"simgE", {226, 170, 160, 0}}, +{(unsigned char*)"siml", {226, 170, 157, 0}}, +{(unsigned char*)"simlE", {226, 170, 159, 0}}, +{(unsigned char*)"simne", {226, 137, 134, 0}}, +{(unsigned char*)"simplus", {226, 168, 164, 0}}, +{(unsigned char*)"simrarr", {226, 165, 178, 0}}, +{(unsigned char*)"slarr", {226, 134, 144, 0}}, +{(unsigned char*)"smallsetminus", {226, 136, 150, 0}}, +{(unsigned char*)"smashp", {226, 168, 179, 0}}, +{(unsigned char*)"smeparsl", {226, 167, 164, 0}}, +{(unsigned char*)"smid", {226, 136, 163, 0}}, +{(unsigned char*)"smile", {226, 140, 163, 0}}, +{(unsigned char*)"smt", {226, 170, 170, 0}}, +{(unsigned char*)"smte", {226, 170, 172, 0}}, +{(unsigned char*)"smtes", {226, 170, 172, 239, 184, 128, 0}}, +{(unsigned char*)"softcy", {209, 140, 0}}, +{(unsigned char*)"sol", {47, 0}}, +{(unsigned char*)"solb", {226, 167, 132, 0}}, +{(unsigned char*)"solbar", {226, 140, 191, 0}}, +{(unsigned char*)"sopf", {240, 157, 149, 164, 0}}, +{(unsigned char*)"spades", {226, 153, 160, 0}}, +{(unsigned char*)"spadesuit", {226, 153, 160, 0}}, +{(unsigned char*)"spar", {226, 136, 165, 0}}, +{(unsigned char*)"sqcap", {226, 138, 147, 0}}, +{(unsigned char*)"sqcaps", {226, 138, 147, 239, 184, 128, 0}}, +{(unsigned char*)"sqcup", {226, 138, 148, 0}}, +{(unsigned char*)"sqcups", {226, 138, 148, 239, 184, 128, 0}}, +{(unsigned char*)"sqsub", {226, 138, 143, 0}}, +{(unsigned char*)"sqsube", {226, 138, 145, 0}}, +{(unsigned char*)"sqsubset", {226, 138, 143, 0}}, +{(unsigned char*)"sqsubseteq", {226, 138, 145, 0}}, +{(unsigned char*)"sqsup", {226, 138, 144, 0}}, +{(unsigned char*)"sqsupe", {226, 138, 146, 0}}, +{(unsigned char*)"sqsupset", {226, 138, 144, 0}}, +{(unsigned char*)"sqsupseteq", {226, 138, 146, 0}}, +{(unsigned char*)"squ", {226, 150, 161, 0}}, +{(unsigned char*)"square", {226, 150, 161, 0}}, +{(unsigned char*)"squarf", {226, 150, 170, 0}}, +{(unsigned char*)"squf", {226, 150, 170, 0}}, +{(unsigned char*)"srarr", {226, 134, 146, 0}}, +{(unsigned char*)"sscr", {240, 157, 147, 136, 0}}, +{(unsigned char*)"ssetmn", {226, 136, 150, 0}}, +{(unsigned char*)"ssmile", {226, 140, 163, 0}}, +{(unsigned char*)"sstarf", {226, 139, 134, 0}}, +{(unsigned char*)"star", {226, 152, 134, 0}}, +{(unsigned char*)"starf", {226, 152, 133, 0}}, +{(unsigned char*)"straightepsilon", {207, 181, 0}}, +{(unsigned char*)"straightphi", {207, 149, 0}}, +{(unsigned char*)"strns", {194, 175, 0}}, +{(unsigned char*)"sub", {226, 138, 130, 0}}, +{(unsigned char*)"subE", {226, 171, 133, 0}}, +{(unsigned char*)"subdot", {226, 170, 189, 0}}, +{(unsigned char*)"sube", {226, 138, 134, 0}}, +{(unsigned char*)"subedot", {226, 171, 131, 0}}, +{(unsigned char*)"submult", {226, 171, 129, 0}}, +{(unsigned char*)"subnE", {226, 171, 139, 0}}, +{(unsigned char*)"subne", {226, 138, 138, 0}}, +{(unsigned char*)"subplus", {226, 170, 191, 0}}, +{(unsigned char*)"subrarr", {226, 165, 185, 0}}, +{(unsigned char*)"subset", {226, 138, 130, 0}}, +{(unsigned char*)"subseteq", {226, 138, 134, 0}}, +{(unsigned char*)"subseteqq", {226, 171, 133, 0}}, +{(unsigned char*)"subsetneq", {226, 138, 138, 0}}, +{(unsigned char*)"subsetneqq", {226, 171, 139, 0}}, +{(unsigned char*)"subsim", {226, 171, 135, 0}}, +{(unsigned char*)"subsub", {226, 171, 149, 0}}, +{(unsigned char*)"subsup", {226, 171, 147, 0}}, +{(unsigned char*)"succ", {226, 137, 187, 0}}, +{(unsigned char*)"succapprox", {226, 170, 184, 0}}, +{(unsigned char*)"succcurlyeq", {226, 137, 189, 0}}, +{(unsigned char*)"succeq", {226, 170, 176, 0}}, +{(unsigned char*)"succnapprox", {226, 170, 186, 0}}, +{(unsigned char*)"succneqq", {226, 170, 182, 0}}, +{(unsigned char*)"succnsim", {226, 139, 169, 0}}, +{(unsigned char*)"succsim", {226, 137, 191, 0}}, +{(unsigned char*)"sum", {226, 136, 145, 0}}, +{(unsigned char*)"sung", {226, 153, 170, 0}}, +{(unsigned char*)"sup", {226, 138, 131, 0}}, +{(unsigned char*)"sup1", {194, 185, 0}}, +{(unsigned char*)"sup2", {194, 178, 0}}, +{(unsigned char*)"sup3", {194, 179, 0}}, +{(unsigned char*)"supE", {226, 171, 134, 0}}, +{(unsigned char*)"supdot", {226, 170, 190, 0}}, +{(unsigned char*)"supdsub", {226, 171, 152, 0}}, +{(unsigned char*)"supe", {226, 138, 135, 0}}, +{(unsigned char*)"supedot", {226, 171, 132, 0}}, +{(unsigned char*)"suphsol", {226, 159, 137, 0}}, +{(unsigned char*)"suphsub", {226, 171, 151, 0}}, +{(unsigned char*)"suplarr", {226, 165, 187, 0}}, +{(unsigned char*)"supmult", {226, 171, 130, 0}}, +{(unsigned char*)"supnE", {226, 171, 140, 0}}, +{(unsigned char*)"supne", {226, 138, 139, 0}}, +{(unsigned char*)"supplus", {226, 171, 128, 0}}, +{(unsigned char*)"supset", {226, 138, 131, 0}}, +{(unsigned char*)"supseteq", {226, 138, 135, 0}}, +{(unsigned char*)"supseteqq", {226, 171, 134, 0}}, +{(unsigned char*)"supsetneq", {226, 138, 139, 0}}, +{(unsigned char*)"supsetneqq", {226, 171, 140, 0}}, +{(unsigned char*)"supsim", {226, 171, 136, 0}}, +{(unsigned char*)"supsub", {226, 171, 148, 0}}, +{(unsigned char*)"supsup", {226, 171, 150, 0}}, +{(unsigned char*)"swArr", {226, 135, 153, 0}}, +{(unsigned char*)"swarhk", {226, 164, 166, 0}}, +{(unsigned char*)"swarr", {226, 134, 153, 0}}, +{(unsigned char*)"swarrow", {226, 134, 153, 0}}, +{(unsigned char*)"swnwar", {226, 164, 170, 0}}, +{(unsigned char*)"szlig", {195, 159, 0}}, +{(unsigned char*)"target", {226, 140, 150, 0}}, +{(unsigned char*)"tau", {207, 132, 0}}, +{(unsigned char*)"tbrk", {226, 142, 180, 0}}, +{(unsigned char*)"tcaron", {197, 165, 0}}, +{(unsigned char*)"tcedil", {197, 163, 0}}, +{(unsigned char*)"tcy", {209, 130, 0}}, +{(unsigned char*)"tdot", {226, 131, 155, 0}}, +{(unsigned char*)"telrec", {226, 140, 149, 0}}, +{(unsigned char*)"tfr", {240, 157, 148, 177, 0}}, +{(unsigned char*)"there4", {226, 136, 180, 0}}, +{(unsigned char*)"therefore", {226, 136, 180, 0}}, +{(unsigned char*)"theta", {206, 184, 0}}, +{(unsigned char*)"thetasym", {207, 145, 0}}, +{(unsigned char*)"thetav", {207, 145, 0}}, +{(unsigned char*)"thickapprox", {226, 137, 136, 0}}, +{(unsigned char*)"thicksim", {226, 136, 188, 0}}, +{(unsigned char*)"thinsp", {226, 128, 137, 0}}, +{(unsigned char*)"thkap", {226, 137, 136, 0}}, +{(unsigned char*)"thksim", {226, 136, 188, 0}}, +{(unsigned char*)"thorn", {195, 190, 0}}, +{(unsigned char*)"tilde", {203, 156, 0}}, +{(unsigned char*)"times", {195, 151, 0}}, +{(unsigned char*)"timesb", {226, 138, 160, 0}}, +{(unsigned char*)"timesbar", {226, 168, 177, 0}}, +{(unsigned char*)"timesd", {226, 168, 176, 0}}, +{(unsigned char*)"tint", {226, 136, 173, 0}}, +{(unsigned char*)"toea", {226, 164, 168, 0}}, +{(unsigned char*)"top", {226, 138, 164, 0}}, +{(unsigned char*)"topbot", {226, 140, 182, 0}}, +{(unsigned char*)"topcir", {226, 171, 177, 0}}, +{(unsigned char*)"topf", {240, 157, 149, 165, 0}}, +{(unsigned char*)"topfork", {226, 171, 154, 0}}, +{(unsigned char*)"tosa", {226, 164, 169, 0}}, +{(unsigned char*)"tprime", {226, 128, 180, 0}}, +{(unsigned char*)"trade", {226, 132, 162, 0}}, +{(unsigned char*)"triangle", {226, 150, 181, 0}}, +{(unsigned char*)"triangledown", {226, 150, 191, 0}}, +{(unsigned char*)"triangleleft", {226, 151, 131, 0}}, +{(unsigned char*)"trianglelefteq", {226, 138, 180, 0}}, +{(unsigned char*)"triangleq", {226, 137, 156, 0}}, +{(unsigned char*)"triangleright", {226, 150, 185, 0}}, +{(unsigned char*)"trianglerighteq", {226, 138, 181, 0}}, +{(unsigned char*)"tridot", {226, 151, 172, 0}}, +{(unsigned char*)"trie", {226, 137, 156, 0}}, +{(unsigned char*)"triminus", {226, 168, 186, 0}}, +{(unsigned char*)"triplus", {226, 168, 185, 0}}, +{(unsigned char*)"trisb", {226, 167, 141, 0}}, +{(unsigned char*)"tritime", {226, 168, 187, 0}}, +{(unsigned char*)"trpezium", {226, 143, 162, 0}}, +{(unsigned char*)"tscr", {240, 157, 147, 137, 0}}, +{(unsigned char*)"tscy", {209, 134, 0}}, +{(unsigned char*)"tshcy", {209, 155, 0}}, +{(unsigned char*)"tstrok", {197, 167, 0}}, +{(unsigned char*)"twixt", {226, 137, 172, 0}}, +{(unsigned char*)"twoheadleftarrow", {226, 134, 158, 0}}, +{(unsigned char*)"twoheadrightarrow", {226, 134, 160, 0}}, +{(unsigned char*)"uArr", {226, 135, 145, 0}}, +{(unsigned char*)"uHar", {226, 165, 163, 0}}, +{(unsigned char*)"uacute", {195, 186, 0}}, +{(unsigned char*)"uarr", {226, 134, 145, 0}}, +{(unsigned char*)"ubrcy", {209, 158, 0}}, +{(unsigned char*)"ubreve", {197, 173, 0}}, +{(unsigned char*)"ucirc", {195, 187, 0}}, +{(unsigned char*)"ucy", {209, 131, 0}}, +{(unsigned char*)"udarr", {226, 135, 133, 0}}, +{(unsigned char*)"udblac", {197, 177, 0}}, +{(unsigned char*)"udhar", {226, 165, 174, 0}}, +{(unsigned char*)"ufisht", {226, 165, 190, 0}}, +{(unsigned char*)"ufr", {240, 157, 148, 178, 0}}, +{(unsigned char*)"ugrave", {195, 185, 0}}, +{(unsigned char*)"uharl", {226, 134, 191, 0}}, +{(unsigned char*)"uharr", {226, 134, 190, 0}}, +{(unsigned char*)"uhblk", {226, 150, 128, 0}}, +{(unsigned char*)"ulcorn", {226, 140, 156, 0}}, +{(unsigned char*)"ulcorner", {226, 140, 156, 0}}, +{(unsigned char*)"ulcrop", {226, 140, 143, 0}}, +{(unsigned char*)"ultri", {226, 151, 184, 0}}, +{(unsigned char*)"umacr", {197, 171, 0}}, +{(unsigned char*)"uml", {194, 168, 0}}, +{(unsigned char*)"uogon", {197, 179, 0}}, +{(unsigned char*)"uopf", {240, 157, 149, 166, 0}}, +{(unsigned char*)"uparrow", {226, 134, 145, 0}}, +{(unsigned char*)"updownarrow", {226, 134, 149, 0}}, +{(unsigned char*)"upharpoonleft", {226, 134, 191, 0}}, +{(unsigned char*)"upharpoonright", {226, 134, 190, 0}}, +{(unsigned char*)"uplus", {226, 138, 142, 0}}, +{(unsigned char*)"upsi", {207, 133, 0}}, +{(unsigned char*)"upsih", {207, 146, 0}}, +{(unsigned char*)"upsilon", {207, 133, 0}}, +{(unsigned char*)"upuparrows", {226, 135, 136, 0}}, +{(unsigned char*)"urcorn", {226, 140, 157, 0}}, +{(unsigned char*)"urcorner", {226, 140, 157, 0}}, +{(unsigned char*)"urcrop", {226, 140, 142, 0}}, +{(unsigned char*)"uring", {197, 175, 0}}, +{(unsigned char*)"urtri", {226, 151, 185, 0}}, +{(unsigned char*)"uscr", {240, 157, 147, 138, 0}}, +{(unsigned char*)"utdot", {226, 139, 176, 0}}, +{(unsigned char*)"utilde", {197, 169, 0}}, +{(unsigned char*)"utri", {226, 150, 181, 0}}, +{(unsigned char*)"utrif", {226, 150, 180, 0}}, +{(unsigned char*)"uuarr", {226, 135, 136, 0}}, +{(unsigned char*)"uuml", {195, 188, 0}}, +{(unsigned char*)"uwangle", {226, 166, 167, 0}}, +{(unsigned char*)"vArr", {226, 135, 149, 0}}, +{(unsigned char*)"vBar", {226, 171, 168, 0}}, +{(unsigned char*)"vBarv", {226, 171, 169, 0}}, +{(unsigned char*)"vDash", {226, 138, 168, 0}}, +{(unsigned char*)"vangrt", {226, 166, 156, 0}}, +{(unsigned char*)"varepsilon", {207, 181, 0}}, +{(unsigned char*)"varkappa", {207, 176, 0}}, +{(unsigned char*)"varnothing", {226, 136, 133, 0}}, +{(unsigned char*)"varphi", {207, 149, 0}}, +{(unsigned char*)"varpi", {207, 150, 0}}, +{(unsigned char*)"varpropto", {226, 136, 157, 0}}, +{(unsigned char*)"varr", {226, 134, 149, 0}}, +{(unsigned char*)"varrho", {207, 177, 0}}, +{(unsigned char*)"varsigma", {207, 130, 0}}, +{(unsigned char*)"varsubsetneq", {226, 138, 138, 239, 184, 128, 0}}, +{(unsigned char*)"varsubsetneqq", {226, 171, 139, 239, 184, 128, 0}}, +{(unsigned char*)"varsupsetneq", {226, 138, 139, 239, 184, 128, 0}}, +{(unsigned char*)"varsupsetneqq", {226, 171, 140, 239, 184, 128, 0}}, +{(unsigned char*)"vartheta", {207, 145, 0}}, +{(unsigned char*)"vartriangleleft", {226, 138, 178, 0}}, +{(unsigned char*)"vartriangleright", {226, 138, 179, 0}}, +{(unsigned char*)"vcy", {208, 178, 0}}, +{(unsigned char*)"vdash", {226, 138, 162, 0}}, +{(unsigned char*)"vee", {226, 136, 168, 0}}, +{(unsigned char*)"veebar", {226, 138, 187, 0}}, +{(unsigned char*)"veeeq", {226, 137, 154, 0}}, +{(unsigned char*)"vellip", {226, 139, 174, 0}}, +{(unsigned char*)"verbar", {124, 0}}, +{(unsigned char*)"vert", {124, 0}}, +{(unsigned char*)"vfr", {240, 157, 148, 179, 0}}, +{(unsigned char*)"vltri", {226, 138, 178, 0}}, +{(unsigned char*)"vnsub", {226, 138, 130, 226, 131, 146, 0}}, +{(unsigned char*)"vnsup", {226, 138, 131, 226, 131, 146, 0}}, +{(unsigned char*)"vopf", {240, 157, 149, 167, 0}}, +{(unsigned char*)"vprop", {226, 136, 157, 0}}, +{(unsigned char*)"vrtri", {226, 138, 179, 0}}, +{(unsigned char*)"vscr", {240, 157, 147, 139, 0}}, +{(unsigned char*)"vsubnE", {226, 171, 139, 239, 184, 128, 0}}, +{(unsigned char*)"vsubne", {226, 138, 138, 239, 184, 128, 0}}, +{(unsigned char*)"vsupnE", {226, 171, 140, 239, 184, 128, 0}}, +{(unsigned char*)"vsupne", {226, 138, 139, 239, 184, 128, 0}}, +{(unsigned char*)"vzigzag", {226, 166, 154, 0}}, +{(unsigned char*)"wcirc", {197, 181, 0}}, +{(unsigned char*)"wedbar", {226, 169, 159, 0}}, +{(unsigned char*)"wedge", {226, 136, 167, 0}}, +{(unsigned char*)"wedgeq", {226, 137, 153, 0}}, +{(unsigned char*)"weierp", {226, 132, 152, 0}}, +{(unsigned char*)"wfr", {240, 157, 148, 180, 0}}, +{(unsigned char*)"wopf", {240, 157, 149, 168, 0}}, +{(unsigned char*)"wp", {226, 132, 152, 0}}, +{(unsigned char*)"wr", {226, 137, 128, 0}}, +{(unsigned char*)"wreath", {226, 137, 128, 0}}, +{(unsigned char*)"wscr", {240, 157, 147, 140, 0}}, +{(unsigned char*)"xcap", {226, 139, 130, 0}}, +{(unsigned char*)"xcirc", {226, 151, 175, 0}}, +{(unsigned char*)"xcup", {226, 139, 131, 0}}, +{(unsigned char*)"xdtri", {226, 150, 189, 0}}, +{(unsigned char*)"xfr", {240, 157, 148, 181, 0}}, +{(unsigned char*)"xhArr", {226, 159, 186, 0}}, +{(unsigned char*)"xharr", {226, 159, 183, 0}}, +{(unsigned char*)"xi", {206, 190, 0}}, +{(unsigned char*)"xlArr", {226, 159, 184, 0}}, +{(unsigned char*)"xlarr", {226, 159, 181, 0}}, +{(unsigned char*)"xmap", {226, 159, 188, 0}}, +{(unsigned char*)"xnis", {226, 139, 187, 0}}, +{(unsigned char*)"xodot", {226, 168, 128, 0}}, +{(unsigned char*)"xopf", {240, 157, 149, 169, 0}}, +{(unsigned char*)"xoplus", {226, 168, 129, 0}}, +{(unsigned char*)"xotime", {226, 168, 130, 0}}, +{(unsigned char*)"xrArr", {226, 159, 185, 0}}, +{(unsigned char*)"xrarr", {226, 159, 182, 0}}, +{(unsigned char*)"xscr", {240, 157, 147, 141, 0}}, +{(unsigned char*)"xsqcup", {226, 168, 134, 0}}, +{(unsigned char*)"xuplus", {226, 168, 132, 0}}, +{(unsigned char*)"xutri", {226, 150, 179, 0}}, +{(unsigned char*)"xvee", {226, 139, 129, 0}}, +{(unsigned char*)"xwedge", {226, 139, 128, 0}}, +{(unsigned char*)"yacute", {195, 189, 0}}, +{(unsigned char*)"yacy", {209, 143, 0}}, +{(unsigned char*)"ycirc", {197, 183, 0}}, +{(unsigned char*)"ycy", {209, 139, 0}}, +{(unsigned char*)"yen", {194, 165, 0}}, +{(unsigned char*)"yfr", {240, 157, 148, 182, 0}}, +{(unsigned char*)"yicy", {209, 151, 0}}, +{(unsigned char*)"yopf", {240, 157, 149, 170, 0}}, +{(unsigned char*)"yscr", {240, 157, 147, 142, 0}}, +{(unsigned char*)"yucy", {209, 142, 0}}, +{(unsigned char*)"yuml", {195, 191, 0}}, +{(unsigned char*)"zacute", {197, 186, 0}}, +{(unsigned char*)"zcaron", {197, 190, 0}}, +{(unsigned char*)"zcy", {208, 183, 0}}, +{(unsigned char*)"zdot", {197, 188, 0}}, +{(unsigned char*)"zeetrf", {226, 132, 168, 0}}, +{(unsigned char*)"zeta", {206, 182, 0}}, +{(unsigned char*)"zfr", {240, 157, 148, 183, 0}}, +{(unsigned char*)"zhcy", {208, 182, 0}}, +{(unsigned char*)"zigrarr", {226, 135, 157, 0}}, +{(unsigned char*)"zopf", {240, 157, 149, 171, 0}}, +{(unsigned char*)"zscr", {240, 157, 147, 143, 0}}, +{(unsigned char*)"zwj", {226, 128, 141, 0}}, +{(unsigned char*)"zwnj", {226, 128, 140, 0}}, +}; diff --git a/deps/cmark/src/houdini.h b/deps/cmark/src/houdini.h new file mode 100644 index 0000000..f738e82 --- /dev/null +++ b/deps/cmark/src/houdini.h @@ -0,0 +1,51 @@ +#ifndef CMARK_HOUDINI_H +#define CMARK_HOUDINI_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "config.h" +#include "buffer.h" + +#ifdef HAVE___BUILTIN_EXPECT +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + +#ifdef HOUDINI_USE_LOCALE +#define _isxdigit(c) isxdigit(c) +#define _isdigit(c) isdigit(c) +#else +/* + * Helper _isdigit methods -- do not trust the current locale + * */ +#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) +#define _isdigit(c) ((c) >= '0' && (c) <= '9') +#endif + +#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) +#define HOUDINI_UNESCAPED_SIZE(x) (x) + +extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size); +extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size); +extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size, int secure); +extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size); +extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size); +extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/houdini_href_e.c b/deps/cmark/src/houdini_href_e.c new file mode 100644 index 0000000..959d4bb --- /dev/null +++ b/deps/cmark/src/houdini_href_e.c @@ -0,0 +1,100 @@ +#include +#include +#include + +#include "houdini.h" + +/* + * The following characters will not be escaped: + * + * -_.+!*'(),%#@?=;:/,+&$~ alphanum + * + * Note that this character set is the addition of: + * + * - The characters which are safe to be in an URL + * - The characters which are *not* safe to be in + * an URL because they are RESERVED characters. + * + * We assume (lazily) that any RESERVED char that + * appears inside an URL is actually meant to + * have its native function (i.e. as an URL + * component/separator) and hence needs no escaping. + * + * There are two exceptions: the characters & (amp) + * and ' (single quote) do not appear in the table. + * They are meant to appear in the URL as components, + * yet they require special HTML-entity escaping + * to generate valid HTML markup. + * + * All other characters will be escaped to %XX. + * + */ +static const char HREF_SAFE[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { + static const uint8_t hex_chars[] = "0123456789ABCDEF"; + bufsize_t i = 0, org; + uint8_t hex_str[3]; + + hex_str[0] = '%'; + + while (i < size) { + org = i; + while (i < size && HREF_SAFE[src[i]] != 0) + i++; + + if (likely(i > org)) + cmark_strbuf_put(ob, src + org, i - org); + + /* escaping */ + if (i >= size) + break; + + switch (src[i]) { + /* amp appears all the time in URLs, but needs + * HTML-entity escaping to be inside an href */ + case '&': + cmark_strbuf_puts(ob, "&"); + break; + + /* the single quote is a valid URL character + * according to the standard; it needs HTML + * entity escaping too */ + case '\'': + cmark_strbuf_puts(ob, "'"); + break; + +/* the space can be escaped to %20 or a plus + * sign. we're going with the generic escape + * for now. the plus thing is more commonly seen + * when building GET strings */ +#if 0 + case ' ': + cmark_strbuf_putc(ob, '+'); + break; +#endif + + /* every other character goes with a %XX escaping */ + default: + hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; + hex_str[2] = hex_chars[src[i] & 0xF]; + cmark_strbuf_put(ob, hex_str, 3); + } + + i++; + } + + return 1; +} diff --git a/deps/cmark/src/houdini_html_e.c b/deps/cmark/src/houdini_html_e.c new file mode 100644 index 0000000..0e539f0 --- /dev/null +++ b/deps/cmark/src/houdini_html_e.c @@ -0,0 +1,66 @@ +#include +#include +#include + +#include "houdini.h" + +/** + * According to the OWASP rules: + * + * & --> & + * < --> < + * > --> > + * " --> " + * ' --> ' ' is not recommended + * / --> / forward slash is included as it helps end an HTML entity + * + */ +static const char HTML_ESCAPE_TABLE[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const char *HTML_ESCAPES[] = {"", """, "&", "'", + "/", "<", ">"}; + +int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, + int secure) { + bufsize_t i = 0, org, esc = 0; + + while (i < size) { + org = i; + while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) + i++; + + if (i > org) + cmark_strbuf_put(ob, src + org, i - org); + + /* escaping */ + if (unlikely(i >= size)) + break; + + /* The forward slash is only escaped in secure mode */ + if ((src[i] == '/' || src[i] == '\'') && !secure) { + cmark_strbuf_putc(ob, src[i]); + } else { + cmark_strbuf_puts(ob, HTML_ESCAPES[esc]); + } + + i++; + } + + return 1; +} + +int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { + return houdini_escape_html0(ob, src, size, 1); +} diff --git a/deps/cmark/src/houdini_html_u.c b/deps/cmark/src/houdini_html_u.c new file mode 100644 index 0000000..3b7466e --- /dev/null +++ b/deps/cmark/src/houdini_html_u.c @@ -0,0 +1,153 @@ +#include +#include +#include + +#include "buffer.h" +#include "houdini.h" +#include "utf8.h" +#include "entities.inc" + +/* Binary tree lookup code for entities added by JGM */ + +static const unsigned char *S_lookup(int i, int low, int hi, + const unsigned char *s, int len) { + int j; + int cmp = + strncmp((const char *)s, (const char *)cmark_entities[i].entity, len); + if (cmp == 0 && cmark_entities[i].entity[len] == 0) { + return (const unsigned char *)cmark_entities[i].bytes; + } else if (cmp <= 0 && i > low) { + j = i - ((i - low) / 2); + if (j == i) + j -= 1; + return S_lookup(j, low, i - 1, s, len); + } else if (cmp > 0 && i < hi) { + j = i + ((hi - i) / 2); + if (j == i) + j += 1; + return S_lookup(j, i + 1, hi, s, len); + } else { + return NULL; + } +} + +static const unsigned char *S_lookup_entity(const unsigned char *s, int len) { + return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len); +} + +bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size) { + bufsize_t i = 0; + + if (size >= 3 && src[0] == '#') { + int codepoint = 0; + int num_digits = 0; + int max_digits = 7; + + if (_isdigit(src[1])) { + for (i = 1; i < size && _isdigit(src[i]); ++i) { + codepoint = (codepoint * 10) + (src[i] - '0'); + + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } + } + + num_digits = i - 1; + max_digits = 7; + } + + else if (src[1] == 'x' || src[1] == 'X') { + for (i = 2; i < size && _isxdigit(src[i]); ++i) { + codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); + + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } + } + + num_digits = i - 2; + max_digits = 6; + } + + if (num_digits >= 1 && num_digits <= max_digits && + i < size && src[i] == ';') { + if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) || + codepoint >= 0x110000) { + codepoint = 0xFFFD; + } + cmark_utf8proc_encode_char(codepoint, ob); + return i + 1; + } + } + + else { + if (size > CMARK_ENTITY_MAX_LENGTH) + size = CMARK_ENTITY_MAX_LENGTH; + + for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) { + if (src[i] == ' ') + break; + + if (src[i] == ';') { + const unsigned char *entity = S_lookup_entity(src, i); + + if (entity != NULL) { + cmark_strbuf_puts(ob, (const char *)entity); + return i + 1; + } + + break; + } + } + } + + return 0; +} + +int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size) { + bufsize_t i = 0, org, ent; + + while (i < size) { + org = i; + while (i < size && src[i] != '&') + i++; + + if (likely(i > org)) { + if (unlikely(org == 0)) { + if (i >= size) + return 0; + + cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); + } + + cmark_strbuf_put(ob, src + org, i - org); + } + + /* escaping */ + if (i >= size) + break; + + i++; + + ent = houdini_unescape_ent(ob, src + i, size - i); + i += ent; + + /* not really an entity */ + if (ent == 0) + cmark_strbuf_putc(ob, '&'); + } + + return 1; +} + +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, + bufsize_t size) { + if (!houdini_unescape_html(ob, src, size)) + cmark_strbuf_put(ob, src, size); +} diff --git a/deps/cmark/src/html.c b/deps/cmark/src/html.c new file mode 100644 index 0000000..962d795 --- /dev/null +++ b/deps/cmark/src/html.c @@ -0,0 +1,341 @@ +#include +#include +#include +#include +#include "cmark_ctype.h" +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "houdini.h" +#include "scanners.h" + +#define BUFFER_SIZE 100 + +// Functions to convert cmark_nodes to HTML strings. + +static void escape_html(cmark_strbuf *dest, const unsigned char *source, + bufsize_t length) { + houdini_escape_html0(dest, source, length, 0); +} + +static CMARK_INLINE void cr(cmark_strbuf *html) { + if (html->size && html->ptr[html->size - 1] != '\n') + cmark_strbuf_putc(html, '\n'); +} + +struct render_state { + cmark_strbuf *html; + cmark_node *plain; +}; + +static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, + int options) { + char buffer[BUFFER_SIZE]; + if (CMARK_OPT_SOURCEPOS & options) { + snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", + cmark_node_get_start_line(node), cmark_node_get_start_column(node), + cmark_node_get_end_line(node), cmark_node_get_end_column(node)); + cmark_strbuf_puts(html, buffer); + } +} + +static int S_render_node(cmark_node *node, cmark_event_type ev_type, + struct render_state *state, int options) { + cmark_node *parent; + cmark_node *grandparent; + cmark_strbuf *html = state->html; + char start_heading[] = "plain == node) { // back at original node + state->plain = NULL; + } + + if (state->plain != NULL) { + switch (node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_INLINE: + escape_html(html, node->data, node->len); + break; + + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: + cmark_strbuf_putc(html, ' '); + break; + + default: + break; + } + return 1; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + cr(html); + cmark_strbuf_puts(html, "\n"); + } else { + cr(html); + cmark_strbuf_puts(html, "\n"); + } + break; + + case CMARK_NODE_LIST: { + cmark_list_type list_type = (cmark_list_type)node->as.list.list_type; + int start = node->as.list.start; + + if (entering) { + cr(html); + if (list_type == CMARK_BULLET_LIST) { + cmark_strbuf_puts(html, "\n"); + } else if (start == 1) { + cmark_strbuf_puts(html, "\n"); + } else { + snprintf(buffer, BUFFER_SIZE, "
    \n"); + } + } else { + cmark_strbuf_puts(html, + list_type == CMARK_BULLET_LIST ? "\n" : "
\n"); + } + break; + } + + case CMARK_NODE_ITEM: + if (entering) { + cr(html); + cmark_strbuf_puts(html, "'); + } else { + cmark_strbuf_puts(html, "\n"); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + cr(html); + start_heading[2] = (char)('0' + node->as.heading.level); + cmark_strbuf_puts(html, start_heading); + S_render_sourcepos(node, html, options); + cmark_strbuf_putc(html, '>'); + } else { + end_heading[3] = (char)('0' + node->as.heading.level); + cmark_strbuf_puts(html, end_heading); + cmark_strbuf_puts(html, ">\n"); + } + break; + + case CMARK_NODE_CODE_BLOCK: + cr(html); + + if (node->as.code.info == NULL || node->as.code.info[0] == 0) { + cmark_strbuf_puts(html, ""); + } else { + bufsize_t first_tag = 0; + while (node->as.code.info[first_tag] && + !cmark_isspace(node->as.code.info[first_tag])) { + first_tag += 1; + } + + cmark_strbuf_puts(html, "as.code.info, first_tag); + cmark_strbuf_puts(html, "\">"); + } + + escape_html(html, node->data, node->len); + cmark_strbuf_puts(html, "\n"); + break; + + case CMARK_NODE_HTML_BLOCK: + cr(html); + if (!(options & CMARK_OPT_UNSAFE)) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_put(html, node->data, node->len); + } + cr(html); + break; + + case CMARK_NODE_CUSTOM_BLOCK: { + unsigned char *block = entering ? node->as.custom.on_enter : + node->as.custom.on_exit; + cr(html); + if (block) { + cmark_strbuf_puts(html, (char *)block); + } + cr(html); + break; + } + + case CMARK_NODE_THEMATIC_BREAK: + cr(html); + cmark_strbuf_puts(html, "\n"); + break; + + case CMARK_NODE_PARAGRAPH: + parent = cmark_node_parent(node); + grandparent = cmark_node_parent(parent); + if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) { + tight = grandparent->as.list.tight; + } else { + tight = false; + } + if (!tight) { + if (entering) { + cr(html); + cmark_strbuf_puts(html, "'); + } else { + cmark_strbuf_puts(html, "

\n"); + } + } + break; + + case CMARK_NODE_TEXT: + escape_html(html, node->data, node->len); + break; + + case CMARK_NODE_LINEBREAK: + cmark_strbuf_puts(html, "
\n"); + break; + + case CMARK_NODE_SOFTBREAK: + if (options & CMARK_OPT_HARDBREAKS) { + cmark_strbuf_puts(html, "
\n"); + } else if (options & CMARK_OPT_NOBREAKS) { + cmark_strbuf_putc(html, ' '); + } else { + cmark_strbuf_putc(html, '\n'); + } + break; + + case CMARK_NODE_CODE: + cmark_strbuf_puts(html, ""); + escape_html(html, node->data, node->len); + cmark_strbuf_puts(html, ""); + break; + + case CMARK_NODE_HTML_INLINE: + if (!(options & CMARK_OPT_UNSAFE)) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_put(html, node->data, node->len); + } + break; + + case CMARK_NODE_CUSTOM_INLINE: { + unsigned char *block = entering ? node->as.custom.on_enter : + node->as.custom.on_exit; + if (block) { + cmark_strbuf_puts(html, (char *)block); + } + break; + } + + case CMARK_NODE_STRONG: + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + break; + + case CMARK_NODE_LINK: + if (entering) { + cmark_strbuf_puts(html, "
as.link.url && ((options & CMARK_OPT_UNSAFE) || + !(_scan_dangerous_url(node->as.link.url)))) { + houdini_escape_href(html, node->as.link.url, + strlen((char *)node->as.link.url)); + } + if (node->as.link.title) { + cmark_strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, + strlen((char *)node->as.link.title)); + } + cmark_strbuf_puts(html, "\">"); + } else { + cmark_strbuf_puts(html, ""); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + cmark_strbuf_puts(html, "as.link.url && ((options & CMARK_OPT_UNSAFE) || + !(_scan_dangerous_url(node->as.link.url)))) { + houdini_escape_href(html, node->as.link.url, + strlen((char *)node->as.link.url)); + } + cmark_strbuf_puts(html, "\" alt=\""); + state->plain = node; + } else { + if (node->as.link.title) { + cmark_strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, + strlen((char *)node->as.link.title)); + } + + cmark_strbuf_puts(html, "\" />"); + } + break; + + default: + assert(false); + break; + } + + // cmark_strbuf_putc(html, 'x'); + return 1; +} + +char *cmark_render_html(cmark_node *root, int options) { + char *result; + cmark_strbuf html = CMARK_BUF_INIT(root->mem); + cmark_event_type ev_type; + cmark_node *cur; + struct render_state state = {&html, NULL}; + cmark_iter *iter = cmark_iter_new(root); + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state, options); + } + result = (char *)cmark_strbuf_detach(&html); + + cmark_iter_free(iter); + return result; +} diff --git a/deps/cmark/src/inlines.c b/deps/cmark/src/inlines.c new file mode 100644 index 0000000..96f4d45 --- /dev/null +++ b/deps/cmark/src/inlines.c @@ -0,0 +1,1483 @@ +#include +#include +#include + +#include "cmark_ctype.h" +#include "config.h" +#include "node.h" +#include "parser.h" +#include "references.h" +#include "cmark.h" +#include "houdini.h" +#include "utf8.h" +#include "scanners.h" +#include "inlines.h" + +static const char *EMDASH = "\xE2\x80\x94"; +static const char *ENDASH = "\xE2\x80\x93"; +static const char *ELLIPSES = "\xE2\x80\xA6"; +static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C"; +static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D"; +static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98"; +static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; + +// Macros for creating various kinds of simple. +#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK) +#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK) +#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) +#define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) + +#define MAXBACKTICKS 1000 + +typedef struct delimiter { + struct delimiter *previous; + struct delimiter *next; + cmark_node *inl_text; + bufsize_t position; + bufsize_t length; + unsigned char delim_char; + bool can_open; + bool can_close; +} delimiter; + +typedef struct bracket { + struct bracket *previous; + cmark_node *inl_text; + bufsize_t position; + bool image; + bool active; + bool bracket_after; +} bracket; + +#define FLAG_SKIP_HTML_CDATA (1u << 0) +#define FLAG_SKIP_HTML_DECLARATION (1u << 1) +#define FLAG_SKIP_HTML_PI (1u << 2) +#define FLAG_SKIP_HTML_COMMENT (1u << 3) + +typedef struct { + cmark_mem *mem; + cmark_chunk input; + unsigned flags; + int line; + bufsize_t pos; + int block_offset; + int column_offset; + cmark_reference_map *refmap; + delimiter *last_delim; + bracket *last_bracket; + bufsize_t backticks[MAXBACKTICKS + 1]; + bool scanned_for_backticks; + bool no_link_openers; +} subject; + +static CMARK_INLINE bool S_is_line_end_char(char c) { + return (c == '\n' || c == '\r'); +} + +static delimiter *S_insert_emph(subject *subj, delimiter *opener, + delimiter *closer); + +static int parse_inline(subject *subj, cmark_node *parent, int options); + +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, + cmark_chunk *chunk, cmark_reference_map *refmap); +static bufsize_t subject_find_special_char(subject *subj, int options); + +// Create an inline with a literal string value. +static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, + int start_column, int end_column) { + cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e)); + e->mem = subj->mem; + e->type = (uint16_t)t; + e->start_line = e->end_line = subj->line; + // columns are 1 based. + e->start_column = start_column + 1 + subj->column_offset + subj->block_offset; + e->end_column = end_column + 1 + subj->column_offset + subj->block_offset; + return e; +} + +// Create an inline with no value. +static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { + cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); + e->mem = mem; + e->type = t; + return e; +} + +static cmark_node *make_str(subject *subj, int sc, int ec, cmark_chunk s) { + cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec); + e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + 1); + if (s.data != NULL) { + memcpy(e->data, s.data, s.len); + } + e->data[s.len] = 0; + e->len = s.len; + return e; +} + +static cmark_node *make_str_from_buf(subject *subj, int sc, int ec, + cmark_strbuf *buf) { + cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec); + e->len = buf->size; + e->data = cmark_strbuf_detach(buf); + return e; +} + +// Like make_str, but parses entities. +static cmark_node *make_str_with_entities(subject *subj, + int start_column, int end_column, + cmark_chunk *content) { + cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem); + + if (houdini_unescape_html(&unescaped, content->data, content->len)) { + return make_str_from_buf(subj, start_column, end_column, &unescaped); + } else { + return make_str(subj, start_column, end_column, *content); + } +} + +// Like cmark_node_append_child but without costly sanity checks. +// Assumes that child was newly created. +static void append_child(cmark_node *node, cmark_node *child) { + cmark_node *old_last_child = node->last_child; + + child->next = NULL; + child->prev = old_last_child; + child->parent = node; + node->last_child = child; + + if (old_last_child) { + old_last_child->next = child; + } else { + // Also set first_child if node previously had no children. + node->first_child = child; + } +} + +// Duplicate a chunk by creating a copy of the buffer not by reusing the +// buffer like cmark_chunk_dup does. +static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) { + if (src == NULL) { + return NULL; + } + size_t len = strlen((char *)src); + unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1); + memcpy(data, src, len + 1); + return data; +} + +static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, + int is_email) { + cmark_strbuf buf = CMARK_BUF_INIT(mem); + + cmark_chunk_trim(url); + + if (is_email) + cmark_strbuf_puts(&buf, "mailto:"); + + houdini_unescape_html_f(&buf, url->data, url->len); + return cmark_strbuf_detach(&buf); +} + +static CMARK_INLINE cmark_node *make_autolink(subject *subj, + int start_column, int end_column, + cmark_chunk url, int is_email) { + cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); + link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); + link->as.link.title = NULL; + link->start_line = link->end_line = subj->line; + link->start_column = start_column + 1; + link->end_column = end_column + 1; + append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); + return link; +} + +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, + cmark_chunk *chunk, cmark_reference_map *refmap) { + int i; + e->mem = mem; + e->input = *chunk; + e->flags = 0; + e->line = line_number; + e->pos = 0; + e->block_offset = block_offset; + e->column_offset = 0; + e->refmap = refmap; + e->last_delim = NULL; + e->last_bracket = NULL; + for (i = 0; i <= MAXBACKTICKS; i++) { + e->backticks[i] = 0; + } + e->scanned_for_backticks = false; + e->no_link_openers = true; +} + +static CMARK_INLINE int isbacktick(int c) { return (c == '`'); } + +static CMARK_INLINE unsigned char peek_char(subject *subj) { + // NULL bytes should have been stripped out by now. If they're + // present, it's a programming error: + assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0)); + return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; +} + +static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) { + return subj->input.data[pos]; +} + +// Return true if there are more characters in the subject. +static CMARK_INLINE int is_eof(subject *subj) { + return (subj->pos >= subj->input.len); +} + +// Advance the subject. Doesn't check for eof. +#define advance(subj) (subj)->pos += 1 + +static CMARK_INLINE bool skip_spaces(subject *subj) { + bool skipped = false; + while (peek_char(subj) == ' ' || peek_char(subj) == '\t') { + advance(subj); + skipped = true; + } + return skipped; +} + +static CMARK_INLINE bool skip_line_end(subject *subj) { + bool seen_line_end_char = false; + if (peek_char(subj) == '\r') { + advance(subj); + seen_line_end_char = true; + } + if (peek_char(subj) == '\n') { + advance(subj); + seen_line_end_char = true; + } + return seen_line_end_char || is_eof(subj); +} + +// Take characters while a predicate holds, and return a string. +static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { + unsigned char c; + bufsize_t startpos = subj->pos; + bufsize_t len = 0; + + while ((c = peek_char(subj)) && (*f)(c)) { + advance(subj); + len++; + } + + return cmark_chunk_dup(&subj->input, startpos, len); +} + +// Return the number of newlines in a given span of text in a subject. If +// the number is greater than zero, also return the number of characters +// between the last newline and the end of the span in `since_newline`. +static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { + int nls = 0; + int since_nl = 0; + + while (len--) { + if (subj->input.data[from++] == '\n') { + ++nls; + since_nl = 0; + } else { + ++since_nl; + } + } + + if (!nls) + return 0; + + *since_newline = since_nl; + return nls; +} + +// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and +// `column_offset` according to the number of newlines in a just-matched span +// of text in `subj`. +static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) { + if (!(options & CMARK_OPT_SOURCEPOS)) { + return; + } + + int since_newline; + int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); + if (newlines) { + subj->line += newlines; + node->end_line += newlines; + node->end_column = since_newline; + subj->column_offset = -subj->pos + since_newline + extra; + } +} + +// Try to process a backtick code span that began with a +// span of ticks of length openticklength length (already +// parsed). Return 0 if you don't find matching closing +// backticks, otherwise return the position in the subject +// after the closing backticks. +static bufsize_t scan_to_closing_backticks(subject *subj, + bufsize_t openticklength) { + + bool found = false; + if (openticklength > MAXBACKTICKS) { + // we limit backtick string length because of the array subj->backticks: + return 0; + } + if (subj->scanned_for_backticks && + subj->backticks[openticklength] <= subj->pos) { + // return if we already know there's no closer + return 0; + } + while (!found) { + // read non backticks + unsigned char c; + while ((c = peek_char(subj)) && c != '`') { + advance(subj); + } + if (is_eof(subj)) { + break; + } + bufsize_t numticks = 0; + while (peek_char(subj) == '`') { + advance(subj); + numticks++; + } + // store position of ender + if (numticks <= MAXBACKTICKS) { + subj->backticks[numticks] = subj->pos - numticks; + } + if (numticks == openticklength) { + return (subj->pos); + } + } + // got through whole input without finding closer + subj->scanned_for_backticks = true; + return 0; +} + +// Destructively modify string, converting newlines to +// spaces, then removing a single leading + trailing space, +// unless the code span consists entirely of space characters. +static void S_normalize_code(cmark_strbuf *s) { + bufsize_t r, w; + bool contains_nonspace = false; + + for (r = 0, w = 0; r < s->size; ++r) { + switch (s->ptr[r]) { + case '\r': + if (s->ptr[r + 1] != '\n') { + s->ptr[w++] = ' '; + } + break; + case '\n': + s->ptr[w++] = ' '; + break; + default: + s->ptr[w++] = s->ptr[r]; + } + if (s->ptr[r] != ' ') { + contains_nonspace = true; + } + } + + // begins and ends with space? + if (contains_nonspace && + s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') { + cmark_strbuf_drop(s, 1); + cmark_strbuf_truncate(s, w - 2); + } else { + cmark_strbuf_truncate(s, w); + } + +} + + +// Parse backtick code section or raw backticks, return an inline. +// Assumes that the subject has a backtick at the current position. +static cmark_node *handle_backticks(subject *subj, int options) { + bufsize_t initpos = subj->pos; + cmark_chunk openticks = take_while(subj, isbacktick); + bufsize_t startpos = subj->pos; + bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); + + if (endpos == 0) { // not found + subj->pos = startpos; // rewind + return make_str(subj, initpos, initpos + openticks.len - 1, openticks); + } else { + cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); + + cmark_strbuf_set(&buf, subj->input.data + startpos, + endpos - startpos - openticks.len); + S_normalize_code(&buf); + + cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos, + endpos - openticks.len - 1); + node->len = buf.size; + node->data = cmark_strbuf_detach(&buf); + adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); + return node; + } +} + + +// Scan ***, **, or * and return number scanned, or 0. +// Advances position. +static int scan_delims(subject *subj, unsigned char c, bool *can_open, + bool *can_close) { + int numdelims = 0; + bufsize_t before_char_pos; + int32_t after_char = 0; + int32_t before_char = 0; + int len; + bool left_flanking, right_flanking; + + if (subj->pos == 0) { + before_char = 10; + } else { + before_char_pos = subj->pos - 1; + // walk back to the beginning of the UTF_8 sequence: + while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) { + before_char_pos -= 1; + } + len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, + subj->pos - before_char_pos, &before_char); + if (len == -1) { + before_char = 10; + } + } + + if (c == '\'' || c == '"') { + numdelims++; + advance(subj); // limit to 1 delim for quotes + } else { + while (peek_char(subj) == c) { + numdelims++; + advance(subj); + } + } + + len = cmark_utf8proc_iterate(subj->input.data + subj->pos, + subj->input.len - subj->pos, &after_char); + if (len == -1) { + after_char = 10; + } + left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && + (!cmark_utf8proc_is_punctuation(after_char) || + cmark_utf8proc_is_space(before_char) || + cmark_utf8proc_is_punctuation(before_char)); + right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && + (!cmark_utf8proc_is_punctuation(before_char) || + cmark_utf8proc_is_space(after_char) || + cmark_utf8proc_is_punctuation(after_char)); + if (c == '_') { + *can_open = left_flanking && + (!right_flanking || cmark_utf8proc_is_punctuation(before_char)); + *can_close = right_flanking && + (!left_flanking || cmark_utf8proc_is_punctuation(after_char)); + } else if (c == '\'' || c == '"') { + *can_open = left_flanking && + (!right_flanking || before_char == '(' || before_char == '[') && + before_char != ']' && before_char != ')'; + *can_close = right_flanking; + } else { + *can_open = left_flanking; + *can_close = right_flanking; + } + return numdelims; +} + +/* +static void print_delimiters(subject *subj) +{ + delimiter *delim; + delim = subj->last_delim; + while (delim != NULL) { + printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n", + (void*)delim, delim->delim_char, + delim->can_open, delim->can_close, + (void*)delim->next, (void*)delim->previous); + delim = delim->previous; + } +} +*/ + +static void remove_delimiter(subject *subj, delimiter *delim) { + if (delim == NULL) + return; + if (delim->next == NULL) { + // end of list: + assert(delim == subj->last_delim); + subj->last_delim = delim->previous; + } else { + delim->next->previous = delim->previous; + } + if (delim->previous != NULL) { + delim->previous->next = delim->next; + } + subj->mem->free(delim); +} + +static void pop_bracket(subject *subj) { + bracket *b; + if (subj->last_bracket == NULL) + return; + b = subj->last_bracket; + subj->last_bracket = subj->last_bracket->previous; + subj->mem->free(b); +} + +static void push_delimiter(subject *subj, unsigned char c, bool can_open, + bool can_close, cmark_node *inl_text) { + delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter)); + delim->delim_char = c; + delim->can_open = can_open; + delim->can_close = can_close; + delim->inl_text = inl_text; + delim->position = subj->pos; + delim->length = inl_text->len; + delim->previous = subj->last_delim; + delim->next = NULL; + if (delim->previous != NULL) { + delim->previous->next = delim; + } + subj->last_delim = delim; +} + +static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { + bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket)); + if (subj->last_bracket != NULL) { + subj->last_bracket->bracket_after = true; + } + b->image = image; + b->active = true; + b->inl_text = inl_text; + b->previous = subj->last_bracket; + b->position = subj->pos; + b->bracket_after = false; + subj->last_bracket = b; + if (!image) { + subj->no_link_openers = false; + } +} + +// Assumes the subject has a c at the current position. +static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { + bufsize_t numdelims; + cmark_node *inl_text; + bool can_open, can_close; + cmark_chunk contents; + + numdelims = scan_delims(subj, c, &can_open, &can_close); + + if (c == '\'' && smart) { + contents = cmark_chunk_literal(RIGHTSINGLEQUOTE); + } else if (c == '"' && smart) { + contents = + cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE); + } else { + contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims); + } + + inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents); + + if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { + push_delimiter(subj, c, can_open, can_close, inl_text); + } + + return inl_text; +} + +// Assumes we have a hyphen at the current position. +static cmark_node *handle_hyphen(subject *subj, bool smart) { + int startpos = subj->pos; + + advance(subj); + + if (!smart || peek_char(subj) != '-') { + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-")); + } + + while (smart && peek_char(subj) == '-') { + advance(subj); + } + + int numhyphens = subj->pos - startpos; + int en_count = 0; + int em_count = 0; + int i; + cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); + + if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes + em_count = numhyphens / 3; + } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes + en_count = numhyphens / 2; + } else if (numhyphens % 3 == 2) { // use one en dash at end + en_count = 1; + em_count = (numhyphens - 2) / 3; + } else { // use two en dashes at the end + en_count = 2; + em_count = (numhyphens - 4) / 3; + } + + for (i = em_count; i > 0; i--) { + cmark_strbuf_puts(&buf, EMDASH); + } + + for (i = en_count; i > 0; i--) { + cmark_strbuf_puts(&buf, ENDASH); + } + + return make_str_from_buf(subj, startpos, subj->pos - 1, &buf); +} + +// Assumes we have a period at the current position. +static cmark_node *handle_period(subject *subj, bool smart) { + advance(subj); + if (smart && peek_char(subj) == '.') { + advance(subj); + if (peek_char(subj) == '.') { + advance(subj); + return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES)); + } else { + return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("..")); + } + } else { + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal(".")); + } +} + +static void process_emphasis(subject *subj, bufsize_t stack_bottom) { + delimiter *candidate; + delimiter *closer = NULL; + delimiter *opener; + delimiter *old_closer; + bool opener_found; + int openers_bottom_index = 0; + bufsize_t openers_bottom[9] = {stack_bottom, stack_bottom, stack_bottom, + stack_bottom, stack_bottom, stack_bottom, + stack_bottom, stack_bottom, stack_bottom}; + + // move back to first relevant delim. + candidate = subj->last_delim; + while (candidate != NULL && candidate->position >= stack_bottom) { + closer = candidate; + candidate = candidate->previous; + } + + // now move forward, looking for closers, and handling each + while (closer != NULL) { + if (closer->can_close) { + switch (closer->delim_char) { + case '"': + openers_bottom_index = 0; + break; + case '\'': + openers_bottom_index = 1; + break; + case '_': + openers_bottom_index = 2; + break; + case '*': + openers_bottom_index = 3 + + (closer->can_open ? 3 : 0) + (closer->length % 3); + break; + default: + assert(false); + } + + // Now look backwards for first matching opener: + opener = closer->previous; + opener_found = false; + while (opener != NULL && + opener->position >= openers_bottom[openers_bottom_index]) { + if (opener->can_open && opener->delim_char == closer->delim_char) { + // interior closer of size 2 can't match opener of size 1 + // or of size 1 can't match 2 + if (!(closer->can_open || opener->can_close) || + closer->length % 3 == 0 || + (opener->length + closer->length) % 3 != 0) { + opener_found = true; + break; + } + } + opener = opener->previous; + } + old_closer = closer; + if (closer->delim_char == '*' || closer->delim_char == '_') { + if (opener_found) { + closer = S_insert_emph(subj, opener, closer); + } else { + closer = closer->next; + } + } else if (closer->delim_char == '\'' || closer->delim_char == '"') { + if (closer->delim_char == '\'') { + cmark_node_set_literal(closer->inl_text, RIGHTSINGLEQUOTE); + } else { + cmark_node_set_literal(closer->inl_text, RIGHTDOUBLEQUOTE); + } + closer = closer->next; + if (opener_found) { + if (old_closer->delim_char == '\'') { + cmark_node_set_literal(opener->inl_text, LEFTSINGLEQUOTE); + } else { + cmark_node_set_literal(opener->inl_text, LEFTDOUBLEQUOTE); + } + remove_delimiter(subj, opener); + remove_delimiter(subj, old_closer); + } + } + if (!opener_found) { + // set lower bound for future searches for openers + openers_bottom[openers_bottom_index] = old_closer->position; + if (!old_closer->can_open) { + // we can remove a closer that can't be an + // opener, once we've seen there's no + // matching opener: + remove_delimiter(subj, old_closer); + } + } + } else { + closer = closer->next; + } + } + // free all delimiters in list until stack_bottom: + while (subj->last_delim != NULL && + subj->last_delim->position >= stack_bottom) { + remove_delimiter(subj, subj->last_delim); + } +} + +static delimiter *S_insert_emph(subject *subj, delimiter *opener, + delimiter *closer) { + delimiter *delim, *tmp_delim; + bufsize_t use_delims; + cmark_node *opener_inl = opener->inl_text; + cmark_node *closer_inl = closer->inl_text; + bufsize_t opener_num_chars = opener_inl->len; + bufsize_t closer_num_chars = closer_inl->len; + cmark_node *tmp, *tmpnext, *emph; + + // calculate the actual number of characters used from this closer + use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1; + + // remove used characters from associated inlines. + opener_num_chars -= use_delims; + closer_num_chars -= use_delims; + opener_inl->len = opener_num_chars; + opener_inl->data[opener_num_chars] = 0; + closer_inl->len = closer_num_chars; + closer_inl->data[closer_num_chars] = 0; + + // free delimiters between opener and closer + delim = closer->previous; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + remove_delimiter(subj, delim); + delim = tmp_delim; + } + + // create new emph or strong, and splice it in to our inlines + // between the opener and closer + emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem); + + tmp = opener_inl->next; + while (tmp && tmp != closer_inl) { + tmpnext = tmp->next; + cmark_node_unlink(tmp); + append_child(emph, tmp); + tmp = tmpnext; + } + cmark_node_insert_after(opener_inl, emph); + + emph->start_line = opener_inl->start_line; + emph->end_line = closer_inl->end_line; + emph->start_column = opener_inl->start_column; + emph->end_column = closer_inl->end_column; + + // if opener has 0 characters, remove it and its associated inline + if (opener_num_chars == 0) { + cmark_node_free(opener_inl); + remove_delimiter(subj, opener); + } + + // if closer has 0 characters, remove it and its associated inline + if (closer_num_chars == 0) { + // remove empty closer inline + cmark_node_free(closer_inl); + // remove closer from list + tmp_delim = closer->next; + remove_delimiter(subj, closer); + closer = tmp_delim; + } + + return closer; +} + +// Parse backslash-escape or just a backslash, returning an inline. +static cmark_node *handle_backslash(subject *subj) { + advance(subj); + unsigned char nextchar = peek_char(subj); + if (cmark_ispunct( + nextchar)) { // only ascii symbols and newline can be escaped + advance(subj); + return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); + } else if (!is_eof(subj) && skip_line_end(subj)) { + return make_linebreak(subj->mem); + } else { + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); + } +} + +// Parse an entity or a regular "&" string. +// Assumes the subject has an '&' character at the current position. +static cmark_node *handle_entity(subject *subj) { + cmark_strbuf ent = CMARK_BUF_INIT(subj->mem); + bufsize_t len; + + advance(subj); + + len = houdini_unescape_ent(&ent, subj->input.data + subj->pos, + subj->input.len - subj->pos); + + if (len <= 0) + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&")); + + subj->pos += len; + return make_str_from_buf(subj, subj->pos - 1 - len, subj->pos - 1, &ent); +} + +// Clean a URL: remove surrounding whitespace, and remove \ that escape +// punctuation. +unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { + cmark_strbuf buf = CMARK_BUF_INIT(mem); + + cmark_chunk_trim(url); + + houdini_unescape_html_f(&buf, url->data, url->len); + + cmark_strbuf_unescape(&buf); + return cmark_strbuf_detach(&buf); +} + +unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { + cmark_strbuf buf = CMARK_BUF_INIT(mem); + unsigned char first, last; + + if (title->len == 0) { + return NULL; + } + + first = title->data[0]; + last = title->data[title->len - 1]; + + // remove surrounding quotes if any: + if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || + (first == '"' && last == '"')) { + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); + } else { + houdini_unescape_html_f(&buf, title->data, title->len); + } + + cmark_strbuf_unescape(&buf); + return cmark_strbuf_detach(&buf); +} + +// Parse an autolink or HTML tag. +// Assumes the subject has a '<' character at the current position. +static cmark_node *handle_pointy_brace(subject *subj, int options) { + bufsize_t matchlen = 0; + cmark_chunk contents; + + advance(subj); // advance past first < + + // first try to match a URL autolink + matchlen = scan_autolink_uri(&subj->input, subj->pos); + if (matchlen > 0) { + contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); + } + + // next try to match an email autolink + matchlen = scan_autolink_email(&subj->input, subj->pos); + if (matchlen > 0) { + contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); + } + + // finally, try to match an html tag + if (subj->pos + 2 <= subj->input.len) { + int c = subj->input.data[subj->pos]; + if (c == '!' && (subj->flags & FLAG_SKIP_HTML_COMMENT) == 0) { + c = subj->input.data[subj->pos+1]; + if (c == '-' && subj->input.data[subj->pos+2] == '-') { + if (subj->input.data[subj->pos+3] == '>') { + matchlen = 4; + } else if (subj->input.data[subj->pos+3] == '-' && + subj->input.data[subj->pos+4] == '>') { + matchlen = 5; + } else { + matchlen = scan_html_comment(&subj->input, subj->pos + 1); + if (matchlen > 0) { + matchlen += 1; // prefix "<" + } else { // no match through end of input: set a flag so + // we don't reparse looking for -->: + subj->flags |= FLAG_SKIP_HTML_COMMENT; + } + } + } else if (c == '[') { + if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) { + matchlen = scan_html_cdata(&subj->input, subj->pos + 2); + if (matchlen > 0) { + // The regex doesn't require the final "]]>". But if we're not at + // the end of input, it must come after the match. Otherwise, + // disable subsequent scans to avoid quadratic behavior. + matchlen += 5; // prefix "![", suffix "]]>" + if (subj->pos + matchlen > subj->input.len) { + subj->flags |= FLAG_SKIP_HTML_CDATA; + matchlen = 0; + } + } + } + } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) { + matchlen = scan_html_declaration(&subj->input, subj->pos + 1); + if (matchlen > 0) { + matchlen += 2; // prefix "!", suffix ">" + if (subj->pos + matchlen > subj->input.len) { + subj->flags |= FLAG_SKIP_HTML_DECLARATION; + matchlen = 0; + } + } + } + } else if (c == '?') { + if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) { + // Note that we allow an empty match. + matchlen = scan_html_pi(&subj->input, subj->pos + 1); + matchlen += 3; // prefix "?", suffix "?>" + if (subj->pos + matchlen > subj->input.len) { + subj->flags |= FLAG_SKIP_HTML_PI; + matchlen = 0; + } + } + } else { + matchlen = scan_html_tag(&subj->input, subj->pos); + } + } + if (matchlen > 0) { + const unsigned char *src = subj->input.data + subj->pos - 1; + bufsize_t len = matchlen + 1; + subj->pos += matchlen; + cmark_node *node = make_literal(subj, CMARK_NODE_HTML_INLINE, + subj->pos - matchlen - 1, subj->pos - 1); + node->data = (unsigned char *)subj->mem->realloc(NULL, len + 1); + memcpy(node->data, src, len); + node->data[len] = 0; + node->len = len; + adjust_subj_node_newlines(subj, node, matchlen, 1, options); + return node; + } + + // if nothing matches, just return the opening <: + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<")); +} + +// Parse a link label. Returns 1 if successful. +// Note: unescaped brackets are not allowed in labels. +// The label begins with `[` and ends with the first `]` character +// encountered. Backticks in labels do not start code spans. +static int link_label(subject *subj, cmark_chunk *raw_label) { + bufsize_t startpos = subj->pos; + int length = 0; + unsigned char c; + + // advance past [ + if (peek_char(subj) == '[') { + advance(subj); + } else { + return 0; + } + + while ((c = peek_char(subj)) && c != '[' && c != ']') { + if (c == '\\') { + advance(subj); + length++; + if (cmark_ispunct(peek_char(subj))) { + advance(subj); + length++; + } + } else { + advance(subj); + length++; + } + if (length > MAX_LINK_LABEL_LENGTH) { + goto noMatch; + } + } + + if (c == ']') { // match found + *raw_label = + cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); + cmark_chunk_trim(raw_label); + advance(subj); // advance past ] + return 1; + } + +noMatch: + subj->pos = startpos; // rewind + return 0; +} + +static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, + cmark_chunk *output) { + bufsize_t i = offset; + size_t nb_p = 0; + + while (i < input->len) { + if (input->data[i] == '\\' && + i + 1 < input-> len && + cmark_ispunct(input->data[i+1])) + i += 2; + else if (input->data[i] == '(') { + ++nb_p; + ++i; + if (nb_p > 32) + return -1; + } else if (input->data[i] == ')') { + if (nb_p == 0) + break; + --nb_p; + ++i; + } else if (cmark_isspace(input->data[i])) { + if (i == offset) { + return -1; + } + break; + } else { + ++i; + } + } + + if (i >= input->len || nb_p != 0) + return -1; + + { + cmark_chunk result = {input->data + offset, i - offset}; + *output = result; + } + return i - offset; +} + +static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, + cmark_chunk *output) { + bufsize_t i = offset; + + if (i < input->len && input->data[i] == '<') { + ++i; + while (i < input->len) { + if (input->data[i] == '>') { + ++i; + break; + } else if (input->data[i] == '\\') + i += 2; + else if (input->data[i] == '\n' || input->data[i] == '<') + return -1; + else + ++i; + } + } else { + return manual_scan_link_url_2(input, offset, output); + } + + if (i >= input->len) + return -1; + + { + cmark_chunk result = {input->data + offset + 1, i - 2 - offset}; + *output = result; + } + return i - offset; +} + +// Return a link, an image, or a literal close bracket. +static cmark_node *handle_close_bracket(subject *subj) { + bufsize_t initial_pos, after_link_text_pos; + bufsize_t endurl, starttitle, endtitle, endall; + bufsize_t sps, n; + cmark_reference *ref = NULL; + cmark_chunk url_chunk, title_chunk; + unsigned char *url, *title; + bracket *opener; + cmark_node *inl; + cmark_chunk raw_label; + int found_label; + cmark_node *tmp, *tmpnext; + bool is_image; + + advance(subj); // advance past ] + initial_pos = subj->pos; + + // get last [ or ![ + opener = subj->last_bracket; + + if (opener == NULL) { + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); + } + + // If we got here, we matched a potential link/image text. + // Now we check to see if it's a link/image. + is_image = opener->image; + + if (!is_image && subj->no_link_openers) { + // take delimiter off stack + pop_bracket(subj); + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); + } + + after_link_text_pos = subj->pos; + + // First, look for an inline link. + if (peek_char(subj) == '(' && + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, + &url_chunk)) > -1)) { + + // try to parse an explicit link: + endurl = subj->pos + 1 + sps + n; + starttitle = endurl + scan_spacechars(&subj->input, endurl); + + // ensure there are spaces btw url and title + endtitle = (starttitle == endurl) + ? starttitle + : starttitle + scan_link_title(&subj->input, starttitle); + + endall = endtitle + scan_spacechars(&subj->input, endtitle); + + if (peek_at(subj, endall) == ')') { + subj->pos = endall + 1; + + title_chunk = + cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); + url = cmark_clean_url(subj->mem, &url_chunk); + title = cmark_clean_title(subj->mem, &title_chunk); + cmark_chunk_free(&url_chunk); + cmark_chunk_free(&title_chunk); + goto match; + + } else { + // it could still be a shortcut reference link + subj->pos = after_link_text_pos; + } + } + + // Next, look for a following [link label] that matches in refmap. + // skip spaces + raw_label = cmark_chunk_literal(""); + found_label = link_label(subj, &raw_label); + if (!found_label) { + // If we have a shortcut reference link, back up + // to before the spacse we skipped. + subj->pos = initial_pos; + } + + if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { + cmark_chunk_free(&raw_label); + raw_label = cmark_chunk_dup(&subj->input, opener->position, + initial_pos - opener->position - 1); + found_label = true; + } + + if (found_label) { + ref = cmark_reference_lookup(subj->refmap, &raw_label); + cmark_chunk_free(&raw_label); + } + + if (ref != NULL) { // found + url = cmark_strdup(subj->mem, ref->url); + title = cmark_strdup(subj->mem, ref->title); + goto match; + } else { + goto noMatch; + } + +noMatch: + // If we fall through to here, it means we didn't match a link: + pop_bracket(subj); // remove this opener from delimiter list + subj->pos = initial_pos; + return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); + +match: + inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); + inl->as.link.url = url; + inl->as.link.title = title; + inl->start_line = inl->end_line = subj->line; + inl->start_column = opener->inl_text->start_column; + inl->end_column = subj->pos + subj->column_offset + subj->block_offset; + cmark_node_insert_before(opener->inl_text, inl); + // Add link text: + tmp = opener->inl_text->next; + while (tmp) { + tmpnext = tmp->next; + cmark_node_unlink(tmp); + append_child(inl, tmp); + tmp = tmpnext; + } + + // Free the bracket [: + cmark_node_free(opener->inl_text); + + process_emphasis(subj, opener->position); + pop_bracket(subj); + + // Now, if we have a link, we also want to deactivate links until + // we get a new opener. (This code can be removed if we decide to allow links + // inside links.) + if (!is_image) { + subj->no_link_openers = true; + } + + return NULL; +} + +// Parse a hard or soft linebreak, returning an inline. +// Assumes the subject has a cr or newline at the current position. +static cmark_node *handle_newline(subject *subj) { + bufsize_t nlpos = subj->pos; + // skip over cr, crlf, or lf: + if (peek_at(subj, subj->pos) == '\r') { + advance(subj); + } + if (peek_at(subj, subj->pos) == '\n') { + advance(subj); + } + ++subj->line; + subj->column_offset = -subj->pos; + // skip spaces at beginning of line + skip_spaces(subj); + if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { + return make_linebreak(subj->mem); + } else { + return make_softbreak(subj->mem); + } +} + +static bufsize_t subject_find_special_char(subject *subj, int options) { + // "\r\n\\`&_*[]pos + 1; + + while (n < subj->input.len) { + if (SPECIAL_CHARS[subj->input.data[n]]) + return n; + if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) + return n; + n++; + } + + return subj->input.len; +} + +// Parse an inline, advancing subject, and add it as a child of parent. +// Return 0 if no inline can be parsed, 1 otherwise. +static int parse_inline(subject *subj, cmark_node *parent, int options) { + cmark_node *new_inl = NULL; + cmark_chunk contents; + unsigned char c; + bufsize_t startpos, endpos; + c = peek_char(subj); + if (c == 0) { + return 0; + } + switch (c) { + case '\r': + case '\n': + new_inl = handle_newline(subj); + break; + case '`': + new_inl = handle_backticks(subj, options); + break; + case '\\': + new_inl = handle_backslash(subj); + break; + case '&': + new_inl = handle_entity(subj); + break; + case '<': + new_inl = handle_pointy_brace(subj, options); + break; + case '*': + case '_': + case '\'': + case '"': + new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); + break; + case '-': + new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); + break; + case '.': + new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0); + break; + case '[': + advance(subj); + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("[")); + push_bracket(subj, false, new_inl); + break; + case ']': + new_inl = handle_close_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { + advance(subj); + new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); + push_bracket(subj, true, new_inl); + } else { + new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); + } + break; + default: + endpos = subject_find_special_char(subj, options); + contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + startpos = subj->pos; + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (S_is_line_end_char(peek_char(subj))) { + cmark_chunk_rtrim(&contents); + } + + new_inl = make_str(subj, startpos, endpos - 1, contents); + } + if (new_inl != NULL) { + append_child(parent, new_inl); + } + + return 1; +} + +// Parse inlines from parent's string_content, adding as children of parent. +void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, + cmark_reference_map *refmap, int options) { + int internal_offset = parent->type == CMARK_NODE_HEADING ? + parent->as.heading.internal_offset : 0; + subject subj; + cmark_chunk content = {parent->data, parent->len}; + subject_from_buf(mem, parent->start_line, parent->start_column - 1 + internal_offset, &subj, &content, refmap); + cmark_chunk_rtrim(&subj.input); + + while (!is_eof(&subj) && parse_inline(&subj, parent, options)) + ; + + process_emphasis(&subj, 0); + // free bracket and delim stack + while (subj.last_delim) { + remove_delimiter(&subj, subj.last_delim); + } + while (subj.last_bracket) { + pop_bracket(&subj); + } +} + +// Parse zero or more space characters, including at most one newline. +static void spnl(subject *subj) { + skip_spaces(subj); + if (skip_line_end(subj)) { + skip_spaces(subj); + } +} + +// Parse reference. Assumes string begins with '[' character. +// Modify refmap if a reference is encountered. +// Return 0 if no reference found, otherwise position of subject +// after reference is parsed. +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, + cmark_reference_map *refmap) { + subject subj; + + cmark_chunk lab; + cmark_chunk url; + cmark_chunk title; + + bufsize_t matchlen = 0; + bufsize_t beforetitle; + + subject_from_buf(mem, -1, 0, &subj, input, NULL); + + // parse label: + if (!link_label(&subj, &lab) || lab.len == 0) + return 0; + + // colon: + if (peek_char(&subj) == ':') { + advance(&subj); + } else { + return 0; + } + + // parse link url: + spnl(&subj); + if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) { + subj.pos += matchlen; + } else { + return 0; + } + + // parse optional link_title + beforetitle = subj.pos; + spnl(&subj); + matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos); + if (matchlen) { + title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); + subj.pos += matchlen; + } else { + subj.pos = beforetitle; + title = cmark_chunk_literal(""); + } + + // parse final spaces and newline: + skip_spaces(&subj); + if (!skip_line_end(&subj)) { + if (matchlen) { // try rewinding before title + subj.pos = beforetitle; + skip_spaces(&subj); + if (!skip_line_end(&subj)) { + return 0; + } + } else { + return 0; + } + } + // insert reference into refmap + cmark_reference_create(refmap, &lab, &url, &title); + return subj.pos; +} diff --git a/deps/cmark/src/inlines.h b/deps/cmark/src/inlines.h new file mode 100644 index 0000000..800ed0c --- /dev/null +++ b/deps/cmark/src/inlines.h @@ -0,0 +1,24 @@ +#ifndef CMARK_INLINES_H +#define CMARK_INLINES_H + +#include "chunk.h" +#include "references.h" + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url); +unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title); + +void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, + cmark_reference_map *refmap, int options); + +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, + cmark_reference_map *refmap); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/iterator.c b/deps/cmark/src/iterator.c new file mode 100644 index 0000000..63cbf9e --- /dev/null +++ b/deps/cmark/src/iterator.c @@ -0,0 +1,122 @@ +#include +#include + +#include "config.h" +#include "node.h" +#include "cmark.h" +#include "iterator.h" + +static const int S_leaf_mask = + (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | + (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | + (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | + (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); + +cmark_iter *cmark_iter_new(cmark_node *root) { + if (root == NULL) { + return NULL; + } + cmark_mem *mem = root->mem; + cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter)); + iter->mem = mem; + iter->root = root; + iter->cur.ev_type = CMARK_EVENT_NONE; + iter->cur.node = NULL; + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = root; + return iter; +} + +void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } + +static bool S_is_leaf(cmark_node *node) { + return ((1 << node->type) & S_leaf_mask) != 0; +} + +cmark_event_type cmark_iter_next(cmark_iter *iter) { + cmark_event_type ev_type = iter->next.ev_type; + cmark_node *node = iter->next.node; + + iter->cur.ev_type = ev_type; + iter->cur.node = node; + + if (ev_type == CMARK_EVENT_DONE) { + return ev_type; + } + + /* roll forward to next item, setting both fields */ + if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { + if (node->first_child == NULL) { + /* stay on this node but exit */ + iter->next.ev_type = CMARK_EVENT_EXIT; + } else { + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = node->first_child; + } + } else if (node == iter->root) { + /* don't move past root */ + iter->next.ev_type = CMARK_EVENT_DONE; + iter->next.node = NULL; + } else if (node->next) { + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = node->next; + } else if (node->parent) { + iter->next.ev_type = CMARK_EVENT_EXIT; + iter->next.node = node->parent; + } else { + assert(false); + iter->next.ev_type = CMARK_EVENT_DONE; + iter->next.node = NULL; + } + + return ev_type; +} + +void cmark_iter_reset(cmark_iter *iter, cmark_node *current, + cmark_event_type event_type) { + iter->next.ev_type = event_type; + iter->next.node = current; + cmark_iter_next(iter); +} + +cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; } + +cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) { + return iter->cur.ev_type; +} + +cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; } + +void cmark_consolidate_text_nodes(cmark_node *root) { + if (root == NULL) { + return; + } + cmark_iter *iter = cmark_iter_new(root); + cmark_strbuf buf = CMARK_BUF_INIT(iter->mem); + cmark_event_type ev_type; + cmark_node *cur, *tmp, *next; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && + cur->next && cur->next->type == CMARK_NODE_TEXT) { + cmark_strbuf_clear(&buf); + cmark_strbuf_put(&buf, cur->data, cur->len); + tmp = cur->next; + while (tmp && tmp->type == CMARK_NODE_TEXT) { + cmark_iter_next(iter); // advance pointer + cmark_strbuf_put(&buf, tmp->data, tmp->len); + cur->end_column = tmp->end_column; + next = tmp->next; + cmark_node_free(tmp); + tmp = next; + } + iter->mem->free(cur->data); + cur->len = buf.size; + cur->data = cmark_strbuf_detach(&buf); + } + } + + cmark_strbuf_free(&buf); + cmark_iter_free(iter); +} diff --git a/deps/cmark/src/iterator.h b/deps/cmark/src/iterator.h new file mode 100644 index 0000000..30ce76f --- /dev/null +++ b/deps/cmark/src/iterator.h @@ -0,0 +1,26 @@ +#ifndef CMARK_ITERATOR_H +#define CMARK_ITERATOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" + +typedef struct { + cmark_event_type ev_type; + cmark_node *node; +} cmark_iter_state; + +struct cmark_iter { + cmark_mem *mem; + cmark_node *root; + cmark_iter_state cur; + cmark_iter_state next; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/latex.c b/deps/cmark/src/latex.c new file mode 100644 index 0000000..2051ed3 --- /dev/null +++ b/deps/cmark/src/latex.c @@ -0,0 +1,453 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" +#include "scanners.h" +#include "render.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define LIST_NUMBER_STRING_SIZE 20 + +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, + int32_t c, unsigned char nextc) { + if (escape == LITERAL) { + cmark_render_code_point(renderer, c); + return; + } + + switch (c) { + case 123: // '{' + case 125: // '}' + case 35: // '#' + case 37: // '%' + case 38: // '&' + cmark_render_ascii(renderer, "\\"); + cmark_render_code_point(renderer, c); + break; + case 36: // '$' + case 95: // '_' + if (escape == NORMAL) { + cmark_render_ascii(renderer, "\\"); + } + cmark_render_code_point(renderer, c); + break; + case 45: // '-' + if (nextc == 45) { // prevent ligature + cmark_render_ascii(renderer, "-{}"); + } else { + cmark_render_ascii(renderer, "-"); + } + break; + case 126: // '~' + if (escape == NORMAL) { + cmark_render_ascii(renderer, "\\textasciitilde{}"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 94: // '^' + cmark_render_ascii(renderer, "\\^{}"); + break; + case 92: // '\\' + if (escape == URL) { + // / acts as path sep even on windows: + cmark_render_ascii(renderer, "/"); + } else { + cmark_render_ascii(renderer, "\\textbackslash{}"); + } + break; + case 124: // '|' + cmark_render_ascii(renderer, "\\textbar{}"); + break; + case 60: // '<' + cmark_render_ascii(renderer, "\\textless{}"); + break; + case 62: // '>' + cmark_render_ascii(renderer, "\\textgreater{}"); + break; + case 91: // '[' + case 93: // ']' + cmark_render_ascii(renderer, "{"); + cmark_render_code_point(renderer, c); + cmark_render_ascii(renderer, "}"); + break; + case 34: // '"' + cmark_render_ascii(renderer, "\\textquotedbl{}"); + // requires \usepackage[T1]{fontenc} + break; + case 39: // '\'' + cmark_render_ascii(renderer, "\\textquotesingle{}"); + // requires \usepackage{textcomp} + break; + case 160: // nbsp + cmark_render_ascii(renderer, "~"); + break; + case 8230: // hellip + cmark_render_ascii(renderer, "\\ldots{}"); + break; + case 8216: // lsquo + if (escape == NORMAL) { + cmark_render_ascii(renderer, "`"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 8217: // rsquo + if (escape == NORMAL) { + cmark_render_ascii(renderer, "\'"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 8220: // ldquo + if (escape == NORMAL) { + cmark_render_ascii(renderer, "``"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 8221: // rdquo + if (escape == NORMAL) { + cmark_render_ascii(renderer, "''"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 8212: // emdash + if (escape == NORMAL) { + cmark_render_ascii(renderer, "---"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 8211: // endash + if (escape == NORMAL) { + cmark_render_ascii(renderer, "--"); + } else { + cmark_render_code_point(renderer, c); + } + break; + default: + cmark_render_code_point(renderer, c); + } +} + +typedef enum { + NO_LINK, + URL_AUTOLINK, + EMAIL_AUTOLINK, + NORMAL_LINK, + INTERNAL_LINK +} link_type; + +static link_type get_link_type(cmark_node *node) { + size_t title_len, url_len; + cmark_node *link_text; + char *realurl; + int realurllen; + bool isemail = false; + + if (node->type != CMARK_NODE_LINK) { + return NO_LINK; + } + + const char *url = cmark_node_get_url(node); + cmark_chunk url_chunk = cmark_chunk_literal(url); + + if (url && *url == '#') { + return INTERNAL_LINK; + } + + url_len = strlen(url); + if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { + return NO_LINK; + } + + const char *title = cmark_node_get_title(node); + title_len = strlen(title); + // if it has a title, we can't treat it as an autolink: + if (title_len == 0) { + + link_text = node->first_child; + cmark_consolidate_text_nodes(link_text); + + if (!link_text) + return NO_LINK; + + realurl = (char *)url; + realurllen = (int)url_len; + if (strncmp(realurl, "mailto:", 7) == 0) { + realurl += 7; + realurllen -= 7; + isemail = true; + } + if (realurllen == link_text->len && + strncmp(realurl, (char *)link_text->data, + link_text->len) == 0) { + if (isemail) { + return EMAIL_AUTOLINK; + } else { + return URL_AUTOLINK; + } + } + } + + return NORMAL_LINK; +} + +static int S_get_enumlevel(cmark_node *node) { + int enumlevel = 0; + cmark_node *tmp = node; + while (tmp) { + if (tmp->type == CMARK_NODE_LIST && + cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { + enumlevel++; + } + tmp = tmp->parent; + } + return enumlevel; +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + int list_number; + int enumlevel; + char list_number_string[LIST_NUMBER_STRING_SIZE]; + bool entering = (ev_type == CMARK_EVENT_ENTER); + cmark_list_type list_type; + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); + + // avoid warning about unused parameter: + (void)(options); + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + LIT("\\begin{quote}"); + CR(); + } else { + LIT("\\end{quote}"); + BLANKLINE(); + } + break; + + case CMARK_NODE_LIST: + list_type = cmark_node_get_list_type(node); + if (entering) { + LIT("\\begin{"); + LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); + LIT("}"); + CR(); + list_number = cmark_node_get_list_start(node); + if (list_number > 1) { + enumlevel = S_get_enumlevel(node); + // latex normally supports only five levels + if (enumlevel >= 1 && enumlevel <= 5) { + snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d", + list_number - 1); // the next item will increment this + LIT("\\setcounter{enum"); + switch (enumlevel) { + case 1: LIT("i"); break; + case 2: LIT("ii"); break; + case 3: LIT("iii"); break; + case 4: LIT("iv"); break; + case 5: LIT("v"); break; + default: LIT("i"); break; + } + LIT("}{"); + OUT(list_number_string, false, NORMAL); + LIT("}"); + } + CR(); + } + } else { + LIT("\\end{"); + LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); + LIT("}"); + BLANKLINE(); + } + break; + + case CMARK_NODE_ITEM: + if (entering) { + LIT("\\item "); + } else { + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + switch (cmark_node_get_heading_level(node)) { + case 1: + LIT("\\section"); + break; + case 2: + LIT("\\subsection"); + break; + case 3: + LIT("\\subsubsection"); + break; + case 4: + LIT("\\paragraph"); + break; + case 5: + LIT("\\subparagraph"); + break; + } + LIT("{"); + } else { + LIT("}"); + BLANKLINE(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + CR(); + LIT("\\begin{verbatim}"); + CR(); + OUT(cmark_node_get_literal(node), false, LITERAL); + CR(); + LIT("\\end{verbatim}"); + BLANKLINE(); + break; + + case CMARK_NODE_HTML_BLOCK: + break; + + case CMARK_NODE_CUSTOM_BLOCK: + CR(); + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + CR(); + break; + + case CMARK_NODE_THEMATIC_BREAK: + BLANKLINE(); + LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"); + BLANKLINE(); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + BLANKLINE(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + LIT("\\\\"); + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (options & CMARK_OPT_HARDBREAKS) { + LIT("\\\\"); + CR(); + } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, NORMAL); + } + break; + + case CMARK_NODE_CODE: + LIT("\\texttt{"); + OUT(cmark_node_get_literal(node), false, NORMAL); + LIT("}"); + break; + + case CMARK_NODE_HTML_INLINE: + break; + + case CMARK_NODE_CUSTOM_INLINE: + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + break; + + case CMARK_NODE_STRONG: + if (entering) { + LIT("\\textbf{"); + } else { + LIT("}"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + LIT("\\emph{"); + } else { + LIT("}"); + } + break; + + case CMARK_NODE_LINK: + if (entering) { + const char *url = cmark_node_get_url(node); + // requires \usepackage{hyperref} + switch (get_link_type(node)) { + case URL_AUTOLINK: + LIT("\\url{"); + OUT(url, false, URL); + LIT("}"); + return 0; // Don't process further nodes to avoid double-rendering artefacts + case EMAIL_AUTOLINK: + LIT("\\href{"); + OUT(url, false, URL); + LIT("}\\nolinkurl{"); + break; + case NORMAL_LINK: + LIT("\\href{"); + OUT(url, false, URL); + LIT("}{"); + break; + case INTERNAL_LINK: + LIT("\\protect\\hyperlink{"); + OUT(url + 1, false, URL); + LIT("}{"); + break; + case NO_LINK: + LIT("{"); // error? + } + } else { + LIT("}"); + } + + break; + + case CMARK_NODE_IMAGE: + if (entering) { + LIT("\\protect\\includegraphics{"); + // requires \include{graphicx} + OUT(cmark_node_get_url(node), false, URL); + LIT("}"); + return 0; + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_latex(cmark_node *root, int options, int width) { + return cmark_render(root, options, width, outc, S_render_node); +} diff --git a/deps/cmark/src/libcmark.pc.in b/deps/cmark/src/libcmark.pc.in new file mode 100644 index 0000000..e259898 --- /dev/null +++ b/deps/cmark/src/libcmark.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: libcmark +Description: CommonMark parsing, rendering, and manipulation +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -lcmark +Cflags: -I${includedir} diff --git a/deps/cmark/src/main.c b/deps/cmark/src/main.c new file mode 100644 index 0000000..e02a3c4 --- /dev/null +++ b/deps/cmark/src/main.c @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include "config.h" +#include "cmark.h" +#include "node.h" + +#if defined(__OpenBSD__) +# include +# if OpenBSD >= 201605 +# define USE_PLEDGE +# include +# endif +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +#include +#include +#endif + +typedef enum { + FORMAT_NONE, + FORMAT_HTML, + FORMAT_XML, + FORMAT_MAN, + FORMAT_COMMONMARK, + FORMAT_LATEX +} writer_format; + +void print_usage(void) { + printf("Usage: cmark [FILE*]\n"); + printf("Options:\n"); + printf(" --to, -t FORMAT Specify output format (html, xml, man, " + "commonmark, latex)\n"); + printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); + printf(" --sourcepos Include source position attribute\n"); + printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --nobreaks Render soft line breaks as spaces\n"); + printf(" --safe Omit raw HTML and dangerous URLs\n"); + printf(" --unsafe Render raw HTML and dangerous URLs\n"); + printf(" --smart Use smart punctuation\n"); + printf(" --validate-utf8 Replace invalid UTF-8 sequences with U+FFFD\n"); + printf(" --help, -h Print usage information\n"); + printf(" --version Print version\n"); +} + +static void print_document(cmark_node *document, writer_format writer, + int options, int width) { + char *result; + + switch (writer) { + case FORMAT_HTML: + result = cmark_render_html(document, options); + break; + case FORMAT_XML: + result = cmark_render_xml(document, options); + break; + case FORMAT_MAN: + result = cmark_render_man(document, options, width); + break; + case FORMAT_COMMONMARK: + result = cmark_render_commonmark(document, options, width); + break; + case FORMAT_LATEX: + result = cmark_render_latex(document, options, width); + break; + default: + fprintf(stderr, "Unknown format %d\n", writer); + exit(1); + } + printf("%s", result); + document->mem->free(result); +} + +int main(int argc, char *argv[]) { + int i, numfps = 0; + int *files; + char buffer[4096]; + cmark_parser *parser; + size_t bytes; + cmark_node *document; + int width = 0; + char *unparsed; + writer_format writer = FORMAT_HTML; + int options = CMARK_OPT_DEFAULT; + +#ifdef USE_PLEDGE + if (pledge("stdio rpath", NULL) != 0) { + perror("pledge"); + return 1; + } +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) + _setmode(_fileno(stdin), _O_BINARY); + _setmode(_fileno(stdout), _O_BINARY); +#endif + + files = (int *)calloc(argc, sizeof(*files)); + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--version") == 0) { + printf("cmark %s", CMARK_VERSION_STRING); + printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); + exit(0); + } else if (strcmp(argv[i], "--sourcepos") == 0) { + options |= CMARK_OPT_SOURCEPOS; + } else if (strcmp(argv[i], "--hardbreaks") == 0) { + options |= CMARK_OPT_HARDBREAKS; + } else if (strcmp(argv[i], "--nobreaks") == 0) { + options |= CMARK_OPT_NOBREAKS; + } else if (strcmp(argv[i], "--smart") == 0) { + options |= CMARK_OPT_SMART; + } else if (strcmp(argv[i], "--safe") == 0) { + options |= CMARK_OPT_SAFE; + } else if (strcmp(argv[i], "--unsafe") == 0) { + options |= CMARK_OPT_UNSAFE; + } else if (strcmp(argv[i], "--validate-utf8") == 0) { + options |= CMARK_OPT_VALIDATE_UTF8; + } else if ((strcmp(argv[i], "--help") == 0) || + (strcmp(argv[i], "-h") == 0)) { + print_usage(); + exit(0); + } else if (strcmp(argv[i], "--width") == 0) { + i += 1; + if (i < argc) { + width = (int)strtol(argv[i], &unparsed, 10); + if (unparsed && strlen(unparsed) > 0) { + fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i], + unparsed); + exit(1); + } + } else { + fprintf(stderr, "--width requires an argument\n"); + exit(1); + } + } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) { + i += 1; + if (i < argc) { + if (strcmp(argv[i], "man") == 0) { + writer = FORMAT_MAN; + } else if (strcmp(argv[i], "html") == 0) { + writer = FORMAT_HTML; + } else if (strcmp(argv[i], "xml") == 0) { + writer = FORMAT_XML; + } else if (strcmp(argv[i], "commonmark") == 0) { + writer = FORMAT_COMMONMARK; + } else if (strcmp(argv[i], "latex") == 0) { + writer = FORMAT_LATEX; + } else { + fprintf(stderr, "Unknown format %s\n", argv[i]); + exit(1); + } + } else { + fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); + exit(1); + } + } else if (*argv[i] == '-') { + print_usage(); + exit(1); + } else { // treat as file argument + files[numfps++] = i; + } + } + + parser = cmark_parser_new(options); + for (i = 0; i < numfps; i++) { + FILE *fp = fopen(argv[files[i]], "rb"); + if (fp == NULL) { + fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], + strerror(errno)); + exit(1); + } + + while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { + cmark_parser_feed(parser, buffer, bytes); + if (bytes < sizeof(buffer)) { + break; + } + } + + fclose(fp); + } + + if (numfps == 0) { + + while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { + cmark_parser_feed(parser, buffer, bytes); + if (bytes < sizeof(buffer)) { + break; + } + } + } + +#ifdef USE_PLEDGE + if (pledge("stdio", NULL) != 0) { + perror("pledge"); + return 1; + } +#endif + + document = cmark_parser_finish(parser); + cmark_parser_free(parser); + + print_document(document, writer, options, width); + + cmark_node_free(document); + + free(files); + + return 0; +} diff --git a/deps/cmark/src/man.c b/deps/cmark/src/man.c new file mode 100644 index 0000000..65e5c79 --- /dev/null +++ b/deps/cmark/src/man.c @@ -0,0 +1,281 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" +#include "render.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define LIST_NUMBER_SIZE 20 + +// Functions to convert cmark_nodes to groff man strings. +static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, + unsigned char nextc) { + (void)(nextc); + + if (escape == LITERAL) { + cmark_render_code_point(renderer, c); + return; + } + + switch (c) { + case 46: + if (renderer->begin_line) { + cmark_render_ascii(renderer, "\\&."); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 39: + if (renderer->begin_line) { + cmark_render_ascii(renderer, "\\&'"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 45: + cmark_render_ascii(renderer, "\\-"); + break; + case 92: + cmark_render_ascii(renderer, "\\e"); + break; + case 8216: // left single quote + cmark_render_ascii(renderer, "\\[oq]"); + break; + case 8217: // right single quote + cmark_render_ascii(renderer, "\\[cq]"); + break; + case 8220: // left double quote + cmark_render_ascii(renderer, "\\[lq]"); + break; + case 8221: // right double quote + cmark_render_ascii(renderer, "\\[rq]"); + break; + case 8212: // em dash + cmark_render_ascii(renderer, "\\[em]"); + break; + case 8211: // en dash + cmark_render_ascii(renderer, "\\[en]"); + break; + default: + cmark_render_code_point(renderer, c); + } +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + cmark_node *tmp; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); + struct block_number *new_block_number; + cmark_mem *allocator = cmark_get_default_mem_allocator(); + + // avoid unused parameter error: + (void)(options); + + // indent inside nested lists + if (renderer->block_number_in_list_item && + node->type < CMARK_NODE_FIRST_INLINE) { + if (entering) { + renderer->block_number_in_list_item->number += 1; + if (renderer->block_number_in_list_item->number == 2) { + CR(); + LIT(".RS"); // indent + CR(); + } + } + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + CR(); + LIT(".RS"); + CR(); + } else { + CR(); + LIT(".RE"); + CR(); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_ITEM: + if (entering) { + new_block_number = allocator->calloc(1, sizeof(struct block_number)); + new_block_number->number = 0; + new_block_number->parent = renderer->block_number_in_list_item; + renderer->block_number_in_list_item = new_block_number; + CR(); + LIT(".IP "); + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + LIT("\\[bu] 2"); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + char list_number_s[LIST_NUMBER_SIZE]; + snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); + LIT(list_number_s); + } + CR(); + } else { + if (renderer->block_number_in_list_item) { + if (renderer->block_number_in_list_item->number >= 2) { + CR(); + LIT(".RE"); // de-indent + } + new_block_number = renderer->block_number_in_list_item; + renderer->block_number_in_list_item = + renderer->block_number_in_list_item->parent; + allocator->free(new_block_number); + } + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + CR(); + LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS"); + CR(); + } else { + CR(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + CR(); + LIT(".IP\n.nf\n\\f[C]\n"); + OUT(cmark_node_get_literal(node), false, NORMAL); + CR(); + LIT("\\f[]\n.fi"); + CR(); + break; + + case CMARK_NODE_HTML_BLOCK: + break; + + case CMARK_NODE_CUSTOM_BLOCK: + CR(); + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + CR(); + break; + + case CMARK_NODE_THEMATIC_BREAK: + CR(); + LIT(".PP\n * * * * *"); + CR(); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + if (node->parent && node->parent->type == CMARK_NODE_ITEM && + node->prev == NULL) { + // no blank line or .PP + } else { + CR(); + LIT(".PP"); + CR(); + } + } else { + CR(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + LIT(".PD 0\n.P\n.PD"); + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (options & CMARK_OPT_HARDBREAKS) { + LIT(".PD 0\n.P\n.PD"); + CR(); + } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + LIT("\\f[C]"); + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + LIT("\\f[]"); + break; + + case CMARK_NODE_HTML_INLINE: + break; + + case CMARK_NODE_CUSTOM_INLINE: + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + break; + + case CMARK_NODE_STRONG: + if (entering) { + LIT("\\f[B]"); + } else { + LIT("\\f[]"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + LIT("\\f[I]"); + } else { + LIT("\\f[]"); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + LIT(" ("); + OUT(cmark_node_get_url(node), allow_wrap, URL); + LIT(")"); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + LIT("[IMAGE: "); + } else { + LIT("]"); + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_man(cmark_node *root, int options, int width) { + return cmark_render(root, options, width, S_outc, S_render_node); +} diff --git a/deps/cmark/src/node.c b/deps/cmark/src/node.c new file mode 100644 index 0000000..3b0cf13 --- /dev/null +++ b/deps/cmark/src/node.c @@ -0,0 +1,872 @@ +#include +#include + +#include "config.h" +#include "node.h" + +static void S_node_unlink(cmark_node *node); + +static CMARK_INLINE bool S_is_block(cmark_node *node) { + if (node == NULL) { + return false; + } + return node->type >= CMARK_NODE_FIRST_BLOCK && + node->type <= CMARK_NODE_LAST_BLOCK; +} + +static CMARK_INLINE bool S_is_inline(cmark_node *node) { + if (node == NULL) { + return false; + } + return node->type >= CMARK_NODE_FIRST_INLINE && + node->type <= CMARK_NODE_LAST_INLINE; +} + +static bool S_can_contain(cmark_node *node, cmark_node *child) { + if (node == NULL || child == NULL || node == child) { + return false; + } + + // Verify that child is not an ancestor of node. + if (child->first_child != NULL) { + cmark_node *cur = node->parent; + + while (cur != NULL) { + if (cur == child) { + return false; + } + cur = cur->parent; + } + } + + if (child->type == CMARK_NODE_DOCUMENT) { + return false; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + case CMARK_NODE_BLOCK_QUOTE: + case CMARK_NODE_ITEM: + return S_is_block(child) && child->type != CMARK_NODE_ITEM; + + case CMARK_NODE_LIST: + return child->type == CMARK_NODE_ITEM; + + case CMARK_NODE_CUSTOM_BLOCK: + return true; + + case CMARK_NODE_PARAGRAPH: + case CMARK_NODE_HEADING: + case CMARK_NODE_EMPH: + case CMARK_NODE_STRONG: + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + case CMARK_NODE_CUSTOM_INLINE: + return S_is_inline(child); + + default: + break; + } + + return false; +} + +cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { + cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); + node->mem = mem; + node->type = (uint16_t)type; + + switch (node->type) { + case CMARK_NODE_HEADING: + node->as.heading.level = 1; + break; + + case CMARK_NODE_LIST: { + cmark_list *list = &node->as.list; + list->list_type = CMARK_BULLET_LIST; + list->start = 0; + list->tight = false; + break; + } + + default: + break; + } + + return node; +} + +cmark_node *cmark_node_new(cmark_node_type type) { + extern cmark_mem DEFAULT_MEM_ALLOCATOR; + return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); +} + +// Free a cmark_node list and any children. +static void S_free_nodes(cmark_node *e) { + cmark_mem *mem = e->mem; + cmark_node *next; + while (e != NULL) { + switch (e->type) { + case CMARK_NODE_CODE_BLOCK: + mem->free(e->data); + mem->free(e->as.code.info); + break; + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + mem->free(e->data); + break; + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + mem->free(e->as.link.url); + mem->free(e->as.link.title); + break; + case CMARK_NODE_CUSTOM_BLOCK: + case CMARK_NODE_CUSTOM_INLINE: + mem->free(e->as.custom.on_enter); + mem->free(e->as.custom.on_exit); + break; + default: + break; + } + if (e->last_child) { + // Splice children into list + e->last_child->next = e->next; + e->next = e->first_child; + } + next = e->next; + mem->free(e); + e = next; + } +} + +void cmark_node_free(cmark_node *node) { + S_node_unlink(node); + node->next = NULL; + S_free_nodes(node); +} + +cmark_node_type cmark_node_get_type(cmark_node *node) { + if (node == NULL) { + return CMARK_NODE_NONE; + } else { + return (cmark_node_type)node->type; + } +} + +const char *cmark_node_get_type_string(cmark_node *node) { + if (node == NULL) { + return "NONE"; + } + + switch (node->type) { + case CMARK_NODE_NONE: + return "none"; + case CMARK_NODE_DOCUMENT: + return "document"; + case CMARK_NODE_BLOCK_QUOTE: + return "block_quote"; + case CMARK_NODE_LIST: + return "list"; + case CMARK_NODE_ITEM: + return "item"; + case CMARK_NODE_CODE_BLOCK: + return "code_block"; + case CMARK_NODE_HTML_BLOCK: + return "html_block"; + case CMARK_NODE_CUSTOM_BLOCK: + return "custom_block"; + case CMARK_NODE_PARAGRAPH: + return "paragraph"; + case CMARK_NODE_HEADING: + return "heading"; + case CMARK_NODE_THEMATIC_BREAK: + return "thematic_break"; + case CMARK_NODE_TEXT: + return "text"; + case CMARK_NODE_SOFTBREAK: + return "softbreak"; + case CMARK_NODE_LINEBREAK: + return "linebreak"; + case CMARK_NODE_CODE: + return "code"; + case CMARK_NODE_HTML_INLINE: + return "html_inline"; + case CMARK_NODE_CUSTOM_INLINE: + return "custom_inline"; + case CMARK_NODE_EMPH: + return "emph"; + case CMARK_NODE_STRONG: + return "strong"; + case CMARK_NODE_LINK: + return "link"; + case CMARK_NODE_IMAGE: + return "image"; + } + + return ""; +} + +cmark_node *cmark_node_next(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->next; + } +} + +cmark_node *cmark_node_previous(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->prev; + } +} + +cmark_node *cmark_node_parent(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->parent; + } +} + +cmark_node *cmark_node_first_child(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->first_child; + } +} + +cmark_node *cmark_node_last_child(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->last_child; + } +} + +static bufsize_t cmark_set_cstr(cmark_mem *mem, unsigned char **dst, + const char *src) { + unsigned char *old = *dst; + bufsize_t len; + + if (src && src[0]) { + len = (bufsize_t)strlen(src); + *dst = (unsigned char *)mem->realloc(NULL, len + 1); + memcpy(*dst, src, len + 1); + } else { + len = 0; + *dst = NULL; + } + if (old) { + mem->free(old); + } + + return len; +} + +void *cmark_node_get_user_data(cmark_node *node) { + if (node == NULL) { + return NULL; + } else { + return node->user_data; + } +} + +int cmark_node_set_user_data(cmark_node *node, void *user_data) { + if (node == NULL) { + return 0; + } + node->user_data = user_data; + return 1; +} + +const char *cmark_node_get_literal(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_CODE_BLOCK: + return node->data ? (char *)node->data : ""; + + default: + break; + } + + return NULL; +} + +int cmark_node_set_literal(cmark_node *node, const char *content) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_CODE_BLOCK: + node->len = cmark_set_cstr(node->mem, &node->data, content); + return 1; + + default: + break; + } + + return 0; +} + +int cmark_node_get_heading_level(cmark_node *node) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_HEADING: + return node->as.heading.level; + + default: + break; + } + + return 0; +} + +int cmark_node_set_heading_level(cmark_node *node, int level) { + if (node == NULL || level < 1 || level > 6) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_HEADING: + node->as.heading.level = level; + return 1; + + default: + break; + } + + return 0; +} + +cmark_list_type cmark_node_get_list_type(cmark_node *node) { + if (node == NULL) { + return CMARK_NO_LIST; + } + + if (node->type == CMARK_NODE_LIST) { + return (cmark_list_type)node->as.list.list_type; + } else { + return CMARK_NO_LIST; + } +} + +int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { + if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) { + return 0; + } + + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.list_type = (unsigned char)type; + return 1; + } else { + return 0; + } +} + +cmark_delim_type cmark_node_get_list_delim(cmark_node *node) { + if (node == NULL) { + return CMARK_NO_DELIM; + } + + if (node->type == CMARK_NODE_LIST) { + return (cmark_delim_type)node->as.list.delimiter; + } else { + return CMARK_NO_DELIM; + } +} + +int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) { + if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) { + return 0; + } + + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.delimiter = (unsigned char)delim; + return 1; + } else { + return 0; + } +} + +int cmark_node_get_list_start(cmark_node *node) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + return node->as.list.start; + } else { + return 0; + } +} + +int cmark_node_set_list_start(cmark_node *node, int start) { + if (node == NULL || start < 0) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.start = start; + return 1; + } else { + return 0; + } +} + +int cmark_node_get_list_tight(cmark_node *node) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + return node->as.list.tight; + } else { + return 0; + } +} + +int cmark_node_set_list_tight(cmark_node *node, int tight) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.tight = tight == 1; + return 1; + } else { + return 0; + } +} + +const char *cmark_node_get_fence_info(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + return node->as.code.info ? (char *)node->as.code.info : ""; + } else { + return NULL; + } +} + +int cmark_node_set_fence_info(cmark_node *node, const char *info) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + cmark_set_cstr(node->mem, &node->as.code.info, info); + return 1; + } else { + return 0; + } +} + +const char *cmark_node_get_url(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + return node->as.link.url ? (char *)node->as.link.url : ""; + default: + break; + } + + return NULL; +} + +int cmark_node_set_url(cmark_node *node, const char *url) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + cmark_set_cstr(node->mem, &node->as.link.url, url); + return 1; + default: + break; + } + + return 0; +} + +const char *cmark_node_get_title(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + return node->as.link.title ? (char *)node->as.link.title : ""; + default: + break; + } + + return NULL; +} + +int cmark_node_set_title(cmark_node *node, const char *title) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + cmark_set_cstr(node->mem, &node->as.link.title, title); + return 1; + default: + break; + } + + return 0; +} + +const char *cmark_node_get_on_enter(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_CUSTOM_INLINE: + case CMARK_NODE_CUSTOM_BLOCK: + return node->as.custom.on_enter ? (char *)node->as.custom.on_enter : ""; + default: + break; + } + + return NULL; +} + +int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_CUSTOM_INLINE: + case CMARK_NODE_CUSTOM_BLOCK: + cmark_set_cstr(node->mem, &node->as.custom.on_enter, on_enter); + return 1; + default: + break; + } + + return 0; +} + +const char *cmark_node_get_on_exit(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_CUSTOM_INLINE: + case CMARK_NODE_CUSTOM_BLOCK: + return node->as.custom.on_exit ? (char *)node->as.custom.on_exit : ""; + default: + break; + } + + return NULL; +} + +int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_CUSTOM_INLINE: + case CMARK_NODE_CUSTOM_BLOCK: + cmark_set_cstr(node->mem, &node->as.custom.on_exit, on_exit); + return 1; + default: + break; + } + + return 0; +} + +int cmark_node_get_start_line(cmark_node *node) { + if (node == NULL) { + return 0; + } + return node->start_line; +} + +int cmark_node_get_start_column(cmark_node *node) { + if (node == NULL) { + return 0; + } + return node->start_column; +} + +int cmark_node_get_end_line(cmark_node *node) { + if (node == NULL) { + return 0; + } + return node->end_line; +} + +int cmark_node_get_end_column(cmark_node *node) { + if (node == NULL) { + return 0; + } + return node->end_column; +} + +// Unlink a node without adjusting its next, prev, and parent pointers. +static void S_node_unlink(cmark_node *node) { + if (node == NULL) { + return; + } + + if (node->prev) { + node->prev->next = node->next; + } + if (node->next) { + node->next->prev = node->prev; + } + + // Adjust first_child and last_child of parent. + cmark_node *parent = node->parent; + if (parent) { + if (parent->first_child == node) { + parent->first_child = node->next; + } + if (parent->last_child == node) { + parent->last_child = node->prev; + } + } +} + +void cmark_node_unlink(cmark_node *node) { + S_node_unlink(node); + + node->next = NULL; + node->prev = NULL; + node->parent = NULL; +} + +int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) { + if (node == NULL || sibling == NULL) { + return 0; + } + + if (!node->parent || !S_can_contain(node->parent, sibling)) { + return 0; + } + + S_node_unlink(sibling); + + cmark_node *old_prev = node->prev; + + // Insert 'sibling' between 'old_prev' and 'node'. + if (old_prev) { + old_prev->next = sibling; + } + sibling->prev = old_prev; + sibling->next = node; + node->prev = sibling; + + // Set new parent. + cmark_node *parent = node->parent; + sibling->parent = parent; + + // Adjust first_child of parent if inserted as first child. + if (parent && !old_prev) { + parent->first_child = sibling; + } + + return 1; +} + +int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) { + if (node == NULL || sibling == NULL) { + return 0; + } + + if (!node->parent || !S_can_contain(node->parent, sibling)) { + return 0; + } + + S_node_unlink(sibling); + + cmark_node *old_next = node->next; + + // Insert 'sibling' between 'node' and 'old_next'. + if (old_next) { + old_next->prev = sibling; + } + sibling->next = old_next; + sibling->prev = node; + node->next = sibling; + + // Set new parent. + cmark_node *parent = node->parent; + sibling->parent = parent; + + // Adjust last_child of parent if inserted as last child. + if (parent && !old_next) { + parent->last_child = sibling; + } + + return 1; +} + +int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) { + if (!cmark_node_insert_before(oldnode, newnode)) { + return 0; + } + cmark_node_unlink(oldnode); + return 1; +} + +int cmark_node_prepend_child(cmark_node *node, cmark_node *child) { + if (!S_can_contain(node, child)) { + return 0; + } + + S_node_unlink(child); + + cmark_node *old_first_child = node->first_child; + + child->next = old_first_child; + child->prev = NULL; + child->parent = node; + node->first_child = child; + + if (old_first_child) { + old_first_child->prev = child; + } else { + // Also set last_child if node previously had no children. + node->last_child = child; + } + + return 1; +} + +int cmark_node_append_child(cmark_node *node, cmark_node *child) { + if (!S_can_contain(node, child)) { + return 0; + } + + S_node_unlink(child); + + cmark_node *old_last_child = node->last_child; + + child->next = NULL; + child->prev = old_last_child; + child->parent = node; + node->last_child = child; + + if (old_last_child) { + old_last_child->next = child; + } else { + // Also set first_child if node previously had no children. + node->first_child = child; + } + + return 1; +} + +static void S_print_error(FILE *out, cmark_node *node, const char *elem) { + if (out == NULL) { + return; + } + fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, + cmark_node_get_type_string(node), node->start_line, + node->start_column); +} + +int cmark_node_check(cmark_node *node, FILE *out) { + cmark_node *cur; + int errors = 0; + + if (!node) { + return 0; + } + + cur = node; + for (;;) { + if (cur->first_child) { + if (cur->first_child->prev != NULL) { + S_print_error(out, cur->first_child, "prev"); + cur->first_child->prev = NULL; + ++errors; + } + if (cur->first_child->parent != cur) { + S_print_error(out, cur->first_child, "parent"); + cur->first_child->parent = cur; + ++errors; + } + cur = cur->first_child; + continue; + } + + next_sibling: + if (cur == node) { + break; + } + if (cur->next) { + if (cur->next->prev != cur) { + S_print_error(out, cur->next, "prev"); + cur->next->prev = cur; + ++errors; + } + if (cur->next->parent != cur->parent) { + S_print_error(out, cur->next, "parent"); + cur->next->parent = cur->parent; + ++errors; + } + cur = cur->next; + continue; + } + + if (cur->parent->last_child != cur) { + S_print_error(out, cur->parent, "last_child"); + cur->parent->last_child = cur; + ++errors; + } + cur = cur->parent; + goto next_sibling; + } + + return errors; +} diff --git a/deps/cmark/src/node.h b/deps/cmark/src/node.h new file mode 100644 index 0000000..1cae5d7 --- /dev/null +++ b/deps/cmark/src/node.h @@ -0,0 +1,92 @@ +#ifndef CMARK_NODE_H +#define CMARK_NODE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "config.h" +#include "cmark.h" +#include "buffer.h" + +typedef struct { + int marker_offset; + int padding; + int start; + unsigned char list_type; + unsigned char delimiter; + unsigned char bullet_char; + bool tight; +} cmark_list; + +typedef struct { + unsigned char *info; + uint8_t fence_length; + uint8_t fence_offset; + unsigned char fence_char; + int8_t fenced; +} cmark_code; + +typedef struct { + int internal_offset; + int8_t level; + bool setext; +} cmark_heading; + +typedef struct { + unsigned char *url; + unsigned char *title; +} cmark_link; + +typedef struct { + unsigned char *on_enter; + unsigned char *on_exit; +} cmark_custom; + +enum cmark_node__internal_flags { + CMARK_NODE__OPEN = (1 << 0), + CMARK_NODE__LAST_LINE_BLANK = (1 << 1), + CMARK_NODE__LAST_LINE_CHECKED = (1 << 2), +}; + +struct cmark_node { + cmark_mem *mem; + + struct cmark_node *next; + struct cmark_node *prev; + struct cmark_node *parent; + struct cmark_node *first_child; + struct cmark_node *last_child; + + void *user_data; + + unsigned char *data; + bufsize_t len; + + int start_line; + int start_column; + int end_line; + int end_column; + uint16_t type; + uint16_t flags; + + union { + cmark_list list; + cmark_code code; + cmark_heading heading; + cmark_link link; + cmark_custom custom; + int html_block_type; + } as; +}; + +CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/parser.h b/deps/cmark/src/parser.h new file mode 100644 index 0000000..f546ace --- /dev/null +++ b/deps/cmark/src/parser.h @@ -0,0 +1,42 @@ +#ifndef CMARK_AST_H +#define CMARK_AST_H + +#include +#include "references.h" +#include "node.h" +#include "buffer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_LINK_LABEL_LENGTH 1000 + +struct cmark_parser { + struct cmark_mem *mem; + struct cmark_reference_map *refmap; + struct cmark_node *root; + struct cmark_node *current; + int line_number; + bufsize_t offset; + bufsize_t column; + bufsize_t first_nonspace; + bufsize_t first_nonspace_column; + bufsize_t thematic_break_kill_pos; + int indent; + bool blank; + bool partially_consumed_tab; + cmark_strbuf curline; + bufsize_t last_line_length; + cmark_strbuf linebuf; + cmark_strbuf content; + int options; + bool last_buffer_ended_with_cr; + unsigned int total_size; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/references.c b/deps/cmark/src/references.c new file mode 100644 index 0000000..39b4f2b --- /dev/null +++ b/deps/cmark/src/references.c @@ -0,0 +1,171 @@ +#include "cmark.h" +#include "utf8.h" +#include "parser.h" +#include "references.h" +#include "inlines.h" +#include "chunk.h" + +static void reference_free(cmark_reference_map *map, cmark_reference *ref) { + cmark_mem *mem = map->mem; + if (ref != NULL) { + mem->free(ref->label); + mem->free(ref->url); + mem->free(ref->title); + mem->free(ref); + } +} + +// normalize reference: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +// Return NULL if the reference name is actually empty (i.e. composed +// solely from whitespace) +static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { + cmark_strbuf normalized = CMARK_BUF_INIT(mem); + unsigned char *result; + + if (ref == NULL) + return NULL; + + if (ref->len == 0) + return NULL; + + cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); + cmark_strbuf_trim(&normalized); + cmark_strbuf_normalize_whitespace(&normalized); + + result = cmark_strbuf_detach(&normalized); + assert(result); + + if (result[0] == '\0') { + mem->free(result); + return NULL; + } + + return result; +} + +void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, + cmark_chunk *url, cmark_chunk *title) { + cmark_reference *ref; + unsigned char *reflabel = normalize_reference(map->mem, label); + + /* empty reference name, or composed from only whitespace */ + if (reflabel == NULL) + return; + + assert(map->sorted == NULL); + + ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); + ref->label = reflabel; + ref->url = cmark_clean_url(map->mem, url); + ref->title = cmark_clean_title(map->mem, title); + ref->age = map->size; + ref->next = map->refs; + + if (ref->url != NULL) + ref->size += strlen((char*)ref->url); + if (ref->title != NULL) + ref->size += strlen((char*)ref->title); + + map->refs = ref; + map->size++; +} + +static int +labelcmp(const unsigned char *a, const unsigned char *b) { + return strcmp((const char *)a, (const char *)b); +} + +static int +refcmp(const void *p1, const void *p2) { + cmark_reference *r1 = *(cmark_reference **)p1; + cmark_reference *r2 = *(cmark_reference **)p2; + int res = labelcmp(r1->label, r2->label); + return res ? res : ((int)r1->age - (int)r2->age); +} + +static int +refsearch(const void *label, const void *p2) { + cmark_reference *ref = *(cmark_reference **)p2; + return labelcmp((const unsigned char *)label, ref->label); +} + +static void sort_references(cmark_reference_map *map) { + unsigned int i = 0, last = 0, size = map->size; + cmark_reference *r = map->refs, **sorted = NULL; + + sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); + while (r) { + sorted[i++] = r; + r = r->next; + } + + qsort(sorted, size, sizeof(cmark_reference *), refcmp); + + for (i = 1; i < size; i++) { + if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) + sorted[++last] = sorted[i]; + } + map->sorted = sorted; + map->size = last + 1; +} + +// Returns reference if refmap contains a reference with matching +// label, otherwise NULL. +cmark_reference *cmark_reference_lookup(cmark_reference_map *map, + cmark_chunk *label) { + cmark_reference **ref = NULL; + cmark_reference *r = NULL; + unsigned char *norm; + + if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) + return NULL; + + if (map == NULL || !map->size) + return NULL; + + norm = normalize_reference(map->mem, label); + if (norm == NULL) + return NULL; + + if (!map->sorted) + sort_references(map); + + ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), + refsearch); + map->mem->free(norm); + + if (ref != NULL) { + r = ref[0]; + /* Check for expansion limit */ + if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size) + return NULL; + map->ref_size += r->size; + } + + return r; +} + +void cmark_reference_map_free(cmark_reference_map *map) { + cmark_reference *ref; + + if (map == NULL) + return; + + ref = map->refs; + while (ref) { + cmark_reference *next = ref->next; + reference_free(map, ref); + ref = next; + } + + map->mem->free(map->sorted); + map->mem->free(map); +} + +cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) { + cmark_reference_map *map = + (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map)); + map->mem = mem; + return map; +} diff --git a/deps/cmark/src/references.h b/deps/cmark/src/references.h new file mode 100644 index 0000000..b069d92 --- /dev/null +++ b/deps/cmark/src/references.h @@ -0,0 +1,43 @@ +#ifndef CMARK_REFERENCES_H +#define CMARK_REFERENCES_H + +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmark_reference { + struct cmark_reference *next; + unsigned char *label; + unsigned char *url; + unsigned char *title; + unsigned int age; + unsigned int size; +}; + +typedef struct cmark_reference cmark_reference; + +struct cmark_reference_map { + cmark_mem *mem; + cmark_reference *refs; + cmark_reference **sorted; + unsigned int size; + unsigned int ref_size; + unsigned int max_ref_size; +}; + +typedef struct cmark_reference_map cmark_reference_map; + +cmark_reference_map *cmark_reference_map_new(cmark_mem *mem); +void cmark_reference_map_free(cmark_reference_map *map); +cmark_reference *cmark_reference_lookup(cmark_reference_map *map, + cmark_chunk *label); +extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, + cmark_chunk *url, cmark_chunk *title); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/render.c b/deps/cmark/src/render.c new file mode 100644 index 0000000..f71b048 --- /dev/null +++ b/deps/cmark/src/render.c @@ -0,0 +1,193 @@ +#include +#include "buffer.h" +#include "cmark.h" +#include "utf8.h" +#include "render.h" +#include "node.h" +#include "cmark_ctype.h" + +static CMARK_INLINE void S_cr(cmark_renderer *renderer) { + if (renderer->need_cr < 1) { + renderer->need_cr = 1; + } +} + +static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { + if (renderer->need_cr < 2) { + renderer->need_cr = 2; + } +} + +static void S_out(cmark_renderer *renderer, const char *source, bool wrap, + cmark_escaping escape) { + int length = strlen(source); + unsigned char nextc; + int32_t c; + int i = 0; + int last_nonspace; + int len; + int k = renderer->buffer->size - 1; + + wrap = wrap && !renderer->no_linebreaks; + + if (renderer->in_tight_list_item && renderer->need_cr > 1) { + renderer->need_cr = 1; + } + while (renderer->need_cr) { + if (k < 0 || renderer->buffer->ptr[k] == '\n') { + k -= 1; + } else { + cmark_strbuf_putc(renderer->buffer, '\n'); + if (renderer->need_cr > 1) { + cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, + renderer->prefix->size); + } + } + renderer->column = 0; + renderer->last_breakable = 0; + renderer->begin_line = true; + renderer->begin_content = true; + renderer->need_cr -= 1; + } + + while (i < length) { + if (renderer->begin_line) { + cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, + renderer->prefix->size); + // note: this assumes prefix is ascii: + renderer->column = renderer->prefix->size; + } + + len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); + if (len == -1) { // error condition + return; // return without rendering rest of string + } + nextc = source[i + len]; + if (c == 32 && wrap) { + if (!renderer->begin_line) { + last_nonspace = renderer->buffer->size; + cmark_strbuf_putc(renderer->buffer, ' '); + renderer->column += 1; + renderer->begin_line = false; + renderer->begin_content = false; + // skip following spaces + while (source[i + 1] == ' ') { + i++; + } + // We don't allow breaks that make a digit the first character + // because this causes problems with commonmark output. + if (!cmark_isdigit(source[i + 1])) { + renderer->last_breakable = last_nonspace; + } + } + + } else if (escape == LITERAL) { + if (c == 10) { + cmark_strbuf_putc(renderer->buffer, '\n'); + renderer->column = 0; + renderer->begin_line = true; + renderer->begin_content = true; + renderer->last_breakable = 0; + } else { + cmark_render_code_point(renderer, c); + renderer->begin_line = false; + // we don't set 'begin_content' to false til we've + // finished parsing a digit. Reason: in commonmark + // we need to escape a potential list marker after + // a digit: + renderer->begin_content = + renderer->begin_content && cmark_isdigit(c) == 1; + } + } else { + (renderer->outc)(renderer, escape, c, nextc); + renderer->begin_line = false; + renderer->begin_content = + renderer->begin_content && cmark_isdigit(c) == 1; + } + + // If adding the character went beyond width, look for an + // earlier place where the line could be broken: + if (renderer->width > 0 && renderer->column > renderer->width && + !renderer->begin_line && renderer->last_breakable > 0) { + + // copy from last_breakable to remainder + unsigned char *src = renderer->buffer->ptr + + renderer->last_breakable + 1; + bufsize_t remainder_len = renderer->buffer->size - + renderer->last_breakable - 1; + unsigned char *remainder = + (unsigned char *)renderer->mem->realloc(NULL, remainder_len); + memcpy(remainder, src, remainder_len); + // truncate at last_breakable + cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); + // add newline, prefix, and remainder + cmark_strbuf_putc(renderer->buffer, '\n'); + cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, + renderer->prefix->size); + cmark_strbuf_put(renderer->buffer, remainder, remainder_len); + renderer->column = renderer->prefix->size + remainder_len; + renderer->mem->free(remainder); + renderer->last_breakable = 0; + renderer->begin_line = false; + renderer->begin_content = false; + } + + i += len; + } +} + +// Assumes no newlines, assumes ascii content: +void cmark_render_ascii(cmark_renderer *renderer, const char *s) { + int origsize = renderer->buffer->size; + cmark_strbuf_puts(renderer->buffer, s); + renderer->column += renderer->buffer->size - origsize; +} + +void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { + cmark_utf8proc_encode_char(c, renderer->buffer); + renderer->column += 1; +} + +char *cmark_render(cmark_node *root, int options, int width, + void (*outc)(cmark_renderer *, cmark_escaping, int32_t, + unsigned char), + int (*render_node)(cmark_renderer *renderer, + cmark_node *node, + cmark_event_type ev_type, int options)) { + cmark_mem *mem = root->mem; + cmark_strbuf pref = CMARK_BUF_INIT(mem); + cmark_strbuf buf = CMARK_BUF_INIT(mem); + cmark_node *cur; + cmark_event_type ev_type; + char *result; + cmark_iter *iter = cmark_iter_new(root); + + cmark_renderer renderer = {options, + mem, &buf, &pref, 0, width, + 0, 0, true, true, false, + false, NULL, + outc, S_cr, S_blankline, S_out}; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (!render_node(&renderer, cur, ev_type, options)) { + // a false value causes us to skip processing + // the node's contents. this is used for + // autolinks. + cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); + } + } + + // ensure final newline + if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { + cmark_strbuf_putc(renderer.buffer, '\n'); + } + + result = (char *)cmark_strbuf_detach(renderer.buffer); + + cmark_iter_free(iter); + cmark_strbuf_free(renderer.prefix); + cmark_strbuf_free(renderer.buffer); + + return result; +} diff --git a/deps/cmark/src/render.h b/deps/cmark/src/render.h new file mode 100644 index 0000000..db60a5d --- /dev/null +++ b/deps/cmark/src/render.h @@ -0,0 +1,55 @@ +#ifndef CMARK_RENDER_H +#define CMARK_RENDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "buffer.h" + +typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping; + +struct block_number { + int number; + struct block_number *parent; +}; + +struct cmark_renderer { + int options; + cmark_mem *mem; + cmark_strbuf *buffer; + cmark_strbuf *prefix; + int column; + int width; + int need_cr; + bufsize_t last_breakable; + bool begin_line; + bool begin_content; + bool no_linebreaks; + bool in_tight_list_item; + struct block_number *block_number_in_list_item; + void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); + void (*cr)(struct cmark_renderer *); + void (*blankline)(struct cmark_renderer *); + void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); +}; + +typedef struct cmark_renderer cmark_renderer; + +void cmark_render_ascii(cmark_renderer *renderer, const char *s); + +void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); + +char *cmark_render(cmark_node *root, int options, int width, + void (*outc)(cmark_renderer *, cmark_escaping, int32_t, + unsigned char), + int (*render_node)(cmark_renderer *renderer, + cmark_node *node, + cmark_event_type ev_type, int options)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/scanners.c b/deps/cmark/src/scanners.c new file mode 100644 index 0000000..5d8950f --- /dev/null +++ b/deps/cmark/src/scanners.c @@ -0,0 +1,13634 @@ +/* Generated by re2c 3.0 */ +#include +#include "chunk.h" +#include "scanners.h" + +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, + bufsize_t offset) { + bufsize_t res; + unsigned char *ptr = (unsigned char *)c->data; + + if (ptr == NULL || offset > c->len) { + return 0; + } else { + unsigned char lim = ptr[c->len]; + + ptr[c->len] = '\0'; + res = scanner(ptr + offset); + ptr[c->len] = lim; + } + + return res; +} + +// Try to match a scheme including colon. +bufsize_t _scan_scheme(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + yych = *p; + if (yych <= '@') + goto yy1; + if (yych <= 'Z') + goto yy3; + if (yych <= '`') + goto yy1; + if (yych <= 'z') + goto yy3; + yy1: + ++p; + yy2 : { return 0; } + yy3: + yych = *(marker = ++p); + if (yych <= '/') { + if (yych <= '+') { + if (yych <= '*') + goto yy2; + } else { + if (yych <= ',') + goto yy2; + if (yych >= '/') + goto yy2; + } + } else { + if (yych <= 'Z') { + if (yych <= '9') + goto yy4; + if (yych <= '@') + goto yy2; + } else { + if (yych <= '`') + goto yy2; + if (yych >= '{') + goto yy2; + } + } + yy4: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') + goto yy6; + } else { + if (yych != '/') + goto yy6; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych >= 'A') + goto yy6; + } else { + if (yych <= '`') + goto yy5; + if (yych <= 'z') + goto yy6; + } + } + yy5: + p = marker; + goto yy2; + yy6: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') + goto yy8; + goto yy5; + } else { + if (yych == '/') + goto yy5; + goto yy8; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + goto yy8; + } else { + if (yych <= '`') + goto yy5; + if (yych <= 'z') + goto yy8; + goto yy5; + } + } + yy7: + ++p; + { return (bufsize_t)(p - start); } + yy8: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy5; + } else { + if (yych == '/') + goto yy5; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy7; + if (yych <= '@') + goto yy5; + } else { + if (yych <= '`') + goto yy5; + if (yych >= '{') + goto yy5; + } + } + yych = *++p; + if (yych == ':') + goto yy7; + goto yy5; + } +} + +// Try to match URI autolink after first <, returning number of chars matched. +bufsize_t _scan_autolink_uri(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') + goto yy10; + if (yych <= 'Z') + goto yy12; + if (yych <= '`') + goto yy10; + if (yych <= 'z') + goto yy12; + yy10: + ++p; + yy11 : { return 0; } + yy12: + yych = *(marker = ++p); + if (yych <= '/') { + if (yych <= '+') { + if (yych <= '*') + goto yy11; + } else { + if (yych <= ',') + goto yy11; + if (yych >= '/') + goto yy11; + } + } else { + if (yych <= 'Z') { + if (yych <= '9') + goto yy13; + if (yych <= '@') + goto yy11; + } else { + if (yych <= '`') + goto yy11; + if (yych >= '{') + goto yy11; + } + } + yy13: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') + goto yy15; + } else { + if (yych != '/') + goto yy15; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych >= 'A') + goto yy15; + } else { + if (yych <= '`') + goto yy14; + if (yych <= 'z') + goto yy15; + } + } + yy14: + p = marker; + goto yy11; + yy15: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') + goto yy17; + goto yy14; + } else { + if (yych == '/') + goto yy14; + goto yy17; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + goto yy17; + } else { + if (yych <= '`') + goto yy14; + if (yych <= 'z') + goto yy17; + goto yy14; + } + } + yy16: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy16; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '<') + goto yy14; + if (yych <= '>') + goto yy18; + goto yy14; + } else { + if (yych <= 0xDF) + goto yy19; + if (yych <= 0xE0) + goto yy20; + goto yy21; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy22; + if (yych <= 0xEF) + goto yy21; + goto yy23; + } else { + if (yych <= 0xF3) + goto yy24; + if (yych <= 0xF4) + goto yy25; + goto yy14; + } + } + yy17: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych == '+') + goto yy26; + goto yy14; + } else { + if (yych == '/') + goto yy14; + goto yy26; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + goto yy26; + } else { + if (yych <= '`') + goto yy14; + if (yych <= 'z') + goto yy26; + goto yy14; + } + } + yy18: + ++p; + { return (bufsize_t)(p - start); } + yy19: + yych = *++p; + if (yych <= 0x7F) + goto yy14; + if (yych <= 0xBF) + goto yy16; + goto yy14; + yy20: + yych = *++p; + if (yych <= 0x9F) + goto yy14; + if (yych <= 0xBF) + goto yy19; + goto yy14; + yy21: + yych = *++p; + if (yych <= 0x7F) + goto yy14; + if (yych <= 0xBF) + goto yy19; + goto yy14; + yy22: + yych = *++p; + if (yych <= 0x7F) + goto yy14; + if (yych <= 0x9F) + goto yy19; + goto yy14; + yy23: + yych = *++p; + if (yych <= 0x8F) + goto yy14; + if (yych <= 0xBF) + goto yy21; + goto yy14; + yy24: + yych = *++p; + if (yych <= 0x7F) + goto yy14; + if (yych <= 0xBF) + goto yy21; + goto yy14; + yy25: + yych = *++p; + if (yych <= 0x7F) + goto yy14; + if (yych <= 0x8F) + goto yy21; + goto yy14; + yy26: + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych <= '9') { + if (yych <= ',') { + if (yych != '+') + goto yy14; + } else { + if (yych == '/') + goto yy14; + } + } else { + if (yych <= 'Z') { + if (yych <= ':') + goto yy16; + if (yych <= '@') + goto yy14; + } else { + if (yych <= '`') + goto yy14; + if (yych >= '{') + goto yy14; + } + } + yych = *++p; + if (yych == ':') + goto yy16; + goto yy14; + } +} + +// Try to match email autolink after first <, returning num of chars matched. +bufsize_t _scan_autolink_email(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 128, 0, 128, 128, 128, 128, 128, 0, 0, + 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 128, 0, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '9') { + if (yych <= '\'') { + if (yych == '!') + goto yy30; + if (yych >= '#') + goto yy30; + } else { + if (yych <= ')') + goto yy28; + if (yych != ',') + goto yy30; + } + } else { + if (yych <= '?') { + if (yych == '=') + goto yy30; + if (yych >= '?') + goto yy30; + } else { + if (yych <= 'Z') { + if (yych >= 'A') + goto yy30; + } else { + if (yych <= ']') + goto yy28; + if (yych <= '~') + goto yy30; + } + } + } + yy28: + ++p; + yy29 : { return 0; } + yy30: + yych = *(marker = ++p); + if (yych <= ',') { + if (yych <= '"') { + if (yych == '!') + goto yy32; + goto yy29; + } else { + if (yych <= '\'') + goto yy32; + if (yych <= ')') + goto yy29; + if (yych <= '+') + goto yy32; + goto yy29; + } + } else { + if (yych <= '>') { + if (yych <= '9') + goto yy32; + if (yych == '=') + goto yy32; + goto yy29; + } else { + if (yych <= 'Z') + goto yy32; + if (yych <= ']') + goto yy29; + if (yych <= '~') + goto yy32; + goto yy29; + } + } + yy31: + yych = *++p; + yy32: + if (yybm[0 + yych] & 128) { + goto yy31; + } + if (yych <= '>') + goto yy33; + if (yych <= '@') + goto yy34; + yy33: + p = marker; + goto yy29; + yy34: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') + goto yy33; + if (yych >= ':') + goto yy33; + } else { + if (yych <= 'Z') + goto yy35; + if (yych <= '`') + goto yy33; + if (yych >= '{') + goto yy33; + } + yy35: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy36; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy36; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy36; + goto yy33; + } + } + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy38; + if (yych <= '/') + goto yy33; + goto yy39; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy39; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy39; + goto yy33; + } + } + yy36: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych <= '-') + goto yy38; + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy39; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy39; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy39; + goto yy33; + } + } + yy37: + ++p; + { return (bufsize_t)(p - start); } + yy38: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy40; + if (yych <= '/') + goto yy33; + goto yy41; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy41; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy41; + goto yy33; + } + } + yy39: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy41; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy41; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy41; + goto yy33; + } + } + yy40: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy42; + if (yych <= '/') + goto yy33; + goto yy43; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy43; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy43; + goto yy33; + } + } + yy41: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy43; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy43; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy43; + goto yy33; + } + } + yy42: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy44; + if (yych <= '/') + goto yy33; + goto yy45; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy45; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy45; + goto yy33; + } + } + yy43: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy45; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy45; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy45; + goto yy33; + } + } + yy44: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy46; + if (yych <= '/') + goto yy33; + goto yy47; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy47; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy47; + goto yy33; + } + } + yy45: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy47; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy47; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy47; + goto yy33; + } + } + yy46: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy48; + if (yych <= '/') + goto yy33; + goto yy49; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy49; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy49; + goto yy33; + } + } + yy47: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy49; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy49; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy49; + goto yy33; + } + } + yy48: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy50; + if (yych <= '/') + goto yy33; + goto yy51; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy51; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy51; + goto yy33; + } + } + yy49: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy51; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy51; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy51; + goto yy33; + } + } + yy50: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy52; + if (yych <= '/') + goto yy33; + goto yy53; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy53; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy53; + goto yy33; + } + } + yy51: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy53; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy53; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy53; + goto yy33; + } + } + yy52: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy54; + if (yych <= '/') + goto yy33; + goto yy55; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy55; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy55; + goto yy33; + } + } + yy53: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy55; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy55; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy55; + goto yy33; + } + } + yy54: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy56; + if (yych <= '/') + goto yy33; + goto yy57; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy57; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy57; + goto yy33; + } + } + yy55: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy57; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy57; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy57; + goto yy33; + } + } + yy56: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy58; + if (yych <= '/') + goto yy33; + goto yy59; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy59; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy59; + goto yy33; + } + } + yy57: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy59; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy59; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy59; + goto yy33; + } + } + yy58: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy60; + if (yych <= '/') + goto yy33; + goto yy61; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy61; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy61; + goto yy33; + } + } + yy59: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy61; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy61; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy61; + goto yy33; + } + } + yy60: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy62; + if (yych <= '/') + goto yy33; + goto yy63; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy63; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy63; + goto yy33; + } + } + yy61: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy63; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy63; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy63; + goto yy33; + } + } + yy62: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy64; + if (yych <= '/') + goto yy33; + goto yy65; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy65; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy65; + goto yy33; + } + } + yy63: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy65; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy65; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy65; + goto yy33; + } + } + yy64: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy66; + if (yych <= '/') + goto yy33; + goto yy67; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy67; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy67; + goto yy33; + } + } + yy65: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy67; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy67; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy67; + goto yy33; + } + } + yy66: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy68; + if (yych <= '/') + goto yy33; + goto yy69; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy69; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy69; + goto yy33; + } + } + yy67: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy69; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy69; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy69; + goto yy33; + } + } + yy68: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy70; + if (yych <= '/') + goto yy33; + goto yy71; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy71; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy71; + goto yy33; + } + } + yy69: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy71; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy71; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy71; + goto yy33; + } + } + yy70: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy72; + if (yych <= '/') + goto yy33; + goto yy73; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy73; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy73; + goto yy33; + } + } + yy71: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy73; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy73; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy73; + goto yy33; + } + } + yy72: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy74; + if (yych <= '/') + goto yy33; + goto yy75; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy75; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy75; + goto yy33; + } + } + yy73: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy75; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy75; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy75; + goto yy33; + } + } + yy74: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy76; + if (yych <= '/') + goto yy33; + goto yy77; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy77; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy77; + goto yy33; + } + } + yy75: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy77; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy77; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy77; + goto yy33; + } + } + yy76: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy78; + if (yych <= '/') + goto yy33; + goto yy79; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy79; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy79; + goto yy33; + } + } + yy77: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy79; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy79; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy79; + goto yy33; + } + } + yy78: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy80; + if (yych <= '/') + goto yy33; + goto yy81; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy81; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy81; + goto yy33; + } + } + yy79: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy81; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy81; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy81; + goto yy33; + } + } + yy80: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy82; + if (yych <= '/') + goto yy33; + goto yy83; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy83; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy83; + goto yy33; + } + } + yy81: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy83; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy83; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy83; + goto yy33; + } + } + yy82: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy84; + if (yych <= '/') + goto yy33; + goto yy85; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy85; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy85; + goto yy33; + } + } + yy83: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy85; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy85; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy85; + goto yy33; + } + } + yy84: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy86; + if (yych <= '/') + goto yy33; + goto yy87; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy87; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy87; + goto yy33; + } + } + yy85: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy87; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy87; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy87; + goto yy33; + } + } + yy86: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy88; + if (yych <= '/') + goto yy33; + goto yy89; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy89; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy89; + goto yy33; + } + } + yy87: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy89; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy89; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy89; + goto yy33; + } + } + yy88: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy90; + if (yych <= '/') + goto yy33; + goto yy91; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy91; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy91; + goto yy33; + } + } + yy89: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy91; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy91; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy91; + goto yy33; + } + } + yy90: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy92; + if (yych <= '/') + goto yy33; + goto yy93; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy93; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy93; + goto yy33; + } + } + yy91: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy93; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy93; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy93; + goto yy33; + } + } + yy92: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy94; + if (yych <= '/') + goto yy33; + goto yy95; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy95; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy95; + goto yy33; + } + } + yy93: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy95; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy95; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy95; + goto yy33; + } + } + yy94: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy96; + if (yych <= '/') + goto yy33; + goto yy97; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy97; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy97; + goto yy33; + } + } + yy95: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy97; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy97; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy97; + goto yy33; + } + } + yy96: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy98; + if (yych <= '/') + goto yy33; + goto yy99; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy99; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy99; + goto yy33; + } + } + yy97: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy99; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy99; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy99; + goto yy33; + } + } + yy98: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy100; + if (yych <= '/') + goto yy33; + goto yy101; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy101; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy101; + goto yy33; + } + } + yy99: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy101; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy101; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy101; + goto yy33; + } + } + yy100: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy102; + if (yych <= '/') + goto yy33; + goto yy103; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy103; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy103; + goto yy33; + } + } + yy101: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy103; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy103; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy103; + goto yy33; + } + } + yy102: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy104; + if (yych <= '/') + goto yy33; + goto yy105; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy105; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy105; + goto yy33; + } + } + yy103: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy105; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy105; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy105; + goto yy33; + } + } + yy104: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy106; + if (yych <= '/') + goto yy33; + goto yy107; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy107; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy107; + goto yy33; + } + } + yy105: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy107; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy107; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy107; + goto yy33; + } + } + yy106: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy108; + if (yych <= '/') + goto yy33; + goto yy109; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy109; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy109; + goto yy33; + } + } + yy107: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy109; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy109; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy109; + goto yy33; + } + } + yy108: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy110; + if (yych <= '/') + goto yy33; + goto yy111; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy111; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy111; + goto yy33; + } + } + yy109: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy111; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy111; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy111; + goto yy33; + } + } + yy110: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy112; + if (yych <= '/') + goto yy33; + goto yy113; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy113; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy113; + goto yy33; + } + } + yy111: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy113; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy113; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy113; + goto yy33; + } + } + yy112: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy114; + if (yych <= '/') + goto yy33; + goto yy115; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy115; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy115; + goto yy33; + } + } + yy113: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy115; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy115; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy115; + goto yy33; + } + } + yy114: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy116; + if (yych <= '/') + goto yy33; + goto yy117; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy117; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy117; + goto yy33; + } + } + yy115: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy117; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy117; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy117; + goto yy33; + } + } + yy116: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy118; + if (yych <= '/') + goto yy33; + goto yy119; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy119; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy119; + goto yy33; + } + } + yy117: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy119; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy119; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy119; + goto yy33; + } + } + yy118: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy120; + if (yych <= '/') + goto yy33; + goto yy121; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy121; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy121; + goto yy33; + } + } + yy119: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy121; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy121; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy121; + goto yy33; + } + } + yy120: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy122; + if (yych <= '/') + goto yy33; + goto yy123; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy123; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy123; + goto yy33; + } + } + yy121: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy123; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy123; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy123; + goto yy33; + } + } + yy122: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy124; + if (yych <= '/') + goto yy33; + goto yy125; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy125; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy125; + goto yy33; + } + } + yy123: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy125; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy125; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy125; + goto yy33; + } + } + yy124: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy126; + if (yych <= '/') + goto yy33; + goto yy127; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy127; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy127; + goto yy33; + } + } + yy125: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy127; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy127; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy127; + goto yy33; + } + } + yy126: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy128; + if (yych <= '/') + goto yy33; + goto yy129; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy129; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy129; + goto yy33; + } + } + yy127: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy129; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy129; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy129; + goto yy33; + } + } + yy128: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy130; + if (yych <= '/') + goto yy33; + goto yy131; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy131; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy131; + goto yy33; + } + } + yy129: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy131; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy131; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy131; + goto yy33; + } + } + yy130: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy132; + if (yych <= '/') + goto yy33; + goto yy133; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy133; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy133; + goto yy33; + } + } + yy131: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy133; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy133; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy133; + goto yy33; + } + } + yy132: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy134; + if (yych <= '/') + goto yy33; + goto yy135; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy135; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy135; + goto yy33; + } + } + yy133: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy135; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy135; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy135; + goto yy33; + } + } + yy134: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy136; + if (yych <= '/') + goto yy33; + goto yy137; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy137; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy137; + goto yy33; + } + } + yy135: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy137; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy137; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy137; + goto yy33; + } + } + yy136: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy138; + if (yych <= '/') + goto yy33; + goto yy139; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy139; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy139; + goto yy33; + } + } + yy137: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy139; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy139; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy139; + goto yy33; + } + } + yy138: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy140; + if (yych <= '/') + goto yy33; + goto yy141; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy141; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy141; + goto yy33; + } + } + yy139: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy141; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy141; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy141; + goto yy33; + } + } + yy140: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy142; + if (yych <= '/') + goto yy33; + goto yy143; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy143; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy143; + goto yy33; + } + } + yy141: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy143; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy143; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy143; + goto yy33; + } + } + yy142: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy144; + if (yych <= '/') + goto yy33; + goto yy145; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy145; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy145; + goto yy33; + } + } + yy143: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy145; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy145; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy145; + goto yy33; + } + } + yy144: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy146; + if (yych <= '/') + goto yy33; + goto yy147; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy147; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy147; + goto yy33; + } + } + yy145: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy147; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy147; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy147; + goto yy33; + } + } + yy146: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy148; + if (yych <= '/') + goto yy33; + goto yy149; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy149; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy149; + goto yy33; + } + } + yy147: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy149; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy149; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy149; + goto yy33; + } + } + yy148: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy150; + if (yych <= '/') + goto yy33; + goto yy151; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy151; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy151; + goto yy33; + } + } + yy149: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy151; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy151; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy151; + goto yy33; + } + } + yy150: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy152; + if (yych <= '/') + goto yy33; + goto yy153; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy153; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy153; + goto yy33; + } + } + yy151: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy153; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy153; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy153; + goto yy33; + } + } + yy152: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy154; + if (yych <= '/') + goto yy33; + goto yy155; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy155; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy155; + goto yy33; + } + } + yy153: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy155; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy155; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy155; + goto yy33; + } + } + yy154: + yych = *++p; + if (yych <= '9') { + if (yych == '-') + goto yy156; + if (yych <= '/') + goto yy33; + goto yy157; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy33; + goto yy157; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy157; + goto yy33; + } + } + yy155: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= ',') + goto yy33; + if (yych >= '.') + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy157; + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + goto yy157; + } else { + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy157; + goto yy33; + } + } + yy156: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') + goto yy33; + if (yych <= '9') + goto yy158; + goto yy33; + } else { + if (yych <= 'Z') + goto yy158; + if (yych <= '`') + goto yy33; + if (yych <= 'z') + goto yy158; + goto yy33; + } + yy157: + yych = *++p; + if (yych <= '=') { + if (yych <= '.') { + if (yych <= '-') + goto yy33; + goto yy34; + } else { + if (yych <= '/') + goto yy33; + if (yych >= ':') + goto yy33; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy37; + if (yych <= '@') + goto yy33; + } else { + if (yych <= '`') + goto yy33; + if (yych >= '{') + goto yy33; + } + } + yy158: + yych = *++p; + if (yych == '.') + goto yy34; + if (yych == '>') + goto yy37; + goto yy33; + } +} + +// Try to match an HTML tag after first <, returning num of chars matched. +bufsize_t _scan_html_tag(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 224, 224, 224, 224, 224, 224, 224, 224, 200, 200, 200, 200, 200, + 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 200, 224, 128, 224, 224, 224, 224, 64, 224, 224, + 224, 224, 224, 244, 240, 224, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 240, 224, 192, 192, 192, 224, 224, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 224, 224, 224, 224, 240, 192, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 224, 224, 224, + 224, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') { + if (yych == '/') + goto yy162; + } else { + if (yych <= 'Z') + goto yy163; + if (yych <= '`') + goto yy160; + if (yych <= 'z') + goto yy163; + } + yy160: + ++p; + yy161 : { return 0; } + yy162: + yych = *(marker = ++p); + if (yych <= '@') + goto yy161; + if (yych <= 'Z') + goto yy164; + if (yych <= '`') + goto yy161; + if (yych <= 'z') + goto yy164; + goto yy161; + yy163: + yych = *(marker = ++p); + if (yych <= '.') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy161; + if (yych <= '\r') + goto yy168; + goto yy161; + } else { + if (yych <= ' ') + goto yy168; + if (yych == '-') + goto yy168; + goto yy161; + } + } else { + if (yych <= '@') { + if (yych <= '9') + goto yy168; + if (yych == '>') + goto yy168; + goto yy161; + } else { + if (yych <= 'Z') + goto yy168; + if (yych <= '`') + goto yy161; + if (yych <= 'z') + goto yy168; + goto yy161; + } + } + yy164: + yych = *++p; + if (yybm[0 + yych] & 4) { + goto yy164; + } + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy165; + if (yych <= '\r') + goto yy171; + } else { + if (yych <= ' ') + goto yy171; + if (yych == '>') + goto yy170; + } + yy165: + p = marker; + goto yy161; + yy166: + yych = *++p; + if (yybm[0 + yych] & 8) { + goto yy166; + } + if (yych <= '>') { + if (yych <= '9') { + if (yych == '/') + goto yy169; + goto yy165; + } else { + if (yych <= ':') + goto yy172; + if (yych <= '=') + goto yy165; + goto yy170; + } + } else { + if (yych <= '^') { + if (yych <= '@') + goto yy165; + if (yych <= 'Z') + goto yy172; + goto yy165; + } else { + if (yych == '`') + goto yy165; + if (yych <= 'z') + goto yy172; + goto yy165; + } + } + yy167: + yych = *++p; + yy168: + if (yybm[0 + yych] & 8) { + goto yy166; + } + if (yych <= '=') { + if (yych <= '.') { + if (yych == '-') + goto yy167; + goto yy165; + } else { + if (yych <= '/') + goto yy169; + if (yych <= '9') + goto yy167; + goto yy165; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy170; + if (yych <= '@') + goto yy165; + goto yy167; + } else { + if (yych <= '`') + goto yy165; + if (yych <= 'z') + goto yy167; + goto yy165; + } + } + yy169: + yych = *++p; + if (yych != '>') + goto yy165; + yy170: + ++p; + { return (bufsize_t)(p - start); } + yy171: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy165; + if (yych <= '\r') + goto yy171; + goto yy165; + } else { + if (yych <= ' ') + goto yy171; + if (yych == '>') + goto yy170; + goto yy165; + } + yy172: + yych = *++p; + if (yybm[0 + yych] & 16) { + goto yy172; + } + if (yych <= ',') { + if (yych <= '\r') { + if (yych <= 0x08) + goto yy165; + } else { + if (yych != ' ') + goto yy165; + } + } else { + if (yych <= '<') { + if (yych <= '/') + goto yy169; + goto yy165; + } else { + if (yych <= '=') + goto yy174; + if (yych <= '>') + goto yy170; + goto yy165; + } + } + yy173: + yych = *++p; + if (yych <= '<') { + if (yych <= ' ') { + if (yych <= 0x08) + goto yy165; + if (yych <= '\r') + goto yy173; + if (yych <= 0x1F) + goto yy165; + goto yy173; + } else { + if (yych <= '/') { + if (yych <= '.') + goto yy165; + goto yy169; + } else { + if (yych == ':') + goto yy172; + goto yy165; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '=') + goto yy174; + if (yych <= '>') + goto yy170; + if (yych <= '@') + goto yy165; + goto yy172; + } else { + if (yych <= '_') { + if (yych <= '^') + goto yy165; + goto yy172; + } else { + if (yych <= '`') + goto yy165; + if (yych <= 'z') + goto yy172; + goto yy165; + } + } + } + yy174: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy175; + } + if (yych <= 0xE0) { + if (yych <= '"') { + if (yych <= 0x00) + goto yy165; + if (yych <= ' ') + goto yy174; + goto yy176; + } else { + if (yych <= '\'') + goto yy177; + if (yych <= 0xC1) + goto yy165; + if (yych <= 0xDF) + goto yy178; + goto yy179; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy181; + goto yy180; + } else { + if (yych <= 0xF0) + goto yy182; + if (yych <= 0xF3) + goto yy183; + if (yych <= 0xF4) + goto yy184; + goto yy165; + } + } + yy175: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy175; + } + if (yych <= 0xE0) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy165; + if (yych <= ' ') + goto yy166; + goto yy165; + } else { + if (yych <= '>') + goto yy170; + if (yych <= 0xC1) + goto yy165; + if (yych <= 0xDF) + goto yy178; + goto yy179; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy181; + goto yy180; + } else { + if (yych <= 0xF0) + goto yy182; + if (yych <= 0xF3) + goto yy183; + if (yych <= 0xF4) + goto yy184; + goto yy165; + } + } + yy176: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy176; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy165; + if (yych <= '"') + goto yy185; + goto yy165; + } else { + if (yych <= 0xDF) + goto yy186; + if (yych <= 0xE0) + goto yy187; + goto yy188; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy189; + if (yych <= 0xEF) + goto yy188; + goto yy190; + } else { + if (yych <= 0xF3) + goto yy191; + if (yych <= 0xF4) + goto yy192; + goto yy165; + } + } + yy177: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy177; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy165; + if (yych <= '\'') + goto yy185; + goto yy165; + } else { + if (yych <= 0xDF) + goto yy193; + if (yych <= 0xE0) + goto yy194; + goto yy195; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy196; + if (yych <= 0xEF) + goto yy195; + goto yy197; + } else { + if (yych <= 0xF3) + goto yy198; + if (yych <= 0xF4) + goto yy199; + goto yy165; + } + } + yy178: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy175; + goto yy165; + yy179: + yych = *++p; + if (yych <= 0x9F) + goto yy165; + if (yych <= 0xBF) + goto yy178; + goto yy165; + yy180: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy178; + goto yy165; + yy181: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x9F) + goto yy178; + goto yy165; + yy182: + yych = *++p; + if (yych <= 0x8F) + goto yy165; + if (yych <= 0xBF) + goto yy180; + goto yy165; + yy183: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy180; + goto yy165; + yy184: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x8F) + goto yy180; + goto yy165; + yy185: + yych = *++p; + if (yybm[0 + yych] & 8) { + goto yy166; + } + if (yych == '/') + goto yy169; + if (yych == '>') + goto yy170; + goto yy165; + yy186: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy176; + goto yy165; + yy187: + yych = *++p; + if (yych <= 0x9F) + goto yy165; + if (yych <= 0xBF) + goto yy186; + goto yy165; + yy188: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy186; + goto yy165; + yy189: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x9F) + goto yy186; + goto yy165; + yy190: + yych = *++p; + if (yych <= 0x8F) + goto yy165; + if (yych <= 0xBF) + goto yy188; + goto yy165; + yy191: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy188; + goto yy165; + yy192: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x8F) + goto yy188; + goto yy165; + yy193: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy177; + goto yy165; + yy194: + yych = *++p; + if (yych <= 0x9F) + goto yy165; + if (yych <= 0xBF) + goto yy193; + goto yy165; + yy195: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy193; + goto yy165; + yy196: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x9F) + goto yy193; + goto yy165; + yy197: + yych = *++p; + if (yych <= 0x8F) + goto yy165; + if (yych <= 0xBF) + goto yy195; + goto yy165; + yy198: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0xBF) + goto yy195; + goto yy165; + yy199: + yych = *++p; + if (yych <= 0x7F) + goto yy165; + if (yych <= 0x8F) + goto yy195; + goto yy165; + } +} + +bufsize_t _scan_html_comment(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych == '-') + goto yy202; + ++p; + yy201 : { return 0; } + yy202: + yych = *(marker = ++p); + if (yych != '-') + goto yy201; + yy203: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy203; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy204; + if (yych <= '-') + goto yy205; + } else { + if (yych <= 0xDF) + goto yy206; + if (yych <= 0xE0) + goto yy207; + goto yy208; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy209; + if (yych <= 0xEF) + goto yy208; + goto yy210; + } else { + if (yych <= 0xF3) + goto yy211; + if (yych <= 0xF4) + goto yy212; + } + } + yy204: + p = marker; + goto yy201; + yy205: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy203; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy204; + if (yych <= '-') + goto yy213; + goto yy204; + } else { + if (yych <= 0xDF) + goto yy206; + if (yych <= 0xE0) + goto yy207; + goto yy208; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy209; + if (yych <= 0xEF) + goto yy208; + goto yy210; + } else { + if (yych <= 0xF3) + goto yy211; + if (yych <= 0xF4) + goto yy212; + goto yy204; + } + } + yy206: + yych = *++p; + if (yych <= 0x7F) + goto yy204; + if (yych <= 0xBF) + goto yy203; + goto yy204; + yy207: + yych = *++p; + if (yych <= 0x9F) + goto yy204; + if (yych <= 0xBF) + goto yy206; + goto yy204; + yy208: + yych = *++p; + if (yych <= 0x7F) + goto yy204; + if (yych <= 0xBF) + goto yy206; + goto yy204; + yy209: + yych = *++p; + if (yych <= 0x7F) + goto yy204; + if (yych <= 0x9F) + goto yy206; + goto yy204; + yy210: + yych = *++p; + if (yych <= 0x8F) + goto yy204; + if (yych <= 0xBF) + goto yy208; + goto yy204; + yy211: + yych = *++p; + if (yych <= 0x7F) + goto yy204; + if (yych <= 0xBF) + goto yy208; + goto yy204; + yy212: + yych = *++p; + if (yych <= 0x7F) + goto yy204; + if (yych <= 0x8F) + goto yy208; + goto yy204; + yy213: + yych = *++p; + if (yych <= 0xE0) { + if (yych <= '>') { + if (yych <= 0x00) + goto yy204; + if (yych <= '=') + goto yy203; + } else { + if (yych <= 0x7F) + goto yy203; + if (yych <= 0xC1) + goto yy204; + if (yych <= 0xDF) + goto yy206; + goto yy207; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy209; + goto yy208; + } else { + if (yych <= 0xF0) + goto yy210; + if (yych <= 0xF3) + goto yy211; + if (yych <= 0xF4) + goto yy212; + goto yy204; + } + } + ++p; + { return (bufsize_t)(p - start); } + } +} + +bufsize_t _scan_html_pi(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy217; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy215; + if (yych <= '?') + goto yy220; + } else { + if (yych <= 0xDF) + goto yy221; + if (yych <= 0xE0) + goto yy222; + goto yy223; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy224; + if (yych <= 0xEF) + goto yy223; + goto yy225; + } else { + if (yych <= 0xF3) + goto yy226; + if (yych <= 0xF4) + goto yy227; + } + } + yy215: + ++p; + yy216 : { return 0; } + yy217: + yyaccept = 0; + yych = *(marker = ++p); + yy218: + if (yybm[0 + yych] & 128) { + goto yy217; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy219; + if (yych <= '?') + goto yy228; + } else { + if (yych <= 0xDF) + goto yy230; + if (yych <= 0xE0) + goto yy231; + goto yy232; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy233; + if (yych <= 0xEF) + goto yy232; + goto yy234; + } else { + if (yych <= 0xF3) + goto yy235; + if (yych <= 0xF4) + goto yy236; + } + } + yy219 : { return (bufsize_t)(p - start); } + yy220: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= '?') { + if (yych <= 0x00) + goto yy216; + if (yych <= '=') + goto yy218; + if (yych <= '>') + goto yy216; + goto yy217; + } else { + if (yych <= 0x7F) + goto yy218; + if (yych <= 0xC1) + goto yy216; + if (yych <= 0xF4) + goto yy218; + goto yy216; + } + yy221: + yych = *++p; + if (yych <= 0x7F) + goto yy216; + if (yych <= 0xBF) + goto yy217; + goto yy216; + yy222: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy216; + if (yych <= 0xBF) + goto yy230; + goto yy216; + yy223: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy216; + if (yych <= 0xBF) + goto yy230; + goto yy216; + yy224: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy216; + if (yych <= 0x9F) + goto yy230; + goto yy216; + yy225: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy216; + if (yych <= 0xBF) + goto yy232; + goto yy216; + yy226: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy216; + if (yych <= 0xBF) + goto yy232; + goto yy216; + yy227: + yyaccept = 1; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy216; + if (yych <= 0x8F) + goto yy232; + goto yy216; + yy228: + yych = *++p; + if (yych <= 0xE0) { + if (yych <= '>') { + if (yych <= 0x00) + goto yy229; + if (yych <= '=') + goto yy217; + } else { + if (yych <= 0x7F) + goto yy217; + if (yych <= 0xC1) + goto yy229; + if (yych <= 0xDF) + goto yy230; + goto yy231; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy233; + goto yy232; + } else { + if (yych <= 0xF0) + goto yy234; + if (yych <= 0xF3) + goto yy235; + if (yych <= 0xF4) + goto yy236; + } + } + yy229: + p = marker; + if (yyaccept == 0) { + goto yy219; + } else { + goto yy216; + } + yy230: + yych = *++p; + if (yych <= 0x7F) + goto yy229; + if (yych <= 0xBF) + goto yy217; + goto yy229; + yy231: + yych = *++p; + if (yych <= 0x9F) + goto yy229; + if (yych <= 0xBF) + goto yy230; + goto yy229; + yy232: + yych = *++p; + if (yych <= 0x7F) + goto yy229; + if (yych <= 0xBF) + goto yy230; + goto yy229; + yy233: + yych = *++p; + if (yych <= 0x7F) + goto yy229; + if (yych <= 0x9F) + goto yy230; + goto yy229; + yy234: + yych = *++p; + if (yych <= 0x8F) + goto yy229; + if (yych <= 0xBF) + goto yy232; + goto yy229; + yy235: + yych = *++p; + if (yych <= 0x7F) + goto yy229; + if (yych <= 0xBF) + goto yy232; + goto yy229; + yy236: + yych = *++p; + if (yych <= 0x7F) + goto yy229; + if (yych <= 0x8F) + goto yy232; + goto yy229; + } +} + +bufsize_t _scan_html_declaration(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') + goto yy238; + if (yych <= 'Z') + goto yy239; + yy238: + ++p; + { return 0; } + yy239: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy239; + } + if (yych <= 0xED) { + if (yych <= 0xDF) { + if (yych >= 0xC2) + goto yy241; + } else { + if (yych <= 0xE0) + goto yy243; + if (yych <= 0xEC) + goto yy244; + goto yy245; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy244; + goto yy246; + } else { + if (yych <= 0xF3) + goto yy247; + if (yych <= 0xF4) + goto yy248; + } + } + yy240 : { return (bufsize_t)(p - start); } + yy241: + yych = *++p; + if (yych <= 0x7F) + goto yy242; + if (yych <= 0xBF) + goto yy239; + yy242: + p = marker; + goto yy240; + yy243: + yych = *++p; + if (yych <= 0x9F) + goto yy242; + if (yych <= 0xBF) + goto yy241; + goto yy242; + yy244: + yych = *++p; + if (yych <= 0x7F) + goto yy242; + if (yych <= 0xBF) + goto yy241; + goto yy242; + yy245: + yych = *++p; + if (yych <= 0x7F) + goto yy242; + if (yych <= 0x9F) + goto yy241; + goto yy242; + yy246: + yych = *++p; + if (yych <= 0x8F) + goto yy242; + if (yych <= 0xBF) + goto yy244; + goto yy242; + yy247: + yych = *++p; + if (yych <= 0x7F) + goto yy242; + if (yych <= 0xBF) + goto yy244; + goto yy242; + yy248: + yych = *++p; + if (yych <= 0x7F) + goto yy242; + if (yych <= 0x8F) + goto yy244; + goto yy242; + } +} + +bufsize_t _scan_html_cdata(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych == 'C') + goto yy251; + if (yych == 'c') + goto yy251; + ++p; + yy250 : { return 0; } + yy251: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'D') + goto yy252; + if (yych != 'd') + goto yy250; + yy252: + yych = *++p; + if (yych == 'A') + goto yy254; + if (yych == 'a') + goto yy254; + yy253: + p = marker; + if (yyaccept == 0) { + goto yy250; + } else { + goto yy258; + } + yy254: + yych = *++p; + if (yych == 'T') + goto yy255; + if (yych != 't') + goto yy253; + yy255: + yych = *++p; + if (yych == 'A') + goto yy256; + if (yych != 'a') + goto yy253; + yy256: + yych = *++p; + if (yych != '[') + goto yy253; + yy257: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy257; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy258; + if (yych <= ']') + goto yy259; + } else { + if (yych <= 0xDF) + goto yy260; + if (yych <= 0xE0) + goto yy261; + goto yy262; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy263; + if (yych <= 0xEF) + goto yy262; + goto yy264; + } else { + if (yych <= 0xF3) + goto yy265; + if (yych <= 0xF4) + goto yy266; + } + } + yy258 : { return (bufsize_t)(p - start); } + yy259: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy257; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy253; + if (yych <= ']') + goto yy267; + goto yy253; + } else { + if (yych <= 0xDF) + goto yy260; + if (yych <= 0xE0) + goto yy261; + goto yy262; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy263; + if (yych <= 0xEF) + goto yy262; + goto yy264; + } else { + if (yych <= 0xF3) + goto yy265; + if (yych <= 0xF4) + goto yy266; + goto yy253; + } + } + yy260: + yych = *++p; + if (yych <= 0x7F) + goto yy253; + if (yych <= 0xBF) + goto yy257; + goto yy253; + yy261: + yych = *++p; + if (yych <= 0x9F) + goto yy253; + if (yych <= 0xBF) + goto yy260; + goto yy253; + yy262: + yych = *++p; + if (yych <= 0x7F) + goto yy253; + if (yych <= 0xBF) + goto yy260; + goto yy253; + yy263: + yych = *++p; + if (yych <= 0x7F) + goto yy253; + if (yych <= 0x9F) + goto yy260; + goto yy253; + yy264: + yych = *++p; + if (yych <= 0x8F) + goto yy253; + if (yych <= 0xBF) + goto yy262; + goto yy253; + yy265: + yych = *++p; + if (yych <= 0x7F) + goto yy253; + if (yych <= 0xBF) + goto yy262; + goto yy253; + yy266: + yych = *++p; + if (yych <= 0x7F) + goto yy253; + if (yych <= 0x8F) + goto yy262; + goto yy253; + yy267: + yych = *++p; + if (yych <= 0xE0) { + if (yych <= '>') { + if (yych <= 0x00) + goto yy253; + if (yych <= '=') + goto yy257; + goto yy253; + } else { + if (yych <= 0x7F) + goto yy257; + if (yych <= 0xC1) + goto yy253; + if (yych <= 0xDF) + goto yy260; + goto yy261; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy263; + goto yy262; + } else { + if (yych <= 0xF0) + goto yy264; + if (yych <= 0xF3) + goto yy265; + if (yych <= 0xF4) + goto yy266; + goto yy253; + } + } + } +} + +// Try to match an HTML block tag start line, returning +// an integer code for the type of block (1-6, matching the spec). +// #7 is handled by a separate function, below. +bufsize_t _scan_html_block_start(const unsigned char *p) { + const unsigned char *marker = NULL; + + { + unsigned char yych; + yych = *p; + if (yych == '<') + goto yy270; + ++p; + yy269 : { return 0; } + yy270: + yych = *(marker = ++p); + switch (yych) { + case '!': + goto yy271; + case '/': + goto yy273; + case '?': + goto yy274; + case 'A': + case 'a': + goto yy275; + case 'B': + case 'b': + goto yy276; + case 'C': + case 'c': + goto yy277; + case 'D': + case 'd': + goto yy278; + case 'F': + case 'f': + goto yy279; + case 'H': + case 'h': + goto yy280; + case 'I': + case 'i': + goto yy281; + case 'L': + case 'l': + goto yy282; + case 'M': + case 'm': + goto yy283; + case 'N': + case 'n': + goto yy284; + case 'O': + case 'o': + goto yy285; + case 'P': + case 'p': + goto yy286; + case 'S': + case 's': + goto yy287; + case 'T': + case 't': + goto yy288; + case 'U': + case 'u': + goto yy289; + default: + goto yy269; + } + yy271: + yych = *++p; + if (yych <= 'Z') { + if (yych == '-') + goto yy290; + if (yych >= 'A') + goto yy291; + } else { + if (yych <= '[') + goto yy292; + if (yych <= '`') + goto yy272; + if (yych <= 'z') + goto yy291; + } + yy272: + p = marker; + goto yy269; + yy273: + yych = *++p; + switch (yych) { + case 'A': + case 'a': + goto yy275; + case 'B': + case 'b': + goto yy276; + case 'C': + case 'c': + goto yy277; + case 'D': + case 'd': + goto yy278; + case 'F': + case 'f': + goto yy279; + case 'H': + case 'h': + goto yy280; + case 'I': + case 'i': + goto yy281; + case 'L': + case 'l': + goto yy282; + case 'M': + case 'm': + goto yy283; + case 'N': + case 'n': + goto yy284; + case 'O': + case 'o': + goto yy285; + case 'P': + case 'p': + goto yy293; + case 'S': + case 's': + goto yy294; + case 'T': + case 't': + goto yy295; + case 'U': + case 'u': + goto yy289; + default: + goto yy272; + } + yy274: + ++p; + { return 3; } + yy275: + yych = *++p; + if (yych <= 'S') { + if (yych <= 'D') { + if (yych <= 'C') + goto yy272; + goto yy296; + } else { + if (yych <= 'Q') + goto yy272; + if (yych <= 'R') + goto yy297; + goto yy298; + } + } else { + if (yych <= 'q') { + if (yych == 'd') + goto yy296; + goto yy272; + } else { + if (yych <= 'r') + goto yy297; + if (yych <= 's') + goto yy298; + goto yy272; + } + } + yy276: + yych = *++p; + if (yych <= 'O') { + if (yych <= 'K') { + if (yych == 'A') + goto yy299; + goto yy272; + } else { + if (yych <= 'L') + goto yy300; + if (yych <= 'N') + goto yy272; + goto yy301; + } + } else { + if (yych <= 'k') { + if (yych == 'a') + goto yy299; + goto yy272; + } else { + if (yych <= 'l') + goto yy300; + if (yych == 'o') + goto yy301; + goto yy272; + } + } + yy277: + yych = *++p; + if (yych <= 'O') { + if (yych <= 'D') { + if (yych == 'A') + goto yy302; + goto yy272; + } else { + if (yych <= 'E') + goto yy303; + if (yych <= 'N') + goto yy272; + goto yy304; + } + } else { + if (yych <= 'd') { + if (yych == 'a') + goto yy302; + goto yy272; + } else { + if (yych <= 'e') + goto yy303; + if (yych == 'o') + goto yy304; + goto yy272; + } + } + yy278: + yych = *++p; + switch (yych) { + case 'D': + case 'L': + case 'T': + case 'd': + case 'l': + case 't': + goto yy305; + case 'E': + case 'e': + goto yy306; + case 'I': + case 'i': + goto yy307; + default: + goto yy272; + } + yy279: + yych = *++p; + if (yych <= 'R') { + if (yych <= 'N') { + if (yych == 'I') + goto yy308; + goto yy272; + } else { + if (yych <= 'O') + goto yy309; + if (yych <= 'Q') + goto yy272; + goto yy310; + } + } else { + if (yych <= 'n') { + if (yych == 'i') + goto yy308; + goto yy272; + } else { + if (yych <= 'o') + goto yy309; + if (yych == 'r') + goto yy310; + goto yy272; + } + } + yy280: + yych = *++p; + if (yych <= 'S') { + if (yych <= 'D') { + if (yych <= '0') + goto yy272; + if (yych <= '6') + goto yy305; + goto yy272; + } else { + if (yych <= 'E') + goto yy311; + if (yych == 'R') + goto yy305; + goto yy272; + } + } else { + if (yych <= 'q') { + if (yych <= 'T') + goto yy312; + if (yych == 'e') + goto yy311; + goto yy272; + } else { + if (yych <= 'r') + goto yy305; + if (yych == 't') + goto yy312; + goto yy272; + } + } + yy281: + yych = *++p; + if (yych == 'F') + goto yy313; + if (yych == 'f') + goto yy313; + goto yy272; + yy282: + yych = *++p; + if (yych <= 'I') { + if (yych == 'E') + goto yy314; + if (yych <= 'H') + goto yy272; + goto yy315; + } else { + if (yych <= 'e') { + if (yych <= 'd') + goto yy272; + goto yy314; + } else { + if (yych == 'i') + goto yy315; + goto yy272; + } + } + yy283: + yych = *++p; + if (yych <= 'E') { + if (yych == 'A') + goto yy316; + if (yych <= 'D') + goto yy272; + goto yy317; + } else { + if (yych <= 'a') { + if (yych <= '`') + goto yy272; + goto yy316; + } else { + if (yych == 'e') + goto yy317; + goto yy272; + } + } + yy284: + yych = *++p; + if (yych <= 'O') { + if (yych == 'A') + goto yy318; + if (yych <= 'N') + goto yy272; + goto yy319; + } else { + if (yych <= 'a') { + if (yych <= '`') + goto yy272; + goto yy318; + } else { + if (yych == 'o') + goto yy319; + goto yy272; + } + } + yy285: + yych = *++p; + if (yych <= 'P') { + if (yych == 'L') + goto yy305; + if (yych <= 'O') + goto yy272; + goto yy320; + } else { + if (yych <= 'l') { + if (yych <= 'k') + goto yy272; + goto yy305; + } else { + if (yych == 'p') + goto yy320; + goto yy272; + } + } + yy286: + yych = *++p; + if (yych <= '>') { + if (yych <= ' ') { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + if (yych <= 0x1F) + goto yy272; + goto yy321; + } else { + if (yych == '/') + goto yy322; + if (yych <= '=') + goto yy272; + goto yy321; + } + } else { + if (yych <= 'R') { + if (yych == 'A') + goto yy323; + if (yych <= 'Q') + goto yy272; + goto yy324; + } else { + if (yych <= 'a') { + if (yych <= '`') + goto yy272; + goto yy323; + } else { + if (yych == 'r') + goto yy324; + goto yy272; + } + } + } + yy287: + yych = *++p; + switch (yych) { + case 'C': + case 'c': + goto yy325; + case 'E': + case 'e': + goto yy326; + case 'O': + case 'o': + goto yy327; + case 'T': + case 't': + goto yy328; + case 'U': + case 'u': + goto yy329; + default: + goto yy272; + } + yy288: + yych = *++p; + switch (yych) { + case 'A': + case 'a': + goto yy330; + case 'B': + case 'b': + goto yy331; + case 'D': + case 'd': + goto yy305; + case 'E': + case 'e': + goto yy332; + case 'F': + case 'f': + goto yy333; + case 'H': + case 'h': + goto yy334; + case 'I': + case 'i': + goto yy335; + case 'R': + case 'r': + goto yy336; + default: + goto yy272; + } + yy289: + yych = *++p; + if (yych == 'L') + goto yy305; + if (yych == 'l') + goto yy305; + goto yy272; + yy290: + yych = *++p; + if (yych == '-') + goto yy337; + goto yy272; + yy291: + ++p; + { return 4; } + yy292: + yych = *++p; + if (yych == 'C') + goto yy338; + if (yych == 'c') + goto yy338; + goto yy272; + yy293: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= '@') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'A') + goto yy323; + if (yych == 'a') + goto yy323; + goto yy272; + } + } + yy294: + yych = *++p; + if (yych <= 'U') { + if (yych <= 'N') { + if (yych == 'E') + goto yy326; + goto yy272; + } else { + if (yych <= 'O') + goto yy327; + if (yych <= 'T') + goto yy272; + goto yy329; + } + } else { + if (yych <= 'n') { + if (yych == 'e') + goto yy326; + goto yy272; + } else { + if (yych <= 'o') + goto yy327; + if (yych == 'u') + goto yy329; + goto yy272; + } + } + yy295: + yych = *++p; + switch (yych) { + case 'A': + case 'a': + goto yy330; + case 'B': + case 'b': + goto yy331; + case 'D': + case 'd': + goto yy305; + case 'F': + case 'f': + goto yy333; + case 'H': + case 'h': + goto yy334; + case 'I': + case 'i': + goto yy335; + case 'R': + case 'r': + goto yy336; + default: + goto yy272; + } + yy296: + yych = *++p; + if (yych == 'D') + goto yy339; + if (yych == 'd') + goto yy339; + goto yy272; + yy297: + yych = *++p; + if (yych == 'T') + goto yy340; + if (yych == 't') + goto yy340; + goto yy272; + yy298: + yych = *++p; + if (yych == 'I') + goto yy341; + if (yych == 'i') + goto yy341; + goto yy272; + yy299: + yych = *++p; + if (yych == 'S') + goto yy342; + if (yych == 's') + goto yy342; + goto yy272; + yy300: + yych = *++p; + if (yych == 'O') + goto yy343; + if (yych == 'o') + goto yy343; + goto yy272; + yy301: + yych = *++p; + if (yych == 'D') + goto yy344; + if (yych == 'd') + goto yy344; + goto yy272; + yy302: + yych = *++p; + if (yych == 'P') + goto yy345; + if (yych == 'p') + goto yy345; + goto yy272; + yy303: + yych = *++p; + if (yych == 'N') + goto yy346; + if (yych == 'n') + goto yy346; + goto yy272; + yy304: + yych = *++p; + if (yych == 'L') + goto yy347; + if (yych == 'l') + goto yy347; + goto yy272; + yy305: + yych = *++p; + if (yych <= ' ') { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + if (yych <= 0x1F) + goto yy272; + goto yy321; + } else { + if (yych <= '/') { + if (yych <= '.') + goto yy272; + goto yy322; + } else { + if (yych == '>') + goto yy321; + goto yy272; + } + } + yy306: + yych = *++p; + if (yych == 'T') + goto yy348; + if (yych == 't') + goto yy348; + goto yy272; + yy307: + yych = *++p; + if (yych <= 'V') { + if (yych <= 'Q') { + if (yych == 'A') + goto yy349; + goto yy272; + } else { + if (yych <= 'R') + goto yy305; + if (yych <= 'U') + goto yy272; + goto yy305; + } + } else { + if (yych <= 'q') { + if (yych == 'a') + goto yy349; + goto yy272; + } else { + if (yych <= 'r') + goto yy305; + if (yych == 'v') + goto yy305; + goto yy272; + } + } + yy308: + yych = *++p; + if (yych <= 'G') { + if (yych == 'E') + goto yy350; + if (yych <= 'F') + goto yy272; + goto yy351; + } else { + if (yych <= 'e') { + if (yych <= 'd') + goto yy272; + goto yy350; + } else { + if (yych == 'g') + goto yy351; + goto yy272; + } + } + yy309: + yych = *++p; + if (yych <= 'R') { + if (yych == 'O') + goto yy346; + if (yych <= 'Q') + goto yy272; + goto yy352; + } else { + if (yych <= 'o') { + if (yych <= 'n') + goto yy272; + goto yy346; + } else { + if (yych == 'r') + goto yy352; + goto yy272; + } + } + yy310: + yych = *++p; + if (yych == 'A') + goto yy353; + if (yych == 'a') + goto yy353; + goto yy272; + yy311: + yych = *++p; + if (yych == 'A') + goto yy354; + if (yych == 'a') + goto yy354; + goto yy272; + yy312: + yych = *++p; + if (yych == 'M') + goto yy289; + if (yych == 'm') + goto yy289; + goto yy272; + yy313: + yych = *++p; + if (yych == 'R') + goto yy355; + if (yych == 'r') + goto yy355; + goto yy272; + yy314: + yych = *++p; + if (yych == 'G') + goto yy356; + if (yych == 'g') + goto yy356; + goto yy272; + yy315: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'M') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'N') + goto yy357; + if (yych == 'n') + goto yy357; + goto yy272; + } + } + yy316: + yych = *++p; + if (yych == 'I') + goto yy358; + if (yych == 'i') + goto yy358; + goto yy272; + yy317: + yych = *++p; + if (yych == 'N') + goto yy359; + if (yych == 'n') + goto yy359; + goto yy272; + yy318: + yych = *++p; + if (yych == 'V') + goto yy305; + if (yych == 'v') + goto yy305; + goto yy272; + yy319: + yych = *++p; + if (yych == 'F') + goto yy360; + if (yych == 'f') + goto yy360; + goto yy272; + yy320: + yych = *++p; + if (yych == 'T') + goto yy361; + if (yych == 't') + goto yy361; + goto yy272; + yy321: + ++p; + { return 6; } + yy322: + yych = *++p; + if (yych == '>') + goto yy321; + goto yy272; + yy323: + yych = *++p; + if (yych == 'R') + goto yy362; + if (yych == 'r') + goto yy362; + goto yy272; + yy324: + yych = *++p; + if (yych == 'E') + goto yy363; + if (yych == 'e') + goto yy363; + goto yy272; + yy325: + yych = *++p; + if (yych == 'R') + goto yy364; + if (yych == 'r') + goto yy364; + goto yy272; + yy326: + yych = *++p; + if (yych == 'C') + goto yy345; + if (yych == 'c') + goto yy345; + goto yy272; + yy327: + yych = *++p; + if (yych == 'U') + goto yy365; + if (yych == 'u') + goto yy365; + goto yy272; + yy328: + yych = *++p; + if (yych == 'Y') + goto yy366; + if (yych == 'y') + goto yy366; + goto yy272; + yy329: + yych = *++p; + if (yych == 'M') + goto yy367; + if (yych == 'm') + goto yy367; + goto yy272; + yy330: + yych = *++p; + if (yych == 'B') + goto yy368; + if (yych == 'b') + goto yy368; + goto yy272; + yy331: + yych = *++p; + if (yych == 'O') + goto yy301; + if (yych == 'o') + goto yy301; + goto yy272; + yy332: + yych = *++p; + if (yych == 'X') + goto yy369; + if (yych == 'x') + goto yy369; + goto yy272; + yy333: + yych = *++p; + if (yych == 'O') + goto yy370; + if (yych == 'o') + goto yy370; + goto yy272; + yy334: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'D') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'E') + goto yy371; + if (yych == 'e') + goto yy371; + goto yy272; + } + } + yy335: + yych = *++p; + if (yych == 'T') + goto yy368; + if (yych == 't') + goto yy368; + goto yy272; + yy336: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= '@') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'A') + goto yy372; + if (yych == 'a') + goto yy372; + goto yy272; + } + } + yy337: + ++p; + { return 2; } + yy338: + yych = *++p; + if (yych == 'D') + goto yy373; + if (yych == 'd') + goto yy373; + goto yy272; + yy339: + yych = *++p; + if (yych == 'R') + goto yy374; + if (yych == 'r') + goto yy374; + goto yy272; + yy340: + yych = *++p; + if (yych == 'I') + goto yy375; + if (yych == 'i') + goto yy375; + goto yy272; + yy341: + yych = *++p; + if (yych == 'D') + goto yy376; + if (yych == 'd') + goto yy376; + goto yy272; + yy342: + yych = *++p; + if (yych == 'E') + goto yy377; + if (yych == 'e') + goto yy377; + goto yy272; + yy343: + yych = *++p; + if (yych == 'C') + goto yy378; + if (yych == 'c') + goto yy378; + goto yy272; + yy344: + yych = *++p; + if (yych == 'Y') + goto yy305; + if (yych == 'y') + goto yy305; + goto yy272; + yy345: + yych = *++p; + if (yych == 'T') + goto yy379; + if (yych == 't') + goto yy379; + goto yy272; + yy346: + yych = *++p; + if (yych == 'T') + goto yy380; + if (yych == 't') + goto yy380; + goto yy272; + yy347: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'F') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'G') + goto yy381; + if (yych == 'g') + goto yy381; + goto yy272; + } + } + yy348: + yych = *++p; + if (yych == 'A') + goto yy382; + if (yych == 'a') + goto yy382; + goto yy272; + yy349: + yych = *++p; + if (yych == 'L') + goto yy383; + if (yych == 'l') + goto yy383; + goto yy272; + yy350: + yych = *++p; + if (yych == 'L') + goto yy384; + if (yych == 'l') + goto yy384; + goto yy272; + yy351: + yych = *++p; + if (yych <= 'U') { + if (yych == 'C') + goto yy385; + if (yych <= 'T') + goto yy272; + goto yy386; + } else { + if (yych <= 'c') { + if (yych <= 'b') + goto yy272; + goto yy385; + } else { + if (yych == 'u') + goto yy386; + goto yy272; + } + } + yy352: + yych = *++p; + if (yych == 'M') + goto yy305; + if (yych == 'm') + goto yy305; + goto yy272; + yy353: + yych = *++p; + if (yych == 'M') + goto yy387; + if (yych == 'm') + goto yy387; + goto yy272; + yy354: + yych = *++p; + if (yych == 'D') + goto yy388; + if (yych == 'd') + goto yy388; + goto yy272; + yy355: + yych = *++p; + if (yych == 'A') + goto yy389; + if (yych == 'a') + goto yy389; + goto yy272; + yy356: + yych = *++p; + if (yych == 'E') + goto yy390; + if (yych == 'e') + goto yy390; + goto yy272; + yy357: + yych = *++p; + if (yych == 'K') + goto yy305; + if (yych == 'k') + goto yy305; + goto yy272; + yy358: + yych = *++p; + if (yych == 'N') + goto yy305; + if (yych == 'n') + goto yy305; + goto yy272; + yy359: + yych = *++p; + if (yych == 'U') + goto yy391; + if (yych == 'u') + goto yy391; + goto yy272; + yy360: + yych = *++p; + if (yych == 'R') + goto yy392; + if (yych == 'r') + goto yy392; + goto yy272; + yy361: + yych = *++p; + if (yych <= 'I') { + if (yych == 'G') + goto yy381; + if (yych <= 'H') + goto yy272; + goto yy393; + } else { + if (yych <= 'g') { + if (yych <= 'f') + goto yy272; + goto yy381; + } else { + if (yych == 'i') + goto yy393; + goto yy272; + } + } + yy362: + yych = *++p; + if (yych == 'A') + goto yy352; + if (yych == 'a') + goto yy352; + goto yy272; + yy363: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy394; + goto yy272; + } else { + if (yych <= ' ') + goto yy394; + if (yych == '>') + goto yy394; + goto yy272; + } + yy364: + yych = *++p; + if (yych == 'I') + goto yy395; + if (yych == 'i') + goto yy395; + goto yy272; + yy365: + yych = *++p; + if (yych == 'R') + goto yy396; + if (yych == 'r') + goto yy396; + goto yy272; + yy366: + yych = *++p; + if (yych == 'L') + goto yy324; + if (yych == 'l') + goto yy324; + goto yy272; + yy367: + yych = *++p; + if (yych == 'M') + goto yy397; + if (yych == 'm') + goto yy397; + goto yy272; + yy368: + yych = *++p; + if (yych == 'L') + goto yy376; + if (yych == 'l') + goto yy376; + goto yy272; + yy369: + yych = *++p; + if (yych == 'T') + goto yy398; + if (yych == 't') + goto yy398; + goto yy272; + yy370: + yych = *++p; + if (yych == 'O') + goto yy399; + if (yych == 'o') + goto yy399; + goto yy272; + yy371: + yych = *++p; + if (yych == 'A') + goto yy400; + if (yych == 'a') + goto yy400; + goto yy272; + yy372: + yych = *++p; + if (yych == 'C') + goto yy357; + if (yych == 'c') + goto yy357; + goto yy272; + yy373: + yych = *++p; + if (yych == 'A') + goto yy401; + if (yych == 'a') + goto yy401; + goto yy272; + yy374: + yych = *++p; + if (yych == 'E') + goto yy402; + if (yych == 'e') + goto yy402; + goto yy272; + yy375: + yych = *++p; + if (yych == 'C') + goto yy368; + if (yych == 'c') + goto yy368; + goto yy272; + yy376: + yych = *++p; + if (yych == 'E') + goto yy305; + if (yych == 'e') + goto yy305; + goto yy272; + yy377: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'E') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'F') + goto yy403; + if (yych == 'f') + goto yy403; + goto yy272; + } + } + yy378: + yych = *++p; + if (yych == 'K') + goto yy404; + if (yych == 'k') + goto yy404; + goto yy272; + yy379: + yych = *++p; + if (yych == 'I') + goto yy393; + if (yych == 'i') + goto yy393; + goto yy272; + yy380: + yych = *++p; + if (yych == 'E') + goto yy405; + if (yych == 'e') + goto yy405; + goto yy272; + yy381: + yych = *++p; + if (yych == 'R') + goto yy406; + if (yych == 'r') + goto yy406; + goto yy272; + yy382: + yych = *++p; + if (yych == 'I') + goto yy407; + if (yych == 'i') + goto yy407; + goto yy272; + yy383: + yych = *++p; + if (yych == 'O') + goto yy408; + if (yych == 'o') + goto yy408; + goto yy272; + yy384: + yych = *++p; + if (yych == 'D') + goto yy409; + if (yych == 'd') + goto yy409; + goto yy272; + yy385: + yych = *++p; + if (yych == 'A') + goto yy302; + if (yych == 'a') + goto yy302; + goto yy272; + yy386: + yych = *++p; + if (yych == 'R') + goto yy376; + if (yych == 'r') + goto yy376; + goto yy272; + yy387: + yych = *++p; + if (yych == 'E') + goto yy410; + if (yych == 'e') + goto yy410; + goto yy272; + yy388: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'D') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'E') + goto yy405; + if (yych == 'e') + goto yy405; + goto yy272; + } + } + yy389: + yych = *++p; + if (yych == 'M') + goto yy376; + if (yych == 'm') + goto yy376; + goto yy272; + yy390: + yych = *++p; + if (yych == 'N') + goto yy400; + if (yych == 'n') + goto yy400; + goto yy272; + yy391: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'H') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'I') + goto yy411; + if (yych == 'i') + goto yy411; + goto yy272; + } + } + yy392: + yych = *++p; + if (yych == 'A') + goto yy412; + if (yych == 'a') + goto yy412; + goto yy272; + yy393: + yych = *++p; + if (yych == 'O') + goto yy358; + if (yych == 'o') + goto yy358; + goto yy272; + yy394: + ++p; + { return 1; } + yy395: + yych = *++p; + if (yych == 'P') + goto yy413; + if (yych == 'p') + goto yy413; + goto yy272; + yy396: + yych = *++p; + if (yych == 'C') + goto yy376; + if (yych == 'c') + goto yy376; + goto yy272; + yy397: + yych = *++p; + if (yych == 'A') + goto yy414; + if (yych == 'a') + goto yy414; + goto yy272; + yy398: + yych = *++p; + if (yych == 'A') + goto yy415; + if (yych == 'a') + goto yy415; + goto yy272; + yy399: + yych = *++p; + if (yych == 'T') + goto yy305; + if (yych == 't') + goto yy305; + goto yy272; + yy400: + yych = *++p; + if (yych == 'D') + goto yy305; + if (yych == 'd') + goto yy305; + goto yy272; + yy401: + yych = *++p; + if (yych == 'T') + goto yy416; + if (yych == 't') + goto yy416; + goto yy272; + yy402: + yych = *++p; + if (yych == 'S') + goto yy417; + if (yych == 's') + goto yy417; + goto yy272; + yy403: + yych = *++p; + if (yych == 'O') + goto yy418; + if (yych == 'o') + goto yy418; + goto yy272; + yy404: + yych = *++p; + if (yych == 'Q') + goto yy419; + if (yych == 'q') + goto yy419; + goto yy272; + yy405: + yych = *++p; + if (yych == 'R') + goto yy305; + if (yych == 'r') + goto yy305; + goto yy272; + yy406: + yych = *++p; + if (yych == 'O') + goto yy420; + if (yych == 'o') + goto yy420; + goto yy272; + yy407: + yych = *++p; + if (yych == 'L') + goto yy417; + if (yych == 'l') + goto yy417; + goto yy272; + yy408: + yych = *++p; + if (yych == 'G') + goto yy305; + if (yych == 'g') + goto yy305; + goto yy272; + yy409: + yych = *++p; + if (yych == 'S') + goto yy421; + if (yych == 's') + goto yy421; + goto yy272; + yy410: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy272; + if (yych <= '\r') + goto yy321; + goto yy272; + } else { + if (yych <= ' ') + goto yy321; + if (yych <= '.') + goto yy272; + goto yy322; + } + } else { + if (yych <= 'R') { + if (yych == '>') + goto yy321; + goto yy272; + } else { + if (yych <= 'S') + goto yy421; + if (yych == 's') + goto yy421; + goto yy272; + } + } + yy411: + yych = *++p; + if (yych == 'T') + goto yy422; + if (yych == 't') + goto yy422; + goto yy272; + yy412: + yych = *++p; + if (yych == 'M') + goto yy423; + if (yych == 'm') + goto yy423; + goto yy272; + yy413: + yych = *++p; + if (yych == 'T') + goto yy363; + if (yych == 't') + goto yy363; + goto yy272; + yy414: + yych = *++p; + if (yych == 'R') + goto yy344; + if (yych == 'r') + goto yy344; + goto yy272; + yy415: + yych = *++p; + if (yych == 'R') + goto yy424; + if (yych == 'r') + goto yy424; + goto yy272; + yy416: + yych = *++p; + if (yych == 'A') + goto yy425; + if (yych == 'a') + goto yy425; + goto yy272; + yy417: + yych = *++p; + if (yych == 'S') + goto yy305; + if (yych == 's') + goto yy305; + goto yy272; + yy418: + yych = *++p; + if (yych == 'N') + goto yy399; + if (yych == 'n') + goto yy399; + goto yy272; + yy419: + yych = *++p; + if (yych == 'U') + goto yy426; + if (yych == 'u') + goto yy426; + goto yy272; + yy420: + yych = *++p; + if (yych == 'U') + goto yy427; + if (yych == 'u') + goto yy427; + goto yy272; + yy421: + yych = *++p; + if (yych == 'E') + goto yy399; + if (yych == 'e') + goto yy399; + goto yy272; + yy422: + yych = *++p; + if (yych == 'E') + goto yy352; + if (yych == 'e') + goto yy352; + goto yy272; + yy423: + yych = *++p; + if (yych == 'E') + goto yy417; + if (yych == 'e') + goto yy417; + goto yy272; + yy424: + yych = *++p; + if (yych == 'E') + goto yy428; + if (yych == 'e') + goto yy428; + goto yy272; + yy425: + yych = *++p; + if (yych == '[') + goto yy429; + goto yy272; + yy426: + yych = *++p; + if (yych == 'O') + goto yy430; + if (yych == 'o') + goto yy430; + goto yy272; + yy427: + yych = *++p; + if (yych == 'P') + goto yy305; + if (yych == 'p') + goto yy305; + goto yy272; + yy428: + yych = *++p; + if (yych == 'A') + goto yy363; + if (yych == 'a') + goto yy363; + goto yy272; + yy429: + ++p; + { return 5; } + yy430: + yych = *++p; + if (yych == 'T') + goto yy376; + if (yych == 't') + goto yy376; + goto yy272; + } +} + +// Try to match an HTML block tag start line of type 7, returning +// 7 if successful, 0 if not. +bufsize_t _scan_html_block_start_7(const unsigned char *p) { + const unsigned char *marker = NULL; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 224, 224, 224, 224, 224, 224, 224, 224, 198, 210, 194, 198, 194, + 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 198, 224, 128, 224, 224, 224, 224, 64, 224, 224, + 224, 224, 224, 233, 232, 224, 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 232, 224, 192, 192, 192, 224, 224, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 224, 224, 224, 224, 232, 192, 233, + 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, + 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 224, 224, 224, + 224, 224, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych == '<') + goto yy433; + ++p; + yy432 : { return 0; } + yy433: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '@') { + if (yych != '/') + goto yy432; + } else { + if (yych <= 'Z') + goto yy435; + if (yych <= '`') + goto yy432; + if (yych <= 'z') + goto yy435; + goto yy432; + } + yych = *++p; + if (yych <= '@') + goto yy434; + if (yych <= 'Z') + goto yy436; + if (yych <= '`') + goto yy434; + if (yych <= 'z') + goto yy436; + yy434: + p = marker; + if (yyaccept == 0) { + goto yy432; + } else { + goto yy443; + } + yy435: + yych = *++p; + if (yybm[0 + yych] & 2) { + goto yy437; + } + if (yych <= '=') { + if (yych <= '.') { + if (yych == '-') + goto yy435; + goto yy434; + } else { + if (yych <= '/') + goto yy438; + if (yych <= '9') + goto yy435; + goto yy434; + } + } else { + if (yych <= 'Z') { + if (yych <= '>') + goto yy439; + if (yych <= '@') + goto yy434; + goto yy435; + } else { + if (yych <= '`') + goto yy434; + if (yych <= 'z') + goto yy435; + goto yy434; + } + } + yy436: + yych = *++p; + if (yych <= '/') { + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy434; + if (yych <= '\r') + goto yy440; + goto yy434; + } else { + if (yych <= ' ') + goto yy440; + if (yych == '-') + goto yy436; + goto yy434; + } + } else { + if (yych <= '@') { + if (yych <= '9') + goto yy436; + if (yych == '>') + goto yy439; + goto yy434; + } else { + if (yych <= 'Z') + goto yy436; + if (yych <= '`') + goto yy434; + if (yych <= 'z') + goto yy436; + goto yy434; + } + } + yy437: + yych = *++p; + if (yybm[0 + yych] & 2) { + goto yy437; + } + if (yych <= '>') { + if (yych <= '9') { + if (yych != '/') + goto yy434; + } else { + if (yych <= ':') + goto yy441; + if (yych <= '=') + goto yy434; + goto yy439; + } + } else { + if (yych <= '^') { + if (yych <= '@') + goto yy434; + if (yych <= 'Z') + goto yy441; + goto yy434; + } else { + if (yych == '`') + goto yy434; + if (yych <= 'z') + goto yy441; + goto yy434; + } + } + yy438: + yych = *++p; + if (yych != '>') + goto yy434; + yy439: + yych = *++p; + if (yybm[0 + yych] & 4) { + goto yy439; + } + if (yych <= 0x08) + goto yy434; + if (yych <= '\n') + goto yy442; + if (yych <= '\v') + goto yy434; + if (yych <= '\r') + goto yy444; + goto yy434; + yy440: + yych = *++p; + if (yych <= 0x1F) { + if (yych <= 0x08) + goto yy434; + if (yych <= '\r') + goto yy440; + goto yy434; + } else { + if (yych <= ' ') + goto yy440; + if (yych == '>') + goto yy439; + goto yy434; + } + yy441: + yych = *++p; + if (yybm[0 + yych] & 8) { + goto yy441; + } + if (yych <= ',') { + if (yych <= '\r') { + if (yych <= 0x08) + goto yy434; + goto yy445; + } else { + if (yych == ' ') + goto yy445; + goto yy434; + } + } else { + if (yych <= '<') { + if (yych <= '/') + goto yy438; + goto yy434; + } else { + if (yych <= '=') + goto yy446; + if (yych <= '>') + goto yy439; + goto yy434; + } + } + yy442: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 4) { + goto yy439; + } + if (yych <= 0x08) + goto yy443; + if (yych <= '\n') + goto yy442; + if (yych <= '\v') + goto yy443; + if (yych <= '\r') + goto yy444; + yy443 : { return 7; } + yy444: + ++p; + goto yy443; + yy445: + yych = *++p; + if (yych <= '<') { + if (yych <= ' ') { + if (yych <= 0x08) + goto yy434; + if (yych <= '\r') + goto yy445; + if (yych <= 0x1F) + goto yy434; + goto yy445; + } else { + if (yych <= '/') { + if (yych <= '.') + goto yy434; + goto yy438; + } else { + if (yych == ':') + goto yy441; + goto yy434; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '=') + goto yy446; + if (yych <= '>') + goto yy439; + if (yych <= '@') + goto yy434; + goto yy441; + } else { + if (yych <= '_') { + if (yych <= '^') + goto yy434; + goto yy441; + } else { + if (yych <= '`') + goto yy434; + if (yych <= 'z') + goto yy441; + goto yy434; + } + } + } + yy446: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy447; + } + if (yych <= 0xE0) { + if (yych <= '"') { + if (yych <= 0x00) + goto yy434; + if (yych <= ' ') + goto yy446; + goto yy448; + } else { + if (yych <= '\'') + goto yy449; + if (yych <= 0xC1) + goto yy434; + if (yych <= 0xDF) + goto yy450; + goto yy451; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy453; + goto yy452; + } else { + if (yych <= 0xF0) + goto yy454; + if (yych <= 0xF3) + goto yy455; + if (yych <= 0xF4) + goto yy456; + goto yy434; + } + } + yy447: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy447; + } + if (yych <= 0xE0) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy434; + if (yych <= ' ') + goto yy437; + goto yy434; + } else { + if (yych <= '>') + goto yy439; + if (yych <= 0xC1) + goto yy434; + if (yych <= 0xDF) + goto yy450; + goto yy451; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy453; + goto yy452; + } else { + if (yych <= 0xF0) + goto yy454; + if (yych <= 0xF3) + goto yy455; + if (yych <= 0xF4) + goto yy456; + goto yy434; + } + } + yy448: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy448; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy434; + if (yych <= '"') + goto yy457; + goto yy434; + } else { + if (yych <= 0xDF) + goto yy458; + if (yych <= 0xE0) + goto yy459; + goto yy460; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy461; + if (yych <= 0xEF) + goto yy460; + goto yy462; + } else { + if (yych <= 0xF3) + goto yy463; + if (yych <= 0xF4) + goto yy464; + goto yy434; + } + } + yy449: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy449; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy434; + if (yych <= '\'') + goto yy457; + goto yy434; + } else { + if (yych <= 0xDF) + goto yy465; + if (yych <= 0xE0) + goto yy466; + goto yy467; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy468; + if (yych <= 0xEF) + goto yy467; + goto yy469; + } else { + if (yych <= 0xF3) + goto yy470; + if (yych <= 0xF4) + goto yy471; + goto yy434; + } + } + yy450: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy447; + goto yy434; + yy451: + yych = *++p; + if (yych <= 0x9F) + goto yy434; + if (yych <= 0xBF) + goto yy450; + goto yy434; + yy452: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy450; + goto yy434; + yy453: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x9F) + goto yy450; + goto yy434; + yy454: + yych = *++p; + if (yych <= 0x8F) + goto yy434; + if (yych <= 0xBF) + goto yy452; + goto yy434; + yy455: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy452; + goto yy434; + yy456: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x8F) + goto yy452; + goto yy434; + yy457: + yych = *++p; + if (yybm[0 + yych] & 2) { + goto yy437; + } + if (yych == '/') + goto yy438; + if (yych == '>') + goto yy439; + goto yy434; + yy458: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy448; + goto yy434; + yy459: + yych = *++p; + if (yych <= 0x9F) + goto yy434; + if (yych <= 0xBF) + goto yy458; + goto yy434; + yy460: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy458; + goto yy434; + yy461: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x9F) + goto yy458; + goto yy434; + yy462: + yych = *++p; + if (yych <= 0x8F) + goto yy434; + if (yych <= 0xBF) + goto yy460; + goto yy434; + yy463: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy460; + goto yy434; + yy464: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x8F) + goto yy460; + goto yy434; + yy465: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy449; + goto yy434; + yy466: + yych = *++p; + if (yych <= 0x9F) + goto yy434; + if (yych <= 0xBF) + goto yy465; + goto yy434; + yy467: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy465; + goto yy434; + yy468: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x9F) + goto yy465; + goto yy434; + yy469: + yych = *++p; + if (yych <= 0x8F) + goto yy434; + if (yych <= 0xBF) + goto yy467; + goto yy434; + yy470: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0xBF) + goto yy467; + goto yy434; + yy471: + yych = *++p; + if (yych <= 0x7F) + goto yy434; + if (yych <= 0x8F) + goto yy467; + goto yy434; + } +} + +// Try to match an HTML block end line of type 1 +bufsize_t _scan_html_block_end_1(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= ';') { + if (yych <= 0x00) + goto yy473; + if (yych != '\n') + goto yy475; + } else { + if (yych <= '<') + goto yy476; + if (yych <= 0x7F) + goto yy475; + if (yych >= 0xC2) + goto yy477; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy478; + if (yych == 0xED) + goto yy480; + goto yy479; + } else { + if (yych <= 0xF0) + goto yy481; + if (yych <= 0xF3) + goto yy482; + if (yych <= 0xF4) + goto yy483; + } + } + yy473: + ++p; + yy474 : { return 0; } + yy475: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy474; + if (yych <= '\t') + goto yy485; + goto yy474; + } else { + if (yych <= 0x7F) + goto yy485; + if (yych <= 0xC1) + goto yy474; + if (yych <= 0xF4) + goto yy485; + goto yy474; + } + yy476: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '.') { + if (yych <= 0x00) + goto yy474; + if (yych == '\n') + goto yy474; + goto yy485; + } else { + if (yych <= 0x7F) { + if (yych <= '/') + goto yy495; + goto yy485; + } else { + if (yych <= 0xC1) + goto yy474; + if (yych <= 0xF4) + goto yy485; + goto yy474; + } + } + yy477: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy474; + if (yych <= 0xBF) + goto yy484; + goto yy474; + yy478: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy474; + if (yych <= 0xBF) + goto yy488; + goto yy474; + yy479: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy474; + if (yych <= 0xBF) + goto yy488; + goto yy474; + yy480: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy474; + if (yych <= 0x9F) + goto yy488; + goto yy474; + yy481: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy474; + if (yych <= 0xBF) + goto yy490; + goto yy474; + yy482: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy474; + if (yych <= 0xBF) + goto yy490; + goto yy474; + yy483: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy474; + if (yych <= 0x8F) + goto yy490; + goto yy474; + yy484: + yych = *++p; + yy485: + if (yybm[0 + yych] & 64) { + goto yy484; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy486; + if (yych <= '<') + goto yy487; + } else { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + goto yy490; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy491; + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + } + } + yy486: + p = marker; + if (yyaccept == 0) { + goto yy474; + } else { + goto yy508; + } + yy487: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xDF) { + if (yych <= '.') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= '/') + goto yy495; + if (yych <= 0x7F) + goto yy484; + if (yych <= 0xC1) + goto yy486; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy489; + if (yych == 0xED) + goto yy491; + goto yy490; + } else { + if (yych <= 0xF0) + goto yy492; + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + yy488: + yych = *++p; + if (yych <= 0x7F) + goto yy486; + if (yych <= 0xBF) + goto yy484; + goto yy486; + yy489: + yych = *++p; + if (yych <= 0x9F) + goto yy486; + if (yych <= 0xBF) + goto yy488; + goto yy486; + yy490: + yych = *++p; + if (yych <= 0x7F) + goto yy486; + if (yych <= 0xBF) + goto yy488; + goto yy486; + yy491: + yych = *++p; + if (yych <= 0x7F) + goto yy486; + if (yych <= 0x9F) + goto yy488; + goto yy486; + yy492: + yych = *++p; + if (yych <= 0x8F) + goto yy486; + if (yych <= 0xBF) + goto yy490; + goto yy486; + yy493: + yych = *++p; + if (yych <= 0x7F) + goto yy486; + if (yych <= 0xBF) + goto yy490; + goto yy486; + yy494: + yych = *++p; + if (yych <= 0x7F) + goto yy486; + if (yych <= 0x8F) + goto yy490; + goto yy486; + yy495: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 's') { + if (yych <= 'R') { + if (yych <= '\n') { + if (yych <= 0x00) + goto yy486; + if (yych <= '\t') + goto yy484; + goto yy486; + } else { + if (yych != 'P') + goto yy484; + } + } else { + if (yych <= 'o') { + if (yych <= 'S') + goto yy497; + if (yych <= 'T') + goto yy498; + goto yy484; + } else { + if (yych <= 'p') + goto yy496; + if (yych <= 'r') + goto yy484; + goto yy497; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 't') + goto yy498; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } else { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + goto yy490; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy491; + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy496: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'Q') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'q') { + if (yych <= 'R') + goto yy499; + goto yy484; + } else { + if (yych <= 'r') + goto yy499; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy497: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 't') { + if (yych <= 'C') { + if (yych <= '\t') { + if (yych <= 0x00) + goto yy486; + goto yy484; + } else { + if (yych <= '\n') + goto yy486; + if (yych <= 'B') + goto yy484; + goto yy500; + } + } else { + if (yych <= 'b') { + if (yych == 'T') + goto yy501; + goto yy484; + } else { + if (yych <= 'c') + goto yy500; + if (yych <= 's') + goto yy484; + goto yy501; + } + } + } else { + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x7F) + goto yy484; + goto yy486; + } else { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + goto yy490; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy491; + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy498: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'D') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'd') { + if (yych <= 'E') + goto yy502; + goto yy484; + } else { + if (yych <= 'e') + goto yy502; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy499: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'D') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'd') { + if (yych <= 'E') + goto yy503; + goto yy484; + } else { + if (yych <= 'e') + goto yy503; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy500: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'Q') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'q') { + if (yych <= 'R') + goto yy504; + goto yy484; + } else { + if (yych <= 'r') + goto yy504; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy501: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'X') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'x') { + if (yych <= 'Y') + goto yy505; + goto yy484; + } else { + if (yych <= 'y') + goto yy505; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy502: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'W') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'w') { + if (yych <= 'X') + goto yy506; + goto yy484; + } else { + if (yych <= 'x') + goto yy506; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy503: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= '>') + goto yy507; + if (yych <= 0x7F) + goto yy484; + if (yych <= 0xC1) + goto yy486; + goto yy488; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy489; + if (yych == 0xED) + goto yy491; + goto yy490; + } else { + if (yych <= 0xF0) + goto yy492; + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + yy504: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'H') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'h') { + if (yych <= 'I') + goto yy509; + goto yy484; + } else { + if (yych <= 'i') + goto yy509; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy505: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'K') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'k') { + if (yych <= 'L') + goto yy499; + goto yy484; + } else { + if (yych <= 'l') + goto yy499; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy506: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'S') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 's') { + if (yych <= 'T') + goto yy510; + goto yy484; + } else { + if (yych <= 't') + goto yy510; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy507: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy484; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy508; + if (yych <= '<') + goto yy487; + } else { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + goto yy490; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy491; + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + } + } + yy508 : { return (bufsize_t)(p - start); } + yy509: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'O') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'o') { + if (yych <= 'P') + goto yy511; + goto yy484; + } else { + if (yych <= 'p') + goto yy511; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy510: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= '@') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= '`') { + if (yych <= 'A') + goto yy512; + goto yy484; + } else { + if (yych <= 'a') + goto yy512; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy511: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'S') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 's') { + if (yych <= 'T') + goto yy503; + goto yy484; + } else { + if (yych <= 't') + goto yy503; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy512: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'Q') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'q') { + if (yych >= 'S') + goto yy484; + } else { + if (yych <= 'r') + goto yy513; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy513: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= 'D') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= 'd') { + if (yych >= 'F') + goto yy484; + } else { + if (yych <= 'e') + goto yy514; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + yy514: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy487; + } + if (yych <= 0xC1) { + if (yych <= '@') { + if (yych <= 0x00) + goto yy486; + if (yych == '\n') + goto yy486; + goto yy484; + } else { + if (yych <= '`') { + if (yych <= 'A') + goto yy503; + goto yy484; + } else { + if (yych <= 'a') + goto yy503; + if (yych <= 0x7F) + goto yy484; + goto yy486; + } + } + } else { + if (yych <= 0xED) { + if (yych <= 0xDF) + goto yy488; + if (yych <= 0xE0) + goto yy489; + if (yych <= 0xEC) + goto yy490; + goto yy491; + } else { + if (yych <= 0xF0) { + if (yych <= 0xEF) + goto yy490; + goto yy492; + } else { + if (yych <= 0xF3) + goto yy493; + if (yych <= 0xF4) + goto yy494; + goto yy486; + } + } + } + } +} + +// Try to match an HTML block end line of type 2 +bufsize_t _scan_html_block_end_2(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= ',') { + if (yych <= 0x00) + goto yy516; + if (yych != '\n') + goto yy518; + } else { + if (yych <= '-') + goto yy519; + if (yych <= 0x7F) + goto yy518; + if (yych >= 0xC2) + goto yy520; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy521; + if (yych == 0xED) + goto yy523; + goto yy522; + } else { + if (yych <= 0xF0) + goto yy524; + if (yych <= 0xF3) + goto yy525; + if (yych <= 0xF4) + goto yy526; + } + } + yy516: + ++p; + yy517 : { return 0; } + yy518: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy517; + if (yych <= '\t') + goto yy528; + goto yy517; + } else { + if (yych <= 0x7F) + goto yy528; + if (yych <= 0xC1) + goto yy517; + if (yych <= 0xF4) + goto yy528; + goto yy517; + } + yy519: + yyaccept = 0; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy538; + } + if (yych <= '\n') { + if (yych <= 0x00) + goto yy517; + if (yych <= '\t') + goto yy528; + goto yy517; + } else { + if (yych <= 0x7F) + goto yy528; + if (yych <= 0xC1) + goto yy517; + if (yych <= 0xF4) + goto yy528; + goto yy517; + } + yy520: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy517; + if (yych <= 0xBF) + goto yy527; + goto yy517; + yy521: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy517; + if (yych <= 0xBF) + goto yy531; + goto yy517; + yy522: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy517; + if (yych <= 0xBF) + goto yy531; + goto yy517; + yy523: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy517; + if (yych <= 0x9F) + goto yy531; + goto yy517; + yy524: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy517; + if (yych <= 0xBF) + goto yy533; + goto yy517; + yy525: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy517; + if (yych <= 0xBF) + goto yy533; + goto yy517; + yy526: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy517; + if (yych <= 0x8F) + goto yy533; + goto yy517; + yy527: + yych = *++p; + yy528: + if (yybm[0 + yych] & 64) { + goto yy527; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy529; + if (yych <= '-') + goto yy530; + } else { + if (yych <= 0xDF) + goto yy531; + if (yych <= 0xE0) + goto yy532; + goto yy533; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy534; + if (yych <= 0xEF) + goto yy533; + goto yy535; + } else { + if (yych <= 0xF3) + goto yy536; + if (yych <= 0xF4) + goto yy537; + } + } + yy529: + p = marker; + if (yyaccept == 0) { + goto yy517; + } else { + goto yy540; + } + yy530: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy527; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy529; + if (yych <= '-') + goto yy538; + goto yy529; + } else { + if (yych <= 0xDF) + goto yy531; + if (yych <= 0xE0) + goto yy532; + goto yy533; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy534; + if (yych <= 0xEF) + goto yy533; + goto yy535; + } else { + if (yych <= 0xF3) + goto yy536; + if (yych <= 0xF4) + goto yy537; + goto yy529; + } + } + yy531: + yych = *++p; + if (yych <= 0x7F) + goto yy529; + if (yych <= 0xBF) + goto yy527; + goto yy529; + yy532: + yych = *++p; + if (yych <= 0x9F) + goto yy529; + if (yych <= 0xBF) + goto yy531; + goto yy529; + yy533: + yych = *++p; + if (yych <= 0x7F) + goto yy529; + if (yych <= 0xBF) + goto yy531; + goto yy529; + yy534: + yych = *++p; + if (yych <= 0x7F) + goto yy529; + if (yych <= 0x9F) + goto yy531; + goto yy529; + yy535: + yych = *++p; + if (yych <= 0x8F) + goto yy529; + if (yych <= 0xBF) + goto yy533; + goto yy529; + yy536: + yych = *++p; + if (yych <= 0x7F) + goto yy529; + if (yych <= 0xBF) + goto yy533; + goto yy529; + yy537: + yych = *++p; + if (yych <= 0x7F) + goto yy529; + if (yych <= 0x8F) + goto yy533; + goto yy529; + yy538: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy538; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy529; + if (yych == '\n') + goto yy529; + goto yy527; + } else { + if (yych <= '>') + goto yy539; + if (yych <= 0x7F) + goto yy527; + if (yych <= 0xC1) + goto yy529; + goto yy531; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy532; + if (yych == 0xED) + goto yy534; + goto yy533; + } else { + if (yych <= 0xF0) + goto yy535; + if (yych <= 0xF3) + goto yy536; + if (yych <= 0xF4) + goto yy537; + goto yy529; + } + } + yy539: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy527; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy540; + if (yych <= '-') + goto yy530; + } else { + if (yych <= 0xDF) + goto yy531; + if (yych <= 0xE0) + goto yy532; + goto yy533; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy534; + if (yych <= 0xEF) + goto yy533; + goto yy535; + } else { + if (yych <= 0xF3) + goto yy536; + if (yych <= 0xF4) + goto yy537; + } + } + yy540 : { return (bufsize_t)(p - start); } + } +} + +// Try to match an HTML block end line of type 3 +bufsize_t _scan_html_block_end_3(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= '>') { + if (yych <= 0x00) + goto yy542; + if (yych != '\n') + goto yy544; + } else { + if (yych <= '?') + goto yy545; + if (yych <= 0x7F) + goto yy544; + if (yych >= 0xC2) + goto yy546; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy547; + if (yych == 0xED) + goto yy549; + goto yy548; + } else { + if (yych <= 0xF0) + goto yy550; + if (yych <= 0xF3) + goto yy551; + if (yych <= 0xF4) + goto yy552; + } + } + yy542: + ++p; + yy543 : { return 0; } + yy544: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy543; + if (yych <= '\t') + goto yy554; + goto yy543; + } else { + if (yych <= 0x7F) + goto yy554; + if (yych <= 0xC1) + goto yy543; + if (yych <= 0xF4) + goto yy554; + goto yy543; + } + yy545: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '=') { + if (yych <= 0x00) + goto yy543; + if (yych == '\n') + goto yy543; + goto yy554; + } else { + if (yych <= 0x7F) { + if (yych <= '>') + goto yy564; + goto yy554; + } else { + if (yych <= 0xC1) + goto yy543; + if (yych <= 0xF4) + goto yy554; + goto yy543; + } + } + yy546: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy543; + if (yych <= 0xBF) + goto yy553; + goto yy543; + yy547: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy543; + if (yych <= 0xBF) + goto yy557; + goto yy543; + yy548: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy543; + if (yych <= 0xBF) + goto yy557; + goto yy543; + yy549: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy543; + if (yych <= 0x9F) + goto yy557; + goto yy543; + yy550: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy543; + if (yych <= 0xBF) + goto yy559; + goto yy543; + yy551: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy543; + if (yych <= 0xBF) + goto yy559; + goto yy543; + yy552: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy543; + if (yych <= 0x8F) + goto yy559; + goto yy543; + yy553: + yych = *++p; + yy554: + if (yybm[0 + yych] & 64) { + goto yy553; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy555; + if (yych <= '?') + goto yy556; + } else { + if (yych <= 0xDF) + goto yy557; + if (yych <= 0xE0) + goto yy558; + goto yy559; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy560; + if (yych <= 0xEF) + goto yy559; + goto yy561; + } else { + if (yych <= 0xF3) + goto yy562; + if (yych <= 0xF4) + goto yy563; + } + } + yy555: + p = marker; + if (yyaccept == 0) { + goto yy543; + } else { + goto yy565; + } + yy556: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy556; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy555; + if (yych == '\n') + goto yy555; + goto yy553; + } else { + if (yych <= '>') + goto yy564; + if (yych <= 0x7F) + goto yy553; + if (yych <= 0xC1) + goto yy555; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy558; + if (yych == 0xED) + goto yy560; + goto yy559; + } else { + if (yych <= 0xF0) + goto yy561; + if (yych <= 0xF3) + goto yy562; + if (yych <= 0xF4) + goto yy563; + goto yy555; + } + } + yy557: + yych = *++p; + if (yych <= 0x7F) + goto yy555; + if (yych <= 0xBF) + goto yy553; + goto yy555; + yy558: + yych = *++p; + if (yych <= 0x9F) + goto yy555; + if (yych <= 0xBF) + goto yy557; + goto yy555; + yy559: + yych = *++p; + if (yych <= 0x7F) + goto yy555; + if (yych <= 0xBF) + goto yy557; + goto yy555; + yy560: + yych = *++p; + if (yych <= 0x7F) + goto yy555; + if (yych <= 0x9F) + goto yy557; + goto yy555; + yy561: + yych = *++p; + if (yych <= 0x8F) + goto yy555; + if (yych <= 0xBF) + goto yy559; + goto yy555; + yy562: + yych = *++p; + if (yych <= 0x7F) + goto yy555; + if (yych <= 0xBF) + goto yy559; + goto yy555; + yy563: + yych = *++p; + if (yych <= 0x7F) + goto yy555; + if (yych <= 0x8F) + goto yy559; + goto yy555; + yy564: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy553; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy565; + if (yych <= '?') + goto yy556; + } else { + if (yych <= 0xDF) + goto yy557; + if (yych <= 0xE0) + goto yy558; + goto yy559; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy560; + if (yych <= 0xEF) + goto yy559; + goto yy561; + } else { + if (yych <= 0xF3) + goto yy562; + if (yych <= 0xF4) + goto yy563; + } + } + yy565 : { return (bufsize_t)(p - start); } + } +} + +// Try to match an HTML block end line of type 4 +bufsize_t _scan_html_block_end_4(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 64, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy570; + } + if (yych <= 0xE0) { + if (yych <= '\n') { + if (yych <= 0x00) + goto yy567; + if (yych <= '\t') + goto yy569; + } else { + if (yych <= 0x7F) + goto yy569; + if (yych <= 0xC1) + goto yy567; + if (yych <= 0xDF) + goto yy572; + goto yy573; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy575; + goto yy574; + } else { + if (yych <= 0xF0) + goto yy576; + if (yych <= 0xF3) + goto yy577; + if (yych <= 0xF4) + goto yy578; + } + } + yy567: + ++p; + yy568 : { return 0; } + yy569: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy568; + if (yych <= '\t') + goto yy580; + goto yy568; + } else { + if (yych <= 0x7F) + goto yy580; + if (yych <= 0xC1) + goto yy568; + if (yych <= 0xF4) + goto yy580; + goto yy568; + } + yy570: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy579; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy571; + if (yych <= '>') + goto yy570; + } else { + if (yych <= 0xDF) + goto yy582; + if (yych <= 0xE0) + goto yy583; + goto yy584; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy585; + if (yych <= 0xEF) + goto yy584; + goto yy586; + } else { + if (yych <= 0xF3) + goto yy587; + if (yych <= 0xF4) + goto yy588; + } + } + yy571 : { return (bufsize_t)(p - start); } + yy572: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy568; + if (yych <= 0xBF) + goto yy579; + goto yy568; + yy573: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy568; + if (yych <= 0xBF) + goto yy582; + goto yy568; + yy574: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy568; + if (yych <= 0xBF) + goto yy582; + goto yy568; + yy575: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy568; + if (yych <= 0x9F) + goto yy582; + goto yy568; + yy576: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy568; + if (yych <= 0xBF) + goto yy584; + goto yy568; + yy577: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy568; + if (yych <= 0xBF) + goto yy584; + goto yy568; + yy578: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy568; + if (yych <= 0x8F) + goto yy584; + goto yy568; + yy579: + yych = *++p; + yy580: + if (yybm[0 + yych] & 128) { + goto yy579; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy581; + if (yych <= '>') + goto yy570; + } else { + if (yych <= 0xDF) + goto yy582; + if (yych <= 0xE0) + goto yy583; + goto yy584; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy585; + if (yych <= 0xEF) + goto yy584; + goto yy586; + } else { + if (yych <= 0xF3) + goto yy587; + if (yych <= 0xF4) + goto yy588; + } + } + yy581: + p = marker; + if (yyaccept == 0) { + goto yy568; + } else { + goto yy571; + } + yy582: + yych = *++p; + if (yych <= 0x7F) + goto yy581; + if (yych <= 0xBF) + goto yy579; + goto yy581; + yy583: + yych = *++p; + if (yych <= 0x9F) + goto yy581; + if (yych <= 0xBF) + goto yy582; + goto yy581; + yy584: + yych = *++p; + if (yych <= 0x7F) + goto yy581; + if (yych <= 0xBF) + goto yy582; + goto yy581; + yy585: + yych = *++p; + if (yych <= 0x7F) + goto yy581; + if (yych <= 0x9F) + goto yy582; + goto yy581; + yy586: + yych = *++p; + if (yych <= 0x8F) + goto yy581; + if (yych <= 0xBF) + goto yy584; + goto yy581; + yy587: + yych = *++p; + if (yych <= 0x7F) + goto yy581; + if (yych <= 0xBF) + goto yy584; + goto yy581; + yy588: + yych = *++p; + if (yych <= 0x7F) + goto yy581; + if (yych <= 0x8F) + goto yy584; + goto yy581; + } +} + +// Try to match an HTML block end line of type 5 +bufsize_t _scan_html_block_end_5(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xDF) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy590; + if (yych != '\n') + goto yy592; + } else { + if (yych <= ']') + goto yy593; + if (yych <= 0x7F) + goto yy592; + if (yych >= 0xC2) + goto yy594; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy595; + if (yych == 0xED) + goto yy597; + goto yy596; + } else { + if (yych <= 0xF0) + goto yy598; + if (yych <= 0xF3) + goto yy599; + if (yych <= 0xF4) + goto yy600; + } + } + yy590: + ++p; + yy591 : { return 0; } + yy592: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy591; + if (yych <= '\t') + goto yy602; + goto yy591; + } else { + if (yych <= 0x7F) + goto yy602; + if (yych <= 0xC1) + goto yy591; + if (yych <= 0xF4) + goto yy602; + goto yy591; + } + yy593: + yyaccept = 0; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy612; + } + if (yych <= '\n') { + if (yych <= 0x00) + goto yy591; + if (yych <= '\t') + goto yy602; + goto yy591; + } else { + if (yych <= 0x7F) + goto yy602; + if (yych <= 0xC1) + goto yy591; + if (yych <= 0xF4) + goto yy602; + goto yy591; + } + yy594: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy591; + if (yych <= 0xBF) + goto yy601; + goto yy591; + yy595: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy591; + if (yych <= 0xBF) + goto yy605; + goto yy591; + yy596: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy591; + if (yych <= 0xBF) + goto yy605; + goto yy591; + yy597: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy591; + if (yych <= 0x9F) + goto yy605; + goto yy591; + yy598: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy591; + if (yych <= 0xBF) + goto yy607; + goto yy591; + yy599: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy591; + if (yych <= 0xBF) + goto yy607; + goto yy591; + yy600: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy591; + if (yych <= 0x8F) + goto yy607; + goto yy591; + yy601: + yych = *++p; + yy602: + if (yybm[0 + yych] & 64) { + goto yy601; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy603; + if (yych <= ']') + goto yy604; + } else { + if (yych <= 0xDF) + goto yy605; + if (yych <= 0xE0) + goto yy606; + goto yy607; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy608; + if (yych <= 0xEF) + goto yy607; + goto yy609; + } else { + if (yych <= 0xF3) + goto yy610; + if (yych <= 0xF4) + goto yy611; + } + } + yy603: + p = marker; + if (yyaccept == 0) { + goto yy591; + } else { + goto yy614; + } + yy604: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy601; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy603; + if (yych <= ']') + goto yy612; + goto yy603; + } else { + if (yych <= 0xDF) + goto yy605; + if (yych <= 0xE0) + goto yy606; + goto yy607; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy608; + if (yych <= 0xEF) + goto yy607; + goto yy609; + } else { + if (yych <= 0xF3) + goto yy610; + if (yych <= 0xF4) + goto yy611; + goto yy603; + } + } + yy605: + yych = *++p; + if (yych <= 0x7F) + goto yy603; + if (yych <= 0xBF) + goto yy601; + goto yy603; + yy606: + yych = *++p; + if (yych <= 0x9F) + goto yy603; + if (yych <= 0xBF) + goto yy605; + goto yy603; + yy607: + yych = *++p; + if (yych <= 0x7F) + goto yy603; + if (yych <= 0xBF) + goto yy605; + goto yy603; + yy608: + yych = *++p; + if (yych <= 0x7F) + goto yy603; + if (yych <= 0x9F) + goto yy605; + goto yy603; + yy609: + yych = *++p; + if (yych <= 0x8F) + goto yy603; + if (yych <= 0xBF) + goto yy607; + goto yy603; + yy610: + yych = *++p; + if (yych <= 0x7F) + goto yy603; + if (yych <= 0xBF) + goto yy607; + goto yy603; + yy611: + yych = *++p; + if (yych <= 0x7F) + goto yy603; + if (yych <= 0x8F) + goto yy607; + goto yy603; + yy612: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy612; + } + if (yych <= 0xDF) { + if (yych <= '=') { + if (yych <= 0x00) + goto yy603; + if (yych == '\n') + goto yy603; + goto yy601; + } else { + if (yych <= '>') + goto yy613; + if (yych <= 0x7F) + goto yy601; + if (yych <= 0xC1) + goto yy603; + goto yy605; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy606; + if (yych == 0xED) + goto yy608; + goto yy607; + } else { + if (yych <= 0xF0) + goto yy609; + if (yych <= 0xF3) + goto yy610; + if (yych <= 0xF4) + goto yy611; + goto yy603; + } + } + yy613: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy601; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy614; + if (yych <= ']') + goto yy604; + } else { + if (yych <= 0xDF) + goto yy605; + if (yych <= 0xE0) + goto yy606; + goto yy607; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy608; + if (yych <= 0xEF) + goto yy607; + goto yy609; + } else { + if (yych <= 0xF3) + goto yy610; + if (yych <= 0xF4) + goto yy611; + } + } + yy614 : { return (bufsize_t)(p - start); } + } +} + +// Try to match a link title (in single quotes, in double quotes, or +// in parentheses), returning number of chars matched. Allow one +// level of internal nesting (quotes within quotes). +bufsize_t _scan_link_title(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 192, 208, 208, 208, 208, 144, 80, 80, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 32, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '&') { + if (yych == '"') + goto yy617; + } else { + if (yych <= '\'') + goto yy618; + if (yych <= '(') + goto yy619; + } + ++p; + yy616 : { return 0; } + yy617: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x00) + goto yy616; + if (yych <= 0x7F) + goto yy621; + if (yych <= 0xC1) + goto yy616; + if (yych <= 0xF4) + goto yy621; + goto yy616; + yy618: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x00) + goto yy616; + if (yych <= 0x7F) + goto yy634; + if (yych <= 0xC1) + goto yy616; + if (yych <= 0xF4) + goto yy634; + goto yy616; + yy619: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '(') { + if (yych <= 0x00) + goto yy616; + if (yych <= '\'') + goto yy646; + goto yy616; + } else { + if (yych <= 0x7F) + goto yy646; + if (yych <= 0xC1) + goto yy616; + if (yych <= 0xF4) + goto yy646; + goto yy616; + } + yy620: + yych = *++p; + yy621: + if (yybm[0 + yych] & 16) { + goto yy620; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy622; + if (yych <= '"') + goto yy623; + goto yy625; + } else { + if (yych <= 0xC1) + goto yy622; + if (yych <= 0xDF) + goto yy626; + goto yy627; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy629; + goto yy628; + } else { + if (yych <= 0xF0) + goto yy630; + if (yych <= 0xF3) + goto yy631; + if (yych <= 0xF4) + goto yy632; + } + } + yy622: + p = marker; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy616; + } else { + goto yy624; + } + } else { + if (yyaccept == 2) { + goto yy636; + } else { + goto yy648; + } + } + yy623: + ++p; + yy624 : { return (bufsize_t)(p - start); } + yy625: + yych = *++p; + if (yybm[0 + yych] & 16) { + goto yy620; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy622; + if (yych <= '"') + goto yy657; + goto yy625; + } else { + if (yych <= 0xC1) + goto yy622; + if (yych >= 0xE0) + goto yy627; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy629; + goto yy628; + } else { + if (yych <= 0xF0) + goto yy630; + if (yych <= 0xF3) + goto yy631; + if (yych <= 0xF4) + goto yy632; + goto yy622; + } + } + yy626: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy620; + goto yy622; + yy627: + yych = *++p; + if (yych <= 0x9F) + goto yy622; + if (yych <= 0xBF) + goto yy626; + goto yy622; + yy628: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy626; + goto yy622; + yy629: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x9F) + goto yy626; + goto yy622; + yy630: + yych = *++p; + if (yych <= 0x8F) + goto yy622; + if (yych <= 0xBF) + goto yy628; + goto yy622; + yy631: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy628; + goto yy622; + yy632: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x8F) + goto yy628; + goto yy622; + yy633: + yych = *++p; + yy634: + if (yybm[0 + yych] & 64) { + goto yy633; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy622; + if (yych >= '(') + goto yy637; + } else { + if (yych <= 0xC1) + goto yy622; + if (yych <= 0xDF) + goto yy638; + goto yy639; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy641; + goto yy640; + } else { + if (yych <= 0xF0) + goto yy642; + if (yych <= 0xF3) + goto yy643; + if (yych <= 0xF4) + goto yy644; + goto yy622; + } + } + yy635: + ++p; + yy636 : { return (bufsize_t)(p - start); } + yy637: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy633; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy622; + if (yych <= '\'') + goto yy658; + goto yy637; + } else { + if (yych <= 0xC1) + goto yy622; + if (yych >= 0xE0) + goto yy639; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy641; + goto yy640; + } else { + if (yych <= 0xF0) + goto yy642; + if (yych <= 0xF3) + goto yy643; + if (yych <= 0xF4) + goto yy644; + goto yy622; + } + } + yy638: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy633; + goto yy622; + yy639: + yych = *++p; + if (yych <= 0x9F) + goto yy622; + if (yych <= 0xBF) + goto yy638; + goto yy622; + yy640: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy638; + goto yy622; + yy641: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x9F) + goto yy638; + goto yy622; + yy642: + yych = *++p; + if (yych <= 0x8F) + goto yy622; + if (yych <= 0xBF) + goto yy640; + goto yy622; + yy643: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy640; + goto yy622; + yy644: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x8F) + goto yy640; + goto yy622; + yy645: + yych = *++p; + yy646: + if (yybm[0 + yych] & 128) { + goto yy645; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= '(') + goto yy622; + if (yych >= '*') + goto yy649; + } else { + if (yych <= 0xC1) + goto yy622; + if (yych <= 0xDF) + goto yy650; + goto yy651; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy653; + goto yy652; + } else { + if (yych <= 0xF0) + goto yy654; + if (yych <= 0xF3) + goto yy655; + if (yych <= 0xF4) + goto yy656; + goto yy622; + } + } + yy647: + ++p; + yy648 : { return (bufsize_t)(p - start); } + yy649: + yych = *++p; + if (yych <= 0xDF) { + if (yych <= '[') { + if (yych <= 0x00) + goto yy622; + if (yych == ')') + goto yy659; + goto yy645; + } else { + if (yych <= '\\') + goto yy649; + if (yych <= 0x7F) + goto yy645; + if (yych <= 0xC1) + goto yy622; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy651; + if (yych == 0xED) + goto yy653; + goto yy652; + } else { + if (yych <= 0xF0) + goto yy654; + if (yych <= 0xF3) + goto yy655; + if (yych <= 0xF4) + goto yy656; + goto yy622; + } + } + yy650: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy645; + goto yy622; + yy651: + yych = *++p; + if (yych <= 0x9F) + goto yy622; + if (yych <= 0xBF) + goto yy650; + goto yy622; + yy652: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy650; + goto yy622; + yy653: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x9F) + goto yy650; + goto yy622; + yy654: + yych = *++p; + if (yych <= 0x8F) + goto yy622; + if (yych <= 0xBF) + goto yy652; + goto yy622; + yy655: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0xBF) + goto yy652; + goto yy622; + yy656: + yych = *++p; + if (yych <= 0x7F) + goto yy622; + if (yych <= 0x8F) + goto yy652; + goto yy622; + yy657: + yyaccept = 1; + yych = *(marker = ++p); + if (yybm[0 + yych] & 16) { + goto yy620; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy624; + if (yych <= '"') + goto yy623; + goto yy625; + } else { + if (yych <= 0xC1) + goto yy624; + if (yych <= 0xDF) + goto yy626; + goto yy627; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy629; + goto yy628; + } else { + if (yych <= 0xF0) + goto yy630; + if (yych <= 0xF3) + goto yy631; + if (yych <= 0xF4) + goto yy632; + goto yy624; + } + } + yy658: + yyaccept = 2; + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy633; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= 0x00) + goto yy636; + if (yych <= '\'') + goto yy635; + goto yy637; + } else { + if (yych <= 0xC1) + goto yy636; + if (yych <= 0xDF) + goto yy638; + goto yy639; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy641; + goto yy640; + } else { + if (yych <= 0xF0) + goto yy642; + if (yych <= 0xF3) + goto yy643; + if (yych <= 0xF4) + goto yy644; + goto yy636; + } + } + yy659: + yyaccept = 3; + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy645; + } + if (yych <= 0xE0) { + if (yych <= '\\') { + if (yych <= '(') + goto yy648; + if (yych <= ')') + goto yy647; + goto yy649; + } else { + if (yych <= 0xC1) + goto yy648; + if (yych <= 0xDF) + goto yy650; + goto yy651; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy653; + goto yy652; + } else { + if (yych <= 0xF0) + goto yy654; + if (yych <= 0xF3) + goto yy655; + if (yych <= 0xF4) + goto yy656; + goto yy648; + } + } + } +} + +// Match space characters, including newlines. +bufsize_t _scan_spacechars(const unsigned char *p) { + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy661; + } + ++p; + { return 0; } + yy661: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy661; + } + { return (bufsize_t)(p - start); } + } +} + +// Match ATX heading start. +bufsize_t _scan_atx_heading_start(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '#') + goto yy664; + ++p; + yy663 : { return 0; } + yy664: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy663; + if (yych <= '\n') + goto yy667; + goto yy663; + } else { + if (yych <= '\r') + goto yy667; + if (yych == '#') + goto yy668; + goto yy663; + } + yy665: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + yy666 : { return (bufsize_t)(p - start); } + yy667: + ++p; + goto yy666; + yy668: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy669; + if (yych <= '\n') + goto yy667; + } else { + if (yych <= '\r') + goto yy667; + if (yych == '#') + goto yy670; + } + yy669: + p = marker; + goto yy663; + yy670: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy669; + if (yych <= '\n') + goto yy667; + goto yy669; + } else { + if (yych <= '\r') + goto yy667; + if (yych != '#') + goto yy669; + } + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy669; + if (yych <= '\n') + goto yy667; + goto yy669; + } else { + if (yych <= '\r') + goto yy667; + if (yych != '#') + goto yy669; + } + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy669; + if (yych <= '\n') + goto yy667; + goto yy669; + } else { + if (yych <= '\r') + goto yy667; + if (yych != '#') + goto yy669; + } + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy665; + } + if (yych <= 0x08) + goto yy669; + if (yych <= '\n') + goto yy667; + if (yych == '\r') + goto yy667; + goto yy669; + } +} + +// Match setext heading line. Return 1 for level-1 heading, +// 2 for level-2, 0 for no match. +bufsize_t _scan_setext_heading_line(const unsigned char *p) { + const unsigned char *marker = NULL; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '-') + goto yy673; + if (yych == '=') + goto yy674; + ++p; + yy672 : { return 0; } + yy673: + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy679; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy672; + if (yych <= '\n') + goto yy676; + goto yy672; + } else { + if (yych <= '\r') + goto yy676; + if (yych == ' ') + goto yy676; + goto yy672; + } + yy674: + yych = *(marker = ++p); + if (yybm[0 + yych] & 128) { + goto yy683; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy672; + if (yych <= '\n') + goto yy681; + goto yy672; + } else { + if (yych <= '\r') + goto yy681; + if (yych == ' ') + goto yy681; + goto yy672; + } + yy675: + yych = *++p; + yy676: + if (yybm[0 + yych] & 32) { + goto yy675; + } + if (yych <= 0x08) + goto yy677; + if (yych <= '\n') + goto yy678; + if (yych == '\r') + goto yy678; + yy677: + p = marker; + goto yy672; + yy678: + ++p; + { return 2; } + yy679: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy675; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy677; + if (yych <= '\n') + goto yy678; + goto yy677; + } else { + if (yych <= '\r') + goto yy678; + if (yych == '-') + goto yy679; + goto yy677; + } + yy680: + yych = *++p; + yy681: + if (yych <= '\f') { + if (yych <= 0x08) + goto yy677; + if (yych <= '\t') + goto yy680; + if (yych >= '\v') + goto yy677; + } else { + if (yych <= '\r') + goto yy682; + if (yych == ' ') + goto yy680; + goto yy677; + } + yy682: + ++p; + { return 1; } + yy683: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy683; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy677; + if (yych <= '\t') + goto yy680; + if (yych <= '\n') + goto yy682; + goto yy677; + } else { + if (yych <= '\r') + goto yy682; + if (yych == ' ') + goto yy680; + goto yy677; + } + } +} + +// Scan an opening code fence. +bufsize_t _scan_open_code_fence(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 192, 192, 0, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 144, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 224, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych == '`') + goto yy686; + if (yych == '~') + goto yy687; + ++p; + yy685 : { return 0; } + yy686: + yych = *(marker = ++p); + if (yych == '`') + goto yy688; + goto yy685; + yy687: + yych = *(marker = ++p); + if (yych == '~') + goto yy690; + goto yy685; + yy688: + yych = *++p; + if (yybm[0 + yych] & 16) { + goto yy691; + } + yy689: + p = marker; + goto yy685; + yy690: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy692; + } + goto yy689; + yy691: + yych = *++p; + if (yybm[0 + yych] & 16) { + goto yy691; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych <= 0x00) + goto yy689; + if (yych == '\n') { + marker = p; + goto yy694; + } + marker = p; + goto yy693; + } else { + if (yych <= '\r') { + marker = p; + goto yy694; + } + if (yych <= 0x7F) { + marker = p; + goto yy693; + } + if (yych <= 0xC1) + goto yy689; + marker = p; + goto yy695; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) { + marker = p; + goto yy696; + } + if (yych == 0xED) { + marker = p; + goto yy698; + } + marker = p; + goto yy697; + } else { + if (yych <= 0xF0) { + marker = p; + goto yy699; + } + if (yych <= 0xF3) { + marker = p; + goto yy700; + } + if (yych <= 0xF4) { + marker = p; + goto yy701; + } + goto yy689; + } + } + yy692: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy692; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych <= 0x00) + goto yy689; + if (yych == '\n') { + marker = p; + goto yy703; + } + marker = p; + goto yy702; + } else { + if (yych <= '\r') { + marker = p; + goto yy703; + } + if (yych <= 0x7F) { + marker = p; + goto yy702; + } + if (yych <= 0xC1) + goto yy689; + marker = p; + goto yy704; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) { + marker = p; + goto yy705; + } + if (yych == 0xED) { + marker = p; + goto yy707; + } + marker = p; + goto yy706; + } else { + if (yych <= 0xF0) { + marker = p; + goto yy708; + } + if (yych <= 0xF3) { + marker = p; + goto yy709; + } + if (yych <= 0xF4) { + marker = p; + goto yy710; + } + goto yy689; + } + } + yy693: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy693; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy689; + if (yych >= 0x0E) + goto yy689; + } else { + if (yych <= 0xDF) + goto yy695; + if (yych <= 0xE0) + goto yy696; + goto yy697; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy698; + if (yych <= 0xEF) + goto yy697; + goto yy699; + } else { + if (yych <= 0xF3) + goto yy700; + if (yych <= 0xF4) + goto yy701; + goto yy689; + } + } + yy694: + ++p; + p = marker; + { return (bufsize_t)(p - start); } + yy695: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy693; + goto yy689; + yy696: + yych = *++p; + if (yych <= 0x9F) + goto yy689; + if (yych <= 0xBF) + goto yy695; + goto yy689; + yy697: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy695; + goto yy689; + yy698: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0x9F) + goto yy695; + goto yy689; + yy699: + yych = *++p; + if (yych <= 0x8F) + goto yy689; + if (yych <= 0xBF) + goto yy697; + goto yy689; + yy700: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy697; + goto yy689; + yy701: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0x8F) + goto yy697; + goto yy689; + yy702: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy702; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= 0x00) + goto yy689; + if (yych >= 0x0E) + goto yy689; + } else { + if (yych <= 0xDF) + goto yy704; + if (yych <= 0xE0) + goto yy705; + goto yy706; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy707; + if (yych <= 0xEF) + goto yy706; + goto yy708; + } else { + if (yych <= 0xF3) + goto yy709; + if (yych <= 0xF4) + goto yy710; + goto yy689; + } + } + yy703: + ++p; + p = marker; + { return (bufsize_t)(p - start); } + yy704: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy702; + goto yy689; + yy705: + yych = *++p; + if (yych <= 0x9F) + goto yy689; + if (yych <= 0xBF) + goto yy704; + goto yy689; + yy706: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy704; + goto yy689; + yy707: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0x9F) + goto yy704; + goto yy689; + yy708: + yych = *++p; + if (yych <= 0x8F) + goto yy689; + if (yych <= 0xBF) + goto yy706; + goto yy689; + yy709: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0xBF) + goto yy706; + goto yy689; + yy710: + yych = *++p; + if (yych <= 0x7F) + goto yy689; + if (yych <= 0x8F) + goto yy706; + goto yy689; + } +} + +// Scan a closing code fence with length at least len. +bufsize_t _scan_close_code_fence(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych == '`') + goto yy713; + if (yych == '~') + goto yy714; + ++p; + yy712 : { return 0; } + yy713: + yych = *(marker = ++p); + if (yych == '`') + goto yy715; + goto yy712; + yy714: + yych = *(marker = ++p); + if (yych == '~') + goto yy717; + goto yy712; + yy715: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy718; + } + yy716: + p = marker; + goto yy712; + yy717: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy719; + } + goto yy716; + yy718: + yych = *++p; + if (yybm[0 + yych] & 32) { + goto yy718; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy716; + if (yych <= '\t') { + marker = p; + goto yy720; + } + if (yych <= '\n') { + marker = p; + goto yy721; + } + goto yy716; + } else { + if (yych <= '\r') { + marker = p; + goto yy721; + } + if (yych == ' ') { + marker = p; + goto yy720; + } + goto yy716; + } + yy719: + yych = *++p; + if (yybm[0 + yych] & 64) { + goto yy719; + } + if (yych <= '\f') { + if (yych <= 0x08) + goto yy716; + if (yych <= '\t') { + marker = p; + goto yy722; + } + if (yych <= '\n') { + marker = p; + goto yy723; + } + goto yy716; + } else { + if (yych <= '\r') { + marker = p; + goto yy723; + } + if (yych == ' ') { + marker = p; + goto yy722; + } + goto yy716; + } + yy720: + yych = *++p; + if (yybm[0 + yych] & 128) { + goto yy720; + } + if (yych <= 0x08) + goto yy716; + if (yych <= '\n') + goto yy721; + if (yych != '\r') + goto yy716; + yy721: + ++p; + p = marker; + { return (bufsize_t)(p - start); } + yy722: + yych = *++p; + if (yych <= '\f') { + if (yych <= 0x08) + goto yy716; + if (yych <= '\t') + goto yy722; + if (yych >= '\v') + goto yy716; + } else { + if (yych <= '\r') + goto yy723; + if (yych == ' ') + goto yy722; + goto yy716; + } + yy723: + ++p; + p = marker; + { return (bufsize_t)(p - start); } + } +} + +// Scans an entity. +// Returns number of chars matched. +bufsize_t _scan_entity(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + yych = *p; + if (yych == '&') + goto yy726; + ++p; + yy725 : { return 0; } + yy726: + yych = *(marker = ++p); + if (yych <= '@') { + if (yych != '#') + goto yy725; + } else { + if (yych <= 'Z') + goto yy728; + if (yych <= '`') + goto yy725; + if (yych <= 'z') + goto yy728; + goto yy725; + } + yych = *++p; + if (yych <= 'W') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy729; + } else { + if (yych <= 'X') + goto yy730; + if (yych == 'x') + goto yy730; + } + yy727: + p = marker; + goto yy725; + yy728: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy731; + goto yy727; + } else { + if (yych <= 'Z') + goto yy731; + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy731; + goto yy727; + } + yy729: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy732; + if (yych == ';') + goto yy733; + goto yy727; + yy730: + yych = *++p; + if (yych <= '@') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy734; + goto yy727; + } else { + if (yych <= 'F') + goto yy734; + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy734; + goto yy727; + } + yy731: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy735; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy735; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy735; + goto yy727; + } + } + yy732: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy736; + if (yych != ';') + goto yy727; + yy733: + ++p; + { return (bufsize_t)(p - start); } + yy734: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy737; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'F') { + if (yych <= '@') + goto yy727; + goto yy737; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy737; + goto yy727; + } + } + yy735: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy738; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy738; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy738; + goto yy727; + } + } + yy736: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy739; + if (yych == ';') + goto yy733; + goto yy727; + yy737: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy740; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'F') { + if (yych <= '@') + goto yy727; + goto yy740; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy740; + goto yy727; + } + } + yy738: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy741; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy741; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy741; + goto yy727; + } + } + yy739: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy742; + if (yych == ';') + goto yy733; + goto yy727; + yy740: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy743; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'F') { + if (yych <= '@') + goto yy727; + goto yy743; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy743; + goto yy727; + } + } + yy741: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy744; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy744; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy744; + goto yy727; + } + } + yy742: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy745; + if (yych == ';') + goto yy733; + goto yy727; + yy743: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy746; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'F') { + if (yych <= '@') + goto yy727; + goto yy746; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy746; + goto yy727; + } + } + yy744: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy747; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy747; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy747; + goto yy727; + } + } + yy745: + yych = *++p; + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy748; + if (yych == ';') + goto yy733; + goto yy727; + yy746: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy748; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'F') { + if (yych <= '@') + goto yy727; + goto yy748; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'f') + goto yy748; + goto yy727; + } + } + yy747: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy749; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy749; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy749; + goto yy727; + } + } + yy748: + yych = *++p; + if (yych == ';') + goto yy733; + goto yy727; + yy749: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy750; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy750: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy751; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy751: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy752; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy752: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy753; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy753: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy754; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy754: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy755; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy755: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy756; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy756: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy757; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy757: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy758; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy758: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy759; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy759: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy760; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy760: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy761; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy761: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy762; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy762: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy763; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy763: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy764; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy764: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy765; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy765: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy766; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy766: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy767; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy767: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy768; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy768: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy769; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy769: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy770; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy770: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy771; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy771: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy772; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + } else { + if (yych <= '`') + goto yy727; + if (yych >= '{') + goto yy727; + } + } + yy772: + yych = *++p; + if (yych <= ';') { + if (yych <= '/') + goto yy727; + if (yych <= '9') + goto yy748; + if (yych <= ':') + goto yy727; + goto yy733; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy727; + goto yy748; + } else { + if (yych <= '`') + goto yy727; + if (yych <= 'z') + goto yy748; + goto yy727; + } + } + } +} + +// Returns positive value if a URL begins in a way that is potentially +// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0. +bufsize_t _scan_dangerous_url(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + yych = *p; + if (yych <= 'V') { + if (yych <= 'F') { + if (yych == 'D') + goto yy775; + if (yych >= 'F') + goto yy776; + } else { + if (yych == 'J') + goto yy777; + if (yych >= 'V') + goto yy778; + } + } else { + if (yych <= 'f') { + if (yych == 'd') + goto yy775; + if (yych >= 'f') + goto yy776; + } else { + if (yych <= 'j') { + if (yych >= 'j') + goto yy777; + } else { + if (yych == 'v') + goto yy778; + } + } + } + ++p; + yy774 : { return 0; } + yy775: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'A') + goto yy779; + if (yych == 'a') + goto yy779; + goto yy774; + yy776: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'I') + goto yy781; + if (yych == 'i') + goto yy781; + goto yy774; + yy777: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'A') + goto yy782; + if (yych == 'a') + goto yy782; + goto yy774; + yy778: + yyaccept = 0; + yych = *(marker = ++p); + if (yych == 'B') + goto yy783; + if (yych == 'b') + goto yy783; + goto yy774; + yy779: + yych = *++p; + if (yych == 'T') + goto yy784; + if (yych == 't') + goto yy784; + yy780: + p = marker; + if (yyaccept == 0) { + goto yy774; + } else { + goto yy792; + } + yy781: + yych = *++p; + if (yych == 'L') + goto yy785; + if (yych == 'l') + goto yy785; + goto yy780; + yy782: + yych = *++p; + if (yych == 'V') + goto yy786; + if (yych == 'v') + goto yy786; + goto yy780; + yy783: + yych = *++p; + if (yych == 'S') + goto yy787; + if (yych == 's') + goto yy787; + goto yy780; + yy784: + yych = *++p; + if (yych == 'A') + goto yy788; + if (yych == 'a') + goto yy788; + goto yy780; + yy785: + yych = *++p; + if (yych == 'E') + goto yy789; + if (yych == 'e') + goto yy789; + goto yy780; + yy786: + yych = *++p; + if (yych == 'A') + goto yy783; + if (yych == 'a') + goto yy783; + goto yy780; + yy787: + yych = *++p; + if (yych == 'C') + goto yy790; + if (yych == 'c') + goto yy790; + goto yy780; + yy788: + yych = *++p; + if (yych == ':') + goto yy791; + goto yy780; + yy789: + yych = *++p; + if (yych == ':') + goto yy793; + goto yy780; + yy790: + yych = *++p; + if (yych == 'R') + goto yy794; + if (yych == 'r') + goto yy794; + goto yy780; + yy791: + yyaccept = 1; + yych = *(marker = ++p); + if (yych == 'I') + goto yy795; + if (yych == 'i') + goto yy795; + yy792 : { return (bufsize_t)(p - start); } + yy793: + ++p; + goto yy792; + yy794: + yych = *++p; + if (yych == 'I') + goto yy796; + if (yych == 'i') + goto yy796; + goto yy780; + yy795: + yych = *++p; + if (yych == 'M') + goto yy797; + if (yych == 'm') + goto yy797; + goto yy780; + yy796: + yych = *++p; + if (yych == 'P') + goto yy798; + if (yych == 'p') + goto yy798; + goto yy780; + yy797: + yych = *++p; + if (yych == 'A') + goto yy799; + if (yych == 'a') + goto yy799; + goto yy780; + yy798: + yych = *++p; + if (yych == 'T') + goto yy789; + if (yych == 't') + goto yy789; + goto yy780; + yy799: + yych = *++p; + if (yych == 'G') + goto yy800; + if (yych != 'g') + goto yy780; + yy800: + yych = *++p; + if (yych == 'E') + goto yy801; + if (yych != 'e') + goto yy780; + yy801: + yych = *++p; + if (yych != '/') + goto yy780; + yych = *++p; + if (yych <= 'W') { + if (yych <= 'J') { + if (yych == 'G') + goto yy802; + if (yych <= 'I') + goto yy780; + goto yy803; + } else { + if (yych == 'P') + goto yy804; + if (yych <= 'V') + goto yy780; + goto yy805; + } + } else { + if (yych <= 'j') { + if (yych == 'g') + goto yy802; + if (yych <= 'i') + goto yy780; + goto yy803; + } else { + if (yych <= 'p') { + if (yych <= 'o') + goto yy780; + goto yy804; + } else { + if (yych == 'w') + goto yy805; + goto yy780; + } + } + } + yy802: + yych = *++p; + if (yych == 'I') + goto yy806; + if (yych == 'i') + goto yy806; + goto yy780; + yy803: + yych = *++p; + if (yych == 'P') + goto yy807; + if (yych == 'p') + goto yy807; + goto yy780; + yy804: + yych = *++p; + if (yych == 'N') + goto yy808; + if (yych == 'n') + goto yy808; + goto yy780; + yy805: + yych = *++p; + if (yych == 'E') + goto yy809; + if (yych == 'e') + goto yy809; + goto yy780; + yy806: + yych = *++p; + if (yych == 'F') + goto yy810; + if (yych == 'f') + goto yy810; + goto yy780; + yy807: + yych = *++p; + if (yych == 'E') + goto yy808; + if (yych != 'e') + goto yy780; + yy808: + yych = *++p; + if (yych == 'G') + goto yy810; + if (yych == 'g') + goto yy810; + goto yy780; + yy809: + yych = *++p; + if (yych == 'B') + goto yy811; + if (yych == 'b') + goto yy811; + goto yy780; + yy810: + ++p; + { return 0; } + yy811: + yych = *++p; + if (yych == 'P') + goto yy810; + if (yych == 'p') + goto yy810; + goto yy780; + } +} diff --git a/deps/cmark/src/scanners.h b/deps/cmark/src/scanners.h new file mode 100644 index 0000000..92f654d --- /dev/null +++ b/deps/cmark/src/scanners.h @@ -0,0 +1,61 @@ +#include "cmark.h" +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, + bufsize_t offset); +bufsize_t _scan_scheme(const unsigned char *p); +bufsize_t _scan_autolink_uri(const unsigned char *p); +bufsize_t _scan_autolink_email(const unsigned char *p); +bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_html_comment(const unsigned char *p); +bufsize_t _scan_html_pi(const unsigned char *p); +bufsize_t _scan_html_declaration(const unsigned char *p); +bufsize_t _scan_html_cdata(const unsigned char *p); +bufsize_t _scan_html_block_start(const unsigned char *p); +bufsize_t _scan_html_block_start_7(const unsigned char *p); +bufsize_t _scan_html_block_end_1(const unsigned char *p); +bufsize_t _scan_html_block_end_2(const unsigned char *p); +bufsize_t _scan_html_block_end_3(const unsigned char *p); +bufsize_t _scan_html_block_end_4(const unsigned char *p); +bufsize_t _scan_html_block_end_5(const unsigned char *p); +bufsize_t _scan_link_title(const unsigned char *p); +bufsize_t _scan_spacechars(const unsigned char *p); +bufsize_t _scan_atx_heading_start(const unsigned char *p); +bufsize_t _scan_setext_heading_line(const unsigned char *p); +bufsize_t _scan_open_code_fence(const unsigned char *p); +bufsize_t _scan_close_code_fence(const unsigned char *p); +bufsize_t _scan_entity(const unsigned char *p); +bufsize_t _scan_dangerous_url(const unsigned char *p); + +#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) +#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) +#define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) +#define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) +#define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n) +#define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n) +#define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n) +#define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n) +#define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) +#define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) +#define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) +#define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n) +#define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n) +#define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n) +#define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n) +#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n) +#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n) +#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n) +#define scan_setext_heading_line(c, n) \ + _scan_at(&_scan_setext_heading_line, c, n) +#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n) +#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) +#define scan_entity(c, n) _scan_at(&_scan_entity, c, n) +#define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) + +#ifdef __cplusplus +} +#endif diff --git a/deps/cmark/src/scanners.re b/deps/cmark/src/scanners.re new file mode 100644 index 0000000..c7dd4f5 --- /dev/null +++ b/deps/cmark/src/scanners.re @@ -0,0 +1,344 @@ +#include +#include "chunk.h" +#include "scanners.h" + +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) +{ + bufsize_t res; + unsigned char *ptr = (unsigned char *)c->data; + + if (ptr == NULL || offset > c->len) { + return 0; + } else { + unsigned char lim = ptr[c->len]; + + ptr[c->len] = '\0'; + res = scanner(ptr + offset); + ptr[c->len] = lim; + } + + return res; +} + +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + wordchar = [^\x00-\x20]; + + spacechar = [ \t\v\f\r\n]; + + reg_char = [^\\()\x00-\x20]; + + escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-]; + + tagname = [A-Za-z][A-Za-z0-9-]*; + + blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; + + attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; + + unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+; + singlequotedvalue = ['][^'\x00]*[']; + doublequotedvalue = ["][^"\x00]*["]; + + attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue; + + attributevaluespec = spacechar* [=] spacechar* attributevalue; + + attribute = spacechar+ attributename attributevaluespec?; + + opentag = tagname attribute* spacechar* [/]? [>]; + closetag = [/] tagname spacechar* [>]; + + htmlcomment = "--" ([^\x00-]+ | "-" [^\x00-] | "--" [^\x00>])* "-->"; + + processinginstruction = ([^?>\x00]+ | [?][^>\x00] | [>])+; + + declaration = [A-Z]+ [^>\x00]*; + + cdata = "CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])*; + + htmltag = opentag | closetag; + + in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)]; + + in_double_quotes = ["] (escaped_char|[^"\x00])* ["]; + in_single_quotes = ['] (escaped_char|[^'\x00])* [']; + in_parens = [(] (escaped_char|[^)\x00])* [)]; + + scheme = [A-Za-z][A-Za-z0-9.+-]{1,31}; +*/ + +// Try to match a scheme including colon. +bufsize_t _scan_scheme(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + scheme [:] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match URI autolink after first <, returning number of chars matched. +bufsize_t _scan_autolink_uri(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match email autolink after first <, returning num of chars matched. +bufsize_t _scan_autolink_email(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ + [@] + [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* + [>] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match an HTML tag after first <, returning num of chars matched. +bufsize_t _scan_html_tag(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + htmltag { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +bufsize_t _scan_html_comment(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + htmlcomment { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +bufsize_t _scan_html_pi(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + processinginstruction { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +bufsize_t _scan_html_declaration(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + declaration { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +bufsize_t _scan_html_cdata(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + cdata { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match an HTML block tag start line, returning +// an integer code for the type of block (1-6, matching the spec). +// #7 is handled by a separate function, below. +bufsize_t _scan_html_block_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; +/*!re2c + [<] ('script'|'pre'|'textarea'|'style') (spacechar | [>]) { return 1; } + '' { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match an HTML block end line of type 3 +bufsize_t _scan_html_block_end_3(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [^\n\x00]* '?>' { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match an HTML block end line of type 4 +bufsize_t _scan_html_block_end_4(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [^\n\x00]* '>' { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match an HTML block end line of type 5 +bufsize_t _scan_html_block_end_5(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [^\n\x00]* ']]>' { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Try to match a link title (in single quotes, in double quotes, or +// in parentheses), returning number of chars matched. Allow one +// level of internal nesting (quotes within quotes). +bufsize_t _scan_link_title(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } + [(] (escaped_char|[^()\x00])* [)] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Match space characters, including newlines. +bufsize_t _scan_spacechars(const unsigned char *p) +{ + const unsigned char *start = p; \ +/*!re2c + [ \t\v\f\r\n]+ { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Match ATX heading start. +bufsize_t _scan_atx_heading_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Match setext heading line. Return 1 for level-1 heading, +// 2 for level-2, 0 for no match. +bufsize_t _scan_setext_heading_line(const unsigned char *p) +{ + const unsigned char *marker = NULL; +/*!re2c + [=]+ [ \t]* [\r\n] { return 1; } + [-]+ [ \t]* [\r\n] { return 2; } + * { return 0; } +*/ +} + +// Scan an opening code fence. +bufsize_t _scan_open_code_fence(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Scan a closing code fence with length at least len. +bufsize_t _scan_close_code_fence(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Scans an entity. +// Returns number of chars matched. +bufsize_t _scan_entity(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] + { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + +// Returns positive value if a URL begins in a way that is potentially +// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0. +bufsize_t _scan_dangerous_url(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + 'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; } + 'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); } + * { return 0; } +*/ +} + diff --git a/deps/cmark/src/utf8.c b/deps/cmark/src/utf8.c new file mode 100644 index 0000000..c29bbf7 --- /dev/null +++ b/deps/cmark/src/utf8.c @@ -0,0 +1,317 @@ +#include +#include +#include + +#include "cmark_ctype.h" +#include "utf8.h" + +static const int8_t utf8proc_utf8class[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}; + +static void encode_unknown(cmark_strbuf *buf) { + static const uint8_t repl[] = {239, 191, 189}; + cmark_strbuf_put(buf, repl, 3); +} + +static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { + int length, i; + + if (!str_len) + return 0; + + length = utf8proc_utf8class[str[0]]; + + if (!length) + return -1; + + if (str_len >= 0 && (bufsize_t)length > str_len) + return -str_len; + + for (i = 1; i < length; i++) { + if ((str[i] & 0xC0) != 0x80) + return -i; + } + + return length; +} + +// Validate a single UTF-8 character according to RFC 3629. +static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { + int length = utf8proc_utf8class[str[0]]; + + if (!length) + return -1; + + if ((bufsize_t)length > str_len) + return -str_len; + + switch (length) { + case 2: + if ((str[1] & 0xC0) != 0x80) + return -1; + if (str[0] < 0xC2) { + // Overlong + return -length; + } + break; + + case 3: + if ((str[1] & 0xC0) != 0x80) + return -1; + if ((str[2] & 0xC0) != 0x80) + return -2; + if (str[0] == 0xE0) { + if (str[1] < 0xA0) { + // Overlong + return -length; + } + } else if (str[0] == 0xED) { + if (str[1] >= 0xA0) { + // Surrogate + return -length; + } + } + break; + + case 4: + if ((str[1] & 0xC0) != 0x80) + return -1; + if ((str[2] & 0xC0) != 0x80) + return -2; + if ((str[3] & 0xC0) != 0x80) + return -3; + if (str[0] == 0xF0) { + if (str[1] < 0x90) { + // Overlong + return -length; + } + } else if (str[0] >= 0xF4) { + if (str[0] > 0xF4 || str[1] >= 0x90) { + // Above 0x10FFFF + return -length; + } + } + break; + } + + return length; +} + +void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, + bufsize_t size) { + bufsize_t i = 0; + + while (i < size) { + bufsize_t org = i; + int charlen = 0; + + while (i < size) { + if (line[i] < 0x80 && line[i] != 0) { + i++; + } else if (line[i] >= 0x80) { + charlen = utf8proc_valid(line + i, size - i); + if (charlen < 0) { + charlen = -charlen; + break; + } + i += charlen; + } else if (line[i] == 0) { + // ASCII NUL is technically valid but rejected + // for security reasons. + charlen = 1; + break; + } + } + + if (i > org) { + cmark_strbuf_put(ob, line + org, i - org); + } + + if (i >= size) { + break; + } else { + // Invalid UTF-8 + encode_unknown(ob); + i += charlen; + } + } +} + +int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, + int32_t *dst) { + int length; + int32_t uc = -1; + + *dst = -1; + length = utf8proc_charlen(str, str_len); + if (length < 0) + return -1; + + switch (length) { + case 1: + uc = str[0]; + break; + case 2: + uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); + if (uc < 0x80) + uc = -1; + break; + case 3: + uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) + uc = -1; + break; + case 4: + uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); + if (uc < 0x10000 || uc >= 0x110000) + uc = -1; + break; + } + + if (uc < 0) + return -1; + + *dst = uc; + return length; +} + +void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { + uint8_t dst[4]; + bufsize_t len = 0; + + assert(uc >= 0); + + if (uc < 0x80) { + dst[0] = (uint8_t)(uc); + len = 1; + } else if (uc < 0x800) { + dst[0] = (uint8_t)(0xC0 + (uc >> 6)); + dst[1] = 0x80 + (uc & 0x3F); + len = 2; + } else if (uc == 0xFFFF) { + dst[0] = 0xFF; + len = 1; + } else if (uc == 0xFFFE) { + dst[0] = 0xFE; + len = 1; + } else if (uc < 0x10000) { + dst[0] = (uint8_t)(0xE0 + (uc >> 12)); + dst[1] = 0x80 + ((uc >> 6) & 0x3F); + dst[2] = 0x80 + (uc & 0x3F); + len = 3; + } else if (uc < 0x110000) { + dst[0] = (uint8_t)(0xF0 + (uc >> 18)); + dst[1] = 0x80 + ((uc >> 12) & 0x3F); + dst[2] = 0x80 + ((uc >> 6) & 0x3F); + dst[3] = 0x80 + (uc & 0x3F); + len = 4; + } else { + encode_unknown(buf); + return; + } + + cmark_strbuf_put(buf, dst, len); +} + +void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, + bufsize_t len) { + int32_t c; + +#define bufpush(x) cmark_utf8proc_encode_char(x, dest) + + while (len > 0) { + bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c); + + if (char_len >= 0) { +#include "case_fold_switch.inc" + } else { + encode_unknown(dest); + char_len = -char_len; + } + + str += char_len; + len -= char_len; + } +} + +// matches anything in the Zs class, plus LF, CR, TAB, FF. +int cmark_utf8proc_is_space(int32_t uc) { + return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 || + uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 || + uc == 8287 || uc == 12288); +} + +// matches anything in the P[cdefios] classes. +int cmark_utf8proc_is_punctuation(int32_t uc) { + return ( + (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 || + uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 || + uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 || + uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 || + uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 || + uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 || + (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) || + (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 || + uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 || + uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) || + uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 || + (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 || + (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) || + uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 || + (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 || + (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) || + (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 || + uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) || + (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) || + (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 || + (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) || + (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) || + (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 || + uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 || + (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 || + (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) || + (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 || + (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 || + uc == 11632 || (uc >= 11776 && uc <= 11822) || + (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) || + (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) || + uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 || + uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 || + uc == 42622 || (uc >= 42738 && uc <= 42743) || + (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 || + (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 || + uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 || + uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 || + uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 || + uc == 64831 || (uc >= 65040 && uc <= 65049) || + (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) || + uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 || + (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) || + (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 || + uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) || + uc == 65343 || uc == 65371 || uc == 65373 || + (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) || + uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 || + uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 || + (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) || + (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) || + uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) || + (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 || + (uc >= 70085 && uc <= 70088) || uc == 70093 || + (uc >= 70200 && uc <= 70205) || uc == 70854 || + (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) || + (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 || + uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 || + uc == 113823); +} diff --git a/deps/cmark/src/utf8.h b/deps/cmark/src/utf8.h new file mode 100644 index 0000000..8e45714 --- /dev/null +++ b/deps/cmark/src/utf8.h @@ -0,0 +1,24 @@ +#ifndef CMARK_UTF8_H +#define CMARK_UTF8_H + +#include +#include "buffer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, + bufsize_t len); +void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); +int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); +void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, + bufsize_t size); +int cmark_utf8proc_is_space(int32_t uc); +int cmark_utf8proc_is_punctuation(int32_t uc); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/cmark/src/xml.c b/deps/cmark/src/xml.c new file mode 100644 index 0000000..45589b9 --- /dev/null +++ b/deps/cmark/src/xml.c @@ -0,0 +1,229 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" + +#define BUFFER_SIZE 100 +#define MAX_INDENT 40 + +// Functions to convert cmark_nodes to XML strings. + +// C0 control characters, U+FFFE and U+FFF aren't allowed in XML. +static const char XML_ESCAPE_TABLE[256] = { + /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, + /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0, + /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, + /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// U+FFFD Replacement Character encoded in UTF-8 +#define UTF8_REPL "\xEF\xBF\xBD" + +static const char *XML_ESCAPES[] = { + "", UTF8_REPL, """, "&", "<", ">" +}; + +static void escape_xml(cmark_strbuf *ob, const unsigned char *src, + bufsize_t size) { + bufsize_t i = 0, org, esc = 0; + + while (i < size) { + org = i; + while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0) + i++; + + if (i > org) + cmark_strbuf_put(ob, src + org, i - org); + + if (i >= size) + break; + + if (esc == 9) { + // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to + // be changed. + // We know that src[i] is 0xBE or 0xBF. + if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { + cmark_strbuf_putc(ob, 0xBD); + } else { + cmark_strbuf_putc(ob, src[i]); + } + } else { + cmark_strbuf_puts(ob, XML_ESCAPES[esc]); + } + + i++; + } +} + +static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) { + if (source) + escape_xml(dest, source, strlen((char *)source)); +} + +struct render_state { + cmark_strbuf *xml; + int indent; +}; + +static CMARK_INLINE void indent(struct render_state *state) { + int i; + for (i = 0; i < state->indent && i < MAX_INDENT; i++) { + cmark_strbuf_putc(state->xml, ' '); + } +} + +static int S_render_node(cmark_node *node, cmark_event_type ev_type, + struct render_state *state, int options) { + cmark_strbuf *xml = state->xml; + bool literal = false; + cmark_delim_type delim; + bool entering = (ev_type == CMARK_EVENT_ENTER); + char buffer[BUFFER_SIZE]; + + if (entering) { + indent(state); + cmark_strbuf_putc(xml, '<'); + cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); + + if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { + snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", + node->start_line, node->start_column, node->end_line, + node->end_column); + cmark_strbuf_puts(xml, buffer); + } + + literal = false; + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); + break; + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_HTML_INLINE: + cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); + escape_xml(xml, node->data, node->len); + cmark_strbuf_puts(xml, "as.heading.level); + cmark_strbuf_puts(xml, buffer); + break; + case CMARK_NODE_CODE_BLOCK: + if (node->as.code.info) { + cmark_strbuf_puts(xml, " info=\""); + escape_xml_str(xml, node->as.code.info); + cmark_strbuf_putc(xml, '"'); + } + cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); + escape_xml(xml, node->data, node->len); + cmark_strbuf_puts(xml, "as.custom.on_enter); + cmark_strbuf_putc(xml, '"'); + cmark_strbuf_puts(xml, " on_exit=\""); + escape_xml_str(xml, node->as.custom.on_exit); + cmark_strbuf_putc(xml, '"'); + break; + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + cmark_strbuf_puts(xml, " destination=\""); + escape_xml_str(xml, node->as.link.url); + cmark_strbuf_putc(xml, '"'); + if (node->as.link.title) { + cmark_strbuf_puts(xml, " title=\""); + escape_xml_str(xml, node->as.link.title); + cmark_strbuf_putc(xml, '"'); + } + break; + default: + break; + } + if (node->first_child) { + state->indent += 2; + } else if (!literal) { + cmark_strbuf_puts(xml, " /"); + } + cmark_strbuf_puts(xml, ">\n"); + + } else if (node->first_child) { + state->indent -= 2; + indent(state); + cmark_strbuf_puts(xml, "\n"); + } + + return 1; +} + +char *cmark_render_xml(cmark_node *root, int options) { + char *result; + cmark_strbuf xml = CMARK_BUF_INIT(root->mem); + cmark_event_type ev_type; + cmark_node *cur; + struct render_state state = {&xml, 0}; + + cmark_iter *iter = cmark_iter_new(root); + + cmark_strbuf_puts(state.xml, "\n"); + cmark_strbuf_puts(state.xml, + "\n"); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state, options); + } + result = (char *)cmark_strbuf_detach(&xml); + + cmark_iter_free(iter); + return result; +} diff --git a/deps/cmark/test/CMakeLists.txt b/deps/cmark/test/CMakeLists.txt new file mode 100755 index 0000000..5c07fb7 --- /dev/null +++ b/deps/cmark/test/CMakeLists.txt @@ -0,0 +1,80 @@ +# To get verbose output: cmake --build build --target "test" -- ARGS='-V' + +# By default, we run the spec tests only if python3 is available. +# To require the spec tests, compile with -DSPEC_TESTS=1 + +if (SPEC_TESTS) + find_package(PythonInterp 3 REQUIRED) +else(SPEC_TESTS) + find_package(PythonInterp 3) +endif(SPEC_TESTS) + +if (CMARK_SHARED OR CMARK_STATIC) + add_test(NAME api_test COMMAND api_test) +endif() + +if (WIN32) + file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR) + set(NEWPATH "${WIN_DLL_DIR};$ENV{PATH}") + string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}") + set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}") + set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat") +else(WIN32) + set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh") +endif(WIN32) + +IF (PYTHONINTERP_FOUND) + + add_test(html_normalization + ${PYTHON_EXECUTABLE} "-m" "doctest" + "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py" + ) + + if (CMARK_SHARED) + add_test(spectest_library + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" + "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" + ) + + add_test(pathological_tests_library + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py" + "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" + ) + + add_test(roundtriptest_library + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" + "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" + "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" + ) + + add_test(entity_library + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" + "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" + ) + endif() + + add_test(spectest_executable + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + ) + + add_test(smartpuncttest_executable + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart" + ) + + add_test(regressiontest_executable + ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" + "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program" + "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" + ) + +ELSE(PYTHONINTERP_FOUND) + + message("\n*** A python 3 interpreter is required to run the spec tests.\n") + add_test(skipping_spectests + echo "Skipping spec tests, because no python 3 interpreter is available.") + +ENDIF(PYTHONINTERP_FOUND) + diff --git a/deps/cmark/test/afl_test_cases/test.md b/deps/cmark/test/afl_test_cases/test.md new file mode 100644 index 0000000..27eee00 --- /dev/null +++ b/deps/cmark/test/afl_test_cases/test.md @@ -0,0 +1,36 @@ +# H1 + +H2 +-- + +t ☺ +*b* **em** `c` +≥\&\ +\_e\_ + +4) I1 + +5) I2 + > [l](/u "t") + > + > - [f] + > - ![a](/u "t") + > + >> + >> + +~~~ l☺ +cb +~~~ + + c1 + c2 + +*** + +
+x +
+ +[f]: /u "t" + diff --git a/deps/cmark/test/cmark-fuzz.c b/deps/cmark/test/cmark-fuzz.c new file mode 100644 index 0000000..b078e3d --- /dev/null +++ b/deps/cmark/test/cmark-fuzz.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include "cmark.h" + +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + struct __attribute__((packed)) { + int options; + int width; + } fuzz_config; + + if (size >= sizeof(fuzz_config)) { + /* The beginning of `data` is treated as fuzzer configuration */ + memcpy(&fuzz_config, data, sizeof(fuzz_config)); + + /* Mask off valid option bits */ + fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); + + /* Remainder of input is the markdown */ + const char *markdown = (const char *)(data + sizeof(fuzz_config)); + const size_t markdown_size = size - sizeof(fuzz_config); + cmark_node *doc = cmark_parse_document(markdown, markdown_size, fuzz_config.options); + + free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_html(doc, fuzz_config.options)); + free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width)); + free(cmark_render_xml(doc, fuzz_config.options)); + + cmark_node_free(doc); + } + return 0; +} diff --git a/deps/cmark/test/cmark.py b/deps/cmark/test/cmark.py new file mode 100644 index 0000000..38d2f59 --- /dev/null +++ b/deps/cmark/test/cmark.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p +from subprocess import * +import platform +import os + +def pipe_through_prog(prog, text): + p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) + [result, err] = p1.communicate(input=text.encode('utf-8')) + return [p1.returncode, result.decode('utf-8'), err] + +def to_html(lib, text): + markdown = lib.cmark_markdown_to_html + markdown.restype = c_char_p + markdown.argtypes = [c_char_p, c_size_t, c_int] + textbytes = text.encode('utf-8') + textlen = len(textbytes) + # 1 << 17 == CMARK_OPT_UNSAFE + result = markdown(textbytes, textlen, 1 << 17).decode('utf-8') + return [0, result, ''] + +def to_commonmark(lib, text): + textbytes = text.encode('utf-8') + textlen = len(textbytes) + parse_document = lib.cmark_parse_document + parse_document.restype = c_void_p + parse_document.argtypes = [c_char_p, c_size_t, c_int] + render_commonmark = lib.cmark_render_commonmark + render_commonmark.restype = c_char_p + render_commonmark.argtypes = [c_void_p, c_int, c_int] + node = parse_document(textbytes, textlen, 0) + result = render_commonmark(node, 0, 0).decode('utf-8') + return [0, result, ''] + +class CMark: + def __init__(self, prog=None, library_dir=None): + self.prog = prog + if prog: + prog += ' --unsafe' + self.to_html = lambda x: pipe_through_prog(prog, x) + self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x) + else: + sysname = platform.system() + if sysname == 'Darwin': + libnames = [ "libcmark.dylib" ] + elif sysname == 'Windows': + libnames = [ "cmark.dll", "libcmark.dll" ] + else: + libnames = [ "libcmark.so" ] + if not library_dir: + library_dir = os.path.join("build", "src") + for libname in libnames: + candidate = os.path.join(library_dir, libname) + if os.path.isfile(candidate): + libpath = candidate + break + cmark = CDLL(libpath) + self.to_html = lambda x: to_html(cmark, x) + self.to_commonmark = lambda x: to_commonmark(cmark, x) + diff --git a/deps/cmark/test/entity_tests.py b/deps/cmark/test/entity_tests.py new file mode 100644 index 0000000..27b70e6 --- /dev/null +++ b/deps/cmark/test/entity_tests.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import os +import argparse +import sys +import platform +import html +from cmark import CMark + +def get_entities(): + regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}' + with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f: + code = f.read() + entities = [] + for entity, utf8 in re.findall(regex, code, re.MULTILINE): + utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8') + entities.append((entity, utf8)) + return entities + +parser = argparse.ArgumentParser(description='Run cmark tests.') +parser.add_argument('--program', dest='program', nargs='?', default=None, + help='program to test') +parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') +args = parser.parse_args(sys.argv[1:]) + +cmark = CMark(prog=args.program, library_dir=args.library_dir) + +entities = get_entities() + +passed = 0 +errored = 0 +failed = 0 + +exceptions = { + 'quot': '"', + 'QUOT': '"', + + # These are broken, but I'm not too worried about them. + 'nvlt': '<⃒', + 'nvgt': '>⃒', +} + +print("Testing entities:") +for entity, utf8 in entities: + [rc, actual, err] = cmark.to_html("&{};".format(entity)) + check = exceptions.get(entity, utf8) + + if rc != 0: + errored += 1 + print(entity, '[ERRORED (return code {})]'.format(rc)) + print(err) + elif check in actual: + # print(entity, '[PASSED]') # omit noisy success output + passed += 1 + else: + print(entity, '[FAILED]') + print(repr(actual)) + failed += 1 + +print("{} passed, {} failed, {} errored".format(passed, failed, errored)) +if failed == 0 and errored == 0: + exit(0) +else: + exit(1) diff --git a/deps/cmark/test/fuzzing_dictionary b/deps/cmark/test/fuzzing_dictionary new file mode 100644 index 0000000..b06783c --- /dev/null +++ b/deps/cmark/test/fuzzing_dictionary @@ -0,0 +1,49 @@ +asterisk="*" +attr_generic=" a=\"1\"" +attr_href=" href=\"1\"" +attr_xml_lang=" xml:lang=\"1\"" +attr_xmlns=" xmlns=\"1\"" +backslash="\\" +backtick="`" +colon=":" +dashes="---" +double_quote="\"" +entity_builtin="<" +entity_decimal="" +entity_external="&a;" +entity_hex="" +equals="===" +exclamation="!" +greater_than=">" +hash="#" +hyphen="-" +indent=" " +left_bracket="[" +left_paren="(" +less_than="<" +plus="+" +right_bracket="]" +right_paren=")" +single_quote="'" +string_any="ANY" +string_brackets="[]" +string_cdata="CDATA" +string_dashes="--" +string_empty_dblquotes="\"\"" +string_empty_quotes="''" +string_idrefs="IDREFS" +string_parentheses="()" +string_pcdata="#PCDATA" +tag_cdata="" +tag_doctype="" +tag_open_close="" +tag_open_exclamation="" +tag_xml_q="" +underscore="_" diff --git a/deps/cmark/test/normalize.py b/deps/cmark/test/normalize.py new file mode 100644 index 0000000..f8ece18 --- /dev/null +++ b/deps/cmark/test/normalize.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +from html.parser import HTMLParser +import urllib + +try: + from html.parser import HTMLParseError +except ImportError: + # HTMLParseError was removed in Python 3.5. It could never be + # thrown, so we define a placeholder instead. + class HTMLParseError(Exception): + pass + +from html.entities import name2codepoint +import sys +import re +import html + +# Normalization code, adapted from +# https://github.com/karlcow/markdown-testsuite/ +significant_attrs = ["alt", "href", "src", "title"] +whitespace_re = re.compile('\s+') +class MyHTMLParser(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.convert_charrefs = False + self.last = "starttag" + self.in_pre = False + self.output = "" + self.last_tag = "" + def handle_data(self, data): + after_tag = self.last == "endtag" or self.last == "starttag" + after_block_tag = after_tag and self.is_block_tag(self.last_tag) + if after_tag and self.last_tag == "br": + data = data.lstrip('\n') + if not self.in_pre: + data = whitespace_re.sub(' ', data) + if after_block_tag and not self.in_pre: + if self.last == "starttag": + data = data.lstrip() + elif self.last == "endtag": + data = data.strip() + self.output += data + self.last = "data" + def handle_endtag(self, tag): + if tag == "pre": + self.in_pre = False + elif self.is_block_tag(tag): + self.output = self.output.rstrip() + self.output += "" + self.last_tag = tag + self.last = "endtag" + def handle_starttag(self, tag, attrs): + if tag == "pre": + self.in_pre = True + if self.is_block_tag(tag): + self.output = self.output.rstrip() + self.output += "<" + tag + # For now we don't strip out 'extra' attributes, because of + # raw HTML test cases. + # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs) + if attrs: + attrs.sort() + for (k,v) in attrs: + self.output += " " + k + if v in ['href','src']: + self.output += ("=" + '"' + + urllib.quote(urllib.unquote(v), safe='/') + '"') + elif v != None: + self.output += ("=" + '"' + html.escape(v,quote=True) + '"') + self.output += ">" + self.last_tag = tag + self.last = "starttag" + def handle_startendtag(self, tag, attrs): + """Ignore closing tag for self-closing """ + self.handle_starttag(tag, attrs) + self.last_tag = tag + self.last = "endtag" + def handle_comment(self, data): + self.output += '' + self.last = "comment" + def handle_decl(self, data): + self.output += '' + self.last = "decl" + def unknown_decl(self, data): + self.output += '' + self.last = "decl" + def handle_pi(self,data): + self.output += '' + self.last = "pi" + def handle_entityref(self, name): + try: + c = chr(name2codepoint[name]) + except KeyError: + c = None + self.output_char(c, '&' + name + ';') + self.last = "ref" + def handle_charref(self, name): + try: + if name.startswith("x"): + c = chr(int(name[1:], 16)) + else: + c = chr(int(name)) + except ValueError: + c = None + self.output_char(c, '&' + name + ';') + self.last = "ref" + # Helpers. + def output_char(self, c, fallback): + if c == '<': + self.output += "<" + elif c == '>': + self.output += ">" + elif c == '&': + self.output += "&" + elif c == '"': + self.output += """ + elif c == None: + self.output += fallback + else: + self.output += c + + def is_block_tag(self,tag): + return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote', + 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas', + 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd', + 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt', + 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption', + 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style']) + +def normalize_html(html): + r""" + Return normalized form of HTML which ignores insignificant output + differences: + + Multiple inner whitespaces are collapsed to a single space (except + in pre tags): + + >>> normalize_html("

a \t b

") + '

a b

' + + >>> normalize_html("

a \t\nb

") + '

a b

' + + * Whitespace surrounding block-level tags is removed. + + >>> normalize_html("

a b

") + '

a b

' + + >>> normalize_html("

a b

") + '

a b

' + + >>> normalize_html("

a b

") + '

a b

' + + >>> normalize_html("\n\t

\n\t\ta b\t\t

\n\t") + '

a b

' + + >>> normalize_html("a b ") + 'a b ' + + * Self-closing tags are converted to open tags. + + >>> normalize_html("
") + '
' + + * Attributes are sorted and lowercased. + + >>> normalize_html('
x') + 'x' + + * References are converted to unicode, except that '<', '>', '&', and + '"' are rendered using entities. + + >>> normalize_html("∀&><"") + '\u2200&><"' + + """ + html_chunk_re = re.compile("(\|\<[^>]*\>|[^<]+)") + try: + parser = MyHTMLParser() + # We work around HTMLParser's limitations parsing CDATA + # by breaking the input into chunks and passing CDATA chunks + # through verbatim. + for chunk in re.finditer(html_chunk_re, html): + if chunk.group(0)[:8] == "\[%s\]

\n){%d}" % (bad_key, COUNT-1)) + + +# list of pairs consisting of input and a regex that must match the output. +pathological = { + # note - some pythons have limit of 65535 for {num-matches} in re. + "nested strong emph": + (("*a **a " * 32500) + "b" + (" a** a*" * 32500), + re.compile("(a a ){32500}b( a a){32500}")), + "many emph closers with no openers": + (("a_ " * 32500), + re.compile("(a[_] ){32499}a_")), + "many emph openers with no closers": + (("_a " * 32500), + re.compile("(_a ){32499}_a")), + "many link closers with no openers": + (("a]" * 32500), + re.compile("(a\]){32500}")), + "many link openers with no closers": + (("[a" * 32500), + re.compile("(\[a){32500}")), + "mismatched openers and closers": + (("*a_ " * 25000), + re.compile("([*]a[_] ){24999}[*]a_")), + "issue #389": + (("*a " * 20000 + "_a*_ " * 20000), + re.compile("(a ){20000}(_a<\/em>_ ?){20000}")), + "openers and closers multiple of 3": + (("a**b" + ("c* " * 25000)), + re.compile("a[*][*]b(c[*] ){24999}c[*]")), + "link openers and emph closers": + (("[ a_" * 25000), + re.compile("(\[ a_){25000}")), + "pattern [ (]( repeated": + (("[ (](" * 40000), + re.compile("(\[ \(\]\(){40000}")), + "pattern ![[]() repeated": + ("![[]()" * 160000, + re.compile("(!\[){160000}")), + "hard link/emph case": + ("**x [a*b**c*](d)", + re.compile("\\*\\*x ab\\*\\*c")), + "nested brackets": + (("[" * 25000) + "a" + ("]" * 25000), + re.compile("\[{25000}a\]{25000}")), + "nested block quotes": + ((("> " * 25000) + "a"), + re.compile("(
\n){25000}")), + "deeply nested lists": + ("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))), + re.compile("
    \n(
  • a\n
      \n){499}
    • a
    • \n
    \n(
  • \n
\n){499}")), + "U+0000 in input": + ("abc\u0000de\u0000", + re.compile("abc\ufffd?de\ufffd?")), + "backticks": + ("".join(map(lambda x: ("e" + "`" * x), range(1,2500))), + re.compile("^

[e`]*

\n$")), + "unclosed links A": + ("[a](line1

+

line2

+```````````````````````````````` + +Issue #114: cmark skipping first character in line +(Important: the blank lines around "Repeatedly" contain a tab.) + +```````````````````````````````` example +By taking it apart + +- alternative solutions +→ +Repeatedly solving +→ +- how techniques +. +

By taking it apart

+
    +
  • alternative solutions
  • +
+

Repeatedly solving

+
    +
  • how techniques
  • +
+```````````````````````````````` + +Issue jgm/CommonMark#430: h2..h6 not recognized as block tags. + +```````````````````````````````` example +

lorem

+ +

lorem

+ +

lorem

+ +

lorem

+ +
lorem
+ +
lorem
+. +

lorem

+

lorem

+

lorem

+

lorem

+
lorem
+
lorem
+```````````````````````````````` + +Issue jgm/commonmark.js#109 - tabs after setext header line + + +```````````````````````````````` example +hi +--→ +. +

hi

+```````````````````````````````` + +Issue #177 - incorrect emphasis parsing + +```````````````````````````````` example +a***b* c* +. +

a*b c

+```````````````````````````````` + +Issue #193 - unescaped left angle brackets in link destination + +```````````````````````````````` example +[a] + +[a]: +. +

[a]

+

[a]: <te

+```````````````````````````````` + +Issue #192 - escaped spaces in link destination + + +```````````````````````````````` example +[a](te\ st) +. +

[a](te\ st)

+```````````````````````````````` + +Issue #527 - meta tags in inline contexts + +```````````````````````````````` example +City: + + + +. +

City: + + +

+```````````````````````````````` + +Issue #530 - link parsing corner cases + +```````````````````````````````` example +[a](\ b) + +[a](<[a](\ b)

+

[a](<<b)

+

[a](<b +)

+```````````````````````````````` + +Issue commonmark#526 - unescaped ( in link title + +```````````````````````````````` example +[link](url ((title)) +. +

[link](url ((title))

+```````````````````````````````` + +Issue commonamrk#517 - script, pre, style close tag without +opener. + +```````````````````````````````` example + + + + + +. + + + +```````````````````````````````` + +Issue #289. + +```````````````````````````````` example +[a]( +. +

[a](<b) c>

+```````````````````````````````` + +Issue #334 - UTF-8 BOM + +```````````````````````````````` example +# Hi +. +

Hi

+```````````````````````````````` + +Issue commonmark.js#213 - type 7 blocks can't interrupt +paragraph + +```````````````````````````````` example +- +. +
    +
  • +
  • +
+```````````````````````````````` + +Issue #383 - emphasis parsing. + +```````````````````````````````` example +*****Hello*world**** +. +

**Helloworld

+```````````````````````````````` + +Issue #424 - emphasis before links + +```````````````````````````````` example +*text* [link](#section) +. +

text link

+```````````````````````````````` + +` +. + +```````````````````````````````` + +Declarations don't need spaces, according to the spec +```````````````````````````````` example +x +. +

x

+```````````````````````````````` + + diff --git a/deps/cmark/test/roundtrip_tests.py b/deps/cmark/test/roundtrip_tests.py new file mode 100644 index 0000000..13444b1 --- /dev/null +++ b/deps/cmark/test/roundtrip_tests.py @@ -0,0 +1,46 @@ +import re +import sys +from spec_tests import get_tests, do_test +from cmark import CMark +import argparse + +parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.') +parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, + help='program to test') +parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', + help='path to spec') +parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', + default=None, help='limit to sections matching regex pattern') +parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') +parser.add_argument('--no-normalize', dest='normalize', + action='store_const', const=False, default=True, + help='do not normalize HTML') +parser.add_argument('-n', '--number', type=int, default=None, + help='only consider the test with the given number') +args = parser.parse_args(sys.argv[1:]) + +spec = sys.argv[1] + +def converter(md): + cmark = CMark(prog=args.program, library_dir=args.library_dir) + [ec, result, err] = cmark.to_commonmark(md) + if ec == 0: + [ec, html, err] = cmark.to_html(result) + if ec == 0: + # In the commonmark writer we insert dummy HTML + # comments between lists, and between lists and code + # blocks. Strip these out, since the spec uses + # two blank lines instead: + return [ec, re.sub('\n', '', html), ''] + else: + return [ec, html, err] + else: + return [ec, result, err] + +tests = get_tests(args.spec) +result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0} +for test in tests: + do_test(converter, test, args.normalize, result_counts) + +exit(result_counts['fail'] + result_counts['error']) diff --git a/deps/cmark/test/run-cmark-fuzz b/deps/cmark/test/run-cmark-fuzz new file mode 100755 index 0000000..75100b8 --- /dev/null +++ b/deps/cmark/test/run-cmark-fuzz @@ -0,0 +1,4 @@ +#!/bin/bash -eu +CMARK_FUZZ="$1" +shift +ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1" "${CMARK_FUZZ}" -max_len=256 -timeout=1 -dict=test/fuzzing_dictionary "$@" diff --git a/deps/cmark/test/smart_punct.txt b/deps/cmark/test/smart_punct.txt new file mode 100644 index 0000000..fd55e62 --- /dev/null +++ b/deps/cmark/test/smart_punct.txt @@ -0,0 +1,177 @@ +## Smart punctuation + +Open quotes are matched with closed quotes. +The same method is used for matching openers and closers +as is used in emphasis parsing: + +```````````````````````````````` example +"Hello," said the spider. +"'Shelob' is my name." +. +

“Hello,” said the spider. +“‘Shelob’ is my name.”

+```````````````````````````````` + +```````````````````````````````` example +'A', 'B', and 'C' are letters. +. +

‘A’, ‘B’, and ‘C’ are letters.

+```````````````````````````````` + +```````````````````````````````` example +'Oak,' 'elm,' and 'beech' are names of trees. +So is 'pine.' +. +

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. +So is ‘pine.’

+```````````````````````````````` + +```````````````````````````````` example +'He said, "I want to go."' +. +

‘He said, “I want to go.”’

+```````````````````````````````` + +A single quote that isn't an open quote matched +with a close quote will be treated as an +apostrophe: + +```````````````````````````````` example +Were you alive in the 70's? +. +

Were you alive in the 70’s?

+```````````````````````````````` + +```````````````````````````````` example +Here is some quoted '`code`' and a "[quoted link](url)". +. +

Here is some quoted ‘code’ and a “quoted link”.

+```````````````````````````````` + +Here the first `'` is treated as an apostrophe, not +an open quote, because the final single quote is matched +by the single quote before `jolly`: + +```````````````````````````````` example +'tis the season to be 'jolly' +. +

’tis the season to be ‘jolly’

+```````````````````````````````` + +Multiple apostrophes should not be marked as open/closing quotes. + +```````````````````````````````` example +'We'll use Jane's boat and John's truck,' Jenna said. +. +

‘We’ll use Jane’s boat and John’s truck,’ Jenna said.

+```````````````````````````````` + +An unmatched double quote will be interpreted as a +left double quote, to facilitate this style: + +```````````````````````````````` example +"A paragraph with no closing quote. + +"Second paragraph by same speaker, in fiction." +. +

“A paragraph with no closing quote.

+

“Second paragraph by same speaker, in fiction.”

+```````````````````````````````` + +A quote following a `]` or `)` character cannot +be an open quote: + +```````````````````````````````` example +[a]'s b' +. +

[a]’s b’

+```````````````````````````````` + +Quotes that are escaped come out as literal straight +quotes: + +```````````````````````````````` example +\"This is not smart.\" +This isn\'t either. +5\'8\" +. +

"This is not smart." +This isn't either. +5'8"

+```````````````````````````````` + +Two hyphens form an en-dash, three an em-dash. + +```````````````````````````````` example +Some dashes: em---em +en--en +em --- em +en -- en +2--3 +. +

Some dashes: em—em +en–en +em — em +en – en +2–3

+```````````````````````````````` + +A sequence of more than three hyphens is +parsed as a sequence of em and/or en dashes, +with no hyphens. If possible, a homogeneous +sequence of dashes is used (so, 10 hyphens += 5 en dashes, and 9 hyphens = 3 em dashes). +When a heterogeneous sequence must be used, +the em dashes come first, followed by the en +dashes, and as few en dashes as possible are +used (so, 7 hyphens = 2 em dashes an 1 en +dash). + +```````````````````````````````` example +one- +two-- +three--- +four---- +five----- +six------ +seven------- +eight-------- +nine--------- +thirteen-------------. +. +

one- +two– +three— +four–– +five—– +six—— +seven—–– +eight–––– +nine——— +thirteen———––.

+```````````````````````````````` + +Hyphens can be escaped: + +```````````````````````````````` example +Escaped hyphens: \-- \-\-\-. +. +

Escaped hyphens: -- ---.

+```````````````````````````````` + +Three periods form an ellipsis: + +```````````````````````````````` example +Ellipses...and...and.... +. +

Ellipses…and…and….

+```````````````````````````````` + +Periods can be escaped if ellipsis-formation +is not wanted: + +```````````````````````````````` example +No ellipses\.\.\. +. +

No ellipses...

+```````````````````````````````` diff --git a/deps/cmark/test/spec.txt b/deps/cmark/test/spec.txt new file mode 100644 index 0000000..43247fe --- /dev/null +++ b/deps/cmark/test/spec.txt @@ -0,0 +1,9742 @@ +--- +title: CommonMark Spec +author: John MacFarlane +version: '0.30' +date: '2021-06-19' +license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +... + +# Introduction + +## What is Markdown? + +Markdown is a plain text format for writing structured documents, +based on conventions for indicating formatting in email +and usenet posts. It was developed by John Gruber (with +help from Aaron Swartz) and released in 2004 in the form of a +[syntax description](http://daringfireball.net/projects/markdown/syntax) +and a Perl script (`Markdown.pl`) for converting Markdown to +HTML. In the next decade, dozens of implementations were +developed in many languages. Some extended the original +Markdown syntax with conventions for footnotes, tables, and +other document elements. Some allowed Markdown documents to be +rendered in formats other than HTML. Websites like Reddit, +StackOverflow, and GitHub had millions of people using Markdown. +And Markdown started to be used beyond the web, to author books, +articles, slide shows, letters, and lecture notes. + +What distinguishes Markdown from many other lightweight markup +syntaxes, which are often easier to write, is its readability. +As Gruber writes: + +> The overriding design goal for Markdown's formatting syntax is +> to make it as readable as possible. The idea is that a +> Markdown-formatted document should be publishable as-is, as +> plain text, without looking like it's been marked up with tags +> or formatting instructions. +> () + +The point can be illustrated by comparing a sample of +[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +an equivalent sample of Markdown. Here is a sample of +AsciiDoc from the AsciiDoc manual: + +``` +1. List item one. ++ +List item one continued with a second paragraph followed by an +Indented block. ++ +................. +$ ls *.sh +$ mv *.sh ~/tmp +................. ++ +List item continued with a third paragraph. + +2. List item two continued with an open block. ++ +-- +This paragraph is part of the preceding list item. + +a. This list is nested and does not require explicit item +continuation. ++ +This paragraph is part of the preceding list item. + +b. List item b. + +This paragraph belongs to item two of the outer list. +-- +``` + +And here is the equivalent in Markdown: +``` +1. List item one. + + List item one continued with a second paragraph followed by an + Indented block. + + $ ls *.sh + $ mv *.sh ~/tmp + + List item continued with a third paragraph. + +2. List item two continued with an open block. + + This paragraph is part of the preceding list item. + + 1. This list is nested and does not require explicit item continuation. + + This paragraph is part of the preceding list item. + + 2. List item b. + + This paragraph belongs to item two of the outer list. +``` + +The AsciiDoc version is, arguably, easier to write. You don't need +to worry about indentation. But the Markdown version is much easier +to read. The nesting of list items is apparent to the eye in the +source, not just in the processed document. + +## Why is a spec needed? + +John Gruber's [canonical description of Markdown's +syntax](http://daringfireball.net/projects/markdown/syntax) +does not specify the syntax unambiguously. Here are some examples of +questions it does not answer: + +1. How much indentation is needed for a sublist? The spec says that + continuation paragraphs need to be indented four spaces, but is + not fully explicit about sublists. It is natural to think that + they, too, must be indented four spaces, but `Markdown.pl` does + not require that. This is hardly a "corner case," and divergences + between implementations on this issue often lead to surprises for + users in real documents. (See [this comment by John + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) + +2. Is a blank line needed before a block quote or heading? + Most implementations do not require the blank line. However, + this can lead to unexpected results in hard-wrapped text, and + also to ambiguities in parsing (note that some implementations + put the heading inside the blockquote, while others do not). + (John Gruber has also spoken [in favor of requiring the blank + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) + +3. Is a blank line needed before an indented code block? + (`Markdown.pl` requires it, but this is not mentioned in the + documentation, and some implementations do not require it.) + + ``` markdown + paragraph + code? + ``` + +4. What is the exact rule for determining when list items get + wrapped in `

` tags? Can a list be partially "loose" and partially + "tight"? What should we do with a list like this? + + ``` markdown + 1. one + + 2. two + 3. three + ``` + + Or this? + + ``` markdown + 1. one + - a + + - b + 2. two + ``` + + (There are some relevant comments by John Gruber + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) + +5. Can list markers be indented? Can ordered list markers be right-aligned? + + ``` markdown + 8. item 1 + 9. item 2 + 10. item 2a + ``` + +6. Is this one list with a thematic break in its second item, + or two lists separated by a thematic break? + + ``` markdown + * a + * * * * * + * b + ``` + +7. When list markers change from numbers to bullets, do we have + two lists or one? (The Markdown syntax description suggests two, + but the perl scripts and many other implementations produce one.) + + ``` markdown + 1. fee + 2. fie + - foe + - fum + ``` + +8. What are the precedence rules for the markers of inline structure? + For example, is the following a valid link, or does the code span + take precedence ? + + ``` markdown + [a backtick (`)](/url) and [another backtick (`)](/url). + ``` + +9. What are the precedence rules for markers of emphasis and strong + emphasis? For example, how should the following be parsed? + + ``` markdown + *foo *bar* baz* + ``` + +10. What are the precedence rules between block-level and inline-level + structure? For example, how should the following be parsed? + + ``` markdown + - `a long code span can contain a hyphen like this + - and it can screw things up` + ``` + +11. Can list items include section headings? (`Markdown.pl` does not + allow this, but does allow blockquotes to include headings.) + + ``` markdown + - # Heading + ``` + +12. Can list items be empty? + + ``` markdown + * a + * + * b + ``` + +13. Can link references be defined inside block quotes or list items? + + ``` markdown + > Blockquote [foo]. + > + > [foo]: /url + ``` + +14. If there are multiple definitions for the same reference, which takes + precedence? + + ``` markdown + [foo]: /url1 + [foo]: /url2 + + [foo][] + ``` + +In the absence of a spec, early implementers consulted `Markdown.pl` +to resolve these ambiguities. But `Markdown.pl` was quite buggy, and +gave manifestly bad results in many cases, so it was not a +satisfactory replacement for a spec. + +Because there is no unambiguous spec, implementations have diverged +considerably. As a result, users are often surprised to find that +a document that renders one way on one system (say, a GitHub wiki) +renders differently on another (say, converting to docbook using +pandoc). To make matters worse, because nothing in Markdown counts +as a "syntax error," the divergence often isn't discovered right away. + +## About this document + +This document attempts to specify Markdown syntax unambiguously. +It contains many examples with side-by-side Markdown and +HTML. These are intended to double as conformance tests. An +accompanying script `spec_tests.py` can be used to run the tests +against any Markdown program: + + python test/spec_tests.py --spec spec.txt --program PROGRAM + +Since this document describes how Markdown is to be parsed into +an abstract syntax tree, it would have made sense to use an abstract +representation of the syntax tree instead of HTML. But HTML is capable +of representing the structural distinctions we need to make, and the +choice of HTML for the tests makes it possible to run the tests against +an implementation without writing an abstract syntax tree renderer. + +Note that not every feature of the HTML samples is mandated by +the spec. For example, the spec says what counts as a link +destination, but it doesn't mandate that non-ASCII characters in +the URL be percent-encoded. To use the automatic tests, +implementers will need to provide a renderer that conforms to +the expectations of the spec examples (percent-encoding +non-ASCII characters in URLs). But a conforming implementation +can use a different renderer and may choose not to +percent-encode non-ASCII characters in URLs. + +This document is generated from a text file, `spec.txt`, written +in Markdown with a small extension for the side-by-side tests. +The script `tools/makespec.py` can be used to convert `spec.txt` into +HTML or CommonMark (which can then be converted into other formats). + +In the examples, the `→` character is used to represent tabs. + +# Preliminaries + +## Characters and lines + +Any sequence of [characters] is a valid CommonMark +document. + +A [character](@) is a Unicode code point. Although some +code points (for example, combining accents) do not correspond to +characters in an intuitive sense, all code points count as characters +for purposes of this spec. + +This spec does not specify an encoding; it thinks of lines as composed +of [characters] rather than bytes. A conforming parser may be limited +to a certain encoding. + +A [line](@) is a sequence of zero or more [characters] +other than line feed (`U+000A`) or carriage return (`U+000D`), +followed by a [line ending] or by the end of file. + +A [line ending](@) is a line feed (`U+000A`), a carriage return +(`U+000D`) not followed by a line feed, or a carriage return and a +following line feed. + +A line containing no characters, or a line containing only spaces +(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). + +The following definitions of character classes will be used in this spec: + +A [Unicode whitespace character](@) is +any code point in the Unicode `Zs` general category, or a tab (`U+0009`), +line feed (`U+000A`), form feed (`U+000C`), or carriage return (`U+000D`). + +[Unicode whitespace](@) is a sequence of one or more +[Unicode whitespace characters]. + +A [tab](@) is `U+0009`. + +A [space](@) is `U+0020`. + +An [ASCII control character](@) is a character between `U+0000–1F` (both +including) or `U+007F`. + +An [ASCII punctuation character](@) +is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, +`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), +`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040), +`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), +`{`, `|`, `}`, or `~` (U+007B–007E). + +A [Unicode punctuation character](@) is an [ASCII +punctuation character] or anything in +the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. + +## Tabs + +Tabs in lines are not expanded to [spaces]. However, +in contexts where spaces help to define block structure, +tabs behave as if they were replaced by spaces with a tab stop +of 4 characters. + +Thus, for example, a tab can be used instead of four spaces +in an indented code block. (Note, however, that internal +tabs are passed through as literal tabs, not expanded to +spaces.) + +```````````````````````````````` example +→foo→baz→→bim +. +

foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + →foo→baz→→bim +. +
foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + a→a + ὐ→a +. +
a→a
+ὐ→a
+
+```````````````````````````````` + +In the following example, a continuation paragraph of a list +item is indented with a tab; this has exactly the same effect +as indentation with four spaces would: + +```````````````````````````````` example + - foo + +→bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + +→→bar +. +
    +
  • +

    foo

    +
      bar
    +
    +
  • +
+```````````````````````````````` + +Normally the `>` that begins a block quote may be followed +optionally by a space, which is not considered part of the +content. In the following case `>` is followed by a tab, +which is treated as if it were expanded into three spaces. +Since one of these spaces is considered part of the +delimiter, `foo` is considered to be indented six spaces +inside the block quote context, so we get an indented +code block starting with two spaces. + +```````````````````````````````` example +>→→foo +. +
+
  foo
+
+
+```````````````````````````````` + +```````````````````````````````` example +-→→foo +. +
    +
  • +
      foo
    +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + foo +→bar +. +
foo
+bar
+
+```````````````````````````````` + +```````````````````````````````` example + - foo + - bar +→ - baz +. +
    +
  • foo +
      +
    • bar +
        +
      • baz
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +#→Foo +. +

Foo

+```````````````````````````````` + +```````````````````````````````` example +*→*→*→ +. +
+```````````````````````````````` + + +## Insecure characters + +For security reasons, the Unicode character `U+0000` must be replaced +with the REPLACEMENT CHARACTER (`U+FFFD`). + + +## Backslash escapes + +Any ASCII punctuation character may be backslash-escaped: + +```````````````````````````````` example +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ +. +

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+```````````````````````````````` + + +Backslashes before other characters are treated as literal +backslashes: + +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

\→\A\a\ \3\φ\«

+```````````````````````````````` + + +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: + +```````````````````````````````` example +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +\ö not a character entity +. +

*not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference" +&ouml; not a character entity

+```````````````````````````````` + + +If a backslash is itself escaped, the following character is not: + +```````````````````````````````` example +\\*emphasis* +. +

\emphasis

+```````````````````````````````` + + +A backslash at the end of the line is a [hard line break]: + +```````````````````````````````` example +foo\ +bar +. +

foo
+bar

+```````````````````````````````` + + +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: + +```````````````````````````````` example +`` \[\` `` +. +

\[\`

+```````````````````````````````` + + +```````````````````````````````` example + \[\] +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +\[\] +~~~ +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://example.com?find=\*

+```````````````````````````````` + + +```````````````````````````````` example + +. + +```````````````````````````````` + + +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: + +```````````````````````````````` example +[foo](/bar\* "ti\*tle") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /bar\* "ti\*tle" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` foo\+bar +foo +``` +. +
foo
+
+```````````````````````````````` + + +## Entity and numeric character references + +Valid HTML entity references and numeric character references +can be used in place of the corresponding Unicode character, +with the following exceptions: + +- Entity and character references are not recognized in code + blocks and code spans. + +- Entity and character references cannot stand in place of + special characters that define structural elements in + CommonMark. For example, although `*` can be used + in place of a literal `*` character, `*` cannot replace + `*` in emphasis delimiters, bullet list markers, or thematic + breaks. + +Conforming CommonMark parsers need not store information about +whether a particular character was represented in the source +using a Unicode character or an entity reference. + +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. + +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ +. +

  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

+```````````````````````````````` + + +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--7 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. + +```````````````````````````````` example +# Ӓ Ϡ � +. +

# Ӓ Ϡ �

+```````````````````````````````` + + +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). + +```````````````````````````````` example +" ആ ಫ +. +

" ആ ಫ

+```````````````````````````````` + + +Here are some nonentities: + +```````````````````````````````` example +  &x; &#; &#x; +� +&#abcdef0; +&ThisIsNotDefined; &hi?; +. +

&nbsp &x; &#; &#x; +&#87654321; +&#abcdef0; +&ThisIsNotDefined; &hi?;

+```````````````````````````````` + + +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: + +```````````````````````````````` example +© +. +

&copy

+```````````````````````````````` + + +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: + +```````````````````````````````` example +&MadeUpEntity; +. +

&MadeUpEntity;

+```````````````````````````````` + + +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: + +```````````````````````````````` example + +. + +```````````````````````````````` + + +```````````````````````````````` example +[foo](/föö "föö") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /föö "föö" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` föö +foo +``` +. +
foo
+
+```````````````````````````````` + + +Entity and numeric character references are treated as literal +text in code spans and code blocks: + +```````````````````````````````` example +`föö` +. +

f&ouml;&ouml;

+```````````````````````````````` + + +```````````````````````````````` example + föfö +. +
f&ouml;f&ouml;
+
+```````````````````````````````` + + +Entity and numeric character references cannot be used +in place of symbols indicating structure in CommonMark +documents. + +```````````````````````````````` example +*foo* +*foo* +. +

*foo* +foo

+```````````````````````````````` + +```````````````````````````````` example +* foo + +* foo +. +

* foo

+
    +
  • foo
  • +
+```````````````````````````````` + +```````````````````````````````` example +foo bar +. +

foo + +bar

+```````````````````````````````` + +```````````````````````````````` example + foo +. +

→foo

+```````````````````````````````` + + +```````````````````````````````` example +[a](url "tit") +. +

[a](url "tit")

+```````````````````````````````` + + + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code spans, and so on. + +## Precedence + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: + +```````````````````````````````` example +- `one +- two` +. +
    +
  • `one
  • +
  • two`
  • +
+```````````````````````````````` + + +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + +## Container blocks and leaf blocks + +We can divide blocks into two types: +[container blocks](#container-blocks), +which can contain other blocks, and [leaf blocks](#leaf-blocks), +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + +## Thematic breaks + +A line consisting of optionally up to three spaces of indentation, followed by a +sequence of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces or tabs, forms a +[thematic break](@). + +```````````````````````````````` example +*** +--- +___ +. +
+
+
+```````````````````````````````` + + +Wrong characters: + +```````````````````````````````` example ++++ +. +

+++

+```````````````````````````````` + + +```````````````````````````````` example +=== +. +

===

+```````````````````````````````` + + +Not enough characters: + +```````````````````````````````` example +-- +** +__ +. +

-- +** +__

+```````````````````````````````` + + +Up to three spaces of indentation are allowed: + +```````````````````````````````` example + *** + *** + *** +. +
+
+
+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + *** +. +
***
+
+```````````````````````````````` + + +```````````````````````````````` example +Foo + *** +. +

Foo +***

+```````````````````````````````` + + +More than three characters may be used: + +```````````````````````````````` example +_____________________________________ +. +
+```````````````````````````````` + + +Spaces and tabs are allowed between the characters: + +```````````````````````````````` example + - - - +. +
+```````````````````````````````` + + +```````````````````````````````` example + ** * ** * ** * ** +. +
+```````````````````````````````` + + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +Spaces and tabs are allowed at the end: + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +However, no other characters may occur in the line: + +```````````````````````````````` example +_ _ _ _ a + +a------ + +---a--- +. +

_ _ _ _ a

+

a------

+

---a---

+```````````````````````````````` + + +It is required that all of the characters other than spaces or tabs be the same. +So, this is not a thematic break: + +```````````````````````````````` example + *-* +. +

-

+```````````````````````````````` + + +Thematic breaks do not need blank lines before or after: + +```````````````````````````````` example +- foo +*** +- bar +. +
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +Thematic breaks can interrupt a paragraph: + +```````````````````````````````` example +Foo +*** +bar +. +

Foo

+
+

bar

+```````````````````````````````` + + +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: + +```````````````````````````````` example +Foo +--- +bar +. +

Foo

+

bar

+```````````````````````````````` + + +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: + +```````````````````````````````` example +* Foo +* * * +* Bar +. +
    +
  • Foo
  • +
+
+
    +
  • Bar
  • +
+```````````````````````````````` + + +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. +
    +
  • Foo
  • +
  • +
    +
  • +
+```````````````````````````````` + + +## ATX headings + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by spaces or tabs, or +by the end of line. The optional closing sequence of `#`s must be preceded by +spaces or tabs and may be followed by spaces or tabs only. The opening +`#` character may be preceded by up to three spaces of indentation. The raw +contents of the heading are stripped of leading and trailing space or tabs +before being parsed as inline content. The heading level is equal to the number +of `#` characters in the opening sequence. + +Simple headings: + +```````````````````````````````` example +# foo +## foo +### foo +#### foo +##### foo +###### foo +. +

foo

+

foo

+

foo

+

foo

+
foo
+
foo
+```````````````````````````````` + + +More than six `#` characters is not a heading: + +```````````````````````````````` example +####### foo +. +

####### foo

+```````````````````````````````` + + +At least one space or tab is required between the `#` characters and the +heading's contents, unless the heading is empty. Note that many +implementations currently do not require the space. However, the +space was required by the +[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), +and it helps prevent things like the following from being parsed as +headings: + +```````````````````````````````` example +#5 bolt + +#hashtag +. +

#5 bolt

+

#hashtag

+```````````````````````````````` + + +This is not a heading, because the first `#` is escaped: + +```````````````````````````````` example +\## foo +. +

## foo

+```````````````````````````````` + + +Contents are parsed as inlines: + +```````````````````````````````` example +# foo *bar* \*baz\* +. +

foo bar *baz*

+```````````````````````````````` + + +Leading and trailing spaces or tabs are ignored in parsing inline content: + +```````````````````````````````` example +# foo +. +

foo

+```````````````````````````````` + + +Up to three spaces of indentation are allowed: + +```````````````````````````````` example + ### foo + ## foo + # foo +. +

foo

+

foo

+

foo

+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + # foo +. +
# foo
+
+```````````````````````````````` + + +```````````````````````````````` example +foo + # bar +. +

foo +# bar

+```````````````````````````````` + + +A closing sequence of `#` characters is optional: + +```````````````````````````````` example +## foo ## + ### bar ### +. +

foo

+

bar

+```````````````````````````````` + + +It need not be the same length as the opening sequence: + +```````````````````````````````` example +# foo ################################## +##### foo ## +. +

foo

+
foo
+```````````````````````````````` + + +Spaces or tabs are allowed after the closing sequence: + +```````````````````````````````` example +### foo ### +. +

foo

+```````````````````````````````` + + +A sequence of `#` characters with anything but spaces or tabs following it +is not a closing sequence, but counts as part of the contents of the +heading: + +```````````````````````````````` example +### foo ### b +. +

foo ### b

+```````````````````````````````` + + +The closing sequence must be preceded by a space or tab: + +```````````````````````````````` example +# foo# +. +

foo#

+```````````````````````````````` + + +Backslash-escaped `#` characters do not count as part +of the closing sequence: + +```````````````````````````````` example +### foo \### +## foo #\## +# foo \# +. +

foo ###

+

foo ###

+

foo #

+```````````````````````````````` + + +ATX headings need not be separated from surrounding content by blank +lines, and they can interrupt paragraphs: + +```````````````````````````````` example +**** +## foo +**** +. +
+

foo

+
+```````````````````````````````` + + +```````````````````````````````` example +Foo bar +# baz +Bar foo +. +

Foo bar

+

baz

+

Bar foo

+```````````````````````````````` + + +ATX headings can be empty: + +```````````````````````````````` example +## +# +### ### +. +

+

+

+```````````````````````````````` + + +## Setext headings + +A [setext heading](@) consists of one or more +lines of text, not interrupted by a blank line, of which the first line does not +have more than 3 spaces of indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. + +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces of indentation and any number of trailing spaces or tabs. + +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. + +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: + +```````````````````````````````` example +Foo *bar* +========= + +Foo *bar* +--------- +. +

Foo bar

+

Foo bar

+```````````````````````````````` + + +The content of the header may span more than one line: + +```````````````````````````````` example +Foo *bar +baz* +==== +. +

Foo bar +baz

+```````````````````````````````` + +The contents are the result of parsing the headings's raw +content as inlines. The heading's raw content is formed by +concatenating the lines and removing initial and final +spaces or tabs. + +```````````````````````````````` example + Foo *bar +baz*→ +==== +. +

Foo bar +baz

+```````````````````````````````` + + +The underlining can be any length: + +```````````````````````````````` example +Foo +------------------------- + +Foo += +. +

Foo

+

Foo

+```````````````````````````````` + + +The heading content can be preceded by up to three spaces of indentation, and +need not line up with the underlining: + +```````````````````````````````` example + Foo +--- + + Foo +----- + + Foo + === +. +

Foo

+

Foo

+

Foo

+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + Foo + --- + + Foo +--- +. +
Foo
+---
+
+Foo
+
+
+```````````````````````````````` + + +The setext heading underline can be preceded by up to three spaces of +indentation, and may have trailing spaces or tabs: + +```````````````````````````````` example +Foo + ---- +. +

Foo

+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example +Foo + --- +. +

Foo +---

+```````````````````````````````` + + +The setext heading underline cannot contain internal spaces or tabs: + +```````````````````````````````` example +Foo += = + +Foo +--- - +. +

Foo += =

+

Foo

+
+```````````````````````````````` + + +Trailing spaces or tabs in the content line do not cause a hard line break: + +```````````````````````````````` example +Foo +----- +. +

Foo

+```````````````````````````````` + + +Nor does a backslash at the end: + +```````````````````````````````` example +Foo\ +---- +. +

Foo\

+```````````````````````````````` + + +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: + +```````````````````````````````` example +`Foo +---- +` + + +. +

`Foo

+

`

+

<a title="a lot

+

of dashes"/>

+```````````````````````````````` + + +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: + +```````````````````````````````` example +> Foo +--- +. +
+

Foo

+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +bar +=== +. +
+

foo +bar +===

+
+```````````````````````````````` + + +```````````````````````````````` example +- Foo +--- +. +
    +
  • Foo
  • +
+
+```````````````````````````````` + + +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: + +```````````````````````````````` example +Foo +Bar +--- +. +

Foo +Bar

+```````````````````````````````` + + +But in general a blank line is not required before or after +setext headings: + +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
+

Foo

+

Bar

+

Baz

+```````````````````````````````` + + +Setext headings cannot be empty: + +```````````````````````````````` example + +==== +. +

====

+```````````````````````````````` + + +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: + +```````````````````````````````` example +--- +--- +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +- foo +----- +. +
    +
  • foo
  • +
+
+```````````````````````````````` + + +```````````````````````````````` example + foo +--- +. +
foo
+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +----- +. +
+

foo

+
+
+```````````````````````````````` + + +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: + +```````````````````````````````` example +\> foo +------ +. +

> foo

+```````````````````````````````` + + +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret + +``` markdown +Foo +bar +--- +baz +``` + +One can find four different interpretations: + +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" + +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: + +```````````````````````````````` example +Foo + +bar +--- +baz +. +

Foo

+

bar

+

baz

+```````````````````````````````` + + +Authors who want interpretation 2 can put blank lines around +the thematic break, + +```````````````````````````````` example +Foo +bar + +--- + +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +or use a thematic break that cannot count as a [setext heading +underline], such as + +```````````````````````````````` example +Foo +bar +* * * +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +Authors who want interpretation 3 can use backslash escapes: + +```````````````````````````````` example +Foo +bar +\--- +baz +. +

Foo +bar +--- +baz

+```````````````````````````````` + + +## Indented code blocks + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each preceded by four or more spaces of indentation. The contents of the code +block are the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) + +```````````````````````````````` example + a simple + indented code block +. +
a simple
+  indented code block
+
+```````````````````````````````` + + +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + +```````````````````````````````` example + - foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo + + - bar +. +
    +
  1. +

    foo

    +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + + +The contents of a code block are literal text, and do not get parsed +as Markdown: + +```````````````````````````````` example +
+ *hi* + + - one +. +
<a/>
+*hi*
+
+- one
+
+```````````````````````````````` + + +Here we have three chunks separated by blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 + + + + chunk3 +. +
chunk1
+
+chunk2
+
+
+
+chunk3
+
+```````````````````````````````` + + +Any initial spaces or tabs beyond four spaces of indentation will be included in +the content, even in interior blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 +. +
chunk1
+  
+  chunk2
+
+```````````````````````````````` + + +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) + +```````````````````````````````` example +Foo + bar + +. +

Foo +bar

+```````````````````````````````` + + +However, any non-blank line with fewer than four spaces of indentation ends +the code block immediately. So a paragraph may occur immediately +after indented code: + +```````````````````````````````` example + foo +bar +. +
foo
+
+

bar

+```````````````````````````````` + + +And indented code can occur immediately before and after other kinds of +blocks: + +```````````````````````````````` example +# Heading + foo +Heading +------ + foo +---- +. +

Heading

+
foo
+
+

Heading

+
foo
+
+
+```````````````````````````````` + + +The first line can be preceded by more than four spaces of indentation: + +```````````````````````````````` example + foo + bar +. +
    foo
+bar
+
+```````````````````````````````` + + +Blank lines preceding or following an indented code block +are not included in it: + +```````````````````````````````` example + + + foo + + +. +
foo
+
+```````````````````````````````` + + +Trailing spaces or tabs are included in the code block's content: + +```````````````````````````````` example + foo +. +
foo  
+
+```````````````````````````````` + + + +## Fenced code blocks + +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, preceded by up to three spaces of indentation. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +spaces or tabs and called the [info string](@). If the [info string] comes +after a backtick fence, it may not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +preceded by N spaces of indentation, then up to N spaces of indentation are +removed from each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented N spaces or less, all +of the indentation is removed.) + +The closing code fence may be preceded by up to three spaces of indentation, and +may be followed only by spaces or tabs, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real downside to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: + +```````````````````````````````` example +``` +< + > +``` +. +
<
+ >
+
+```````````````````````````````` + + +With tildes: + +```````````````````````````````` example +~~~ +< + > +~~~ +. +
<
+ >
+
+```````````````````````````````` + +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + +The closing code fence must use the same character as the opening +fence: + +```````````````````````````````` example +``` +aaa +~~~ +``` +. +
aaa
+~~~
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
aaa
+```
+
+```````````````````````````````` + + +The closing code fence must be at least as long as the opening fence: + +```````````````````````````````` example +```` +aaa +``` +`````` +. +
aaa
+```
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
aaa
+~~~
+
+```````````````````````````````` + + +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): + +```````````````````````````````` example +``` +. +
+```````````````````````````````` + + +```````````````````````````````` example +````` + +``` +aaa +. +

+```
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +> aaa + +bbb +. +
+
aaa
+
+
+

bbb

+```````````````````````````````` + + +A code block can have all empty lines as its content: + +```````````````````````````````` example +``` + + +``` +. +

+  
+
+```````````````````````````````` + + +A code block can be empty: + +```````````````````````````````` example +``` +``` +. +
+```````````````````````````````` + + +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: + +```````````````````````````````` example + ``` + aaa +aaa +``` +. +
aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + aaa +aaa + ``` +. +
aaa
+aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` + aaa + aaa + aaa + ``` +. +
aaa
+ aaa
+aaa
+
+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + ``` + aaa + ``` +. +
```
+aaa
+```
+
+```````````````````````````````` + + +Closing fences may be preceded by up to three spaces of indentation, and their +indentation need not match that of the opening fence: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +This is not a closing fence, because it is indented 4 spaces: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+    ```
+
+```````````````````````````````` + + + +Code fences (opening and closing) cannot contain internal spaces or tabs: + +```````````````````````````````` example +``` ``` +aaa +. +

+aaa

+```````````````````````````````` + + +```````````````````````````````` example +~~~~~~ +aaa +~~~ ~~ +. +
aaa
+~~~ ~~
+
+```````````````````````````````` + + +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: + +```````````````````````````````` example +foo +``` +bar +``` +baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: + +```````````````````````````````` example +foo +--- +~~~ +bar +~~~ +# baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +An [info string] can be provided after the opening code fence. +Although this spec doesn't mandate any particular treatment of +the info string, the first word is typically used to specify +the language of the code block. In HTML output, the language is +normally indicated by adding a class to the `code` element consisting +of `language-` followed by the language name. + +```````````````````````````````` example +```ruby +def foo(x) + return 3 +end +``` +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +````; +```` +. +
+```````````````````````````````` + + +[Info strings] for backtick code blocks cannot contain backticks: + +```````````````````````````````` example +``` aa ``` +foo +. +

aa +foo

+```````````````````````````````` + + +[Info strings] for tilde code blocks can contain backticks and tildes: + +```````````````````````````````` example +~~~ aa ``` ~~~ +foo +~~~ +. +
foo
+
+```````````````````````````````` + + +Closing code fences cannot have [info strings]: + +```````````````````````````````` example +``` +``` aaa +``` +. +
``` aaa
+
+```````````````````````````````` + + + +## HTML blocks + +An [HTML block](@) is a group of lines that is treated +as raw HTML (and will not be escaped in HTML output). + +There are seven kinds of [HTML block], which can be defined by their +start and end conditions. The block begins with a line that meets a +[start condition](@) (after up to three optional spaces of indentation). +It ends with the first subsequent line that meets a matching +[end condition](@), or the last line of the document, or the last line of +the [container block](#container-blocks) containing the current HTML +block, if no line is encountered that meets the [end condition]. If +the first line meets both the [start condition] and the [end +condition], the block will contain just that line. + +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +``, ``, ``, or `` (case-insensitive; it +need not match the start tag). + +2. **Start condition:** line begins with the string ``. + +3. **Start condition:** line begins with the string ``. + +4. **Start condition:** line begins with the string ``. + +5. **Start condition:** line begins with the string +``. + +6. **Start condition:** line begins with the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. + +7. **Start condition:** line begins with a complete [open tag] +(with any [tag name] other than `pre`, `script`, +`style`, or `textarea`) or a complete [closing tag], +followed by zero or more spaces and tabs, followed by the end of the line.\ +**End condition:** line is followed by a [blank line]. + +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container +block](#container-blocks). This means any HTML **within an HTML +block** that might otherwise be recognised as a start condition will +be ignored by the parser and passed through as-is, without changing +the parser's state. + +For instance, `
` within an HTML block started by `` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
+
+```````````````````````````````` example
+
+
+**Hello**,
+
+_world_.
+
+
+. +
+
+**Hello**,
+

world. +

+
+```````````````````````````````` + +In this case, the HTML block is terminated by the blank line — the `**Hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. + +All types of [HTML blocks] except type 7 may interrupt +a paragraph. Blocks of type 7 may not interrupt a paragraph. +(This restriction is intended to prevent unwanted interpretation +of long tags inside a wrapped paragraph as starting HTML blocks.) + +Some simple examples follow. Here are some basic HTML blocks +of type 6: + +```````````````````````````````` example + + + + +
+ hi +
+ +okay. +. + + + + +
+ hi +
+

okay.

+```````````````````````````````` + + +```````````````````````````````` example +
+*foo* +```````````````````````````````` + + +Here we have two HTML blocks with a Markdown paragraph between them: + +```````````````````````````````` example +
+ +*Markdown* + +
+. +
+

Markdown

+
+```````````````````````````````` + + +The tag on the first line can be partial, as long +as it is split where there would be whitespace: + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +An open tag need not be closed: +```````````````````````````````` example +
+*foo* + +*bar* +. +
+*foo* +

bar

+```````````````````````````````` + + + +A partial tag need not even be completed (garbage +in, garbage out): + +```````````````````````````````` example +
+. + +```````````````````````````````` + + +```````````````````````````````` example +
+foo +
+. +
+foo +
+```````````````````````````````` + + +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +example, what looks like a Markdown code block +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: + +```````````````````````````````` example +
+``` c +int x = 33; +``` +. +
+``` c +int x = 33; +``` +```````````````````````````````` + + +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +In type 7 blocks, the [tag name] can be anything: + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` + + +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: + +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` + + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: + +```````````````````````````````` example + + +*foo* + + +. + +

foo

+
+```````````````````````````````` + + +Finally, in this case, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + +```````````````````````````````` example +*foo* +. +

foo

+```````````````````````````````` + + +HTML tags designed to contain literal content +(`pre`, `script`, `style`, `textarea`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: + +A pre tag (type 1): + +```````````````````````````````` example +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+okay +. +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+

okay

+```````````````````````````````` + + +A script tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +A textarea tag (type 1): + +```````````````````````````````` example + +. + +```````````````````````````````` + +A style tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + +```````````````````````````````` example + +*foo* +. + +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*bar* +*baz* +. +*bar* +

baz

+```````````````````````````````` + + +Note that anything on the last line after the +end tag will be included in the [HTML block]: + +```````````````````````````````` example +1. *bar* +. +1. *bar* +```````````````````````````````` + + +A comment (type 2): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + + +A processing instruction (type 3): + +```````````````````````````````` example +'; + +?> +okay +. +'; + +?> +

okay

+```````````````````````````````` + + +A declaration (type 4): + +```````````````````````````````` example + +. + +```````````````````````````````` + + +CDATA (type 5): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +The opening tag can be preceded by up to three spaces of indentation, but not +four: + +```````````````````````````````` example + + + +. + +
<!-- foo -->
+
+```````````````````````````````` + + +```````````````````````````````` example +
+ +
+. +
+
<div>
+
+```````````````````````````````` + + +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. + +```````````````````````````````` example +Foo +
+bar +
+. +

Foo

+
+bar +
+```````````````````````````````` + + +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, [above][HTML +block]: + +```````````````````````````````` example +
+bar +
+*foo* +. +
+bar +
+*foo* +```````````````````````````````` + + +HTML blocks of type 7 cannot interrupt a paragraph: + +```````````````````````````````` example +Foo + +baz +. +

Foo + +baz

+```````````````````````````````` + + +This rule differs from John Gruber's original Markdown syntax +specification, which says: + +> The only restrictions are that block-level HTML elements — +> e.g. `
`, ``, `
`, `

`, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with spaces or tabs. + +In some ways Gruber's rule is more restrictive than the one given +here: + +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. + +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. + +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: + +Compare: + +```````````````````````````````` example +

+ +*Emphasized* text. + +
+. +
+

Emphasized text.

+
+```````````````````````````````` + + +```````````````````````````````` example +
+*Emphasized* text. +
+. +
+*Emphasized* text. +
+```````````````````````````````` + + +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. + +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: + +```````````````````````````````` example +
+ + + + + + + +
+Hi +
+. + + + + +
+Hi +
+```````````````````````````````` + + +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: + +```````````````````````````````` example + + + + + + + + +
+ Hi +
+. + + +
<td>
+  Hi
+</td>
+
+ +
+```````````````````````````````` + + +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `
`
+*can* contain blank lines.
+
+## Link reference definitions
+
+A [link reference definition](@)
+consists of a [link label], optionally preceded by up to three spaces of
+indentation, followed
+by a colon (`:`), optional spaces or tabs (including up to one
+[line ending]), a [link destination],
+optional spaces or tabs (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by spaces or tabs.
+No further character may occur.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
+
+```````````````````````````````` example
+[foo]: /url "title"
+
+[foo]
+.
+

foo

+```````````````````````````````` + + +```````````````````````````````` example + [foo]: + /url + 'the title' + +[foo] +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] +. +

Foo*bar]

+```````````````````````````````` + + +```````````````````````````````` example +[Foo bar]: + +'title' + +[Foo bar] +. +

Foo bar

+```````````````````````````````` + + +The title may extend over multiple lines: + +```````````````````````````````` example +[foo]: /url ' +title +line1 +line2 +' + +[foo] +. +

foo

+```````````````````````````````` + + +However, it may not contain a [blank line]: + +```````````````````````````````` example +[foo]: /url 'title + +with blank line' + +[foo] +. +

[foo]: /url 'title

+

with blank line'

+

[foo]

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[foo]: +/url + +[foo] +. +

foo

+```````````````````````````````` + + +The link destination may not be omitted: + +```````````````````````````````` example +[foo]: + +[foo] +. +

[foo]:

+

[foo]

+```````````````````````````````` + + However, an empty link destination may be specified using + angle brackets: + +```````````````````````````````` example +[foo]: <> + +[foo] +. +

foo

+```````````````````````````````` + +The title must be separated from the link destination by +spaces or tabs: + +```````````````````````````````` example +[foo]: (baz) + +[foo] +. +

[foo]: (baz)

+

[foo]

+```````````````````````````````` + + +Both title and destination can contain backslash escapes +and literal backslashes: + +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" + +[foo] +. +

foo

+```````````````````````````````` + + +A link can come before its corresponding definition: + +```````````````````````````````` example +[foo] + +[foo]: url +. +

foo

+```````````````````````````````` + + +If there are several matching definitions, the first one takes +precedence: + +```````````````````````````````` example +[foo] + +[foo]: first +[foo]: second +. +

foo

+```````````````````````````````` + + +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

Foo

+```````````````````````````````` + + +```````````````````````````````` example +[ΑΓΩ]: /φου + +[αγω] +. +

αγω

+```````````````````````````````` + + +Whether something is a [link reference definition] is +independent of whether the link reference it defines is +used in the document. Thus, for example, the following +document contains just a link reference definition, and +no visible content: + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +Here is another one: + +```````````````````````````````` example +[ +foo +]: /url +bar +. +

bar

+```````````````````````````````` + + +This is not a link reference definition, because there are +characters other than spaces or tabs after the title: + +```````````````````````````````` example +[foo]: /url "title" ok +. +

[foo]: /url "title" ok

+```````````````````````````````` + + +This is a link reference definition, but it has no title: + +```````````````````````````````` example +[foo]: /url +"title" ok +. +

"title" ok

+```````````````````````````````` + + +This is not a link reference definition, because it is indented +four spaces: + +```````````````````````````````` example + [foo]: /url "title" + +[foo] +. +
[foo]: /url "title"
+
+

[foo]

+```````````````````````````````` + + +This is not a link reference definition, because it occurs inside +a code block: + +```````````````````````````````` example +``` +[foo]: /url +``` + +[foo] +. +
[foo]: /url
+
+

[foo]

+```````````````````````````````` + + +A [link reference definition] cannot interrupt a paragraph. + +```````````````````````````````` example +Foo +[bar]: /baz + +[bar] +. +

Foo +[bar]: /baz

+

[bar]

+```````````````````````````````` + + +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. + +```````````````````````````````` example +# [Foo] +[foo]: /url +> bar +. +

Foo

+
+

bar

+
+```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +bar +=== +[foo] +. +

bar

+

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +=== +[foo] +. +

=== +foo

+```````````````````````````````` + + +Several [link reference definitions] +can occur one after another, without intervening blank lines. + +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] +. +

foo, +bar, +baz

+```````````````````````````````` + + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: + +```````````````````````````````` example +[foo] + +> [foo]: /url +. +

foo

+
+
+```````````````````````````````` + + +## Paragraphs + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +spaces or tabs. + +A simple example with two paragraphs: + +```````````````````````````````` example +aaa + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Paragraphs can contain multiple lines, but no blank lines: + +```````````````````````````````` example +aaa +bbb + +ccc +ddd +. +

aaa +bbb

+

ccc +ddd

+```````````````````````````````` + + +Multiple blank lines between paragraphs have no effect: + +```````````````````````````````` example +aaa + + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Leading spaces or tabs are skipped: + +```````````````````````````````` example + aaa + bbb +. +

aaa +bbb

+```````````````````````````````` + + +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. + +```````````````````````````````` example +aaa + bbb + ccc +. +

aaa +bbb +ccc

+```````````````````````````````` + + +However, the first line may be preceded by up to three spaces of indentation. +Four spaces of indentation is too many: + +```````````````````````````````` example + aaa +bbb +. +

aaa +bbb

+```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
aaa
+
+

bbb

+```````````````````````````````` + + +Final spaces or tabs are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

aaa
+bbb

+```````````````````````````````` + + +## Blank lines + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

aaa

+

aaa

+```````````````````````````````` + + + +# Container blocks + +A [container block](#container-blocks) is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + +## Block quotes + +A [block quote marker](@), +optionally preceded by up to three spaces of indentation, +consists of (a) the character `>` together with a following space of +indentation, or (b) a single character `>` not followed by a space of +indentation. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next character other than a space or tab after the + [block quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo +> bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The space or tab after the `>` characters can be omitted: + +```````````````````````````````` example +># Foo +>bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The `>` characters can be preceded by up to three spaces of indentation: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
> # Foo
+> bar
+> baz
+
+```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: + +```````````````````````````````` example +> # Foo +> bar +baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +A block quote can contain some lazy and some non-lazy +continuation lines: + +```````````````````````````````` example +> bar +baz +> foo +. +
+

bar +baz +foo

+
+```````````````````````````````` + + +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: + +```````````````````````````````` example +> foo +--- +. +
+

foo

+
+
+```````````````````````````````` + + +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + +```````````````````````````````` example +> - foo +- bar +. +
+
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: + +```````````````````````````````` example +> foo + bar +. +
+
foo
+
+
+
bar
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +. +
+
+
+

foo

+
+```````````````````````````````` + + +Note that in the following case, we have a [lazy +continuation line]: + +```````````````````````````````` example +> foo + - bar +. +
+

foo +- bar

+
+```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
+
+```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
+

foo

+
+```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
+

foo

+
+
+

bar

+
+```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
+

foo +bar

+
+```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
+

foo

+

bar

+
+```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

foo

+
+

bar

+
+```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
+

aaa

+
+
+
+

bbb

+
+```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
+

bar +baz

+
+```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
+
+
+

foo +bar

+
+
+
+```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
+
+
+

foo +bar +baz

+
+
+
+```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space of indentation. So *five spaces* are needed +after the `>`: + +```````````````````````````````` example +> code + +> not code +. +
+
code
+
+
+
+

not code

+
+```````````````````````````````` + + + +## List items + +A [list marker](@) is a +[bullet list marker] or an [ordered list marker]. + +A [bullet list marker](@) +is a `-`, `+`, or `*` character. + +An [ordered list marker](@) +is a sequence of 1--9 arabic digits (`0-9`), followed by either a +`.` character or a `)` character. (The reason for the length +limit is that with 10 digits we start seeing integer overflows +in some browsers.) + +The following rules define [list items]: + +1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of + blocks *Bs* starting with a character other than a space or tab, and *M* is + a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, + then the result of prepending *M* and the following spaces to the first line + of Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + list item with *Bs* as its contents. The type of the list item + (bullet or ordered) is determined by the type of its list marker. + If the list item is ordered, then it is also assigned a start + number, based on the ordered list marker. + + Exceptions: + + 1. When the first list item in a [list] interrupts + a paragraph---that is, when it starts on a line that would + otherwise count as [paragraph continuation text]---then (a) + the lines *Ls* must not begin with a blank line, and (b) if + the list item is ordered, the start number must be 1. + 2. If any line is a [thematic break][thematic breaks] then + that line is not a list item. + +For example, let *Ls* be the lines + +```````````````````````````````` example +A paragraph +with two lines. + + indented code + +> A block quote. +. +

A paragraph +with two lines.

+
indented code
+
+
+

A block quote.

+
+```````````````````````````````` + + +And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says +that the following is an ordered list item with start number 1, +and the same contents as *Ls*: + +```````````````````````````````` example +1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +The most important thing to notice is that the position of +the text after the list marker determines how much indentation +is needed in subsequent blocks in the list item. If the list +marker takes up two spaces of indentation, and there are three spaces between +the list marker and the next character other than a space or tab, then blocks +must be indented five spaces in order to fall under the list +item. + +Here are some examples showing how far content must be indented to be +put under the list item: + +```````````````````````````````` example +- one + + two +. +
    +
  • one
  • +
+

two

+```````````````````````````````` + + +```````````````````````````````` example +- one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • one
  • +
+
 two
+
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +It is tempting to think of this in terms of columns: the continuation +blocks must be indented at least to the column of the first character other than +a space or tab after the list marker. However, that is not quite right. +The spaces of indentation after the list marker determine how much relative +indentation is needed. Which column this indentation reaches will depend on +how the list item is embedded in other constructions, as shown by +this example: + +```````````````````````````````` example + > > 1. one +>> +>> two +. +
+
+
    +
  1. +

    one

    +

    two

    +
  2. +
+
+
+```````````````````````````````` + + +Here `two` occurs in the same column as the list marker `1.`, +but is actually contained in the list item, because there is +sufficient indentation after the last containing blockquote marker. + +The converse is also possible. In the following example, the word `two` +occurs far to the right of the initial text of the list item, `one`, but +it is not considered part of the list item, because it is not indented +far enough past the blockquote marker: + +```````````````````````````````` example +>>- one +>> + > > two +. +
+
+
    +
  • one
  • +
+

two

+
+
+```````````````````````````````` + + +Note that at least one space or tab is needed between the list marker and +any following content, so these are not list items: + +```````````````````````````````` example +-one + +2.two +. +

-one

+

2.two

+```````````````````````````````` + + +A list item may contain blocks that are separated by more than +one blank line. + +```````````````````````````````` example +- foo + + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +A list item may contain any kind of block: + +```````````````````````````````` example +1. foo + + ``` + bar + ``` + + baz + + > bam +. +
    +
  1. +

    foo

    +
    bar
    +
    +

    baz

    +
    +

    bam

    +
    +
  2. +
+```````````````````````````````` + + +A list item that contains an indented code block will preserve +empty lines within the code block verbatim. + +```````````````````````````````` example +- Foo + + bar + + + baz +. +
    +
  • +

    Foo

    +
    bar
    +
    +
    +baz
    +
    +
  • +
+```````````````````````````````` + +Note that ordered list start numbers must be nine digits or less: + +```````````````````````````````` example +123456789. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +1234567890. not ok +. +

1234567890. not ok

+```````````````````````````````` + + +A start number may begin with 0s: + +```````````````````````````````` example +0. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +003. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +A start number may not be negative: + +```````````````````````````````` example +-1. not ok +. +

-1. not ok

+```````````````````````````````` + + + +2. **Item starting with indented code.** If a sequence of lines *Ls* + constitute a sequence of blocks *Bs* starting with an indented code + block, and *M* is a list marker of width *W* followed by + one space of indentation, then the result of prepending *M* and the + following space to the first line of *Ls*, and indenting subsequent lines + of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +An indented code block will have to be preceded by four spaces of indentation +beyond the edge of the region where text will be included in the list item. +In the following case that is 6 spaces: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +
    bar
    +
    +
  • +
+```````````````````````````````` + + +And in this case it is 11 spaces: + +```````````````````````````````` example + 10. foo + + bar +. +
    +
  1. +

    foo

    +
    bar
    +
    +
  2. +
+```````````````````````````````` + + +If the *first* block in the list item is an indented code block, +then by rule #2, the contents must be preceded by *one* space of indentation +after the list marker: + +```````````````````````````````` example + indented code + +paragraph + + more code +. +
indented code
+
+

paragraph

+
more code
+
+```````````````````````````````` + + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
    indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that an additional space of indentation is interpreted as space +inside the code block: + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
     indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that rules #1 and #2 only apply to two cases: (a) cases +in which the lines to be included in a list item begin with a +character other than a space or tab, and (b) cases in which +they begin with an indented code +block. In a case like the following, where the first block begins with +three spaces of indentation, the rules do not allow us to form a list item by +indenting the whole thing and prepending a list marker: + +```````````````````````````````` example + foo + +bar +. +

foo

+

bar

+```````````````````````````````` + + +```````````````````````````````` example +- foo + + bar +. +
    +
  • foo
  • +
+

bar

+```````````````````````````````` + + +This is not a significant restriction, because when a block is preceded by up to +three spaces of indentation, the indentation can always be removed without +a change in interpretation, allowing rule #1 to be applied. So, in +the above case: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +3. **Item starting with a blank line.** If a sequence of lines *Ls* + starting with a single [blank line] constitute a (possibly empty) + sequence of blocks *Bs*, and *M* is a list marker of width *W*, + then the result of prepending *M* to the first line of *Ls*, and + preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a + list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +Here are some list items that start with a blank line but are not empty: + +```````````````````````````````` example +- + foo +- + ``` + bar + ``` +- + baz +. +
    +
  • foo
  • +
  • +
    bar
    +
    +
  • +
  • +
    baz
    +
    +
  • +
+```````````````````````````````` + +When the list item starts with a blank line, the number of spaces +following the list marker doesn't change the required indentation: + +```````````````````````````````` example +- + foo +. +
    +
  • foo
  • +
+```````````````````````````````` + + +A list item can begin with at most one blank line. +In the following example, `foo` is not part of the list +item: + +```````````````````````````````` example +- + + foo +. +
    +
  • +
+

foo

+```````````````````````````````` + + +Here is an empty bullet list item: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +It does not matter whether there are spaces or tabs following the [list marker]: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +Here is an empty ordered list item: + +```````````````````````````````` example +1. foo +2. +3. bar +. +
    +
  1. foo
  2. +
  3. +
  4. bar
  5. +
+```````````````````````````````` + + +A list may start or end with an empty list item: + +```````````````````````````````` example +* +. +
    +
  • +
+```````````````````````````````` + +However, an empty list item cannot interrupt a paragraph: + +```````````````````````````````` example +foo +* + +foo +1. +. +

foo +*

+

foo +1.

+```````````````````````````````` + + +4. **Indentation.** If a sequence of lines *Ls* constitutes a list item + according to rule #1, #2, or #3, then the result of preceding each line + of *Ls* by up to three spaces of indentation (the same for each line) also + constitutes a list item with the same contents and attributes. If a line is + empty, then it need not be indented. + +Indented one space: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented two spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented three spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Four spaces indent gives a code block: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
1.  A paragraph
+    with two lines.
+
+        indented code
+
+    > A block quote.
+
+```````````````````````````````` + + + +5. **Laziness.** If a string of lines *Ls* constitute a [list + item](#list-items) with contents *Bs*, then the result of deleting + some or all of the indentation from one or more lines in which the + next character other than a space or tab after the indentation is + [paragraph continuation text] is a + list item with the same contents and attributes. The unindented + lines are called + [lazy continuation line](@)s. + +Here is an example with [lazy continuation lines]: + +```````````````````````````````` example + 1. A paragraph +with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indentation can be partially deleted: + +```````````````````````````````` example + 1. A paragraph + with two lines. +. +
    +
  1. A paragraph +with two lines.
  2. +
+```````````````````````````````` + + +These examples show how laziness can work in nested structures: + +```````````````````````````````` example +> 1. > Blockquote +continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> 1. > Blockquote +> continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + + +6. **That's all.** Nothing that is not counted as a list item by rules + #1--5 counts as a [list item](#list-items). + +The rules for sublists follow from the general rules +[above][List items]. A sublist must be indented the same number +of spaces of indentation a paragraph would need to be in order to be included +in the list item. + +So, in this case we need two spaces indent: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo +
      +
    • bar +
        +
      • baz +
          +
        • boo
        • +
        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +One is not enough: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo
  • +
  • bar
  • +
  • baz
  • +
  • boo
  • +
+```````````````````````````````` + + +Here we need four, because the list marker is wider: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + +Three is not enough: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo
  2. +
+
    +
  • bar
  • +
+```````````````````````````````` + + +A list may be the first block in a list item: + +```````````````````````````````` example +- - foo +. +
    +
  • +
      +
    • foo
    • +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. - 2. foo +. +
    +
  1. +
      +
    • +
        +
      1. foo
      2. +
      +
    • +
    +
  2. +
+```````````````````````````````` + + +A list item can contain a heading: + +```````````````````````````````` example +- # Foo +- Bar + --- + baz +. +
    +
  • +

    Foo

    +
  • +
  • +

    Bar

    +baz
  • +
+```````````````````````````````` + + +### Motivation + +John Gruber's Markdown spec says the following about list items: + +1. "List markers typically start at the left margin, but may be indented + by up to three spaces. List markers must be followed by one or more + spaces or a tab." + +2. "To make lists look nice, you can wrap items with hanging indents.... + But if you don't want to, you don't have to." + +3. "List items may consist of multiple paragraphs. Each subsequent + paragraph in a list item must be indented by either 4 spaces or one + tab." + +4. "It looks nice if you indent every line of the subsequent paragraphs, + but here again, Markdown will allow you to be lazy." + +5. "To put a blockquote within a list item, the blockquote's `>` + delimiters need to be indented." + +6. "To put a code block within a list item, the code block needs to be + indented twice — 8 spaces or two tabs." + +These rules specify that a paragraph under a list item must be indented +four spaces (presumably, from the left margin, rather than the start of +the list marker, but this is not said), and that code under a list item +must be indented eight spaces instead of the usual four. They also say +that a block quote must be indented, but not by how much; however, the +example given has four spaces indentation. Although nothing is said +about other kinds of block-level content, it is certainly reasonable to +infer that *all* block elements under a list item, including other +lists, must be indented four spaces. This principle has been called the +*four-space rule*. + +The four-space rule is clear and principled, and if the reference +implementation `Markdown.pl` had followed it, it probably would have +become the standard. However, `Markdown.pl` allowed paragraphs and +sublists to start with only two spaces indentation, at least on the +outer level. Worse, its behavior was inconsistent: a sublist of an +outer-level list needed two spaces indentation, but a sublist of this +sublist needed three spaces. It is not surprising, then, that different +implementations of Markdown have developed very different rules for +determining what comes under a list item. (Pandoc and python-Markdown, +for example, stuck with Gruber's syntax description and the four-space +rule, while discount, redcarpet, marked, PHP Markdown, and others +followed `Markdown.pl`'s behavior more closely.) + +Unfortunately, given the divergences between implementations, there +is no way to give a spec for list items that will be guaranteed not +to break any existing documents. However, the spec given here should +correctly handle lists formatted with either the four-space rule or +the more forgiving `Markdown.pl` behavior, provided they are laid out +in a way that is natural for a human to read. + +The strategy here is to let the width and indentation of the list marker +determine the indentation necessary for blocks to fall under the list +item, rather than having a fixed and arbitrary number. The writer can +think of the body of the list item as a unit which gets indented to the +right enough to fit the list marker (and any indentation on the list +marker). (The laziness rule, #5, then allows continuation lines to be +unindented if needed.) + +This rule is superior, we claim, to any rule requiring a fixed level of +indentation from the margin. The four-space rule is clear but +unnatural. It is quite unintuitive that + +``` markdown +- foo + + bar + + - baz +``` + +should be parsed as two lists with an intervening paragraph, + +``` html +
    +
  • foo
  • +
+

bar

+
    +
  • baz
  • +
+``` + +as the four-space rule demands, rather than a single list, + +``` html +
    +
  • +

    foo

    +

    bar

    +
      +
    • baz
    • +
    +
  • +
+``` + +The choice of four spaces is arbitrary. It can be learned, but it is +not likely to be guessed, and it trips up beginners regularly. + +Would it help to adopt a two-space rule? The problem is that such +a rule, together with the rule allowing up to three spaces of indentation for +the initial list marker, allows text that is indented *less than* the +original list marker to be included in the list item. For example, +`Markdown.pl` parses + +``` markdown + - one + + two +``` + +as a single list item, with `two` a continuation paragraph: + +``` html +
    +
  • +

    one

    +

    two

    +
  • +
+``` + +and similarly + +``` markdown +> - one +> +> two +``` + +as + +``` html +
+
    +
  • +

    one

    +

    two

    +
  • +
+
+``` + +This is extremely unintuitive. + +Rather than requiring a fixed indent from the margin, we could require +a fixed indent (say, two spaces, or even one space) from the list marker (which +may itself be indented). This proposal would remove the last anomaly +discussed. Unlike the spec presented above, it would count the following +as a list item with a subparagraph, even though the paragraph `bar` +is not indented as far as the first paragraph `foo`: + +``` markdown + 10. foo + + bar +``` + +Arguably this text does read like a list item with `bar` as a subparagraph, +which may count in favor of the proposal. However, on this proposal indented +code would have to be indented six spaces after the list marker. And this +would break a lot of existing Markdown, which has the pattern: + +``` markdown +1. foo + + indented code +``` + +where the code is indented eight spaces. The spec above, by contrast, will +parse this text as expected, since the code block's indentation is measured +from the beginning of `foo`. + +The one case that needs special treatment is a list item that *starts* +with indented code. How much indentation is required in that case, since +we don't have a "first paragraph" to measure from? Rule #2 simply stipulates +that in such cases, we require one space indentation from the list marker +(and then the normal four spaces for the indented code). This will match the +four-space rule in cases where the list marker plus its initial indentation +takes four spaces (a common case), but diverge in other cases. + +## Lists + +A [list](@) is a sequence of one or more +list items [of the same type]. The list items +may be separated by any number of blank lines. + +Two list items are [of the same type](@) +if they begin with a [list marker] of the same type. +Two list markers are of the +same type if (a) they are bullet list markers using the same character +(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same +delimiter (either `.` or `)`). + +A list is an [ordered list](@) +if its constituent list items begin with +[ordered list markers], and a +[bullet list](@) if its constituent list +items begin with [bullet list markers]. + +The [start number](@) +of an [ordered list] is determined by the list number of +its initial list item. The numbers of subsequent list items are +disregarded. + +A list is [loose](@) if any of its constituent +list items are separated by blank lines, or if any of its constituent +list items directly contain two block-level elements with a blank line +between them. Otherwise a list is [tight](@). +(The difference in HTML output is that paragraphs in a loose list are +wrapped in `

` tags, while paragraphs in a tight list are not.) + +Changing the bullet or ordered list delimiter starts a new list: + +```````````````````````````````` example +- foo +- bar ++ baz +. +

    +
  • foo
  • +
  • bar
  • +
+
    +
  • baz
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo +2. bar +3) baz +. +
    +
  1. foo
  2. +
  3. bar
  4. +
+
    +
  1. baz
  2. +
+```````````````````````````````` + + +In CommonMark, a list can interrupt a paragraph. That is, +no blank line is needed to separate a paragraph from a following +list: + +```````````````````````````````` example +Foo +- bar +- baz +. +

Foo

+
    +
  • bar
  • +
  • baz
  • +
+```````````````````````````````` + +`Markdown.pl` does not allow this, through fear of triggering a list +via a numeral in a hard-wrapped line: + +``` markdown +The number of windows in my house is +14. The number of doors is 6. +``` + +Oddly, though, `Markdown.pl` *does* allow a blockquote to +interrupt a paragraph, even though the same considerations might +apply. + +In CommonMark, we do allow lists to interrupt paragraphs, for +two reasons. First, it is natural and not uncommon for people +to start lists without blank lines: + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +Second, we are attracted to a + +> [principle of uniformity](@): +> if a chunk of text has a certain +> meaning, it will continue to have the same meaning when put into a +> container block (such as a list item or blockquote). + +(Indeed, the spec for [list items] and [block quotes] presupposes +this principle.) This principle implies that if + +``` markdown + * I need to buy + - new shoes + - a coat + - a plane ticket +``` + +is a list item containing a paragraph followed by a nested sublist, +as all Markdown implementations agree it is (though the paragraph +may be rendered without `

` tags, since the list is "tight"), +then + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +by itself should be a paragraph followed by a nested sublist. + +Since it is well established Markdown practice to allow lists to +interrupt paragraphs inside list items, the [principle of +uniformity] requires us to allow this outside list items as +well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +takes a different approach, requiring blank lines before lists +even inside other list items.) + +In order to solve the problem of unwanted lists in paragraphs with +hard-wrapped numerals, we allow only lists starting with `1` to +interrupt paragraphs. Thus, + +```````````````````````````````` example +The number of windows in my house is +14. The number of doors is 6. +. +

The number of windows in my house is +14. The number of doors is 6.

+```````````````````````````````` + +We may still get an unintended result in cases like + +```````````````````````````````` example +The number of windows in my house is +1. The number of doors is 6. +. +

The number of windows in my house is

+
    +
  1. The number of doors is 6.
  2. +
+```````````````````````````````` + +but this rule should prevent most spurious list captures. + +There can be any number of blank lines between items: + +```````````````````````````````` example +- foo + +- bar + + +- baz +. +
    +
  • +

    foo

    +
  • +
  • +

    bar

    +
  • +
  • +

    baz

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + - bar + - baz + + + bim +. +
    +
  • foo +
      +
    • bar +
        +
      • +

        baz

        +

        bim

        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +To separate consecutive lists of the same type, or to separate a +list from an indented code block that would otherwise be parsed +as a subparagraph of the final list item, you can insert a blank HTML +comment: + +```````````````````````````````` example +- foo +- bar + + + +- baz +- bim +. +
    +
  • foo
  • +
  • bar
  • +
+ +
    +
  • baz
  • +
  • bim
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- foo + + notcode + +- foo + + + + code +. +
    +
  • +

    foo

    +

    notcode

    +
  • +
  • +

    foo

    +
  • +
+ +
code
+
+```````````````````````````````` + + +List items need not be indented to the same level. The following +list items will be treated as items at the same list level, +since none is indented enough to belong to the previous list +item: + +```````````````````````````````` example +- a + - b + - c + - d + - e + - f +- g +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
  • e
  • +
  • f
  • +
  • g
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
  5. +

    c

    +
  6. +
+```````````````````````````````` + +Note, however, that list items may not be preceded by more than +three spaces of indentation. Here `- e` is treated as a paragraph continuation +line, because it is indented more than three spaces: + +```````````````````````````````` example +- a + - b + - c + - d + - e +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d +- e
  • +
+```````````````````````````````` + +And here, `3. c` is treated as in indented code block, +because it is indented four spaces and preceded by a +blank line. + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
+
3. c
+
+```````````````````````````````` + + +This is a loose list, because there is a blank line between +two of the list items: + +```````````````````````````````` example +- a +- b + +- c +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +So is this, with a empty second item: + +```````````````````````````````` example +* a +* + +* c +. +
    +
  • +

    a

    +
  • +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +These are loose lists, even though there are no blank lines between the items, +because one of the items directly contains two block-level elements +with a blank line between them: + +```````````````````````````````` example +- a +- b + + c +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +

    c

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a +- b + + [ref]: /url +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +This is a tight list, because the blank lines are in a code block: + +```````````````````````````````` example +- a +- ``` + b + + + ``` +- c +. +
    +
  • a
  • +
  • +
    b
    +
    +
    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: + +```````````````````````````````` example +- a + - b + + c +- d +. +
    +
  • a +
      +
    • +

      b

      +

      c

      +
    • +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is inside the +block quote: + +```````````````````````````````` example +* a + > b + > +* c +. +
    +
  • a +
    +

    b

    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This list is tight, because the consecutive block elements +are not separated by blank lines: + +```````````````````````````````` example +- a + > b + ``` + c + ``` +- d +. +
    +
  • a +
    +

    b

    +
    +
    c
    +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +A single-paragraph list is tight: + +```````````````````````````````` example +- a +. +
    +
  • a
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b +. +
    +
  • a +
      +
    • b
    • +
    +
  • +
+```````````````````````````````` + + +This list is loose, because of the blank line between the +two block elements in the list item: + +```````````````````````````````` example +1. ``` + foo + ``` + + bar +. +
    +
  1. +
    foo
    +
    +

    bar

    +
  2. +
+```````````````````````````````` + + +Here the outer list is loose, the inner list tight: + +```````````````````````````````` example +* foo + * bar + + baz +. +
    +
  • +

    foo

    +
      +
    • bar
    • +
    +

    baz

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b + - c + +- d + - e + - f +. +
    +
  • +

    a

    +
      +
    • b
    • +
    • c
    • +
    +
  • +
  • +

    d

    +
      +
    • e
    • +
    • f
    • +
    +
  • +
+```````````````````````````````` + + +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in + +```````````````````````````````` example +`hi`lo` +. +

hilo`

+```````````````````````````````` + +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. + + + +## Code spans + +A [backtick string](@) +is a string of one or more backtick characters (`` ` ``) that is neither +preceded nor followed by a backtick. + +A [code span](@) begins with a backtick string and ends with +a backtick string of equal length. The contents of the code span are +the characters between these two backtick strings, normalized in the +following ways: + +- First, [line endings] are converted to [spaces]. +- If the resulting string both begins *and* ends with a [space] + character, but does not consist entirely of [space] + characters, a single [space] character is removed from the + front and back. This allows you to include code that begins + or ends with backtick characters, which must be separated by + whitespace from the opening or closing backtick strings. + +This is a simple code span: + +```````````````````````````````` example +`foo` +. +

foo

+```````````````````````````````` + + +Here two backticks are used, because the code contains a backtick. +This example also illustrates stripping of a single leading and +trailing space: + +```````````````````````````````` example +`` foo ` bar `` +. +

foo ` bar

+```````````````````````````````` + + +This example shows the motivation for stripping leading and trailing +spaces: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + +Note that only *one* space is stripped: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + +The stripping only happens if the space is on both +sides of the string: + +```````````````````````````````` example +` a` +. +

a

+```````````````````````````````` + +Only [spaces], and not [unicode whitespace] in general, are +stripped in this way: + +```````````````````````````````` example +` b ` +. +

 b 

+```````````````````````````````` + +No stripping occurs if the code span contains only spaces: + +```````````````````````````````` example +` ` +` ` +. +

  +

+```````````````````````````````` + + +[Line endings] are treated like spaces: + +```````````````````````````````` example +`` +foo +bar +baz +`` +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + + +Interior spaces are not collapsed: + +```````````````````````````````` example +`foo bar +baz` +. +

foo bar baz

+```````````````````````````````` + +Note that browsers will typically collapse consecutive spaces +when rendering `` elements, so it is recommended that +the following CSS be used: + + code{white-space: pre-wrap;} + + +Note that backslash escapes do not work in code spans. All backslashes +are treated literally: + +```````````````````````````````` example +`foo\`bar` +. +

foo\bar`

+```````````````````````````````` + + +Backslash escapes are never needed, because one can always choose a +string of *n* backtick characters as delimiters, where the code does +not contain any strings of exactly *n* backtick characters. + +```````````````````````````````` example +``foo`bar`` +. +

foo`bar

+```````````````````````````````` + +```````````````````````````````` example +` foo `` bar ` +. +

foo `` bar

+```````````````````````````````` + + +Code span backticks have higher precedence than any other inline +constructs except HTML tags and autolinks. Thus, for example, this is +not parsed as emphasized text, since the second `*` is part of a code +span: + +```````````````````````````````` example +*foo`*` +. +

*foo*

+```````````````````````````````` + + +And this is not parsed as a link: + +```````````````````````````````` example +[not a `link](/foo`) +. +

[not a link](/foo)

+```````````````````````````````` + + +Code spans, HTML tags, and autolinks have the same precedence. +Thus, this is code: + +```````````````````````````````` example +`` +. +

<a href="">`

+```````````````````````````````` + + +But this is an HTML tag: + +```````````````````````````````` example +
` +. +

`

+```````````````````````````````` + + +And this is code: + +```````````````````````````````` example +`` +. +

<http://foo.bar.baz>`

+```````````````````````````````` + + +But this is an autolink: + +```````````````````````````````` example +` +. +

http://foo.bar.`baz`

+```````````````````````````````` + + +When a backtick string is not closed by a matching backtick string, +we just have literal backticks: + +```````````````````````````````` example +```foo`` +. +

```foo``

+```````````````````````````````` + + +```````````````````````````````` example +`foo +. +

`foo

+```````````````````````````````` + +The following case also illustrates the need for opening and +closing backtick strings to be equal in length: + +```````````````````````````````` example +`foo``bar`` +. +

`foobar

+```````````````````````````````` + + +## Emphasis and strong emphasis + +John Gruber's original [Markdown syntax +description](http://daringfireball.net/projects/markdown/syntax#em) says: + +> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML +> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` +> tag. + +This is enough for most users, but these rules leave much undecided, +especially when it comes to nested emphasis. The original +`Markdown.pl` test suite makes it clear that triple `***` and +`___` delimiters can be used for strong emphasis, and most +implementations have also allowed the following patterns: + +``` markdown +***strong emph*** +***strong** in emph* +***emph* in strong** +**in strong *emph*** +*in emph **strong*** +``` + +The following patterns are less widely supported, but the intent +is clear and they are useful (especially in contexts like bibliography +entries): + +``` markdown +*emph *with emph* in it* +**strong **with strong** in it** +``` + +Many implementations have also restricted intraword emphasis to +the `*` forms, to avoid unwanted emphasis in words containing +internal underscores. (It is best practice to put these in code +spans, but users often do not.) + +``` markdown +internal emphasis: foo*bar*baz +no emphasis: foo_bar_baz +``` + +The rules given below capture all of these patterns, while allowing +for efficient parsing strategies that do not backtrack. + +First, some definitions. A [delimiter run](@) is either +a sequence of one or more `*` characters that is not preceded or +followed by a non-backslash-escaped `*` character, or a sequence +of one or more `_` characters that is not preceded or followed by +a non-backslash-escaped `_` character. + +A [left-flanking delimiter run](@) is +a [delimiter run] that is (1) not followed by [Unicode whitespace], +and either (2a) not followed by a [Unicode punctuation character], or +(2b) followed by a [Unicode punctuation character] and +preceded by [Unicode whitespace] or a [Unicode punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +A [right-flanking delimiter run](@) is +a [delimiter run] that is (1) not preceded by [Unicode whitespace], +and either (2a) not preceded by a [Unicode punctuation character], or +(2b) preceded by a [Unicode punctuation character] and +followed by [Unicode whitespace] or a [Unicode punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +Here are some examples of delimiter runs. + + - left-flanking but not right-flanking: + + ``` + ***abc + _abc + **"abc" + _"abc" + ``` + + - right-flanking but not left-flanking: + + ``` + abc*** + abc_ + "abc"** + "abc"_ + ``` + + - Both left and right-flanking: + + ``` + abc***def + "abc"_"def" + ``` + + - Neither left nor right-flanking: + + ``` + abc *** def + a _ b + ``` + +(The idea of distinguishing left-flanking and right-flanking +delimiter runs based on the character before and the character +after comes from Roopesh Chander's +[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +vfmd uses the terminology "emphasis indicator string" instead of "delimiter +run," and its rules for distinguishing left- and right-flanking runs +are a bit more complex than the ones given here.) + +The following rules define emphasis and strong emphasis: + +1. A single `*` character [can open emphasis](@) + iff (if and only if) it is part of a [left-flanking delimiter run]. + +2. A single `_` character [can open emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by a [Unicode punctuation character]. + +3. A single `*` character [can close emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +4. A single `_` character [can close emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by a [Unicode punctuation character]. + +5. A double `**` [can open strong emphasis](@) + iff it is part of a [left-flanking delimiter run]. + +6. A double `__` [can open strong emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by a [Unicode punctuation character]. + +7. A double `**` [can close strong emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +8. A double `__` [can close strong emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by a [Unicode punctuation character]. + +9. Emphasis begins with a delimiter that [can open emphasis] and ends + with a delimiter that [can close emphasis], and that uses the same + character (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both + open and close emphasis, then the sum of the lengths of the + delimiter runs containing the opening and closing delimiters + must not be a multiple of 3 unless both lengths are + multiples of 3. + +10. Strong emphasis begins with a delimiter that + [can open strong emphasis] and ends with a delimiter that + [can close strong emphasis], and that uses the same character + (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both open + and close strong emphasis, then the sum of the lengths of + the delimiter runs containing the opening and closing + delimiters must not be a multiple of 3 unless both lengths + are multiples of 3. + +11. A literal `*` character cannot occur at the beginning or end of + `*`-delimited emphasis or `**`-delimited strong emphasis, unless it + is backslash-escaped. + +12. A literal `_` character cannot occur at the beginning or end of + `_`-delimited emphasis or `__`-delimited strong emphasis, unless it + is backslash-escaped. + +Where rules 1--12 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +13. The number of nestings should be minimized. Thus, for example, + an interpretation `...` is always preferred to + `...`. + +14. An interpretation `...` is always + preferred to `...`. + +15. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first takes precedence. Thus, for example, + `*foo _bar* baz_` is parsed as `foo _bar baz_` rather + than `*foo bar* baz`. + +16. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) takes precedence. Thus, for example, + `**foo **bar baz**` is parsed as `**foo bar baz` + rather than `foo **bar baz`. + +17. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. + +These rules can be illustrated through a series of examples. + +Rule 1: + +```````````````````````````````` example +*foo bar* +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is followed by +whitespace, and hence not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a * foo bar* +. +

a * foo bar*

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a*"foo"* +. +

a*"foo"*

+```````````````````````````````` + + +Unicode nonbreaking spaces count as whitespace, too: + +```````````````````````````````` example +* a * +. +

* a *

+```````````````````````````````` + + +Intraword emphasis with `*` is permitted: + +```````````````````````````````` example +foo*bar* +. +

foobar

+```````````````````````````````` + + +```````````````````````````````` example +5*6*78 +. +

5678

+```````````````````````````````` + + +Rule 2: + +```````````````````````````````` example +_foo bar_ +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is followed by +whitespace: + +```````````````````````````````` example +_ foo bar_ +. +

_ foo bar_

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a_"foo"_ +. +

a_"foo"_

+```````````````````````````````` + + +Emphasis with `_` is not allowed inside words: + +```````````````````````````````` example +foo_bar_ +. +

foo_bar_

+```````````````````````````````` + + +```````````````````````````````` example +5_6_78 +. +

5_6_78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням_стремятся_ +. +

пристаням_стремятся_

+```````````````````````````````` + + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +```````````````````````````````` example +aa_"bb"_cc +. +

aa_"bb"_cc

+```````````````````````````````` + + +This is emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-_(bar)_ +. +

foo-(bar)

+```````````````````````````````` + + +Rule 3: + +This is not emphasis, because the closing delimiter does +not match the opening delimiter: + +```````````````````````````````` example +_foo* +. +

_foo*

+```````````````````````````````` + + +This is not emphasis, because the closing `*` is preceded by +whitespace: + +```````````````````````````````` example +*foo bar * +. +

*foo bar *

+```````````````````````````````` + + +A line ending also counts as whitespace: + +```````````````````````````````` example +*foo bar +* +. +

*foo bar +*

+```````````````````````````````` + + +This is not emphasis, because the second `*` is +preceded by punctuation and followed by an alphanumeric +(hence it is not part of a [right-flanking delimiter run]: + +```````````````````````````````` example +*(*foo) +. +

*(*foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +*(*foo*)* +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis with `*` is allowed: + +```````````````````````````````` example +*foo*bar +. +

foobar

+```````````````````````````````` + + + +Rule 4: + +This is not emphasis, because the closing `_` is preceded by +whitespace: + +```````````````````````````````` example +_foo bar _ +. +

_foo bar _

+```````````````````````````````` + + +This is not emphasis, because the second `_` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +_(_foo) +. +

_(_foo)

+```````````````````````````````` + + +This is emphasis within emphasis: + +```````````````````````````````` example +_(_foo_)_ +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis is disallowed for `_`: + +```````````````````````````````` example +_foo_bar +. +

_foo_bar

+```````````````````````````````` + + +```````````````````````````````` example +_пристаням_стремятся +. +

_пристаням_стремятся

+```````````````````````````````` + + +```````````````````````````````` example +_foo_bar_baz_ +. +

foo_bar_baz

+```````````````````````````````` + + +This is emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +_(bar)_. +. +

(bar).

+```````````````````````````````` + + +Rule 5: + +```````````````````````````````` example +**foo bar** +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +** foo bar** +. +

** foo bar**

+```````````````````````````````` + + +This is not strong emphasis, because the opening `**` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a**"foo"** +. +

a**"foo"**

+```````````````````````````````` + + +Intraword strong emphasis with `**` is permitted: + +```````````````````````````````` example +foo**bar** +. +

foobar

+```````````````````````````````` + + +Rule 6: + +```````````````````````````````` example +__foo bar__ +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +__ foo bar__ +. +

__ foo bar__

+```````````````````````````````` + + +A line ending counts as whitespace: +```````````````````````````````` example +__ +foo bar__ +. +

__ +foo bar__

+```````````````````````````````` + + +This is not strong emphasis, because the opening `__` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a__"foo"__ +. +

a__"foo"__

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +foo__bar__ +. +

foo__bar__

+```````````````````````````````` + + +```````````````````````````````` example +5__6__78 +. +

5__6__78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням__стремятся__ +. +

пристаням__стремятся__

+```````````````````````````````` + + +```````````````````````````````` example +__foo, __bar__, baz__ +. +

foo, bar, baz

+```````````````````````````````` + + +This is strong emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-__(bar)__ +. +

foo-(bar)

+```````````````````````````````` + + + +Rule 7: + +This is not strong emphasis, because the closing delimiter is preceded +by whitespace: + +```````````````````````````````` example +**foo bar ** +. +

**foo bar **

+```````````````````````````````` + + +(Nor can it be interpreted as an emphasized `*foo bar *`, because of +Rule 11.) + +This is not strong emphasis, because the second `**` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +**(**foo) +. +

**(**foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with these examples: + +```````````````````````````````` example +*(**foo**)* +. +

(foo)

+```````````````````````````````` + + +```````````````````````````````` example +**Gomphocarpus (*Gomphocarpus physocarpus*, syn. +*Asclepias physocarpa*)** +. +

Gomphocarpus (Gomphocarpus physocarpus, syn. +Asclepias physocarpa)

+```````````````````````````````` + + +```````````````````````````````` example +**foo "*bar*" foo** +. +

foo "bar" foo

+```````````````````````````````` + + +Intraword emphasis: + +```````````````````````````````` example +**foo**bar +. +

foobar

+```````````````````````````````` + + +Rule 8: + +This is not strong emphasis, because the closing delimiter is +preceded by whitespace: + +```````````````````````````````` example +__foo bar __ +. +

__foo bar __

+```````````````````````````````` + + +This is not strong emphasis, because the second `__` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +__(__foo) +. +

__(__foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +_(__foo__)_ +. +

(foo)

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +__foo__bar +. +

__foo__bar

+```````````````````````````````` + + +```````````````````````````````` example +__пристаням__стремятся +. +

__пристаням__стремятся

+```````````````````````````````` + + +```````````````````````````````` example +__foo__bar__baz__ +. +

foo__bar__baz

+```````````````````````````````` + + +This is strong emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +__(bar)__. +. +

(bar).

+```````````````````````````````` + + +Rule 9: + +Any nonempty sequence of inline elements can be the contents of an +emphasized span. + +```````````````````````````````` example +*foo [bar](/url)* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo +bar* +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside emphasis: + +```````````````````````````````` example +_foo __bar__ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +_foo _bar_ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ bar_ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar** baz* +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +*foo**bar**baz* +. +

foobarbaz

+```````````````````````````````` + +Note that in the preceding case, the interpretation + +``` markdown +

foobarbaz

+``` + + +is precluded by the condition that a delimiter that +can both open and close (like the `*` after `foo`) +cannot form emphasis if the sum of the lengths of +the delimiter runs containing the opening and +closing delimiters is a multiple of 3 unless +both lengths are multiples of 3. + + +For the same reason, we don't get two consecutive +emphasis sections in this example: + +```````````````````````````````` example +*foo**bar* +. +

foo**bar

+```````````````````````````````` + + +The same condition ensures that the following +cases are all strong emphasis nested inside +emphasis, even when the interior whitespace is +omitted: + + +```````````````````````````````` example +***foo** bar* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar*** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo**bar*** +. +

foobar

+```````````````````````````````` + + +When the lengths of the interior closing and opening +delimiter runs are *both* multiples of 3, though, +they can match to create emphasis: + +```````````````````````````````` example +foo***bar***baz +. +

foobarbaz

+```````````````````````````````` + +```````````````````````````````` example +foo******bar*********baz +. +

foobar***baz

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +*foo **bar *baz* bim** bop* +. +

foo bar baz bim bop

+```````````````````````````````` + + +```````````````````````````````` example +*foo [*bar*](/url)* +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +** is not an empty emphasis +. +

** is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +**** is not an empty strong emphasis +. +

**** is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 10: + +Any nonempty sequence of inline elements can be the contents of an +strongly emphasized span. + +```````````````````````````````` example +**foo [bar](/url)** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo +bar** +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside strong emphasis: + +```````````````````````````````` example +__foo _bar_ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo __bar__ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +____foo__ bar__ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo **bar**** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar* baz** +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +**foo*bar*baz** +. +

foobarbaz

+```````````````````````````````` + + +```````````````````````````````` example +***foo* bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar*** +. +

foo bar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +**foo *bar **baz** +bim* bop** +. +

foo bar baz +bim bop

+```````````````````````````````` + + +```````````````````````````````` example +**foo [*bar*](/url)** +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +__ is not an empty emphasis +. +

__ is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +____ is not an empty strong emphasis +. +

____ is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 11: + +```````````````````````````````` example +foo *** +. +

foo ***

+```````````````````````````````` + + +```````````````````````````````` example +foo *\** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo *_* +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo ***** +. +

foo *****

+```````````````````````````````` + + +```````````````````````````````` example +foo **\*** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo **_** +. +

foo _

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 11 determines +that the excess literal `*` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +**foo* +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +*foo** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +***foo** +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +****foo* +. +

***foo

+```````````````````````````````` + + +```````````````````````````````` example +**foo*** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +*foo**** +. +

foo***

+```````````````````````````````` + + + +Rule 12: + +```````````````````````````````` example +foo ___ +. +

foo ___

+```````````````````````````````` + + +```````````````````````````````` example +foo _\__ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo _*_ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo _____ +. +

foo _____

+```````````````````````````````` + + +```````````````````````````````` example +foo __\___ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo __*__ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ +. +

_foo

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 12 determines +that the excess literal `_` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +_foo__ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +___foo__ +. +

_foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo_ +. +

___foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo___ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +_foo____ +. +

foo___

+```````````````````````````````` + + +Rule 13 implies that if you want emphasis nested directly inside +emphasis, you must use different delimiters: + +```````````````````````````````` example +**foo** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*_foo_* +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo__ +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_*foo*_ +. +

foo

+```````````````````````````````` + + +However, strong emphasis within strong emphasis is possible without +switching delimiters: + +```````````````````````````````` example +****foo**** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo____ +. +

foo

+```````````````````````````````` + + + +Rule 13 can be applied to arbitrarily long sequences of +delimiters: + +```````````````````````````````` example +******foo****** +. +

foo

+```````````````````````````````` + + +Rule 14: + +```````````````````````````````` example +***foo*** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_____foo_____ +. +

foo

+```````````````````````````````` + + +Rule 15: + +```````````````````````````````` example +*foo _bar* baz_ +. +

foo _bar baz_

+```````````````````````````````` + + +```````````````````````````````` example +*foo __bar *baz bim__ bam* +. +

foo bar *baz bim bam

+```````````````````````````````` + + +Rule 16: + +```````````````````````````````` example +**foo **bar baz** +. +

**foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar baz* +. +

*foo bar baz

+```````````````````````````````` + + +Rule 17: + +```````````````````````````````` example +*[bar*](/url) +. +

*bar*

+```````````````````````````````` + + +```````````````````````````````` example +_foo [bar_](/url) +. +

_foo bar_

+```````````````````````````````` + + +```````````````````````````````` example +* +. +

*

+```````````````````````````````` + + +```````````````````````````````` example +** +. +

**

+```````````````````````````````` + + +```````````````````````````````` example +__ +. +

__

+```````````````````````````````` + + +```````````````````````````````` example +*a `*`* +. +

a *

+```````````````````````````````` + + +```````````````````````````````` example +_a `_`_ +. +

a _

+```````````````````````````````` + + +```````````````````````````````` example +**a +. +

**ahttp://foo.bar/?q=**

+```````````````````````````````` + + +```````````````````````````````` example +__a +. +

__ahttp://foo.bar/?q=__

+```````````````````````````````` + + + +## Links + +A link contains [link text] (the visible text), a [link destination] +(the URI that is the link destination), and optionally a [link title]. +There are two basic kinds of links in Markdown. In [inline links] the +destination and title are given immediately after the link text. In +[reference links] the destination and title are defined elsewhere in +the document. + +A [link text](@) consists of a sequence of zero or more +inline elements enclosed by square brackets (`[` and `]`). The +following rules apply: + +- Links may not contain other links, at any level of nesting. If + multiple otherwise valid link definitions appear nested inside each + other, the inner-most definition is used. + +- Brackets are allowed in the [link text] only if (a) they + are backslash-escaped or (b) they appear as a matched pair of brackets, + with an open bracket `[`, a sequence of zero or more inlines, and + a close bracket `]`. + +- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly + than the brackets in link text. Thus, for example, + `` [foo`]` `` could not be a link text, since the second `]` + is part of a code span. + +- The brackets in link text bind more tightly than markers for + [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. + +A [link destination](@) consists of either + +- a sequence of zero or more characters between an opening `<` and a + closing `>` that contains no line endings or unescaped + `<` or `>` characters, or + +- a nonempty sequence of characters that does not start with `<`, + does not include [ASCII control characters][ASCII control character] + or [space] character, and includes parentheses only if (a) they are + backslash-escaped or (b) they are part of a balanced pair of + unescaped parentheses. + (Implementations may impose limits on parentheses nesting to + avoid performance issues, but at least three levels of nesting + should be supported.) + +A [link title](@) consists of either + +- a sequence of zero or more characters between straight double-quote + characters (`"`), including a `"` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between straight single-quote + characters (`'`), including a `'` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between matching parentheses + (`(...)`), including a `(` or `)` character only if it is + backslash-escaped. + +Although [link titles] may span multiple lines, they may not contain +a [blank line]. + +An [inline link](@) consists of a [link text] followed immediately +by a left parenthesis `(`, an optional [link destination], an optional +[link title], and a right parenthesis `)`. +These four components may be separated by spaces, tabs, and up to one line +ending. +If both [link destination] and [link title] are present, they *must* be +separated by spaces, tabs, and up to one line ending. + +The link's text consists of the inlines contained +in the [link text] (excluding the enclosing square brackets). +The link's URI consists of the link destination, excluding enclosing +`<...>` if present, with backslash-escapes in effect as described +above. The link's title consists of the link title, excluding its +enclosing delimiters, with backslash-escapes in effect as described +above. + +Here is a simple inline link: + +```````````````````````````````` example +[link](/uri "title") +. +

link

+```````````````````````````````` + + +The title, the link text and even +the destination may be omitted: + +```````````````````````````````` example +[link](/uri) +. +

link

+```````````````````````````````` + +```````````````````````````````` example +[](./target.md) +. +

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link](<>) +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[]() +. +

+```````````````````````````````` + +The destination can only contain spaces if it is +enclosed in pointy brackets: + +```````````````````````````````` example +[link](/my uri) +. +

[link](/my uri)

+```````````````````````````````` + +```````````````````````````````` example +[link](
) +. +

link

+```````````````````````````````` + +The destination cannot contain line endings, +even if enclosed in pointy brackets: + +```````````````````````````````` example +[link](foo +bar) +. +

[link](foo +bar)

+```````````````````````````````` + +```````````````````````````````` example +[link]() +. +

[link]()

+```````````````````````````````` + +The destination can contain `)` if it is enclosed +in pointy brackets: + +```````````````````````````````` example +[a]() +. +

a

+```````````````````````````````` + +Pointy brackets that enclose links must be unescaped: + +```````````````````````````````` example +[link]() +. +

[link](<foo>)

+```````````````````````````````` + +These are not links, because the opening pointy bracket +is not matched properly: + +```````````````````````````````` example +[a]( +[a](c) +. +

[a](<b)c +[a](<b)c> +[a](c)

+```````````````````````````````` + +Parentheses inside the link destination may be escaped: + +```````````````````````````````` example +[link](\(foo\)) +. +

link

+```````````````````````````````` + +Any number of parentheses are allowed without escaping, as long as they are +balanced: + +```````````````````````````````` example +[link](foo(and(bar))) +. +

link

+```````````````````````````````` + +However, if you have unbalanced parentheses, you need to escape or use the +`<...>` form: + +```````````````````````````````` example +[link](foo(and(bar)) +. +

[link](foo(and(bar))

+```````````````````````````````` + + +```````````````````````````````` example +[link](foo\(and\(bar\)) +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +Parentheses and other symbols can also be escaped, as usual +in Markdown: + +```````````````````````````````` example +[link](foo\)\:) +. +

link

+```````````````````````````````` + + +A link can contain fragment identifiers and queries: + +```````````````````````````````` example +[link](#fragment) + +[link](http://example.com#fragment) + +[link](http://example.com?foo=3#frag) +. +

link

+

link

+

link

+```````````````````````````````` + + +Note that a backslash before a non-escapable character is +just a backslash: + +```````````````````````````````` example +[link](foo\bar) +. +

link

+```````````````````````````````` + + +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. Entity and +numerical character references in the destination will be parsed +into the corresponding Unicode code points, as usual. These may +be optionally URL-escaped when written as HTML, but this spec +does not enforce any particular policy for rendering URLs in +HTML or other formats. Renderers may make different decisions +about how to escape or normalize URLs in the output. + +```````````````````````````````` example +[link](foo%20bä) +. +

link

+```````````````````````````````` + + +Note that, because titles can often be parsed as destinations, +if you try to omit the destination and keep the title, you'll +get unexpected results: + +```````````````````````````````` example +[link]("title") +. +

link

+```````````````````````````````` + + +Titles may be in single quotes, double quotes, or parentheses: + +```````````````````````````````` example +[link](/url "title") +[link](/url 'title') +[link](/url (title)) +. +

link +link +link

+```````````````````````````````` + + +Backslash escapes and entity and numeric character references +may be used in titles: + +```````````````````````````````` example +[link](/url "title \""") +. +

link

+```````````````````````````````` + + +Titles must be separated from the link using spaces, tabs, and up to one line +ending. +Other [Unicode whitespace] like non-breaking space doesn't work. + +```````````````````````````````` example +[link](/url "title") +. +

link

+```````````````````````````````` + + +Nested balanced quotes are not allowed without escaping: + +```````````````````````````````` example +[link](/url "title "and" title") +. +

[link](/url "title "and" title")

+```````````````````````````````` + + +But it is easy to work around this by using a different quote type: + +```````````````````````````````` example +[link](/url 'title "and" title') +. +

link

+```````````````````````````````` + + +(Note: `Markdown.pl` did allow double quotes inside a double-quoted +title, and its test suite included a test demonstrating this. +But it is hard to see a good rationale for the extra complexity this +brings, since there are already many ways---backslash escaping, +entity and numeric character references, or using a different +quote type for the enclosing title---to write titles containing +double quotes. `Markdown.pl`'s handling of titles has a number +of other strange features. For example, it allows single-quoted +titles in inline links, but not reference links. And, in +reference links but not inline links, it allows a title to begin +with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows +titles with no closing quotation mark, though 1.0.2b8 does not. +It seems preferable to adopt a simple, rational rule that works +the same way in inline links and link reference definitions.) + +Spaces, tabs, and up to one line ending is allowed around the destination and +title: + +```````````````````````````````` example +[link]( /uri + "title" ) +. +

link

+```````````````````````````````` + + +But it is not allowed between the link text and the +following parenthesis: + +```````````````````````````````` example +[link] (/uri) +. +

[link] (/uri)

+```````````````````````````````` + + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]](/uri) +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link] bar](/uri) +. +

[link] bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link [bar](/uri) +. +

[link bar

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar](/uri) +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*](/uri) +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)](/uri) +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)](/uri) +. +

[foo bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo *[bar [baz](/uri)](/uri)*](/uri) +. +

[foo [bar baz](/uri)](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +![[[foo](uri1)](uri2)](uri3) +. +

[foo](uri2)

+```````````````````````````````` + + +These cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*](/uri) +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar](baz*) +. +

foo *bar

+```````````````````````````````` + + +Note that brackets that *aren't* part of links do not take +precedence: + +```````````````````````````````` example +*foo [bar* baz] +. +

foo [bar baz]

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`](/uri)` +. +

[foo](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo +. +

[foohttp://example.com/?search=](uri)

+```````````````````````````````` + + +There are three kinds of [reference link](@)s: +[full](#full-reference-link), [collapsed](#collapsed-reference-link), +and [shortcut](#shortcut-reference-link). + +A [full reference link](@) +consists of a [link text] immediately followed by a [link label] +that [matches] a [link reference definition] elsewhere in the document. + +A [link label](@) begins with a left bracket (`[`) and ends +with the first right bracket (`]`) that is not backslash-escaped. +Between these brackets there must be at least one character that is not a space, +tab, or line ending. +Unescaped square bracket characters are not allowed inside the +opening and closing square brackets of [link labels]. A link +label can have at most 999 characters inside the square +brackets. + +One label [matches](@) +another just in case their normalized forms are equal. To normalize a +label, strip off the opening and closing brackets, +perform the *Unicode case fold*, strip leading and trailing +spaces, tabs, and line endings, and collapse consecutive internal +spaces, tabs, and line endings to a single space. If there are multiple +matching reference link definitions, the one that comes first in the +document is used. (It is desirable in such cases to emit a warning.) + +The link's URI and title are provided by the matching [link +reference definition]. + +Here is a simple example: + +```````````````````````````````` example +[foo][bar] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +The rules for the [link text] are the same as with +[inline links]. Thus: + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]][ref] + +[ref]: /uri +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar][ref] + +[ref]: /uri +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*][ref] + +[ref]: /uri +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)][ref] + +[ref]: /uri +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)][ref] + +[ref]: /uri +. +

[foo bar]ref

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar [baz][ref]*][ref] + +[ref]: /uri +. +

[foo bar baz]ref

+```````````````````````````````` + + +(In the examples above, we have two [shortcut reference links] +instead of one [full reference link].) + +The following cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*][ref] + +[ref]: /uri +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar][ref]* + +[ref]: /uri +. +

foo *bar*

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`][ref]` + +[ref]: /uri +. +

[foo][ref]

+```````````````````````````````` + + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foohttp://example.com/?search=][ref]

+```````````````````````````````` + + +Matching is case-insensitive: + +```````````````````````````````` example +[foo][BaR] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +Unicode case fold is used: + +```````````````````````````````` example +[ẞ] + +[SS]: /url +. +

+```````````````````````````````` + + +Consecutive internal spaces, tabs, and line endings are treated as one space for +purposes of determining matching: + +```````````````````````````````` example +[Foo + bar]: /url + +[Baz][Foo bar] +. +

Baz

+```````````````````````````````` + + +No spaces, tabs, or line endings are allowed between the [link text] and the +[link label]: + +```````````````````````````````` example +[foo] [bar] + +[bar]: /url "title" +. +

[foo] bar

+```````````````````````````````` + + +```````````````````````````````` example +[foo] +[bar] + +[bar]: /url "title" +. +

[foo] +bar

+```````````````````````````````` + + +This is a departure from John Gruber's original Markdown syntax +description, which explicitly allows whitespace between the link +text and the link label. It brings reference links in line with +[inline links], which (according to both original Markdown and +this spec) cannot have whitespace after the link text. More +importantly, it prevents inadvertent capture of consecutive +[shortcut reference links]. If whitespace is allowed between the +link text and the link label, then in the following we will have +a single reference link, not two shortcut reference links, as +intended: + +``` markdown +[foo] +[bar] + +[foo]: /url1 +[bar]: /url2 +``` + +(Note that [shortcut reference links] were introduced by Gruber +himself in a beta version of `Markdown.pl`, but never included +in the official syntax description. Without shortcut reference +links, it is harmless to allow space between the link text and +link label; but once shortcut references are introduced, it is +too dangerous to allow this, as it frequently leads to +unintended results.) + +When there are multiple matching [link reference definitions], +the first is used: + +```````````````````````````````` example +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] +. +

bar

+```````````````````````````````` + + +Note that matching is performed on normalized strings, not parsed +inline content. So the following does not match, even though the +labels define equivalent inline content: + +```````````````````````````````` example +[bar][foo\!] + +[foo!]: /url +. +

[bar][foo!]

+```````````````````````````````` + + +[Link labels] cannot contain brackets, unless they are +backslash-escaped: + +```````````````````````````````` example +[foo][ref[] + +[ref[]: /uri +. +

[foo][ref[]

+

[ref[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref[bar]] + +[ref[bar]]: /uri +. +

[foo][ref[bar]]

+

[ref[bar]]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[[[foo]]] + +[[[foo]]]: /url +. +

[[[foo]]]

+

[[[foo]]]: /url

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref\[] + +[ref\[]: /uri +. +

foo

+```````````````````````````````` + + +Note that in this example `]` is not backslash-escaped: + +```````````````````````````````` example +[bar\\]: /uri + +[bar\\] +. +

bar\

+```````````````````````````````` + + +A [link label] must contain at least one character that is not a space, tab, or +line ending: + +```````````````````````````````` example +[] + +[]: /uri +. +

[]

+

[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[ + ] + +[ + ]: /uri +. +

[ +]

+

[ +]: /uri

+```````````````````````````````` + + +A [collapsed reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document, followed by the string `[]`. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title are +provided by the matching reference link definition. Thus, +`[foo][]` is equivalent to `[foo][foo]`. + +```````````````````````````````` example +[foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + + +As with full reference links, spaces, tabs, or line endings are not +allowed between the two sets of brackets: + +```````````````````````````````` example +[foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +A [shortcut reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document and is not followed by `[]` or a link label. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title +are provided by the matching link reference definition. +Thus, `[foo]` is equivalent to `[foo][]`. + +```````````````````````````````` example +[foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +[[*foo* bar]] + +[*foo* bar]: /url "title" +. +

[foo bar]

+```````````````````````````````` + + +```````````````````````````````` example +[[bar [foo] + +[foo]: /url +. +

[[bar foo

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +A space after the link text should be preserved: + +```````````````````````````````` example +[foo] bar + +[foo]: /url +. +

foo bar

+```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening bracket to avoid links: + +```````````````````````````````` example +\[foo] + +[foo]: /url "title" +. +

[foo]

+```````````````````````````````` + + +Note that this is a link, because a link label ends with the first +following closing bracket: + +```````````````````````````````` example +[foo*]: /url + +*[foo*] +. +

*foo*

+```````````````````````````````` + + +Full and compact references take precedence over shortcut +references: + +```````````````````````````````` example +[foo][bar] + +[foo]: /url1 +[bar]: /url2 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo][] + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +Inline links also take precedence: + +```````````````````````````````` example +[foo]() + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo](not a link) + +[foo]: /url1 +. +

foo(not a link)

+```````````````````````````````` + +In the following case `[bar][baz]` is parsed as a reference, +`[foo]` as normal text: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url +. +

[foo]bar

+```````````````````````````````` + + +Here, though, `[foo][bar]` is parsed as a reference, since +`[bar]` is defined: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 +. +

foobaz

+```````````````````````````````` + + +Here `[foo]` is not parsed as a shortcut reference, because it +is followed by a link label (even though `[bar]` is not defined): + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 +. +

[foo]bar

+```````````````````````````````` + + + +## Images + +Syntax for images is like the syntax for links, with one +difference. Instead of [link text], we have an +[image description](@). The rules for this are the +same as for [link text], except that (a) an +image description starts with `![` rather than `[`, and +(b) an image description may contain links. +An image description has inline elements +as its contents. When an image is rendered to HTML, +this is standardly used as the image's `alt` attribute. + +```````````````````````````````` example +![foo](/url "title") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo ![bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo [bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +Though this spec is concerned with parsing, not rendering, it is +recommended that in rendering to HTML, only the plain string content +of the [image description] be used. Note that in +the above example, the alt attribute's value is `foo bar`, not `foo +[bar](/url)` or `foo bar`. Only the plain string +content is rendered, without formatting. + +```````````````````````````````` example +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo](train.jpg) +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +My ![foo bar](/path/to/train.jpg "title" ) +. +

My foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo]() +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![](/url) +. +

+```````````````````````````````` + + +Reference-style: + +```````````````````````````````` example +![foo][bar] + +[bar]: /url +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo][bar] + +[BAR]: /url +. +

foo

+```````````````````````````````` + + +Collapsed: + +```````````````````````````````` example +![foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The labels are case-insensitive: + +```````````````````````````````` example +![Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +As with reference links, spaces, tabs, and line endings, are not allowed +between the two sets of brackets: + +```````````````````````````````` example +![foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +Shortcut: + +```````````````````````````````` example +![foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +Note that link labels cannot contain unescaped brackets: + +```````````````````````````````` example +![[foo]] + +[[foo]]: /url "title" +. +

![[foo]]

+

[[foo]]: /url "title"

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +![Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +If you just want a literal `!` followed by bracketed text, you can +backslash-escape the opening `[`: + +```````````````````````````````` example +!\[foo] + +[foo]: /url "title" +. +

![foo]

+```````````````````````````````` + + +If you want a link after a literal `!`, backslash-escape the +`!`: + +```````````````````````````````` example +\![foo] + +[foo]: /url "title" +. +

!foo

+```````````````````````````````` + + +## Autolinks + +[Autolink](@)s are absolute URIs and email addresses inside +`<` and `>`. They are parsed as links, with the URL or email address +as the link label. + +A [URI autolink](@) consists of `<`, followed by an +[absolute URI] followed by `>`. It is parsed as +a link to the URI, with the URI as the link's label. + +An [absolute URI](@), +for these purposes, consists of a [scheme] followed by a colon (`:`) +followed by zero or more characters other than [ASCII control +characters][ASCII control character], [space], `<`, and `>`. +If the URI includes these characters, they must be percent-encoded +(e.g. `%20` for a space). + +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). + +Here are some valid autolinks: + +```````````````````````````````` example + +. +

http://foo.bar.baz

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://foo.bar.baz/test?q=hello&id=22&boolean

+```````````````````````````````` + + +```````````````````````````````` example + +. +

irc://foo.bar:2233/baz

+```````````````````````````````` + + +Uppercase is also fine: + +```````````````````````````````` example + +. +

MAILTO:FOO@BAR.BAZ

+```````````````````````````````` + + +Note that many strings that count as [absolute URIs] for +purposes of this spec are not valid URIs, because their +schemes are not registered or because of other problems +with their syntax: + +```````````````````````````````` example + +. +

a+b+c:d

+```````````````````````````````` + + +```````````````````````````````` example + +. +

made-up-scheme://foo,bar

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://../

+```````````````````````````````` + + +```````````````````````````````` example + +. +

localhost:5001/foo

+```````````````````````````````` + + +Spaces are not allowed in autolinks: + +```````````````````````````````` example + +. +

<http://foo.bar/baz bim>

+```````````````````````````````` + + +Backslash-escapes do not work inside autolinks: + +```````````````````````````````` example + +. +

http://example.com/\[\

+```````````````````````````````` + + +An [email autolink](@) +consists of `<`, followed by an [email address], +followed by `>`. The link's label is the email address, +and the URL is `mailto:` followed by the email address. + +An [email address](@), +for these purposes, is anything that matches +the [non-normative regex from the HTML5 +spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): + + /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + +Examples of email autolinks: + +```````````````````````````````` example + +. +

foo@bar.example.com

+```````````````````````````````` + + +```````````````````````````````` example + +. +

foo+special@Bar.baz-bar0.com

+```````````````````````````````` + + +Backslash-escapes do not work inside email autolinks: + +```````````````````````````````` example + +. +

<foo+@bar.example.com>

+```````````````````````````````` + + +These are not autolinks: + +```````````````````````````````` example +<> +. +

<>

+```````````````````````````````` + + +```````````````````````````````` example +< http://foo.bar > +. +

< http://foo.bar >

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<m:abc>

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<foo.bar.baz>

+```````````````````````````````` + + +```````````````````````````````` example +http://example.com +. +

http://example.com

+```````````````````````````````` + + +```````````````````````````````` example +foo@bar.example.com +. +

foo@bar.example.com

+```````````````````````````````` + + +## Raw HTML + +Text between `<` and `>` that looks like an HTML tag is parsed as a +raw HTML tag and will be rendered in HTML without escaping. +Tag and attribute names are not limited to current HTML tags, +so custom tags (and even, say, DocBook tags) may be used. + +Here is the grammar for tags: + +A [tag name](@) consists of an ASCII letter +followed by zero or more ASCII letters, digits, or +hyphens (`-`). + +An [attribute](@) consists of spaces, tabs, and up to one line ending, +an [attribute name], and an optional +[attribute value specification]. + +An [attribute name](@) +consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII +letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML +specification restricted to ASCII. HTML5 is laxer.) + +An [attribute value specification](@) +consists of optional spaces, tabs, and up to one line ending, +a `=` character, optional spaces, tabs, and up to one line ending, +and an [attribute value]. + +An [attribute value](@) +consists of an [unquoted attribute value], +a [single-quoted attribute value], or a [double-quoted attribute value]. + +An [unquoted attribute value](@) +is a nonempty string of characters not +including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``. + +A [single-quoted attribute value](@) +consists of `'`, zero or more +characters not including `'`, and a final `'`. + +A [double-quoted attribute value](@) +consists of `"`, zero or more +characters not including `"`, and a final `"`. + +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional spaces, tabs, and up to one line ending, +an optional `/` character, and a `>` character. + +A [closing tag](@) consists of the string ``. + +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). + +A [processing instruction](@) +consists of the string ``, and the string +`?>`. + +A [declaration](@) consists of the string ``, and the character `>`. + +A [CDATA section](@) consists of +the string ``, and the string `]]>`. + +An [HTML tag](@) consists of an [open tag], a [closing tag], +an [HTML comment], a [processing instruction], a [declaration], +or a [CDATA section]. + +Here are some simple open tags: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Empty elements: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Whitespace is allowed: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +With attributes: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Custom tag names can be used: + +```````````````````````````````` example +Foo +. +

Foo

+```````````````````````````````` + + +Illegal tag names, not parsed as HTML: + +```````````````````````````````` example +<33> <__> +. +

<33> <__>

+```````````````````````````````` + + +Illegal attribute names: + +```````````````````````````````` example +
+. +

<a h*#ref="hi">

+```````````````````````````````` + + +Illegal attribute values: + +```````````````````````````````` example +
+. +

</a href="foo">

+```````````````````````````````` + + +Comments: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +foo foo --> + +foo foo --> +. +

foo foo -->

+

foo foo -->

+```````````````````````````````` + + +Processing instructions: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +Declarations: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +CDATA sections: + +```````````````````````````````` example +foo &<]]> +. +

foo &<]]>

+```````````````````````````````` + + +Entity and numeric character references are preserved in HTML +attributes: + +```````````````````````````````` example +foo
+. +

foo

+```````````````````````````````` + + +Backslash escapes do not work in HTML attributes: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<a href=""">

+```````````````````````````````` + + +## Hard line breaks + +A line ending (not in a code span or HTML tag) that is preceded +by two or more spaces and does not occur at the end of a block +is parsed as a [hard line break](@) (rendered +in HTML as a `
` tag): + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +For a more visible alternative, a backslash before the +[line ending] may be used instead of two or more spaces: + +```````````````````````````````` example +foo\ +baz +. +

foo
+baz

+```````````````````````````````` + + +More than two spaces can be used: + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +Leading spaces at the beginning of the next line are ignored: + +```````````````````````````````` example +foo + bar +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +foo\ + bar +. +

foo
+bar

+```````````````````````````````` + + +Hard line breaks can occur inside emphasis, links, and other constructs +that allow inline content: + +```````````````````````````````` example +*foo +bar* +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo\ +bar* +. +

foo
+bar

+```````````````````````````````` + + +Hard line breaks do not occur inside code spans + +```````````````````````````````` example +`code +span` +. +

code span

+```````````````````````````````` + + +```````````````````````````````` example +`code\ +span` +. +

code\ span

+```````````````````````````````` + + +or HTML tags: + +```````````````````````````````` example +
+. +

+```````````````````````````````` + + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Hard line breaks are for separating inline content within a block. +Neither syntax for hard line breaks works at the end of a paragraph or +other block element: + +```````````````````````````````` example +foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +### foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +### foo +. +

foo

+```````````````````````````````` + + +## Soft line breaks + +A regular line ending (not in a code span or HTML tag) that is not +preceded by two or more spaces or a backslash is parsed as a +[softbreak](@). (A soft line break may be rendered in HTML either as a +[line ending] or as a space. The result will be the same in +browsers. In the examples here, a [line ending] will be used.) + +```````````````````````````````` example +foo +baz +. +

foo +baz

+```````````````````````````````` + + +Spaces at the end of the line and beginning of the next line are +removed: + +```````````````````````````````` example +foo + baz +. +

foo +baz

+```````````````````````````````` + + +A conforming parser may render a soft line break in HTML either as a +line ending or as a space. + +A renderer may also provide an option to render soft line breaks +as hard line breaks. + +## Textual content + +Any characters not given an interpretation by the above rules will +be parsed as plain textual content. + +```````````````````````````````` example +hello $.;'there +. +

hello $.;'there

+```````````````````````````````` + + +```````````````````````````````` example +Foo χρῆν +. +

Foo χρῆν

+```````````````````````````````` + + +Internal spaces are preserved verbatim: + +```````````````````````````````` example +Multiple spaces +. +

Multiple spaces

+```````````````````````````````` + + + + +# Appendix: A parsing strategy + +In this appendix we describe some features of the parsing strategy +used in the CommonMark reference implementations. + +## Overview + +Parsing has two phases: + +1. In the first phase, lines of input are consumed and the block +structure of the document---its division into paragraphs, block quotes, +list items, and so on---is constructed. Text is assigned to these +blocks but not parsed. Link reference definitions are parsed and a +map of links is constructed. + +2. In the second phase, the raw text contents of paragraphs and headings +are parsed into sequences of Markdown inline elements (strings, +code spans, links, emphasis, and so on), using the map of link +references constructed in phase 1. + +At each point in processing, the document is represented as a tree of +**blocks**. The root of the tree is a `document` block. The `document` +may have any number of other blocks as **children**. These children +may, in turn, have other blocks as children. The last child of a block +is normally considered **open**, meaning that subsequent lines of input +can alter its contents. (Blocks that are not open are **closed**.) +Here, for example, is a possible document tree, with the open blocks +marked by arrows: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 1: block structure + +Each line that is processed has an effect on this tree. The line is +analyzed and, depending on its contents, the document may be altered +in one or more of the following ways: + +1. One or more open blocks may be closed. +2. One or more new blocks may be created as children of the + last open block. +3. Text may be added to the last (deepest) open block remaining + on the tree. + +Once a line has been incorporated into the tree in this way, +it can be discarded, so input can be read in a stream. + +For each line, we follow this procedure: + +1. First we iterate through the open blocks, starting with the +root document, and descending through last children down to the last +open block. Each block imposes a condition that the line must satisfy +if the block is to remain open. For example, a block quote requires a +`>` character. A paragraph requires a non-blank line. +In this phase we may match all or just some of the open +blocks. But we cannot close unmatched blocks yet, because we may have a +[lazy continuation line]. + +2. Next, after consuming the continuation markers for existing +blocks, we look for new block starts (e.g. `>` for a block quote). +If we encounter a new block start, we close any blocks unmatched +in step 1 before creating the new block as a child of the last +matched container block. + +3. Finally, we look at the remainder of the line (after block +markers like `>`, list markers, and indentation have been consumed). +This is text that can be incorporated into the last open +block (a paragraph, code block, heading, or raw HTML). + +Setext headings are formed when we see a line of a paragraph +that is a [setext heading underline]. + +Reference link definitions are detected when a paragraph is closed; +the accumulated text lines are parsed to see if they begin with +one or more reference link definitions. Any remainder becomes a +normal paragraph. + +We can see how this works by considering how the tree above is +generated by four lines of Markdown: + +``` markdown +> Lorem ipsum dolor +sit amet. +> - Qui *quodsi iracundia* +> - aliquando id +``` + +At the outset, our document model is just + +``` tree +-> document +``` + +The first line of our text, + +``` markdown +> Lorem ipsum dolor +``` + +causes a `block_quote` block to be created as a child of our +open `document` block, and a `paragraph` block as a child of +the `block_quote`. Then the text is added to the last open +block, the `paragraph`: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor" +``` + +The next line, + +``` markdown +sit amet. +``` + +is a "lazy continuation" of the open `paragraph`, so it gets added +to the paragraph's text: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor\nsit amet." +``` + +The third line, + +``` markdown +> - Qui *quodsi iracundia* +``` + +causes the `paragraph` block to be closed, and a new `list` block +opened as a child of the `block_quote`. A `list_item` is also +added as a child of the `list`, and a `paragraph` as a child of +the `list_item`. The text is then added to the new `paragraph`: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + -> list_item + -> paragraph + "Qui *quodsi iracundia*" +``` + +The fourth line, + +``` markdown +> - aliquando id +``` + +causes the `list_item` (and its child the `paragraph`) to be closed, +and a new `list_item` opened up as child of the `list`. A `paragraph` +is added as a child of the new `list_item`, to contain the text. +We thus obtain the final tree: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 2: inline structure + +Once all of the input has been parsed, all open blocks are closed. + +We then "walk the tree," visiting every node, and parse raw +string contents of paragraphs and headings as inlines. At this +point we have seen all the link reference definitions, so we can +resolve reference links as we go. + +``` tree +document + block_quote + paragraph + str "Lorem ipsum dolor" + softbreak + str "sit amet." + list (type=bullet tight=true bullet_char=-) + list_item + paragraph + str "Qui " + emph + str "quodsi iracundia" + list_item + paragraph + str "aliquando id" +``` + +Notice how the [line ending] in the first paragraph has +been parsed as a `softbreak`, and the asterisks in the first list item +have become an `emph`. + +### An algorithm for parsing nested emphasis and links + +By far the trickiest part of inline parsing is handling emphasis, +strong emphasis, links, and images. This is done using the following +algorithm. + +When we're parsing inlines and we hit either + +- a run of `*` or `_` characters, or +- a `[` or `![` + +we insert a text node with these symbols as its literal content, and we +add a pointer to this text node to the [delimiter stack](@). + +The [delimiter stack] is a doubly linked list. Each +element contains a pointer to a text node, plus information about + +- the type of delimiter (`[`, `![`, `*`, `_`) +- the number of delimiters, +- whether the delimiter is "active" (all are active to start), and +- whether the delimiter is a potential opener, a potential closer, + or both (which depends on what sort of characters precede + and follow the delimiters). + +When we hit a `]` character, we call the *look for link or image* +procedure (see below). + +When we hit the end of the input, we call the *process emphasis* +procedure (see below), with `stack_bottom` = NULL. + +#### *look for link or image* + +Starting at the top of the delimiter stack, we look backwards +through the stack for an opening `[` or `![` delimiter. + +- If we don't find one, we return a literal text node `]`. + +- If we do find one, but it's not *active*, we remove the inactive + delimiter from the stack, and return a literal text node `]`. + +- If we find one and it's active, then we parse ahead to see if + we have an inline link/image, reference link/image, compact reference + link/image, or shortcut reference link/image. + + + If we don't, then we remove the opening delimiter from the + delimiter stack and return a literal text node `]`. + + + If we do, then + + * We return a link or image node whose children are the inlines + after the text node pointed to by the opening delimiter. + + * We run *process emphasis* on these inlines, with the `[` opener + as `stack_bottom`. + + * We remove the opening delimiter. + + * If we have a link (and not an image), we also set all + `[` delimiters before the opening delimiter to *inactive*. (This + will prevent us from getting links within links.) + +#### *process emphasis* + +Parameter `stack_bottom` sets a lower bound to how far we +descend in the [delimiter stack]. If it is NULL, we can +go all the way to the bottom. Otherwise, we stop before +visiting `stack_bottom`. + +Let `current_position` point to the element on the [delimiter stack] +just above `stack_bottom` (or the first element if `stack_bottom` +is NULL). + +We keep track of the `openers_bottom` for each delimiter +type (`*`, `_`), indexed to the length of the closing delimiter run +(modulo 3) and to whether the closing delimiter can also be an +opener. Initialize this to `stack_bottom`. + +Then we repeat the following until we run out of potential +closers: + +- Move `current_position` forward in the delimiter stack (if needed) + until we find the first potential closer with delimiter `*` or `_`. + (This will be the potential closer closest + to the beginning of the input -- the first one in parse order.) + +- Now, look back in the stack (staying above `stack_bottom` and + the `openers_bottom` for this delimiter type) for the + first matching potential opener ("matching" means same delimiter). + +- If one is found: + + + Figure out whether we have emphasis or strong emphasis: + if both closer and opener spans have length >= 2, we have + strong, otherwise regular. + + + Insert an emph or strong emph node accordingly, after + the text node corresponding to the opener. + + + Remove any delimiters between the opener and closer from + the delimiter stack. + + + Remove 1 (for regular emph) or 2 (for strong emph) delimiters + from the opening and closing text nodes. If they become empty + as a result, remove them and remove the corresponding element + of the delimiter stack. If the closing node is removed, reset + `current_position` to the next element in the stack. + +- If none is found: + + + Set `openers_bottom` to the element before `current_position`. + (We know that there are no openers for this kind of closer up to and + including this point, so this puts a lower bound on future searches.) + + + If the closer at `current_position` is not a potential opener, + remove it from the delimiter stack (since we know it can't + be a closer either). + + + Advance `current_position` to the next element in the stack. + +After we're done, we remove all delimiters above `stack_bottom` from the +delimiter stack. diff --git a/deps/cmark/test/spec_tests.py b/deps/cmark/test/spec_tests.py new file mode 100755 index 0000000..1b00c90 --- /dev/null +++ b/deps/cmark/test/spec_tests.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import sys +from difflib import unified_diff +import argparse +import re +import json +from cmark import CMark +from normalize import normalize_html + +parser = argparse.ArgumentParser(description='Run cmark tests.') +parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, + help='program to test') +parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', + help='path to spec') +parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', + default=None, help='limit to sections matching regex pattern') +parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') +parser.add_argument('--no-normalize', dest='normalize', + action='store_const', const=False, default=True, + help='do not normalize HTML') +parser.add_argument('-d', '--dump-tests', dest='dump_tests', + action='store_const', const=True, default=False, + help='dump tests in JSON format') +parser.add_argument('--debug-normalization', dest='debug_normalization', + action='store_const', const=True, + default=False, help='filter stdin through normalizer for testing') +parser.add_argument('-n', '--number', type=int, default=None, + help='only consider the test with the given number') +args = parser.parse_args(sys.argv[1:]) + +def out(str): + sys.stdout.buffer.write(str.encode('utf-8')) + +def print_test_header(headertext, example_number, start_line, end_line): + out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext)) + +def do_test(converter, test, normalize, result_counts): + [retcode, actual_html, err] = converter(test['markdown']) + if retcode == 0: + expected_html = test['html'] + unicode_error = None + if normalize: + try: + passed = normalize_html(actual_html) == normalize_html(expected_html) + except UnicodeDecodeError as e: + unicode_error = e + passed = False + else: + passed = actual_html == expected_html + if passed: + result_counts['pass'] += 1 + else: + print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) + out(test['markdown'] + '\n') + if unicode_error: + out("Unicode error: " + str(unicode_error) + '\n') + out("Expected: " + repr(expected_html) + '\n') + out("Got: " + repr(actual_html) + '\n') + else: + expected_html_lines = expected_html.splitlines(True) + actual_html_lines = actual_html.splitlines(True) + for diffline in unified_diff(expected_html_lines, actual_html_lines, + "expected HTML", "actual HTML"): + out(diffline) + out('\n') + result_counts['fail'] += 1 + else: + print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) + out("program returned error code %d\n" % retcode) + sys.stdout.buffer.write(err) + result_counts['error'] += 1 + +def get_tests(specfile): + line_number = 0 + start_line = 0 + end_line = 0 + example_number = 0 + markdown_lines = [] + html_lines = [] + state = 0 # 0 regular text, 1 markdown example, 2 html output + headertext = '' + tests = [] + + header_re = re.compile('#+ ') + + with open(specfile, 'r', encoding='utf-8', newline='\n') as specf: + for line in specf: + line_number = line_number + 1 + l = line.strip() + if l == "`" * 32 + " example": + state = 1 + elif l == "`" * 32: + state = 0 + example_number = example_number + 1 + end_line = line_number + tests.append({ + "markdown":''.join(markdown_lines).replace('→',"\t"), + "html":''.join(html_lines).replace('→',"\t"), + "example": example_number, + "start_line": start_line, + "end_line": end_line, + "section": headertext}) + start_line = 0 + markdown_lines = [] + html_lines = [] + elif l == ".": + state = 2 + elif state == 1: + if start_line == 0: + start_line = line_number - 1 + markdown_lines.append(line) + elif state == 2: + html_lines.append(line) + elif state == 0 and re.match(header_re, line): + headertext = header_re.sub('', line).strip() + return tests + +if __name__ == "__main__": + if args.debug_normalization: + out(normalize_html(sys.stdin.read())) + exit(0) + + all_tests = get_tests(args.spec) + if args.pattern: + pattern_re = re.compile(args.pattern, re.IGNORECASE) + else: + pattern_re = re.compile('.') + tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] + if args.dump_tests: + out(json.dumps(tests, ensure_ascii=False, indent=2)) + exit(0) + else: + skipped = len(all_tests) - len(tests) + converter = CMark(prog=args.program, library_dir=args.library_dir).to_html + result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} + for test in tests: + do_test(converter, test, args.normalize, result_counts) + out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts)) + exit(result_counts['fail'] + result_counts['error']) diff --git a/deps/cmark/toolchain-mingw32.cmake b/deps/cmark/toolchain-mingw32.cmake new file mode 100644 index 0000000..ad84aad --- /dev/null +++ b/deps/cmark/toolchain-mingw32.cmake @@ -0,0 +1,17 @@ +# the name of the target operating system +SET(CMAKE_SYSTEM_NAME Windows) + +# which compilers to use for C and C++ +SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc) +SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++) +SET(CMAKE_RC_COMPILER i686-w64-mingw32-windres) + +# here is the target environment located +SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32/ "${CMAKE_SOURCE_DIR}/windows") + +# adjust the default behaviour of the FIND_XXX() commands: +# search headers and libraries in the target environment, search +# programs in the host environment +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/deps/cmark/tools/appveyor-build.bat b/deps/cmark/tools/appveyor-build.bat new file mode 100644 index 0000000..73d555b --- /dev/null +++ b/deps/cmark/tools/appveyor-build.bat @@ -0,0 +1,13 @@ +@echo off + +if "%MSVC_VERSION%" == "10" goto msvc10 + +call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64 +goto build + +:msvc10 +call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 + +:build +nmake + diff --git a/deps/cmark/tools/make_entities_inc.py b/deps/cmark/tools/make_entities_inc.py new file mode 100644 index 0000000..7b8ee41 --- /dev/null +++ b/deps/cmark/tools/make_entities_inc.py @@ -0,0 +1,32 @@ +# Creates C data structures for binary lookup table of entities, +# using python's html5 entity data. +# Usage: python3 tools/make_entities_inc.py > src/entities.inc + +import html + +entities5 = html.entities.html5 + +# remove keys without semicolons. For some reason the list +# has duplicates of a few things, like auml, one with and one +# without a semicolon. +entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';']) + +# Print out the header: +print("""/* Autogenerated by tools/make_headers_inc.py */ + +struct cmark_entity_node { + unsigned char *entity; + unsigned char bytes[8]; +}; + +#define CMARK_ENTITY_MIN_LENGTH 2 +#define CMARK_ENTITY_MAX_LENGTH 32""") + +print("#define CMARK_NUM_ENTITIES " + str(len(entities))); + +print("\nstatic const struct cmark_entity_node cmark_entities[] = {"); + +for (ent, bs) in entities: + print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},') + +print("};") diff --git a/deps/cmark/tools/mkcasefold.pl b/deps/cmark/tools/mkcasefold.pl new file mode 100755 index 0000000..740ce77 --- /dev/null +++ b/deps/cmark/tools/mkcasefold.pl @@ -0,0 +1,22 @@ +binmode STDOUT; +print(" switch (c) {\n"); +my $lastchar = ""; +while () { + if (/^[A-F0-9]/ and / [CF]; /) { + my ($char, $type, $subst) = m/([A-F0-9]+); ([CF]); ([^;]+)/; + if ($char eq $lastchar) { + break; + } + my @subst = $subst =~ m/(\w+)/g; + printf(" case 0x%s:\n", $char); + foreach (@subst) { + printf(" bufpush(0x%s);\n", $_); + } + printf(" break;\n"); + $lastchar = $char; + } +} +printf(" default:\n"); +printf(" bufpush(c);\n"); +print(" }\n"); + diff --git a/deps/cmark/tools/xml2md.xsl b/deps/cmark/tools/xml2md.xsl new file mode 100644 index 0000000..0122e5f --- /dev/null +++ b/deps/cmark/tools/xml2md.xsl @@ -0,0 +1,319 @@ + + + + + + + + + + + + + + + + Unsupported element '' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + *** + + + + + + + + + + + + + - + + + + . + ) + + + + + + + + + + + + + + + + + + + + + + + > + + + + + > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \. + + + + + + + \) + + + + + + + \ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + * + + * + + + + ** + + ** + + + + + + + + + + + + + + + + + + + + + ! + [ + + ]( + + + + + + " + + + + + " + + ) + + + + + + + + + + + + + + + + + + + + + \ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/deps/cmark/why-cmark-and-not-x.md b/deps/cmark/why-cmark-and-not-x.md new file mode 100644 index 0000000..3fdb4e5 --- /dev/null +++ b/deps/cmark/why-cmark-and-not-x.md @@ -0,0 +1,104 @@ +Why use `cmark` and not X? +========================== + +`hoedown` +--------- + +`hoedown` (which derives from `sundown`) is slightly faster +than `cmark` in our benchmarks (0.21s vs. 0.29s). But both +are much faster than any other available implementations. + +`hoedown` boasts of including "protection against all possible +DOS attacks," but there are some chinks in the armor: + + % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | cmark + ... + user 0m0.073s + % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | hoedown + ... + 0m17.84s + +`hoedown` has many parsing bugs. Here is a selection (as of +v3.0.3): + + % hoedown + - one + - two + 1. three + ^D +
    +
  • one + +
      +
    • two
    • +
    • three
    • +
  • +
+ + + % hoedown + ## hi\### + ^D +

hi\

+ + + % hoedown + [ΑΓΩ]: /φου + + [αγω] + ^D +

[αγω]

+ + + % hoedown + ``` + [foo]: /url + ``` + + [foo] + ^D +

```

+ +

```

+ +

foo

+ + + % hoedown + [foo](url "ti\*tle") + ^D +

foo

+ + + % ./hoedown + - one + - two + - three + - four + ^D +
    +
  • one + +
      +
    • two
    • +
    • three
    • +
    • four
    • +
  • +
+ + +`discount` +---------- + +`cmark` is about six times faster. + +`kramdown` +---------- + +`cmark` is about a hundred times faster. + +`kramdown` also gets tied in knots by pathological input like + + python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' + + diff --git a/deps/cmark/wrappers/wrapper.py b/deps/cmark/wrappers/wrapper.py new file mode 100755 index 0000000..98e7f2b --- /dev/null +++ b/deps/cmark/wrappers/wrapper.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +# Example for using the shared library from python +# Will work with either python 2 or python 3 +# Requires cmark library to be installed + +from ctypes import CDLL, c_char_p, c_long +import sys +import platform + +sysname = platform.system() + +if sysname == 'Darwin': + libname = "libcmark.dylib" +elif sysname == 'Windows': + libname = "cmark.dll" +else: + libname = "libcmark.so" +cmark = CDLL(libname) + +markdown = cmark.cmark_markdown_to_html +markdown.restype = c_char_p +markdown.argtypes = [c_char_p, c_long, c_long] + +opts = 0 # defaults + +def md2html(text): + if sys.version_info >= (3,0): + textbytes = text.encode('utf-8') + textlen = len(textbytes) + return markdown(textbytes, textlen, opts).decode('utf-8') + else: + textbytes = text + textlen = len(text) + return markdown(textbytes, textlen, opts) + +sys.stdout.write(md2html(sys.stdin.read())) diff --git a/deps/cmark/wrappers/wrapper.rb b/deps/cmark/wrappers/wrapper.rb new file mode 100755 index 0000000..2359366 --- /dev/null +++ b/deps/cmark/wrappers/wrapper.rb @@ -0,0 +1,15 @@ +#!/usr/bin/env ruby +require 'ffi' + +module CMark + extend FFI::Library + ffi_lib ['libcmark', 'cmark'] + attach_function :cmark_markdown_to_html, [:string, :int, :int], :string +end + +def markdown_to_html(s) + len = s.bytesize + CMark::cmark_markdown_to_html(s, len, 0) +end + +STDOUT.write(markdown_to_html(ARGF.read())) diff --git a/deps/cmark/wrappers/wrapper.rkt b/deps/cmark/wrappers/wrapper.rkt new file mode 100644 index 0000000..776adb4 --- /dev/null +++ b/deps/cmark/wrappers/wrapper.rkt @@ -0,0 +1,210 @@ +#lang racket/base + +;; requires racket >= 5.3 because of submodules + +;; Lowlevel interface + +(module low-level racket/base + + (require ffi/unsafe ffi/unsafe/define) + + (provide (all-defined-out)) + + (define-ffi-definer defcmark (ffi-lib "libcmark")) + + (define _cmark_node_type + (_enum '(;; Error status + none + ;; Block + document block-quote list item code-block + html-block custom-block + paragraph heading thematic-break + ;; ?? first-block = document + ;; ?? last-block = thematic-break + ;; Inline + text softbreak linebreak code html-inline custom-inline + emph strong link image + ;; ?? first-inline = text + ;; ?? last-inline = image + ))) + (define _cmark_list_type + (_enum '(no_list bullet_list ordered_list))) + (define _cmark_delim_type + (_enum '(no_delim period_delim paren_delim))) + (define _cmark_opts + (let ([opts '([sourcepos 1] ; include sourcepos attribute on block elements + [hardbreaks 2] ; render `softbreak` elements as hard line breaks + [safe 3] ; defined here for API compatibility (on by default) + [unsafe 17] ; render raw HTML and unsafe links + [nobreaks 4] ; render `softbreak` elements as spaces + [normalize 8] ; legacy (no effect) + [validate-utf8 9] ; validate UTF-8 in the input + [smart 10] ; straight quotes to curly, ---/-- to em/en dashes + )]) + (_bitmask (apply append (map (λ(o) `(,(car o) = ,(expt 2 (cadr o)))) + opts))))) + + (define-cpointer-type _node) + + (defcmark cmark_markdown_to_html + (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts + -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) + + (defcmark cmark_parse_document + (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts + -> _node)) + + (defcmark cmark_render_html + (_fun _node _cmark_opts + -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) + + (defcmark cmark_node_new (_fun _cmark_node_type -> _node)) + (defcmark cmark_node_free (_fun _node -> _void)) + + (defcmark cmark_node_next (_fun _node -> _node/null)) + (defcmark cmark_node_previous (_fun _node -> _node/null)) + (defcmark cmark_node_parent (_fun _node -> _node/null)) + (defcmark cmark_node_first_child (_fun _node -> _node/null)) + (defcmark cmark_node_last_child (_fun _node -> _node/null)) + + (defcmark cmark_node_get_user_data (_fun _node -> _racket)) + (defcmark cmark_node_set_user_data (_fun _node _racket -> _bool)) + (defcmark cmark_node_get_type (_fun _node -> _cmark_node_type)) + (defcmark cmark_node_get_type_string (_fun _node -> _bytes)) + (defcmark cmark_node_get_literal (_fun _node -> _string)) + (defcmark cmark_node_set_literal (_fun _node _string -> _bool)) + (defcmark cmark_node_get_heading_level (_fun _node -> _int)) + (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool)) + (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type)) + (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool)) + (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type)) + (defcmark cmark_node_set_list_delim (_fun _node _cmark_delim_type -> _bool)) + (defcmark cmark_node_get_list_start (_fun _node -> _int)) + (defcmark cmark_node_set_list_start (_fun _node _int -> _bool)) + (defcmark cmark_node_get_list_tight (_fun _node -> _bool)) + (defcmark cmark_node_set_list_tight (_fun _node _bool -> _bool)) + (defcmark cmark_node_get_fence_info (_fun _node -> _string)) + (defcmark cmark_node_set_fence_info (_fun _node _string -> _bool)) + (defcmark cmark_node_get_url (_fun _node -> _string)) + (defcmark cmark_node_set_url (_fun _node _string -> _bool)) + (defcmark cmark_node_get_title (_fun _node -> _string)) + (defcmark cmark_node_set_title (_fun _node _string -> _bool)) + (defcmark cmark_node_get_start_line (_fun _node -> _int)) + (defcmark cmark_node_get_start_column (_fun _node -> _int)) + (defcmark cmark_node_get_end_line (_fun _node -> _int)) + (defcmark cmark_node_get_end_column (_fun _node -> _int)) + + (defcmark cmark_node_unlink (_fun _node -> _void)) + (defcmark cmark_node_insert_before (_fun _node _node -> _bool)) + (defcmark cmark_node_insert_after (_fun _node _node -> _bool)) + (defcmark cmark_node_prepend_child (_fun _node _node -> _bool)) + (defcmark cmark_node_append_child (_fun _node _node -> _bool)) + (defcmark cmark_consolidate_text_nodes (_fun _node -> _void)) + + (defcmark cmark_version (_fun -> _int)) + (defcmark cmark_version_string (_fun -> _string)) + + ) + +;; Rackety interface + +(module high-level racket/base + + (require (submod ".." low-level) ffi/unsafe) + + (provide cmark-markdown-to-html) + (define (cmark-markdown-to-html str [options '(normalize smart)]) + (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str)) + options)) + + (require (for-syntax racket/base racket/syntax)) + (define-syntax (make-getter+setter stx) + (syntax-case stx () + [(_ name) (with-syntax ([(getter setter) + (map (λ(op) (format-id #'name "cmark_node_~a_~a" + op #'name)) + '(get set))]) + #'(cons getter setter))])) + (define-syntax-rule (define-getters+setters name [type field ...] ...) + (define name (list (list 'type (make-getter+setter field) ...) ...))) + (define-getters+setters getters+setters + [heading heading_level] [code-block fence_info] + [link url title] [image url title] + [list list_type list_delim list_start list_tight]) + + (provide cmark->sexpr) + (define (cmark->sexpr node) + (define text (cmark_node_get_literal node)) + (define type (cmark_node_get_type node)) + (define children + (let loop ([node (cmark_node_first_child node)]) + (if (not node) '() + (cons (cmark->sexpr node) (loop (cmark_node_next node)))))) + (define info + (cond [(assq type getters+setters) + => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))] + [else '()])) + (define (assert-no what-not b) + (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type))) + (cond [(memq type '(document paragraph heading block-quote list item + emph strong link image)) + (assert-no 'text text) + (list type info children)] + [(memq type '(text code code-block html-block html-inline + softbreak linebreak thematic-break)) + (assert-no 'children (pair? children)) + (list type info text)] + [else (error 'cmark->sexpr "unknown type: ~s" type)])) + + (provide sexpr->cmark) + (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above + (define (loop sexpr) + (define type (car sexpr)) + (define info (cadr sexpr)) + (define data (caddr sexpr)) + (define node (cmark_node_new type)) + (let ([gss (assq type getters+setters)]) + (when gss + (unless (= (length (cdr gss)) (length info)) + (error 'sexpr->cmark "bad number of info values in ~s" sexpr)) + (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info))) + (cond [(string? data) (cmark_node_set_literal node data)] + [(not data) (void)] + [(list? data) + (for ([child (in-list data)]) + (cmark_node_append_child node (sexpr->cmark child)))] + [else (error 'sexpr->cmark "bad data in ~s" sexpr)]) + node) + (define root (loop sexpr)) + (register-finalizer root cmark_node_free) + root) + + ;; Registers a `cmark_node_free` finalizer + (provide cmark-parse-document) + (define (cmark-parse-document str [options '(normalize smart)]) + (define root (cmark_parse_document + (if (bytes? str) str (string->bytes/utf-8 str)) + options)) + (register-finalizer root cmark_node_free) + root) + + (provide cmark-render-html) + (define (cmark-render-html root [options '(normalize smart)]) + (cmark_render_html root options))) + +#; ;; sample use +(begin + (require 'high-level racket/string) + (cmark-render-html + (cmark-parse-document + (string-join '("foo" + "===" + "" + "> blah" + ">" + "> blah *blah* `bar()` blah:" + ">" + "> function foo() {" + "> bar();" + "> }") + "\n"))))