subrepo: subdir: "deps/cmark" merged: "5ba25ff" upstream: origin: "https://github.com/commonmark/cmark.git" branch: "0.30.3" commit: "5ba25ff" git-subrepo: version: "0.4.6" commit: "d4444b563"
33 lines
957 B
Python
33 lines
957 B
Python
# Creates C data structures for binary lookup table of entities,
|
|
# using python's html5 entity data.
|
|
# Usage: python3 tools/make_entities_inc.py > src/entities.inc
|
|
|
|
import html
|
|
|
|
entities5 = html.entities.html5
|
|
|
|
# remove keys without semicolons. For some reason the list
|
|
# has duplicates of a few things, like auml, one with and one
|
|
# without a semicolon.
|
|
entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])
|
|
|
|
# Print out the header:
|
|
print("""/* Autogenerated by tools/make_headers_inc.py */
|
|
|
|
struct cmark_entity_node {
|
|
unsigned char *entity;
|
|
unsigned char bytes[8];
|
|
};
|
|
|
|
#define CMARK_ENTITY_MIN_LENGTH 2
|
|
#define CMARK_ENTITY_MAX_LENGTH 32""")
|
|
|
|
print("#define CMARK_NUM_ENTITIES " + str(len(entities)));
|
|
|
|
print("\nstatic const struct cmark_entity_node cmark_entities[] = {");
|
|
|
|
for (ent, bs) in entities:
|
|
print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')
|
|
|
|
print("};")
|