cmark-zig/deps/cmark/tools/make_entities_inc.py
torque 24810cbbbd
git subrepo clone --branch=0.30.3 https://github.com/commonmark/cmark.git deps/cmark
subrepo:
  subdir:   "deps/cmark"
  merged:   "5ba25ff"
upstream:
  origin:   "https://github.com/commonmark/cmark.git"
  branch:   "0.30.3"
  commit:   "5ba25ff"
git-subrepo:
  version:  "0.4.6"
  commit:   "d4444b563"
2023-09-04 15:42:46 -07:00

33 lines
957 B
Python

# Creates C data structures for binary lookup table of entities,
# using python's html5 entity data.
# Usage: python3 tools/make_entities_inc.py > src/entities.inc
import html
entities5 = html.entities.html5
# remove keys without semicolons. For some reason the list
# has duplicates of a few things, like auml, one with and one
# without a semicolon.
entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])
# Print out the header:
print("""/* Autogenerated by tools/make_headers_inc.py */
struct cmark_entity_node {
unsigned char *entity;
unsigned char bytes[8];
};
#define CMARK_ENTITY_MIN_LENGTH 2
#define CMARK_ENTITY_MAX_LENGTH 32""")
print("#define CMARK_NUM_ENTITIES " + str(len(entities)));
print("\nstatic const struct cmark_entity_node cmark_entities[] = {");
for (ent, bs) in entities:
print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')
print("};")