#! /usr/bin/env python
import os
from lxml.html import parse, etree, tostring
link_svg = """"""
os.chdir('export/html')
filenames = [f.rpartition('.html')[0] for f in os.listdir() if f.endswith('html')]
# Build dictionary of which dedicated links and headline links (by filename)
dl = {}
hl = {}
cl = {}
for filename in filenames:
dl[filename] = []
hl[filename] = []
tree = parse(f'{filename}.html')
for el in tree.iter():
# find all dedicated links, which are of form 'a id="..."'
# (they are the only links with and id)
if (el.tag == 'a' and 'id' in el.attrib):
# #and el.attrib['id'].partition(':')[0] in filenames):
#and el.attrib['id'].partition(':')[0] == 'eq'):
# raise flag if id coincides with a filename:
if el.attrib['id'] in filenames:
print("** Error: dedicated link name clashes with "
f"headline CUSTOM_ID {el.attrib['id']} **")
# raise flag if this key already exists:
if el.attrib['id'] in dl[filename]:
print(f"** Error: multiply-defined label {el.attrib['id']} **")
else: # add this dedicated link to our dictionary
dl[filename].append(el.attrib['id'])
# find the headline links, which are of form ''
if (el.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] and 'id' in el.attrib):
if el.attrib['id'] in hl[filename]:
print("** Error: multiply-defined headline {el.attrib['id']} **")
else:
hl[filename].append({
'tag': el.tag,
'id': el.attrib['id'],
'text': el[0].text # el[0] is the a tag (only child)
})
# find all the child section links inside `