113 lines
5.3 KiB
Python
Executable File
113 lines
5.3 KiB
Python
Executable File
#! /usr/bin/env python
|
|
|
|
import os
|
|
from lxml.html import parse, etree, tostring
|
|
|
|
link_svg = """<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-link" viewBox="0 0 16 16">
|
|
<path d="M6.354 5.5H4a3 3 0 0 0 0 6h3a3 3 0 0 0 2.83-4H9c-.086 0-.17.01-.25.031A2 2 0 0 1 7 10.5H4a2 2 0 1 1 0-4h1.535c.218-.376.495-.714.82-1z"/>
|
|
<path d="M9 5.5a3 3 0 0 0-2.83 4h1.098A2 2 0 0 1 9 6.5h3a2 2 0 1 1 0 4h-1.535a4.02 4.02 0 0 1-.82 1H12a3 3 0 1 0 0-6H9z"/>
|
|
</svg>"""
|
|
|
|
|
|
os.chdir('export/html')
|
|
filenames = [f.rpartition('.html')[0] for f in os.listdir() if f.endswith('html')]
|
|
|
|
# Build dictionary of which dedicated links and headline links (by filename)
|
|
dl = {}
|
|
hl = {}
|
|
cl = {}
|
|
for filename in filenames:
|
|
dl[filename] = []
|
|
hl[filename] = []
|
|
tree = parse(f'{filename}.html')
|
|
for el in tree.iter():
|
|
# find all dedicated links, which are of form 'a id="..."'
|
|
# (they are the only links with and id)
|
|
if (el.tag == 'a' and 'id' in el.attrib):
|
|
# #and el.attrib['id'].partition(':')[0] in filenames):
|
|
#and el.attrib['id'].partition(':')[0] == 'eq'):
|
|
# raise flag if id coincides with a filename:
|
|
if el.attrib['id'] in filenames:
|
|
print("** Error: dedicated link name clashes with "
|
|
f"headline CUSTOM_ID {el.attrib['id']} **")
|
|
# raise flag if this key already exists:
|
|
if el.attrib['id'] in dl[filename]:
|
|
print(f"** Error: multiply-defined label {el.attrib['id']} **")
|
|
else: # add this dedicated link to our dictionary
|
|
dl[filename].append(el.attrib['id'])
|
|
# find the headline links, which are of form '<h[2-6] id="...">'
|
|
if (el.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] and 'id' in el.attrib):
|
|
if el.attrib['id'] in hl[filename]:
|
|
print("** Error: multiply-defined headline {el.attrib['id']} **")
|
|
else:
|
|
hl[filename].append({
|
|
'tag': el.tag,
|
|
'id': el.attrib['id'],
|
|
'text': el[0].text # el[0] is the a tag (only child)
|
|
})
|
|
# find all the child section links inside `<ul class="child-link-list">`
|
|
# and are of the form `<li><a href="....html">`
|
|
# if (el.tag == 'ul' and 'class' in el.attrib and
|
|
# el.attrib['class'] == 'child-link-list'):
|
|
# for c in el:
|
|
# cl[c[0].attrib['href'].partition('.')[0] ] =
|
|
|
|
|
|
# Perform all substitutions
|
|
for filename in filenames:
|
|
with open(f'{filename}.html', 'r') as file:
|
|
content = file.read()
|
|
# cleanup any stray type in script tags coming from old-fashioned org export
|
|
content = content.replace('style type="text/css"', 'style ')
|
|
content = content.replace('script type="text/javascript"', 'script ')
|
|
# remove validation link if present
|
|
content = content.replace('<a href="https://validator.w3.org/check?uri=referer">Validate</a>', '')
|
|
# section link substitutions
|
|
for val in filenames:
|
|
content = content.replace(
|
|
# link directly to the headline at `#{val}` even if it's top-level
|
|
# f'a href="#{val}"', f'a href="./{val}.html#{val}"')
|
|
f'href="#{val}"', f'href="./{val}.html#{val}"')
|
|
# equation link substitutions
|
|
for key, vals in dl.items():
|
|
for val in vals:
|
|
#print('Replacing ', f'href="#{val}"', ' by ', f'href="./{key}.html#{val}')
|
|
content = content.replace(f'href="#{val}"', f'href="./{key}.html#{val}"')
|
|
# add permalinks
|
|
content = content.replace(
|
|
f'<a id="{val}"></a>',
|
|
f'<a id="{val}"></a><a href="./{key}.html#{val}">{link_svg}</a>'
|
|
)
|
|
# add permalinks to headlines
|
|
for key, vals in hl.items():
|
|
for val in vals:
|
|
el_tag = val['tag']
|
|
el_id = val['id']
|
|
el_text = val['text']
|
|
content = content.replace(
|
|
(f'<{el_tag} id="{el_id}">'
|
|
f'<a href="./{key}.html#{el_id}">{el_text}</a></{el_tag}>'),
|
|
(f'<{el_tag} id="{el_id}">{el_text}'
|
|
f'<a class="headline-permalink" href="./{key}.html#{el_id}">'
|
|
f'{link_svg}</a>'
|
|
f'<span class="headline-id">{el_id.replace("_",".")}</span>'
|
|
f'</{el_tag}>')
|
|
)
|
|
# add section ids to all `child-link-list`s
|
|
content = content.replace(
|
|
f'<li><a href="{el_id}.html">{el_text}</a></li>',
|
|
(f'<li><a href="{el_id}.html">{el_text}</a>'
|
|
f'<span class="headline-id">'
|
|
f'{el_id.replace("_",".")}</span></li>')
|
|
)
|
|
# add section ids to items in the toc
|
|
content = content.replace(
|
|
f'<a class="toc-a" href="./{key}.html#{el_id}">{el_text}</a>',
|
|
(f'<a class="toc-a" href="./{key}.html#{el_id}">{el_text}</a>'
|
|
f'<span class="headline-id">'
|
|
f'{el_id.replace("_",".")}</span>')
|
|
)
|
|
# rewrite file
|
|
with open(f'{filename}.html', 'w') as outfile:
|
|
outfile.write(content)
|