320 lines
11 KiB
Python
Executable File
320 lines
11 KiB
Python
Executable File
#! /usr/bin/env python
|
|
|
|
import os
|
|
from lxml.html import parse, etree, tostring
|
|
|
|
copyright_string = """
|
|
<div class="license">
|
|
<a rel="license noopener" href="https://creativecommons.org/licenses/by/4.0/"
|
|
target="_blank" class="m-2">
|
|
<img alt="Creative Commons License" style="border-width:0"
|
|
src="https://licensebuttons.net/l/by/4.0/80x15.png"/>
|
|
</a>
|
|
Except where otherwise noted, all content is licensed under a
|
|
<a rel="license noopener" href="https://creativecommons.org/licenses/by/4.0/"
|
|
target="_blank">Creative Commons Attribution 4.0 International License</a>.
|
|
</div>
|
|
"""
|
|
|
|
BASEDIR = "./"
|
|
SRCDIR = f"{BASEDIR}src/"
|
|
|
|
tree = parse(f"{SRCDIR}index.html")
|
|
|
|
el_head = None
|
|
el_header = None
|
|
el_title = None
|
|
el_nav = None
|
|
el_ttoc = None
|
|
el_content = None
|
|
el_postamble = None
|
|
|
|
sections_2 = []
|
|
|
|
for el in tree.iter():
|
|
if el.tag == 'head':
|
|
el_head = el
|
|
if el.tag == 'header':
|
|
el_header = el
|
|
if 'class' in el.attrib and el.attrib['class'] == 'title':
|
|
el_title = el
|
|
if el.tag == 'nav':
|
|
el_nav = el
|
|
if 'id' in el.attrib and el.attrib['id'] == 'text-table-of-contents':
|
|
el_ttoc = el
|
|
if 'id' in el.attrib and el.attrib['id'] == 'content':
|
|
el_content = el
|
|
if 'id' in el.attrib and el.attrib['id'] == 'postamble':
|
|
el_postamble = el
|
|
if 'class' in el.attrib and el.attrib['class'].startswith('outline-2'):
|
|
sections_2.append(el)
|
|
|
|
export_dir = f"{BASEDIR}export/html"
|
|
if not os.path.isdir(export_dir):
|
|
os.makedirs(export_dir)
|
|
|
|
|
|
def ordered_pages(el):
|
|
"""
|
|
Creates a list of all pages, able to return previous, next and up links.
|
|
|
|
`el` should be the text-table-of-contents element.
|
|
"""
|
|
page_ids = []
|
|
page_texts = []
|
|
for child in el.iter():
|
|
if child.tag == 'a':
|
|
page_ids.append(child.attrib['href'].lstrip('#'))
|
|
page_texts.append(child.text)
|
|
return page_ids, page_texts
|
|
|
|
page_ids, page_texts = ordered_pages(el_ttoc)
|
|
|
|
def head_at_location(el_head, page_id):
|
|
output = tostring(el_head, pretty_print=True, encoding='unicode')
|
|
try:
|
|
index = page_ids.index(page_id)
|
|
except ValueError:
|
|
return output
|
|
if index == 0:
|
|
return output
|
|
return output.replace(
|
|
'</title>',
|
|
f': {page_texts[index]}</title>'
|
|
)
|
|
|
|
def breadcrumbs(page_id):
|
|
try:
|
|
index = page_ids.index(page_id)
|
|
except ValueError:
|
|
return ''
|
|
if index == 0: # mains page, no need for breadcrumbs
|
|
return ''
|
|
breadcrumb_lis = f'<li>{page_texts[index]}</li>'
|
|
# If book or article, take shortcut
|
|
if page_id.startswith("b-"): # this is a book
|
|
page_id_stripped = "l_b"
|
|
elif page_id[:4].isdigit(): # this is an article
|
|
page_id_stripped = f"l_a_{page_id[:4]}"
|
|
else:
|
|
page_id_stripped = page_ids[index].rpartition('_')[0]
|
|
while page_id_stripped:
|
|
index_stripped = page_ids.index(page_id_stripped)
|
|
breadcrumb_lis = (
|
|
# give class to link to prevent auto addition of ids from cleanup_links script
|
|
'<li><a class="breadcrumb-link"' +
|
|
f'href="{page_ids[index_stripped]}.html">' +
|
|
f'{page_texts[index_stripped]}</a></li>'
|
|
+ breadcrumb_lis)
|
|
page_id_stripped = page_id_stripped.rpartition('_')[0]
|
|
return f'<ul class="breadcrumbs">{breadcrumb_lis}</ul>'
|
|
|
|
def link_previous(page_id):
|
|
try:
|
|
index = page_ids.index(page_id)
|
|
except ValueError:
|
|
return None
|
|
if index == 0: # first, no previous
|
|
return None
|
|
# If depth is 1, there is no previous
|
|
if page_id.count('_') == 0:
|
|
return None
|
|
else:
|
|
return f'<a href="{page_ids[index-1]}.html">{page_texts[index-1]} <small>[{page_ids[index-1].replace("_",".")}]</small></a>'
|
|
|
|
def link_next(page_id):
|
|
try:
|
|
index = page_ids.index(page_id)
|
|
except ValueError:
|
|
return None
|
|
if index == len(page_ids) - 1: # last, no next
|
|
return None
|
|
return f'<a href="{page_ids[index+1]}.html">{page_texts[index+1]} <small>[{page_ids[index+1].replace("_",".")}]</small></a>'
|
|
|
|
def link_up(page_id):
|
|
try:
|
|
index = page_ids.index(page_id)
|
|
except ValueError:
|
|
return None
|
|
# Strip one '_' from id and point there
|
|
if page_id.count('_') > 0:
|
|
index_up = page_ids.index(page_id.rpartition('_')[0])
|
|
return f'<a href="{page_ids[index_up]}.html">{page_texts[index_up]} <small>[{page_ids[index_up].replace("_",".")}]</small></a>'
|
|
return None
|
|
|
|
def navigation_links(location):
|
|
links = '<ul class="navigation-links">'
|
|
previous = link_previous(location)
|
|
if previous:
|
|
links += f'<li>Prev: {previous}</li>'
|
|
nxt = link_next(location)
|
|
if nxt:
|
|
links += f'<li>Next: {nxt}</li>'
|
|
up = link_up(location)
|
|
if up:
|
|
links += f'<li>Up: {up}</li>'
|
|
links += '</ul>'
|
|
if previous or nxt or up:
|
|
return links
|
|
return ''
|
|
|
|
def search_box():
|
|
return """
|
|
<form style="float: right; padding-right: 0;" method="get" id="search" action="https://duckduckgo.com/" target="_blank">
|
|
<input type="hidden" name="sites" value="integrability.org"/>
|
|
<input class="search" type="text" name="q" maxlength="300" placeholder="Search"/>
|
|
<input type="submit" value="Search" style="visibility: hidden; width: 0;" /></form>
|
|
"""
|
|
|
|
def list_to_details_recursive(el):
|
|
"""
|
|
`el` contains either a single `a` child, or an `a` followed by `ul`.
|
|
In the first case, output as is.
|
|
In the second case, replace by details/summary.
|
|
"""
|
|
# checks
|
|
if not (len(el) == 1 or len(el) == 2):
|
|
raise ValueError(f'el must have either 1 or 2 children, found {len(el)}')
|
|
if not el[0].tag in ['a', 'ul']:
|
|
raise ValueError(f'el[0] must be an a or ul tag, but found a {el[0].tag}')
|
|
if len(el) == 2 and not (el[0].tag == 'a' and el[1].tag == 'ul'):
|
|
raise ValueError(f'for len(el) == 2, el[1] must be ul, but found {el[1].tag}')
|
|
# single child, output as is
|
|
if len(el) == 1 and el[0].tag == 'a':
|
|
output = tostring(el[0], pretty_print=True, encoding='unicode')
|
|
else: # build a details/summary
|
|
summary_text = (
|
|
'Table of contents' + search_box() if len(el) == 1 else
|
|
tostring(el[0], pretty_print=True, encoding='unicode')
|
|
)
|
|
ul = el[0] if len(el) == 1 else el[1]
|
|
# print(f'summary_text: {summary_text}')
|
|
# print(f'len(ul): {len(ul)}')
|
|
# print([li.tag for li in ul])
|
|
# print([tostring(li, pretty_print=True, encoding='unicode') for li in ul])
|
|
output = ('\n<details>'
|
|
f'\n<summary>\n{summary_text}'
|
|
'\n</summary>\n<ul>\n')
|
|
for li in ul:
|
|
if not li.tag == 'li':
|
|
raise ValueError('child of ul should be li')
|
|
output += '<li>\n'
|
|
output += list_to_details_recursive(li)
|
|
output += '\n</li>\n'
|
|
output += '\n</ul>\n</details>'
|
|
return output
|
|
|
|
|
|
|
|
collapsed_toc = '\n<nav id="collapsed-table-of-contents">'
|
|
collapsed_toc += list_to_details_recursive(el_ttoc)
|
|
collapsed_toc += '\n</nav>\n'
|
|
|
|
|
|
def toc_at_location(collapsed_toc, location):
|
|
"""
|
|
Given a collapsed toc and location (filename),
|
|
mark the details hierarchy as open.
|
|
"""
|
|
output = collapsed_toc
|
|
if location != 'index':
|
|
# open all ancestors
|
|
prefix = location.rpartition('_')[0]
|
|
while prefix:
|
|
output = output.replace(
|
|
f'<details>\n<summary>\n<a href="#{prefix}"',
|
|
f'<details open="">\n<summary class="toc-open">\n<a href="#{prefix}"')
|
|
prefix = prefix.rpartition('_')[0]
|
|
# highlight the current location, whether it's a summary or a
|
|
output = output.replace(
|
|
f'<details>\n<summary>\n<a href="#{location}"',
|
|
f'<details open="">\n<summary class="toc-currentpage">\n<a href="#{location}"'
|
|
)
|
|
output = output.replace(
|
|
f'<li>\n<a href="#{location}"',
|
|
f'<li class="toc-currentpage">\n<a href="#{location}"'
|
|
)
|
|
# but close all details which contain deeper levels than location
|
|
output = output.replace(
|
|
f'<details open="">\n<summary>\n<a href="#{location}_',
|
|
f'<details>\n<summary>\n<a href="#{location}_')
|
|
return output.replace('<a href', '<a class="toc-a" href')
|
|
|
|
def write_file_start(_file, location):
|
|
_file.write('<!DOCTYPE html>\n<html lang="en">\n')
|
|
_file.write(head_at_location(el_head, location))
|
|
_file.write('<div id="content">\n')
|
|
#_file.write(tostring(el_header, pretty_print=True, encoding='unicode'))
|
|
_file.write('<header>\n<h1 class="title">\n'
|
|
'<a href="./index.html" class="homepage-link">')
|
|
_file.write(f'{el_title.text}</a>\n</h1>\n</header>')
|
|
_file.write(toc_at_location(collapsed_toc, location))
|
|
_file.write(breadcrumbs(location))
|
|
_file.write(navigation_links(location))
|
|
|
|
def write_file_end(_file, location):
|
|
_file.write('\n<br>')
|
|
_file.write(navigation_links(location)) # repeat, for convenience
|
|
_file.write('\n<br>')
|
|
_file.write('\n<hr>')
|
|
_file.write(copyright_string)
|
|
_file.write(tostring(el_postamble, pretty_print=True, encoding='unicode'))
|
|
_file.write('\n</div>\n</html>')
|
|
|
|
|
|
def write_files_recursive(name, el, levelmax=5):
|
|
"""
|
|
Recursively extract outlines.
|
|
"""
|
|
if 'id' in el.attrib and el.attrib['id'] == 'content':
|
|
level = 1
|
|
elif not (el is not None and
|
|
'class' in el.attrib and
|
|
el.attrib['class'].startswith('outline-')):
|
|
print(f'Element name {name} has no outline class, no files written.')
|
|
return
|
|
else:
|
|
level = int(el.attrib['class'].partition('outline-')[2][0])
|
|
|
|
_file = open(f'{BASEDIR}export/html/{name}.html', 'w')
|
|
write_file_start(_file, name)
|
|
|
|
# Count outline children for establishing output format
|
|
children = []
|
|
for child in el.iter():
|
|
if ('class' in child.attrib and
|
|
child.attrib['class'].startswith('outline-%s' % str(level + 1))):
|
|
children.append(child)
|
|
|
|
# if no children or if we're on levelmax, write everything
|
|
if (len(children) == 0 or level == levelmax):
|
|
_file.write(tostring(el, pretty_print=True, encoding='unicode'))
|
|
# otherwise write anything above first next-level headling,
|
|
# and then replace the next-level headlines by links to their files
|
|
else:
|
|
if (el.text):
|
|
_file.write(el.text)
|
|
for child in el:
|
|
if ('class' in child.attrib and
|
|
child.attrib['class'].startswith('outline-%s' % str(level + 1))):
|
|
break # break out once we hit the first next-level headline
|
|
if child.tag != "nav": # don't write the table-of-contents
|
|
_file.write(tostring(child, pretty_print=True, encoding='unicode'))
|
|
# now print the list of children
|
|
_file.write(f'<h{level+1}>In this section:</h{level+1}>')
|
|
_file.write('\n<ul class="child-links-list">')
|
|
for child in children:
|
|
child_h = next(child.iter('h%s' % int(level + 1)))
|
|
child_label = child_h.attrib['id']
|
|
child_text = next(child_h.iter('a')).text
|
|
_file.write(f'\n<li><a href="{child_label}.html">{child_text}</a></li>')
|
|
write_files_recursive(name=child_label, el=child)
|
|
_file.write('\n</ul>\n')
|
|
|
|
write_file_end(_file, name)
|
|
_file.close()
|
|
|
|
|
|
# Rewrite the index.html file:
|
|
write_files_recursive('index', el_content) # the only call needed
|