320 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			320 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| import os
 | |
| from lxml.html import parse, etree, tostring
 | |
| 
 | |
| copyright_string = """
 | |
| <div class="license">
 | |
| <a rel="license noopener" href="https://creativecommons.org/licenses/by/4.0/"
 | |
| target="_blank" class="m-2">
 | |
| <img alt="Creative Commons License" style="border-width:0"
 | |
| src="https://licensebuttons.net/l/by/4.0/80x15.png"/>
 | |
| </a>
 | |
| Except where otherwise noted, all content is licensed under a
 | |
| <a rel="license noopener" href="https://creativecommons.org/licenses/by/4.0/"
 | |
| target="_blank">Creative Commons Attribution 4.0 International License</a>.
 | |
| </div>
 | |
| """
 | |
| 
 | |
| BASEDIR = "./"
 | |
| SRCDIR = f"{BASEDIR}src/org/"
 | |
| 
 | |
| tree = parse(f"{SRCDIR}index.html")
 | |
| 
 | |
| el_head = None
 | |
| el_header = None
 | |
| el_title = None
 | |
| el_nav = None
 | |
| el_ttoc = None
 | |
| el_content = None
 | |
| el_postamble = None
 | |
| 
 | |
| sections_2 = []
 | |
| 
 | |
| for el in tree.iter():
 | |
|     if el.tag == 'head':
 | |
|         el_head = el
 | |
|     if el.tag == 'header':
 | |
|         el_header = el
 | |
|     if 'class' in el.attrib and el.attrib['class'] == 'title':
 | |
|         el_title = el
 | |
|     if el.tag == 'nav':
 | |
|         el_nav = el
 | |
|     if 'id' in el.attrib and el.attrib['id'] == 'text-table-of-contents':
 | |
|         el_ttoc = el
 | |
|     if 'id' in el.attrib and el.attrib['id'] == 'content':
 | |
|         el_content = el
 | |
|     if 'id' in el.attrib and el.attrib['id'] == 'postamble':
 | |
|         el_postamble = el
 | |
|     if 'class' in el.attrib and el.attrib['class'].startswith('outline-2'):
 | |
|         sections_2.append(el)
 | |
| 
 | |
| export_dir = f"{BASEDIR}export/html"
 | |
| if not os.path.isdir(export_dir):
 | |
|     os.makedirs(export_dir)
 | |
| 
 | |
| 
 | |
| def ordered_pages(el):
 | |
|     """
 | |
|     Creates a list of all pages, able to return previous, next and up links.
 | |
| 
 | |
|     `el` should be the text-table-of-contents element.
 | |
|     """
 | |
|     page_ids = []
 | |
|     page_texts = []
 | |
|     for child in el.iter():
 | |
|         if child.tag == 'a':
 | |
|             page_ids.append(child.attrib['href'].lstrip('#'))
 | |
|             page_texts.append(child.text)
 | |
|     return page_ids, page_texts
 | |
| 
 | |
| page_ids, page_texts = ordered_pages(el_ttoc)
 | |
| 
 | |
| def head_at_location(el_head, page_id):
 | |
|     output = tostring(el_head, pretty_print=True, encoding='unicode')
 | |
|     try:
 | |
|         index = page_ids.index(page_id)
 | |
|     except ValueError:
 | |
|         return output
 | |
|     if index == 0:
 | |
|         return output
 | |
|     return output.replace(
 | |
|         '</title>',
 | |
|         f': {page_texts[index]}</title>'
 | |
|     )
 | |
| 
 | |
| def breadcrumbs(page_id):
 | |
|     try:
 | |
|         index = page_ids.index(page_id)
 | |
|     except ValueError:
 | |
|         return ''
 | |
|     if index == 0: # mains page, no need for breadcrumbs
 | |
|         return ''
 | |
|     breadcrumb_lis = f'<li>{page_texts[index]}</li>'
 | |
|     # If book or article, take shortcut
 | |
|     if page_id.startswith("b-"): # this is a book
 | |
|         page_id_stripped = "l_b"
 | |
|     elif page_id[:4].isdigit(): # this is an article
 | |
|         page_id_stripped = f"l_a_{page_id[:4]}"
 | |
|     else:
 | |
|         page_id_stripped = page_ids[index].rpartition('_')[0]
 | |
|     while page_id_stripped:
 | |
|         index_stripped = page_ids.index(page_id_stripped)
 | |
|         breadcrumb_lis = (
 | |
|             # give class to link to prevent auto addition of ids from cleanup_links script
 | |
|             '<li><a class="breadcrumb-link"' +
 | |
|             f'href="{page_ids[index_stripped]}.html">' +
 | |
|             f'{page_texts[index_stripped]}</a></li>'
 | |
|             + breadcrumb_lis)
 | |
|         page_id_stripped = page_id_stripped.rpartition('_')[0]
 | |
|     return f'<ul class="breadcrumbs">{breadcrumb_lis}</ul>'
 | |
| 
 | |
| def link_previous(page_id):
 | |
|     try:
 | |
|         index = page_ids.index(page_id)
 | |
|     except ValueError:
 | |
|         return None
 | |
|     if index == 0: # first, no previous
 | |
|         return None
 | |
|     # If depth is 1, there is no previous
 | |
|     if page_id.count('_') == 0:
 | |
|         return None
 | |
|     else:
 | |
|         return f'<a href="{page_ids[index-1]}.html">{page_texts[index-1]} <small>[{page_ids[index-1].replace("_",".")}]</small></a>'
 | |
| 
 | |
| def link_next(page_id):
 | |
|     try:
 | |
|         index = page_ids.index(page_id)
 | |
|     except ValueError:
 | |
|         return None
 | |
|     if index == len(page_ids) - 1: # last, no next
 | |
|         return None
 | |
|     return f'<a href="{page_ids[index+1]}.html">{page_texts[index+1]} <small>[{page_ids[index+1].replace("_",".")}]</small></a>'
 | |
| 
 | |
| def link_up(page_id):
 | |
|     try:
 | |
|         index = page_ids.index(page_id)
 | |
|     except ValueError:
 | |
|         return None
 | |
|     # Strip one '_' from id and point there
 | |
|     if page_id.count('_') > 0:
 | |
|         index_up = page_ids.index(page_id.rpartition('_')[0])
 | |
|         return f'<a href="{page_ids[index_up]}.html">{page_texts[index_up]} <small>[{page_ids[index_up].replace("_",".")}]</small></a>'
 | |
|     return None
 | |
| 
 | |
| def navigation_links(location):
 | |
|     links = '<ul class="navigation-links">'
 | |
|     previous = link_previous(location)
 | |
|     if previous:
 | |
|         links += f'<li>Prev: {previous}</li>'
 | |
|     nxt = link_next(location)
 | |
|     if nxt:
 | |
|         links += f'<li>Next: {nxt}</li>'
 | |
|     up = link_up(location)
 | |
|     if up:
 | |
|         links += f'<li>Up: {up}</li>'
 | |
|     links += '</ul>'
 | |
|     if previous or nxt or up:
 | |
|         return links
 | |
|     return ''
 | |
| 
 | |
| def search_box():
 | |
|     return """
 | |
|     <form style="float: right; padding-right: 0;" method="get" id="search" action="https://duckduckgo.com/" target="_blank">
 | |
|     <input type="hidden" name="sites" value="integrability.org"/>
 | |
|     <input class="search" type="text" name="q" maxlength="300" placeholder="Search"/>
 | |
|     <input type="submit" value="Search" style="visibility: hidden; width: 0;" /></form>
 | |
|     """
 | |
| 
 | |
| def list_to_details_recursive(el):
 | |
|     """
 | |
|     `el` contains either a single `a` child, or an `a` followed by `ul`.
 | |
|     In the first case, output as is.
 | |
|     In the second case, replace by details/summary.
 | |
|     """
 | |
|     # checks
 | |
|     if not (len(el) == 1 or len(el) == 2):
 | |
|         raise ValueError(f'el must have either 1 or 2 children, found {len(el)}')
 | |
|     if not el[0].tag in ['a', 'ul']:
 | |
|         raise ValueError(f'el[0] must be an a or ul tag, but found a {el[0].tag}')
 | |
|     if len(el) == 2 and not (el[0].tag == 'a' and el[1].tag == 'ul'):
 | |
|         raise ValueError(f'for len(el) == 2, el[1] must be ul, but found {el[1].tag}')
 | |
|     # single child, output as is
 | |
|     if len(el) == 1 and el[0].tag == 'a':
 | |
|         output = tostring(el[0], pretty_print=True, encoding='unicode')
 | |
|     else: # build a details/summary
 | |
|         summary_text = (
 | |
|             'Table of contents' + search_box() if len(el) == 1 else
 | |
|             tostring(el[0], pretty_print=True, encoding='unicode')
 | |
|         )
 | |
|         ul = el[0] if len(el) == 1 else el[1]
 | |
|         # print(f'summary_text: {summary_text}')
 | |
|         # print(f'len(ul): {len(ul)}')
 | |
|         # print([li.tag for li in ul])
 | |
|         # print([tostring(li, pretty_print=True, encoding='unicode') for li in ul])
 | |
|         output = ('\n<details>'
 | |
|                   f'\n<summary>\n{summary_text}'
 | |
|                   '\n</summary>\n<ul>\n')
 | |
|         for li in ul:
 | |
|             if not li.tag == 'li':
 | |
|                 raise ValueError('child of ul should be li')
 | |
|             output += '<li>\n'
 | |
|             output += list_to_details_recursive(li)
 | |
|             output += '\n</li>\n'
 | |
|         output += '\n</ul>\n</details>'
 | |
|     return output
 | |
| 
 | |
| 
 | |
| 
 | |
| collapsed_toc = '\n<nav id="collapsed-table-of-contents">'
 | |
| collapsed_toc += list_to_details_recursive(el_ttoc)
 | |
| collapsed_toc += '\n</nav>\n'
 | |
| 
 | |
| 
 | |
| def toc_at_location(collapsed_toc, location):
 | |
|     """
 | |
|     Given a collapsed toc and location (filename),
 | |
|     mark the details hierarchy as open.
 | |
|     """
 | |
|     output = collapsed_toc
 | |
|     if location != 'index':
 | |
|         # open all ancestors
 | |
|         prefix = location.rpartition('_')[0]
 | |
|         while prefix:
 | |
|             output = output.replace(
 | |
|                 f'<details>\n<summary>\n<a href="#{prefix}"',
 | |
|                 f'<details open="">\n<summary class="toc-open">\n<a href="#{prefix}"')
 | |
|             prefix = prefix.rpartition('_')[0]
 | |
|         # highlight the current location, whether it's a summary or a
 | |
|         output = output.replace(
 | |
|             f'<details>\n<summary>\n<a href="#{location}"',
 | |
|             f'<details open="">\n<summary class="toc-currentpage">\n<a href="#{location}"'
 | |
|         )
 | |
|         output = output.replace(
 | |
|             f'<li>\n<a href="#{location}"',
 | |
|             f'<li class="toc-currentpage">\n<a href="#{location}"'
 | |
|         )
 | |
|         # but close all details which contain deeper levels than location
 | |
|         output = output.replace(
 | |
|             f'<details open="">\n<summary>\n<a href="#{location}_',
 | |
|             f'<details>\n<summary>\n<a href="#{location}_')
 | |
|     return output.replace('<a href', '<a class="toc-a" href')
 | |
| 
 | |
| def write_file_start(_file, location):
 | |
|     _file.write('<!DOCTYPE html>\n<html lang="en">\n')
 | |
|     _file.write(head_at_location(el_head, location))
 | |
|     _file.write('<div id="content">\n')
 | |
|     #_file.write(tostring(el_header, pretty_print=True, encoding='unicode'))
 | |
|     _file.write('<header>\n<h1 class="title">\n'
 | |
|                 '<a href="./index.html" class="homepage-link">')
 | |
|     _file.write(f'{el_title.text}</a>\n</h1>\n</header>')
 | |
|     _file.write(toc_at_location(collapsed_toc, location))
 | |
|     _file.write(breadcrumbs(location))
 | |
|     _file.write(navigation_links(location))
 | |
| 
 | |
| def write_file_end(_file, location):
 | |
|     _file.write('\n<br>')
 | |
|     _file.write(navigation_links(location)) # repeat, for convenience
 | |
|     _file.write('\n<br>')
 | |
|     _file.write('\n<hr>')
 | |
|     _file.write(copyright_string)
 | |
|     _file.write(tostring(el_postamble, pretty_print=True, encoding='unicode'))
 | |
|     _file.write('\n</div>\n</html>')
 | |
| 
 | |
| 
 | |
| def write_files_recursive(name, el, levelmax=5):
 | |
|     """
 | |
|     Recursively extract outlines.
 | |
|     """
 | |
|     if 'id' in el.attrib and el.attrib['id'] == 'content':
 | |
|         level = 1
 | |
|     elif not (el is not None and
 | |
|               'class' in el.attrib and
 | |
|               el.attrib['class'].startswith('outline-')):
 | |
|         print(f'Element name {name} has no outline class, no files written.')
 | |
|         return
 | |
|     else:
 | |
|         level = int(el.attrib['class'].partition('outline-')[2][0])
 | |
| 
 | |
|     _file = open(f'{BASEDIR}export/html/{name}.html', 'w')
 | |
|     write_file_start(_file, name)
 | |
| 
 | |
|     # Count outline children for establishing output format
 | |
|     children = []
 | |
|     for child in el.iter():
 | |
|         if ('class' in child.attrib and
 | |
|             child.attrib['class'].startswith('outline-%s' % str(level + 1))):
 | |
|             children.append(child)
 | |
| 
 | |
|     # if no children or if we're on levelmax, write everything
 | |
|     if (len(children) == 0 or level == levelmax):
 | |
|         _file.write(tostring(el, pretty_print=True, encoding='unicode'))
 | |
|     # otherwise write anything above first next-level headling,
 | |
|     # and then replace the next-level headlines by links to their files
 | |
|     else:
 | |
|         if (el.text):
 | |
|             _file.write(el.text)
 | |
|         for child in el:
 | |
|             if ('class' in child.attrib and
 | |
|                 child.attrib['class'].startswith('outline-%s' % str(level + 1))):
 | |
|                 break # break out once we hit the first next-level headline
 | |
|             if child.tag != "nav": # don't write the table-of-contents
 | |
|                 _file.write(tostring(child, pretty_print=True, encoding='unicode'))
 | |
|         # now print the list of children
 | |
|         _file.write(f'<h{level+1}>In this section:</h{level+1}>')
 | |
|         _file.write('\n<ul class="child-links-list">')
 | |
|         for child in children:
 | |
|             child_h = next(child.iter('h%s' % int(level + 1)))
 | |
|             child_label = child_h.attrib['id']
 | |
|             child_text = next(child_h.iter('a')).text
 | |
|             _file.write(f'\n<li><a href="{child_label}.html">{child_text}</a></li>')
 | |
|             write_files_recursive(name=child_label, el=child)
 | |
|         _file.write('\n</ul>\n')
 | |
| 
 | |
|     write_file_end(_file, name)
 | |
|     _file.close()
 | |
| 
 | |
| 
 | |
| # Rewrite the index.html file:
 | |
| write_files_recursive('index', el_content) # the only call needed
 |