From 81201c5a21412e795a744ddd599fb70669b7cddb Mon Sep 17 00:00:00 2001 From: Amy Gale Ruth Bowersox Date: Fri, 2 Aug 2024 22:29:57 -0600 Subject: [PATCH] got footnotes to work, including inline footnotes --- dragonglass.iml | 4 +- src/dragonglass/mparse.py | 138 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/dragonglass.iml b/dragonglass.iml index 1889963..eda4b25 100644 --- a/dragonglass.iml +++ b/dragonglass.iml @@ -2,7 +2,9 @@ - + + + diff --git a/src/dragonglass/mparse.py b/src/dragonglass/mparse.py index afd0f62..fb5c82b 100644 --- a/src/dragonglass/mparse.py +++ b/src/dragonglass/mparse.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import hashlib import re import xml.etree.ElementTree as etree # noqa: N813 from typing import Any @@ -9,8 +10,11 @@ from urllib.parse import urlparse import markdown from markdown.extensions import Extension +from markdown.extensions.footnotes import (FootnoteExtension, FootnoteBlockProcessor, FootnoteInlineProcessor, + FootnoteTreeprocessor, FootnotePostTreeprocessor, FootnotePostprocessor) from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor from markdown.preprocessors import Preprocessor +from markdown.util import STX, ETX from .config import Context @@ -24,6 +28,8 @@ GENERICIMAGE_PATTERN = r'!\[(.*?)\]\((.*?)\)' OBSLINK_PATTERN = r'\[\[(.*?)\]\]' GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)' +INLINE_FOOTNOTE_REF_PREFIX = STX + "erbosoft_fnref:" +INLINE_FOOTNOTE_REF_PATTERN = INLINE_FOOTNOTE_REF_PREFIX + "{}" + ETX def is_proper_url(s: str) -> bool: """ @@ -55,6 +61,23 @@ def sanitize_reference(s: str) -> str: return rc +def find_extension(md: markdown.Markdown, cls: type[Extension]) -> Extension | None: + """ + Locate a registered extension in the Markdown parser. + + Args: + md (markdown.Markdown): The Markdown parser to look through. + cls (type): The class of the extension to be retrieved. + + Returns: + Extension: The retrieved extension, or ``None`` if it was not found. + """ + for ex in md.registeredExtensions: + if isinstance(ex, cls): + return ex + return None + + class MetaStripper(Extension): """ An extension that strips the metadata off the front of Obsidian pages, as it's already been parsed in an @@ -299,6 +322,120 @@ class ObsidianInlines(Extension): md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1) +class ObsidianStyleFootnotes(FootnoteExtension): + """ + An extension of the standard Markdown processor footnote extension to support Obsidian-style inline footnotes + in addition to standard ones. + """ + class BlockProcessor(FootnoteBlockProcessor): + """A block processor extended to handle Obsidian inline footnotes.""" + RE_INLINE = re.compile(r'\^\[(.+)]') + + def run(self, parent: etree.Element, blocks: list[str]) -> bool: + """ Find, set, and remove footnote definitions. """ + block = blocks.pop(0) + m_block = self.RE.search(block) + m_inline = self.RE_INLINE.search(block) + if m_block or m_inline: + + if m_block is not None and m_inline is not None: + if m_block.start() > m_inline.start(): + m_block = None # inline goes first + + if m_block: + id = m_block.group(1) + fn_blocks = [m_block.group(2)] + + # Handle rest of block + therest = block[m_block.end():].lstrip('\n') + m2 = self.RE.search(therest) + if m2: + # Another footnote exists in the rest of this block. + # Any content before match is continuation of this footnote, which may be lazily indented. + before = therest[:m2.start()].rstrip('\n') + fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n') + # Add back to blocks everything from beginning of match forward for next iteration. + blocks.insert(0, therest[m2.start():]) + else: + # All remaining lines of block are continuation of this footnote, which may be lazily indented. + fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n') + + # Check for child elements in remaining blocks. + fn_blocks.extend(self.detectTabbed(blocks)) + + footnote = "\n\n".join(fn_blocks) + self.footnotes.setFootnote(id, footnote.rstrip()) + + if block[:m_block.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m_block.start()].rstrip('\n')) + return True + + if m_inline: + footnote = m_inline.group(1).strip() + id = hashlib.sha1(bytes(footnote, 'utf-8')).hexdigest() + self.footnotes.setFootnote(id, footnote) + blocks.insert(0, block[:m_inline.start(0)].rstrip() + + INLINE_FOOTNOTE_REF_PATTERN.format(id) + block[m_inline.end(0):]) + return True + + # No match. Restore block. + blocks.insert(0, block) + return False + + class InlineReplacer(InlineProcessor): + def __init__(self, pattern: str, extref: Any) -> None: + super(ObsidianStyleFootnotes.InlineReplacer, self).__init__(pattern) + self._extref = extref + + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802 + etxpoint = data.find(ETX, m.end(0)) + assert etxpoint > m.end(0) + id = data[m.end():etxpoint] + assert id in self._extref.footnotes.keys() + sup = etree.Element('sup') + a = etree.SubElement(sup, 'a') + sup.set('id', self._extref.makeFootnoteRefId(id, found=True)) + a.set('href', '#' + self._extref.makeFootnoteId(id)) + a.set('class', 'footnote-ref') + a.text = self._extref.getConfig('SUPERSCRIPT_TEXT').format( + list(self._extref.footnotes.keys()).index(id) + 1 + ) + return sup, m.start(0), etxpoint + 1 + + def extendMarkdown(self, md): + """ + Registers the footnote processor with the Markdown parser. + + Args: + md (markdown.Markdown): The Markdown parser to register the footnote processor with. + """ + md.registerExtension(self) + self.parser = md.parser + self.md = md + # Insert a `blockprocessor` before `ReferencePreprocessor` + md.parser.blockprocessors.register(ObsidianStyleFootnotes.BlockProcessor(self), 'footnote', 17) + + # Insert an inline pattern before `ImageReferencePattern` + FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah + md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175) + md.inlinePatterns.register(ObsidianStyleFootnotes.InlineReplacer(INLINE_FOOTNOTE_REF_PREFIX, self), + 'footnote2', 175) + # Insert a tree-processor that would actually add the footnote div + # This must be before all other tree-processors (i.e., `inline` and + # `codehilite`) so they can run on the the contents of the div. + md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50) + + # Insert a tree-processor that will run after inline is done. + # In this tree-processor we want to check our duplicate footnote tracker + # And add additional `backrefs` to the footnote pointing back to the + # duplicated references. + md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15) + + # Insert a postprocessor after amp_substitute processor + md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25) + + def create_markdown_parser(context: Context) -> markdown.Markdown: """ Creates a Markdown parser with all our extensions loaded. @@ -310,6 +447,7 @@ def create_markdown_parser(context: Context) -> markdown.Markdown: markdown.Markdown: The new Markdown parser. """ return markdown.Markdown(extensions=[MetaStripper(), + ObsidianStyleFootnotes(SUPERSCRIPT_TEXT='[{}]', SEPARATOR='-'), ObsidianImages(context), ObsidianLinks(context), ObsidianInlines()])