got footnotes to work, including inline footnotes

This commit is contained in:
Amy G. Bowersox 2024-08-02 22:29:57 -06:00
parent 4facd16452
commit 81201c5a21
2 changed files with 141 additions and 1 deletions

View File

@ -2,7 +2,9 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import hashlib
import re
import xml.etree.ElementTree as etree # noqa: N813
from typing import Any
@ -9,8 +10,11 @@ from urllib.parse import urlparse
import markdown
from markdown.extensions import Extension
from markdown.extensions.footnotes import (FootnoteExtension, FootnoteBlockProcessor, FootnoteInlineProcessor,
FootnoteTreeprocessor, FootnotePostTreeprocessor, FootnotePostprocessor)
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
from markdown.preprocessors import Preprocessor
from markdown.util import STX, ETX
from .config import Context
@ -24,6 +28,8 @@ GENERICIMAGE_PATTERN = r'!\[(.*?)\]\((.*?)\)'
OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)'
INLINE_FOOTNOTE_REF_PREFIX = STX + "erbosoft_fnref:"
INLINE_FOOTNOTE_REF_PATTERN = INLINE_FOOTNOTE_REF_PREFIX + "{}" + ETX
def is_proper_url(s: str) -> bool:
"""
@ -55,6 +61,23 @@ def sanitize_reference(s: str) -> str:
return rc
def find_extension(md: markdown.Markdown, cls: type[Extension]) -> Extension | None:
"""
Locate a registered extension in the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to look through.
cls (type): The class of the extension to be retrieved.
Returns:
Extension: The retrieved extension, or ``None`` if it was not found.
"""
for ex in md.registeredExtensions:
if isinstance(ex, cls):
return ex
return None
class MetaStripper(Extension):
"""
An extension that strips the metadata off the front of Obsidian pages, as it's already been parsed in an
@ -299,6 +322,120 @@ class ObsidianInlines(Extension):
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
class ObsidianStyleFootnotes(FootnoteExtension):
"""
An extension of the standard Markdown processor footnote extension to support Obsidian-style inline footnotes
in addition to standard ones.
"""
class BlockProcessor(FootnoteBlockProcessor):
"""A block processor extended to handle Obsidian inline footnotes."""
RE_INLINE = re.compile(r'\^\[(.+)]')
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
""" Find, set, and remove footnote definitions. """
block = blocks.pop(0)
m_block = self.RE.search(block)
m_inline = self.RE_INLINE.search(block)
if m_block or m_inline:
if m_block is not None and m_inline is not None:
if m_block.start() > m_inline.start():
m_block = None # inline goes first
if m_block:
id = m_block.group(1)
fn_blocks = [m_block.group(2)]
# Handle rest of block
therest = block[m_block.end():].lstrip('\n')
m2 = self.RE.search(therest)
if m2:
# Another footnote exists in the rest of this block.
# Any content before match is continuation of this footnote, which may be lazily indented.
before = therest[:m2.start()].rstrip('\n')
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
# Add back to blocks everything from beginning of match forward for next iteration.
blocks.insert(0, therest[m2.start():])
else:
# All remaining lines of block are continuation of this footnote, which may be lazily indented.
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
# Check for child elements in remaining blocks.
fn_blocks.extend(self.detectTabbed(blocks))
footnote = "\n\n".join(fn_blocks)
self.footnotes.setFootnote(id, footnote.rstrip())
if block[:m_block.start()].strip():
# Add any content before match back to blocks as separate block
blocks.insert(0, block[:m_block.start()].rstrip('\n'))
return True
if m_inline:
footnote = m_inline.group(1).strip()
id = hashlib.sha1(bytes(footnote, 'utf-8')).hexdigest()
self.footnotes.setFootnote(id, footnote)
blocks.insert(0, block[:m_inline.start(0)].rstrip()
+ INLINE_FOOTNOTE_REF_PATTERN.format(id) + block[m_inline.end(0):])
return True
# No match. Restore block.
blocks.insert(0, block)
return False
class InlineReplacer(InlineProcessor):
def __init__(self, pattern: str, extref: Any) -> None:
super(ObsidianStyleFootnotes.InlineReplacer, self).__init__(pattern)
self._extref = extref
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802
etxpoint = data.find(ETX, m.end(0))
assert etxpoint > m.end(0)
id = data[m.end():etxpoint]
assert id in self._extref.footnotes.keys()
sup = etree.Element('sup')
a = etree.SubElement(sup, 'a')
sup.set('id', self._extref.makeFootnoteRefId(id, found=True))
a.set('href', '#' + self._extref.makeFootnoteId(id))
a.set('class', 'footnote-ref')
a.text = self._extref.getConfig('SUPERSCRIPT_TEXT').format(
list(self._extref.footnotes.keys()).index(id) + 1
)
return sup, m.start(0), etxpoint + 1
def extendMarkdown(self, md):
"""
Registers the footnote processor with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the footnote processor with.
"""
md.registerExtension(self)
self.parser = md.parser
self.md = md
# Insert a `blockprocessor` before `ReferencePreprocessor`
md.parser.blockprocessors.register(ObsidianStyleFootnotes.BlockProcessor(self), 'footnote', 17)
# Insert an inline pattern before `ImageReferencePattern`
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
md.inlinePatterns.register(ObsidianStyleFootnotes.InlineReplacer(INLINE_FOOTNOTE_REF_PREFIX, self),
'footnote2', 175)
# Insert a tree-processor that would actually add the footnote div
# This must be before all other tree-processors (i.e., `inline` and
# `codehilite`) so they can run on the the contents of the div.
md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
# Insert a tree-processor that will run after inline is done.
# In this tree-processor we want to check our duplicate footnote tracker
# And add additional `backrefs` to the footnote pointing back to the
# duplicated references.
md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
# Insert a postprocessor after amp_substitute processor
md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
def create_markdown_parser(context: Context) -> markdown.Markdown:
"""
Creates a Markdown parser with all our extensions loaded.
@ -310,6 +447,7 @@ def create_markdown_parser(context: Context) -> markdown.Markdown:
markdown.Markdown: The new Markdown parser.
"""
return markdown.Markdown(extensions=[MetaStripper(),
ObsidianStyleFootnotes(SUPERSCRIPT_TEXT='[{}]', SEPARATOR='-'),
ObsidianImages(context),
ObsidianLinks(context),
ObsidianInlines()])