added support for parsing standard Markdown links that happen to be internal

This commit is contained in:
Amy G. Bowersox 2024-03-13 21:24:42 -06:00
parent 5cadb535dc
commit c0025f9513

View File

@ -2,10 +2,44 @@
import markdown
import xml.etree.ElementTree as etree
from urllib.parse import urlparse
from urllib.parse import unquote as urlunquote
from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
from markdown.preprocessors import Preprocessor
PRIO_BASE = 10000 # priority base for our extensions
def is_proper_url(s):
"""
Checks to see if a string is a "proper" URL.
Args:
s (str): The string to be parsed.
Returns:
bool: ``True`` if the string is a proper URL, ``False`` if not.
"""
parseout = urlparse(s)
return True if parseout.scheme else False
def sanitize_reference(s):
"""
Sanitizes an internal reference to a file by removing URL-quoted characters and any Markdown suffix.
Args:
s (str): Th reference to be sanitized.
Returns:
str: The sanitized reference.
"""
rc = urlunquote(s)
if rc.endswith('.md'):
rc = rc[:-3]
return rc
class MetaStripper(Extension):
"""
@ -23,57 +57,97 @@ class MetaStripper(Extension):
return lines
def extendMarkdown(self, md):
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0)
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
class ObsidianLinks(Extension):
"""An extension that processes Obsidian internal links in the [[page name]] format."""
"""
An extension that processes Obsidian internal links in the [[page name]] format, as well as overrides the standard
Markdown link processing to handle Obsidian internal links as well as external links.
"""
def __init__(self, context, **kwargs):
super(ObsidianLinks, self).__init__(**kwargs)
self._context = context
@property
def obsidian_link_classname(self):
return 'obsidian-link'
@property
def invalid_reference_classname(self):
return 'invalid-reference'
def _parse_reference(self, contents):
text = None
t = contents.split('|')
if len(t) > 1:
text = t[1]
contents = t[0]
node, linktype = self._context.src_index.lookup(contents)
if not text:
text = contents
if node:
return node.link_target(self._context.url_prefix), text
return None, text
class ObsidianLinksProc(InlineProcessor):
def __init__(self, pattern, md, context):
def __init__(self, pattern, md, extref):
super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
self._context = context
def parse_reference(self, contents):
text = None
t = contents.split('|')
if len(t) > 1:
text = t[1]
contents = t[0]
node, linktype = self._context.src_index.lookup(contents)
if not text:
text = contents
if node:
return node.link_target(self._context.url_prefix), text
return None, text
self._extref = extref
def handleMatch(self, m, data):
link, text = self.parse_reference(m.group(0)[2:-2])
link, text = self._extref._parse_reference(m.group(1))
if link is None:
el = etree.Element('span')
el.set('class', 'invalid-reference')
el.set('class', self._extref.invalid_reference_classname)
el.text = text
else:
el = etree.Element('a')
el.set('href', link)
el.set('class', self._extref.obsidian_link_classname)
el.text = text
return el, m.start(0), m.end(0)
class GenericLinksProc(InlineProcessor):
def __init__(self, pattern, md, extref):
super(ObsidianLinks.GenericLinksProc, self).__init__(pattern, md)
self._extref = extref
def handleMatch(self, m, data):
text = m.group(1)
link = m.group(2)
if is_proper_url(link):
el = etree.Element('a')
el.set('href', link)
el.text = text
else:
newlink, _ = self._extref._parse_reference(sanitize_reference(link))
if newlink is None:
el = etree.Element('span')
el.set('class', self._extref.invalid_reference_classname)
el.text = text
else:
el = etree.Element('a')
el.set('href', newlink)
el.set('class', self._extref.obsidian_link_classname)
el.text = text
return el, m.start(0), m.end(0)
def extendMarkdown(self, md):
OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self._context),
'obsidian_links', 0)
GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)'
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self),
'obsidian_links', PRIO_BASE + 110)
md.inlinePatterns.register(ObsidianLinks.GenericLinksProc(GENERICLINK_PATTERN, md, self),
'obsidian_generic_links', PRIO_BASE + 100)
class ObsidianInlines(Extension):
"""An extension that handles the special Obsidian markdown format sequences."""
def extendMarkdown(self, md):
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
def create_markdown_parser(context):