added support for parsing standard Markdown links that happen to be internal

This commit is contained in:
Amy G. Bowersox 2024-03-13 21:24:42 -06:00
parent 5cadb535dc
commit c0025f9513

View File

@ -2,10 +2,44 @@
import markdown import markdown
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from urllib.parse import urlparse
from urllib.parse import unquote as urlunquote
from markdown.extensions import Extension from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
from markdown.preprocessors import Preprocessor from markdown.preprocessors import Preprocessor
PRIO_BASE = 10000 # priority base for our extensions
def is_proper_url(s):
"""
Checks to see if a string is a "proper" URL.
Args:
s (str): The string to be parsed.
Returns:
bool: ``True`` if the string is a proper URL, ``False`` if not.
"""
parseout = urlparse(s)
return True if parseout.scheme else False
def sanitize_reference(s):
"""
Sanitizes an internal reference to a file by removing URL-quoted characters and any Markdown suffix.
Args:
s (str): Th reference to be sanitized.
Returns:
str: The sanitized reference.
"""
rc = urlunquote(s)
if rc.endswith('.md'):
rc = rc[:-3]
return rc
class MetaStripper(Extension): class MetaStripper(Extension):
""" """
@ -23,57 +57,97 @@ class MetaStripper(Extension):
return lines return lines
def extendMarkdown(self, md): def extendMarkdown(self, md):
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0) md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
class ObsidianLinks(Extension): class ObsidianLinks(Extension):
"""An extension that processes Obsidian internal links in the [[page name]] format.""" """
An extension that processes Obsidian internal links in the [[page name]] format, as well as overrides the standard
Markdown link processing to handle Obsidian internal links as well as external links.
"""
def __init__(self, context, **kwargs): def __init__(self, context, **kwargs):
super(ObsidianLinks, self).__init__(**kwargs) super(ObsidianLinks, self).__init__(**kwargs)
self._context = context self._context = context
@property
def obsidian_link_classname(self):
return 'obsidian-link'
@property
def invalid_reference_classname(self):
return 'invalid-reference'
def _parse_reference(self, contents):
text = None
t = contents.split('|')
if len(t) > 1:
text = t[1]
contents = t[0]
node, linktype = self._context.src_index.lookup(contents)
if not text:
text = contents
if node:
return node.link_target(self._context.url_prefix), text
return None, text
class ObsidianLinksProc(InlineProcessor): class ObsidianLinksProc(InlineProcessor):
def __init__(self, pattern, md, context): def __init__(self, pattern, md, extref):
super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md) super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
self._context = context self._extref = extref
def parse_reference(self, contents):
text = None
t = contents.split('|')
if len(t) > 1:
text = t[1]
contents = t[0]
node, linktype = self._context.src_index.lookup(contents)
if not text:
text = contents
if node:
return node.link_target(self._context.url_prefix), text
return None, text
def handleMatch(self, m, data): def handleMatch(self, m, data):
link, text = self.parse_reference(m.group(0)[2:-2]) link, text = self._extref._parse_reference(m.group(1))
if link is None: if link is None:
el = etree.Element('span') el = etree.Element('span')
el.set('class', 'invalid-reference') el.set('class', self._extref.invalid_reference_classname)
el.text = text el.text = text
else: else:
el = etree.Element('a') el = etree.Element('a')
el.set('href', link) el.set('href', link)
el.set('class', self._extref.obsidian_link_classname)
el.text = text el.text = text
return el, m.start(0), m.end(0) return el, m.start(0), m.end(0)
class GenericLinksProc(InlineProcessor):
def __init__(self, pattern, md, extref):
super(ObsidianLinks.GenericLinksProc, self).__init__(pattern, md)
self._extref = extref
def handleMatch(self, m, data):
text = m.group(1)
link = m.group(2)
if is_proper_url(link):
el = etree.Element('a')
el.set('href', link)
el.text = text
else:
newlink, _ = self._extref._parse_reference(sanitize_reference(link))
if newlink is None:
el = etree.Element('span')
el.set('class', self._extref.invalid_reference_classname)
el.text = text
else:
el = etree.Element('a')
el.set('href', newlink)
el.set('class', self._extref.obsidian_link_classname)
el.text = text
return el, m.start(0), m.end(0)
def extendMarkdown(self, md): def extendMarkdown(self, md):
OBSLINK_PATTERN = r'\[\[(.*?)\]\]' OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self._context), GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)'
'obsidian_links', 0) md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self),
'obsidian_links', PRIO_BASE + 110)
md.inlinePatterns.register(ObsidianLinks.GenericLinksProc(GENERICLINK_PATTERN, md, self),
'obsidian_generic_links', PRIO_BASE + 100)
class ObsidianInlines(Extension): class ObsidianInlines(Extension):
"""An extension that handles the special Obsidian markdown format sequences.""" """An extension that handles the special Obsidian markdown format sequences."""
def extendMarkdown(self, md): def extendMarkdown(self, md):
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0) md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0) md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
def create_markdown_parser(context): def create_markdown_parser(context):