diff --git a/src/dragonglass/mparse.py b/src/dragonglass/mparse.py index 54993a5..5c726f1 100644 --- a/src/dragonglass/mparse.py +++ b/src/dragonglass/mparse.py @@ -2,10 +2,44 @@ import markdown import xml.etree.ElementTree as etree +from urllib.parse import urlparse +from urllib.parse import unquote as urlunquote from markdown.extensions import Extension from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor from markdown.preprocessors import Preprocessor +PRIO_BASE = 10000 # priority base for our extensions + + +def is_proper_url(s): + """ + Checks to see if a string is a "proper" URL. + + Args: + s (str): The string to be parsed. + + Returns: + bool: ``True`` if the string is a proper URL, ``False`` if not. + """ + parseout = urlparse(s) + return True if parseout.scheme else False + + +def sanitize_reference(s): + """ + Sanitizes an internal reference to a file by removing URL-quoted characters and any Markdown suffix. + + Args: + s (str): Th reference to be sanitized. + + Returns: + str: The sanitized reference. + """ + rc = urlunquote(s) + if rc.endswith('.md'): + rc = rc[:-3] + return rc + class MetaStripper(Extension): """ @@ -23,57 +57,97 @@ class MetaStripper(Extension): return lines def extendMarkdown(self, md): - md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0) + md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE) class ObsidianLinks(Extension): - """An extension that processes Obsidian internal links in the [[page name]] format.""" + """ + An extension that processes Obsidian internal links in the [[page name]] format, as well as overrides the standard + Markdown link processing to handle Obsidian internal links as well as external links. + """ def __init__(self, context, **kwargs): super(ObsidianLinks, self).__init__(**kwargs) self._context = context + @property + def obsidian_link_classname(self): + return 'obsidian-link' + + @property + def invalid_reference_classname(self): + return 'invalid-reference' + + def _parse_reference(self, contents): + text = None + t = contents.split('|') + if len(t) > 1: + text = t[1] + contents = t[0] + + node, linktype = self._context.src_index.lookup(contents) + if not text: + text = contents + if node: + return node.link_target(self._context.url_prefix), text + return None, text + class ObsidianLinksProc(InlineProcessor): - def __init__(self, pattern, md, context): + def __init__(self, pattern, md, extref): super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md) - self._context = context - - def parse_reference(self, contents): - text = None - t = contents.split('|') - if len(t) > 1: - text = t[1] - contents = t[0] - - node, linktype = self._context.src_index.lookup(contents) - if not text: - text = contents - if node: - return node.link_target(self._context.url_prefix), text - return None, text + self._extref = extref def handleMatch(self, m, data): - link, text = self.parse_reference(m.group(0)[2:-2]) + link, text = self._extref._parse_reference(m.group(1)) if link is None: el = etree.Element('span') - el.set('class', 'invalid-reference') + el.set('class', self._extref.invalid_reference_classname) el.text = text else: el = etree.Element('a') el.set('href', link) + el.set('class', self._extref.obsidian_link_classname) el.text = text return el, m.start(0), m.end(0) + class GenericLinksProc(InlineProcessor): + def __init__(self, pattern, md, extref): + super(ObsidianLinks.GenericLinksProc, self).__init__(pattern, md) + self._extref = extref + + def handleMatch(self, m, data): + text = m.group(1) + link = m.group(2) + if is_proper_url(link): + el = etree.Element('a') + el.set('href', link) + el.text = text + else: + newlink, _ = self._extref._parse_reference(sanitize_reference(link)) + if newlink is None: + el = etree.Element('span') + el.set('class', self._extref.invalid_reference_classname) + el.text = text + else: + el = etree.Element('a') + el.set('href', newlink) + el.set('class', self._extref.obsidian_link_classname) + el.text = text + return el, m.start(0), m.end(0) + def extendMarkdown(self, md): OBSLINK_PATTERN = r'\[\[(.*?)\]\]' - md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self._context), - 'obsidian_links', 0) + GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)' + md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self), + 'obsidian_links', PRIO_BASE + 110) + md.inlinePatterns.register(ObsidianLinks.GenericLinksProc(GENERICLINK_PATTERN, md, self), + 'obsidian_generic_links', PRIO_BASE + 100) class ObsidianInlines(Extension): """An extension that handles the special Obsidian markdown format sequences.""" def extendMarkdown(self, md): - md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0) - md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0) + md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE) + md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1) def create_markdown_parser(context):