added support for parsing standard Markdown links that happen to be internal

2024-03-13 21:24:42 -06:00 · 2024-03-13 21:24:42 -06:00 · c0025f9513
commit c0025f9513
parent 5cadb535dc
1 changed files with 98 additions and 24 deletions
--- a/src/dragonglass/mparse.py
+++ b/src/dragonglass/mparse.py
@ -2,10 +2,44 @@
 import markdown
 import xml.etree.ElementTree as etree
 from urllib.parse import urlparse
 from urllib.parse import unquote as urlunquote
 from markdown.extensions import Extension
 from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
 from markdown.preprocessors import Preprocessor
 PRIO_BASE = 10000  # priority base for our extensions
 def is_proper_url(s):
    """
    Checks to see if a string is a "proper" URL.
    Args:
        s (str): The string to be parsed.
    Returns:
        bool: ``True`` if the string is a proper URL, ``False`` if not.
    """
    parseout = urlparse(s)
    return True if parseout.scheme else False
 def sanitize_reference(s):
    """
    Sanitizes an internal reference to a file by removing URL-quoted characters and any Markdown suffix.
    Args:
        s (str): Th reference to be sanitized.
    Returns:
        str: The sanitized reference.
    """
    rc = urlunquote(s)
    if rc.endswith('.md'):
        rc = rc[:-3]
    return rc
 class MetaStripper(Extension):
    """
@ -23,57 +57,97 @@ class MetaStripper(Extension):
            return lines
    def extendMarkdown(self, md):
-        md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0)
+        md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
 class ObsidianLinks(Extension):
-    """An extension that processes Obsidian internal links in the [[page name]] format."""
+    """
    An extension that processes Obsidian internal links in the [[page name]] format, as well as overrides the standard
    Markdown link processing to handle Obsidian internal links as well as external links.
    """
    def __init__(self, context, **kwargs):
        super(ObsidianLinks, self).__init__(**kwargs)
        self._context = context
    @property
    def obsidian_link_classname(self):
        return 'obsidian-link'
    @property
    def invalid_reference_classname(self):
        return 'invalid-reference'
    def _parse_reference(self, contents):
        text = None
        t = contents.split('|')
        if len(t) > 1:
            text = t[1]
            contents = t[0]
        node, linktype = self._context.src_index.lookup(contents)
        if not text:
            text = contents
        if node:
            return node.link_target(self._context.url_prefix), text
        return None, text
    class ObsidianLinksProc(InlineProcessor):
-        def __init__(self, pattern, md, context):
+        def __init__(self, pattern, md, extref):
            super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
-            self._context = context
+            self._extref = extref
        def parse_reference(self, contents):
            text = None
            t = contents.split('|')
            if len(t) > 1:
                text = t[1]
                contents = t[0]
            node, linktype = self._context.src_index.lookup(contents)
            if not text:
                text = contents
            if node:
                return node.link_target(self._context.url_prefix), text
            return None, text
        def handleMatch(self, m, data):
-            link, text = self.parse_reference(m.group(0)[2:-2])
+            link, text = self._extref._parse_reference(m.group(1))
            if link is None:
                el = etree.Element('span')
-                el.set('class', 'invalid-reference')
+                el.set('class', self._extref.invalid_reference_classname)
                el.text = text
            else:
                el = etree.Element('a')
                el.set('href', link)
                el.set('class', self._extref.obsidian_link_classname)
                el.text = text
            return el, m.start(0), m.end(0)
    class GenericLinksProc(InlineProcessor):
        def __init__(self, pattern, md, extref):
            super(ObsidianLinks.GenericLinksProc, self).__init__(pattern, md)
            self._extref = extref
        def handleMatch(self, m, data):
            text = m.group(1)
            link = m.group(2)
            if is_proper_url(link):
                el = etree.Element('a')
                el.set('href', link)
                el.text = text
            else:
                newlink, _ = self._extref._parse_reference(sanitize_reference(link))
                if newlink is None:
                    el = etree.Element('span')
                    el.set('class', self._extref.invalid_reference_classname)
                    el.text = text
                else:
                    el = etree.Element('a')
                    el.set('href', newlink)
                    el.set('class', self._extref.obsidian_link_classname)
                    el.text = text
            return el, m.start(0), m.end(0)
    def extendMarkdown(self, md):
        OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
-        md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self._context),
+        GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)'
-                                   'obsidian_links', 0)
+        md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self),
                                   'obsidian_links', PRIO_BASE + 110)
        md.inlinePatterns.register(ObsidianLinks.GenericLinksProc(GENERICLINK_PATTERN, md, self),
                                   'obsidian_generic_links', PRIO_BASE + 100)
 class ObsidianInlines(Extension):
    """An extension that handles the special Obsidian markdown format sequences."""
    def extendMarkdown(self, md):
-        md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0)
+        md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
-        md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0)
+        md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
 def create_markdown_parser(context):