added support for parsing standard Markdown links that happen to be internal
This commit is contained in:
parent
5cadb535dc
commit
c0025f9513
|
@ -2,10 +2,44 @@
|
||||||
|
|
||||||
import markdown
|
import markdown
|
||||||
import xml.etree.ElementTree as etree
|
import xml.etree.ElementTree as etree
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from urllib.parse import unquote as urlunquote
|
||||||
from markdown.extensions import Extension
|
from markdown.extensions import Extension
|
||||||
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
|
from markdown.inlinepatterns import InlineProcessor, SimpleTagInlineProcessor
|
||||||
from markdown.preprocessors import Preprocessor
|
from markdown.preprocessors import Preprocessor
|
||||||
|
|
||||||
|
PRIO_BASE = 10000 # priority base for our extensions
|
||||||
|
|
||||||
|
|
||||||
|
def is_proper_url(s):
|
||||||
|
"""
|
||||||
|
Checks to see if a string is a "proper" URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
s (str): The string to be parsed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: ``True`` if the string is a proper URL, ``False`` if not.
|
||||||
|
"""
|
||||||
|
parseout = urlparse(s)
|
||||||
|
return True if parseout.scheme else False
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_reference(s):
|
||||||
|
"""
|
||||||
|
Sanitizes an internal reference to a file by removing URL-quoted characters and any Markdown suffix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
s (str): Th reference to be sanitized.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The sanitized reference.
|
||||||
|
"""
|
||||||
|
rc = urlunquote(s)
|
||||||
|
if rc.endswith('.md'):
|
||||||
|
rc = rc[:-3]
|
||||||
|
return rc
|
||||||
|
|
||||||
|
|
||||||
class MetaStripper(Extension):
|
class MetaStripper(Extension):
|
||||||
"""
|
"""
|
||||||
|
@ -23,57 +57,97 @@ class MetaStripper(Extension):
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
def extendMarkdown(self, md):
|
def extendMarkdown(self, md):
|
||||||
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', 0)
|
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
|
||||||
|
|
||||||
|
|
||||||
class ObsidianLinks(Extension):
|
class ObsidianLinks(Extension):
|
||||||
"""An extension that processes Obsidian internal links in the [[page name]] format."""
|
"""
|
||||||
|
An extension that processes Obsidian internal links in the [[page name]] format, as well as overrides the standard
|
||||||
|
Markdown link processing to handle Obsidian internal links as well as external links.
|
||||||
|
"""
|
||||||
def __init__(self, context, **kwargs):
|
def __init__(self, context, **kwargs):
|
||||||
super(ObsidianLinks, self).__init__(**kwargs)
|
super(ObsidianLinks, self).__init__(**kwargs)
|
||||||
self._context = context
|
self._context = context
|
||||||
|
|
||||||
|
@property
|
||||||
|
def obsidian_link_classname(self):
|
||||||
|
return 'obsidian-link'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def invalid_reference_classname(self):
|
||||||
|
return 'invalid-reference'
|
||||||
|
|
||||||
|
def _parse_reference(self, contents):
|
||||||
|
text = None
|
||||||
|
t = contents.split('|')
|
||||||
|
if len(t) > 1:
|
||||||
|
text = t[1]
|
||||||
|
contents = t[0]
|
||||||
|
|
||||||
|
node, linktype = self._context.src_index.lookup(contents)
|
||||||
|
if not text:
|
||||||
|
text = contents
|
||||||
|
if node:
|
||||||
|
return node.link_target(self._context.url_prefix), text
|
||||||
|
return None, text
|
||||||
|
|
||||||
class ObsidianLinksProc(InlineProcessor):
|
class ObsidianLinksProc(InlineProcessor):
|
||||||
def __init__(self, pattern, md, context):
|
def __init__(self, pattern, md, extref):
|
||||||
super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
|
super(ObsidianLinks.ObsidianLinksProc, self).__init__(pattern, md)
|
||||||
self._context = context
|
self._extref = extref
|
||||||
|
|
||||||
def parse_reference(self, contents):
|
|
||||||
text = None
|
|
||||||
t = contents.split('|')
|
|
||||||
if len(t) > 1:
|
|
||||||
text = t[1]
|
|
||||||
contents = t[0]
|
|
||||||
|
|
||||||
node, linktype = self._context.src_index.lookup(contents)
|
|
||||||
if not text:
|
|
||||||
text = contents
|
|
||||||
if node:
|
|
||||||
return node.link_target(self._context.url_prefix), text
|
|
||||||
return None, text
|
|
||||||
|
|
||||||
def handleMatch(self, m, data):
|
def handleMatch(self, m, data):
|
||||||
link, text = self.parse_reference(m.group(0)[2:-2])
|
link, text = self._extref._parse_reference(m.group(1))
|
||||||
if link is None:
|
if link is None:
|
||||||
el = etree.Element('span')
|
el = etree.Element('span')
|
||||||
el.set('class', 'invalid-reference')
|
el.set('class', self._extref.invalid_reference_classname)
|
||||||
el.text = text
|
el.text = text
|
||||||
else:
|
else:
|
||||||
el = etree.Element('a')
|
el = etree.Element('a')
|
||||||
el.set('href', link)
|
el.set('href', link)
|
||||||
|
el.set('class', self._extref.obsidian_link_classname)
|
||||||
el.text = text
|
el.text = text
|
||||||
return el, m.start(0), m.end(0)
|
return el, m.start(0), m.end(0)
|
||||||
|
|
||||||
|
class GenericLinksProc(InlineProcessor):
|
||||||
|
def __init__(self, pattern, md, extref):
|
||||||
|
super(ObsidianLinks.GenericLinksProc, self).__init__(pattern, md)
|
||||||
|
self._extref = extref
|
||||||
|
|
||||||
|
def handleMatch(self, m, data):
|
||||||
|
text = m.group(1)
|
||||||
|
link = m.group(2)
|
||||||
|
if is_proper_url(link):
|
||||||
|
el = etree.Element('a')
|
||||||
|
el.set('href', link)
|
||||||
|
el.text = text
|
||||||
|
else:
|
||||||
|
newlink, _ = self._extref._parse_reference(sanitize_reference(link))
|
||||||
|
if newlink is None:
|
||||||
|
el = etree.Element('span')
|
||||||
|
el.set('class', self._extref.invalid_reference_classname)
|
||||||
|
el.text = text
|
||||||
|
else:
|
||||||
|
el = etree.Element('a')
|
||||||
|
el.set('href', newlink)
|
||||||
|
el.set('class', self._extref.obsidian_link_classname)
|
||||||
|
el.text = text
|
||||||
|
return el, m.start(0), m.end(0)
|
||||||
|
|
||||||
def extendMarkdown(self, md):
|
def extendMarkdown(self, md):
|
||||||
OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
|
OBSLINK_PATTERN = r'\[\[(.*?)\]\]'
|
||||||
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self._context),
|
GENERICLINK_PATTERN = r'\[(.*?)\]\((.*?)\)'
|
||||||
'obsidian_links', 0)
|
md.inlinePatterns.register(ObsidianLinks.ObsidianLinksProc(OBSLINK_PATTERN, md, self),
|
||||||
|
'obsidian_links', PRIO_BASE + 110)
|
||||||
|
md.inlinePatterns.register(ObsidianLinks.GenericLinksProc(GENERICLINK_PATTERN, md, self),
|
||||||
|
'obsidian_generic_links', PRIO_BASE + 100)
|
||||||
|
|
||||||
|
|
||||||
class ObsidianInlines(Extension):
|
class ObsidianInlines(Extension):
|
||||||
"""An extension that handles the special Obsidian markdown format sequences."""
|
"""An extension that handles the special Obsidian markdown format sequences."""
|
||||||
def extendMarkdown(self, md):
|
def extendMarkdown(self, md):
|
||||||
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', 0)
|
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
|
||||||
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', 0)
|
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
|
||||||
|
|
||||||
|
|
||||||
def create_markdown_parser(context):
|
def create_markdown_parser(context):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user