handle links which use the <> construct to encode blank spaces

This commit is contained in:
Amy G. Bowersox 2024-07-30 22:12:33 -06:00
parent b293684fb7
commit 4facd16452
2 changed files with 56 additions and 1 deletions

View File

@ -31,7 +31,6 @@ def main() -> int:
return 1
context.load_config(args)
tenv = create_template_environment(context)
nodes = generate_list(context.source_dir)
for node in nodes:
@ -57,6 +56,7 @@ def main() -> int:
if not dest_dir.exists():
dest_dir.mkdir()
tenv = create_template_environment(context)
for node in nodes:
p = node.target_file(dest_dir)

View File

@ -4,6 +4,7 @@ import re
import xml.etree.ElementTree as etree # noqa: N813
from typing import Any
from urllib.parse import unquote as urlunquote
from urllib.parse import quote as urlquote
from urllib.parse import urlparse
import markdown
@ -61,7 +62,17 @@ class MetaStripper(Extension):
"""
class MetaStripperProc(Preprocessor):
"""The actual metadata stripper."""
def run(self, lines: list[str]) -> list[str]:
"""
Removes the metadata lines from the front of the array of lines.
Args:
lines (list[str]): The Markdown lies read in.
Returns:
list[str]: The Markdown lines after the metadata has been removed.
"""
if lines[0] == '---':
lines.pop(0)
while lines[0] != '---':
@ -70,6 +81,12 @@ class MetaStripper(Extension):
return lines
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
"""
Registers the metadata stripper with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the stripper with.
"""
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
@ -78,14 +95,35 @@ class ObsidianImages(Extension):
DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?')
def __init__(self, context: Context, **kwargs: dict[str, Any]) -> None:
"""
Initialize the image processor class.
Args:
context (Context): The dragonglass context, containing the configuration.
**kwargs (dict): Other keyword arguments to the image tag processor.
"""
super(ObsidianImages, self).__init__(**kwargs)
self._context = context
@property
def invalid_reference_classname(self) -> str:
"""
Returns the CSS class name for an invalid reference in the text.
"""
return 'invalid-reference'
def _parse_dimensions(self, s: str) -> tuple[str, int, int]:
"""
Parses an image reference looking for image dimensions.
Args:
s (str): The image reference to be parsed.
Returns:
str: The actual image reference.
int: The image width in pixels, or -1 if not specified.
int: The image height in pixels, or -1 if not specified.
"""
m = self.DIMS.match(s)
if m:
width = int(m.group(2))
@ -222,6 +260,8 @@ class ObsidianLinks(Extension):
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802
text = m.group(1)
link = m.group(2)
if link.startswith('<') and link.endswith('>'): # handle whitespace encoding
link = urlquote(link[1:-1].strip(), safe=':/')
if is_proper_url(link):
el = etree.Element('a')
el.set('href', link)
@ -249,11 +289,26 @@ class ObsidianLinks(Extension):
class ObsidianInlines(Extension):
"""An extension that handles the special Obsidian markdown format sequences."""
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
"""
Registers the inline patterns with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the patterns with.
"""
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
def create_markdown_parser(context: Context) -> markdown.Markdown:
"""
Creates a Markdown parser with all our extensions loaded.
Args:
context (Context): The dragonglass context, that contains the configuration.
Returns:
markdown.Markdown: The new Markdown parser.
"""
return markdown.Markdown(extensions=[MetaStripper(),
ObsidianImages(context),
ObsidianLinks(context),