handle links which use the <> construct to encode blank spaces

This commit is contained in:
Amy G. Bowersox 2024-07-30 22:12:33 -06:00
parent b293684fb7
commit 4facd16452
2 changed files with 56 additions and 1 deletions

View File

@ -31,7 +31,6 @@ def main() -> int:
return 1 return 1
context.load_config(args) context.load_config(args)
tenv = create_template_environment(context)
nodes = generate_list(context.source_dir) nodes = generate_list(context.source_dir)
for node in nodes: for node in nodes:
@ -57,6 +56,7 @@ def main() -> int:
if not dest_dir.exists(): if not dest_dir.exists():
dest_dir.mkdir() dest_dir.mkdir()
tenv = create_template_environment(context)
for node in nodes: for node in nodes:
p = node.target_file(dest_dir) p = node.target_file(dest_dir)

View File

@ -4,6 +4,7 @@ import re
import xml.etree.ElementTree as etree # noqa: N813 import xml.etree.ElementTree as etree # noqa: N813
from typing import Any from typing import Any
from urllib.parse import unquote as urlunquote from urllib.parse import unquote as urlunquote
from urllib.parse import quote as urlquote
from urllib.parse import urlparse from urllib.parse import urlparse
import markdown import markdown
@ -61,7 +62,17 @@ class MetaStripper(Extension):
""" """
class MetaStripperProc(Preprocessor): class MetaStripperProc(Preprocessor):
"""The actual metadata stripper."""
def run(self, lines: list[str]) -> list[str]: def run(self, lines: list[str]) -> list[str]:
"""
Removes the metadata lines from the front of the array of lines.
Args:
lines (list[str]): The Markdown lies read in.
Returns:
list[str]: The Markdown lines after the metadata has been removed.
"""
if lines[0] == '---': if lines[0] == '---':
lines.pop(0) lines.pop(0)
while lines[0] != '---': while lines[0] != '---':
@ -70,6 +81,12 @@ class MetaStripper(Extension):
return lines return lines
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802 def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
"""
Registers the metadata stripper with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the stripper with.
"""
md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE) md.preprocessors.register(MetaStripper.MetaStripperProc(md), 'metastripper', PRIO_BASE)
@ -78,14 +95,35 @@ class ObsidianImages(Extension):
DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?') DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?')
def __init__(self, context: Context, **kwargs: dict[str, Any]) -> None: def __init__(self, context: Context, **kwargs: dict[str, Any]) -> None:
"""
Initialize the image processor class.
Args:
context (Context): The dragonglass context, containing the configuration.
**kwargs (dict): Other keyword arguments to the image tag processor.
"""
super(ObsidianImages, self).__init__(**kwargs) super(ObsidianImages, self).__init__(**kwargs)
self._context = context self._context = context
@property @property
def invalid_reference_classname(self) -> str: def invalid_reference_classname(self) -> str:
"""
Returns the CSS class name for an invalid reference in the text.
"""
return 'invalid-reference' return 'invalid-reference'
def _parse_dimensions(self, s: str) -> tuple[str, int, int]: def _parse_dimensions(self, s: str) -> tuple[str, int, int]:
"""
Parses an image reference looking for image dimensions.
Args:
s (str): The image reference to be parsed.
Returns:
str: The actual image reference.
int: The image width in pixels, or -1 if not specified.
int: The image height in pixels, or -1 if not specified.
"""
m = self.DIMS.match(s) m = self.DIMS.match(s)
if m: if m:
width = int(m.group(2)) width = int(m.group(2))
@ -222,6 +260,8 @@ class ObsidianLinks(Extension):
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802 def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802
text = m.group(1) text = m.group(1)
link = m.group(2) link = m.group(2)
if link.startswith('<') and link.endswith('>'): # handle whitespace encoding
link = urlquote(link[1:-1].strip(), safe=':/')
if is_proper_url(link): if is_proper_url(link):
el = etree.Element('a') el = etree.Element('a')
el.set('href', link) el.set('href', link)
@ -249,11 +289,26 @@ class ObsidianLinks(Extension):
class ObsidianInlines(Extension): class ObsidianInlines(Extension):
"""An extension that handles the special Obsidian markdown format sequences.""" """An extension that handles the special Obsidian markdown format sequences."""
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802 def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
"""
Registers the inline patterns with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the patterns with.
"""
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE) md.inlinePatterns.register(SimpleTagInlineProcessor(r'()~~(.*?)~~', 'del'), 'strikeout', PRIO_BASE)
md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1) md.inlinePatterns.register(SimpleTagInlineProcessor(r'()\=\=(.*?)\=\=', 'ins'), 'highlight', PRIO_BASE + 1)
def create_markdown_parser(context: Context) -> markdown.Markdown: def create_markdown_parser(context: Context) -> markdown.Markdown:
"""
Creates a Markdown parser with all our extensions loaded.
Args:
context (Context): The dragonglass context, that contains the configuration.
Returns:
markdown.Markdown: The new Markdown parser.
"""
return markdown.Markdown(extensions=[MetaStripper(), return markdown.Markdown(extensions=[MetaStripper(),
ObsidianImages(context), ObsidianImages(context),
ObsidianLinks(context), ObsidianLinks(context),