From 1842d3335825b32b16ad44dd604710e65751f3c3 Mon Sep 17 00:00:00 2001 From: Amy Gale Ruth Bowersox Date: Fri, 9 Aug 2024 17:43:56 -0600 Subject: [PATCH] added a lot of docstrings and general cleanup --- pyproject.toml | 2 +- src/dragonglass/config.py | 38 ++++++++- src/dragonglass/dragonglass.py | 137 +++++++++++++++++++-------------- src/dragonglass/mparse.py | 75 ++++++++++++++++-- src/dragonglass/style.py | 12 +++ src/dragonglass/template.py | 32 ++++++++ src/dragonglass/tree.py | 111 +++++++++++++++++++++++--- 7 files changed, 330 insertions(+), 77 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8bbc9cc..289ebb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "pygments~=2.18.0", ] readme = "README.md" -requires-python = ">= 3.8" +requires-python = ">= 3.12" [project.scripts] 'dragonglass' = "dragonglass:main" diff --git a/src/dragonglass/config.py b/src/dragonglass/config.py index 5055a0e..df5c18d 100644 --- a/src/dragonglass/config.py +++ b/src/dragonglass/config.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""Configuration and context information for dragonglass""" + from argparse import Namespace from pathlib import Path from typing import Any @@ -9,12 +11,27 @@ import tomllib from .tree import SourceIndex, SourceNode +"""The default configuration file name.""" +DEFAULT_CONFIG_FILE = ".dragonglass" +"""The default template directory name.""" DEFAULT_TEMPLATE_DIRECTORY = ".dragonglass.tmpl" +"""The default template name.""" DEFAULT_TEMPLATE_NAME = "default.html" class Context: + """ + The configuration and context information for dragonglass. + + Attributes: + source_dir (Path): The source directory (Obsidian vault) for the operation. + template_dir (Path): The directory in which templates may be found. + config (dict[str, Any]): The configuration read in from the default configuration file. + src_index (SourceIndex): The index of all nodes, used to look up targets for internal links. + current_node (SourceNode): The current node being processed. + """ def __init__(self) -> None: + """Initialize the Context.""" self.source_dir: Path | None = None self.template_dir: Path | None = None self.config: dict[str, Any] = {} @@ -23,7 +40,13 @@ class Context: self._default_template_name: str | None = None def load_config(self, args: Namespace) -> None: - config_filename: str = str(args.config) if args.config else ".dragonglass" + """ + Loads the configuration for dragonglass. + + Args: + args (Namespace): The command line arguments to dragonglass. + """ + config_filename: str = str(args.config) if args.config else DEFAULT_CONFIG_FILE assert self.source_dir is not None config_path = self.source_dir / config_filename if config_path.exists() and config_path.is_file(): @@ -35,16 +58,29 @@ class Context: @property def url_prefix(self) -> str: + """Returns the configured URL prefix for all URLs generated for internal links.""" links_section = self.config.get("links", {}) rc = links_section.get("prefix", "/") return rc if rc.endswith("/") else rc + '/' @property def relative_links(self) -> bool: + """ + Returns ``True`` if we generate relative links for internal links, ``False`` if we generate absolute links. + """ links_section = self.config.get("links", {}) return links_section.get("relative", False) def get_template_name_for_node(self, node: SourceNode) -> str: + """ + Returns the name of a template to be used to render the current node. + + Args: + node (SourceNode): The current node being processed. + + Returns: + str: TRhe name of the template to be used to render the node. + """ if not self.template_dir.is_dir(): return DEFAULT_TEMPLATE_NAME return self._default_template_name diff --git a/src/dragonglass/dragonglass.py b/src/dragonglass/dragonglass.py index 12717cd..d311d61 100644 --- a/src/dragonglass/dragonglass.py +++ b/src/dragonglass/dragonglass.py @@ -1,5 +1,12 @@ #!/usr/bin/env python3 +""" +Dragonglass - a utility for processing Obsidian vaults into directories of Web pages. + +Add a configuration file named .dragonglass to the root folder of the Obsidian vault, and a template subdirectory +named .dragonglass.tmpl. +""" + import argparse import logging import shutil @@ -23,67 +30,79 @@ parser.add_argument('--dump', action='store_true', help='Dumps the parsed data b def main() -> int: - args = parser.parse_args() + """ + Main function of dragonglass: parses the Obsidian vault and outputs it as a formatted Web page directory. - if args.debug: - logging.basicConfig(level=logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) + Returns: + int: 0 on success, nonzero value on error. + """ + try: + args = parser.parse_args() - context = Context() - context.source_dir = Path(args.source_dir).resolve() - if not (context.source_dir.exists() and context.source_dir.is_dir()): - logger.error(f"{context.source_dir} is not a valid directory") - return 1 - - dest_dir = Path(args.dest_dir).resolve() - if dest_dir.exists() and not dest_dir.is_dir(): - logger.error(f"{dest_dir} exists but is not a valid directory") - return 1 - - context.load_config(args) - - nodes = generate_list(context.source_dir) - for node in nodes: - logger.info(f"Loading metadata for {node}") - context.current_node = node - node.load_metadata(context.source_dir) - - context.src_index = SourceIndex(nodes) - - mdparse = create_markdown_parser(context) - for node in nodes: - context.current_node = node - logger.info(f"Parsing {node}") - node.parse_markdown(context.source_dir, mdparse) - - # TEMP - if args.dump: - for node in nodes: - print(node) - if node.metadata: - print(f"Metadata: {node.metadata}") - if node.text: - print("----- BEGIN TEXT -----") - print(node.text) - print("------ END TEXT ------") - - if not dest_dir.exists(): - dest_dir.mkdir() - tenv = create_template_environment(context) - - for node in nodes: - p = node.target_file(dest_dir) - if node.is_dir: - p.mkdir(exist_ok=True) - elif node.is_md: - context.current_node = node - tmpl = tenv.get_template(context.get_template_name_for_node(node)) - data = tmpl.render(template_vars(node, context)) - with p.open("wt") as f: - f.write(data) + if args.debug: + logging.basicConfig(level=logging.DEBUG) else: - shutil.copyfile(context.source_dir / node.path, p) + logging.basicConfig(level=logging.INFO) + + context = Context() + context.source_dir = Path(args.source_dir).resolve() + if not (context.source_dir.exists() and context.source_dir.is_dir()): + logger.error(f"{context.source_dir} is not a valid directory") + return 1 + + dest_dir = Path(args.dest_dir).resolve() + if dest_dir.exists() and not dest_dir.is_dir(): + logger.error(f"{dest_dir} exists but is not a valid directory") + return 1 + + context.load_config(args) + + nodes = generate_list(context.source_dir) + for node in nodes: + logger.info(f"Loading metadata for {node}") + context.current_node = node + node.load_metadata() + + context.src_index = SourceIndex(nodes) + + mdparse = create_markdown_parser(context) + for node in nodes: + logger.info(f"Parsing {node}") + context.current_node = node + node.parse_markdown(mdparse) + + # TEMP + if args.dump: + for node in nodes: + print(node) + if node.metadata: + print(f"Metadata: {node.metadata}") + if node.text: + print("----- BEGIN TEXT -----") + print(node.text) + print("------ END TEXT ------") + + if not dest_dir.exists(): + dest_dir.mkdir() + tenv = create_template_environment(context) + + for node in nodes: + p = node.target_file(dest_dir) + if node.is_dir: + p.mkdir(exist_ok=True) + elif node.is_md: + context.current_node = node + tmpl = tenv.get_template(context.get_template_name_for_node(node)) + data = tmpl.render(template_vars(node, context)) + with p.open("wt") as f: + f.write(data) + else: + shutil.copyfile(context.source_dir / node.path, p) + + write_default_stylesheet(tenv, dest_dir) + + except: + logger.exception("Unexpected error in processing") + return 1 - write_default_stylesheet(tenv, dest_dir) return 0 diff --git a/src/dragonglass/mparse.py b/src/dragonglass/mparse.py index b01b0cd..23c3592 100644 --- a/src/dragonglass/mparse.py +++ b/src/dragonglass/mparse.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""The Markdown parser and its extensions.""" + import hashlib import re import xml.etree.ElementTree as etree # noqa: N813 @@ -120,7 +122,7 @@ class MetaStripper(Extension): class ObsidianImages(Extension): """An extension that supports image tags the way Obsidian handles them.""" - DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?') + __DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?') def __init__(self, context: Context, **kwargs: dict[str, Any]) -> None: """ @@ -153,7 +155,7 @@ class ObsidianImages(Extension): int: The image height in pixels, or -1 if not specified. """ s = s.replace(r'\|', '|') # handle case where we're inside tables - m = self.DIMS.match(s) + m = self.__DIMS.match(s) if m: width = int(m.group(2)) height = int(m.group(3)) if m.group(3) else -1 @@ -162,20 +164,51 @@ class ObsidianImages(Extension): return s, -1, -1 def _lookup_image_reference(self, name: str) -> str | None: + """ + Looks up the image reference in the source index and returns its link target value if it exists. + + Args: + name (str): The image name to be looked up. + + Returns: + str: The image's link target value, or ``None`` if the image was not found. + """ assert self._context.src_index is not None node, _ = self._context.src_index.lookup(name) if node: return node.link_target(self._context.url_prefix, - self._context.current_node if self._context.relative_links else None, - root_path=self._context.source_dir) + self._context.current_node if self._context.relative_links else None) return None class ObsidianImageProc(InlineProcessor): + """The actual inline processor for Obsidian image references.""" def __init__(self, pattern: str, md: markdown.Markdown, extref: Any) -> None: + """ + Initialize the ObsidianImageProc processor. + + Args: + pattern (str): The pattern to be matched to find the image reference. + md (markdown.Markdown): Backreference to the Markdown processor. + extref (ObsidianImages): External reference to the enclosing ObsidianImages object. + """ super(ObsidianImages.ObsidianImageProc, self).__init__(pattern, md) self._extref = extref def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802 + """ + Handles a match on the reference for this processor. + + Args: + m (re.Match[str]): The regular expression match data. + data (str): The entire block of text surrounding the pattern, as a multi-line string. + + Returns: + el (etree.Element): The new HTML element being added to the tree, or ``None`` if the match was rejected. + int: The index of the first character in ``data`` that was "consumed" by the pattern, or ``None`` + if the match was rejected, + int: The index of the first character in ``data`` that was *not* consumed by the pattern, or ``None`` + if the match was rejected. + """ name, width, height = self._extref._parse_dimensions(m.group(1)) link = self._extref._lookup_image_reference(name) if link is None: @@ -193,11 +226,34 @@ class ObsidianImages(Extension): return el, m.start(0), m.end(0) class GenericImageProc(InlineProcessor): + """The actual inline processor for generic image references.""" def __init__(self, pattern: str, md: markdown.Markdown, extref: Any) -> None: + """ + Initialize the GenericImageProc processor. + + Args: + pattern (str): The pattern to be matched to find the image reference. + md (markdown.Markdown): Backreference to the Markdown processor. + extref (ObsidianImages): External reference to the enclosing ObsidianImages object. + """ super(ObsidianImages.GenericImageProc, self).__init__(pattern, md) self._extref = extref def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802 + """ + Handles a match on the reference for this processor. + + Args: + m (re.Match[str]): The regular expression match data. + data (str): The entire block of text surrounding the pattern, as a multi-line string. + + Returns: + el (etree.Element): The new HTML element being added to the tree, or ``None`` if the match was rejected. + int: The index of the first character in ``data`` that was "consumed" by the pattern, or ``None`` + if the match was rejected, + int: The index of the first character in ``data`` that was *not* consumed by the pattern, or ``None`` + if the match was rejected. + """ name, width, height = self._extref._parse_dimensions(m.group(1)) link = m.group(2) if is_proper_url(link): @@ -227,6 +283,12 @@ class ObsidianImages(Extension): return el, m.start(0), m.end(0) def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802 + """ + Registers the image processor with the Markdown parser. + + Args: + md (markdown.Markdown): The Markdown parser to register the footnote processor with. + """ md.inlinePatterns.register(ObsidianImages.ObsidianImageProc(OBSIMAGE_PATTERN, md, self), 'obsidian_images', PRIO_BASE + 1010) md.inlinePatterns.register(ObsidianImages.GenericImageProc(GENERICIMAGE_PATTERN, md, self), @@ -264,8 +326,7 @@ class ObsidianLinks(Extension): text = contents if node: return node.link_target(self._context.url_prefix, - self._context.current_node if self._context.relative_links else None, - root_path=self._context.source_dir), text + self._context.current_node if self._context.relative_links else None), text return None, text class ObsidianLinksProc(InlineProcessor): @@ -342,7 +403,7 @@ class ObsidianComments(Extension): Removes the comments from the array of lines. Args: - lines (list[str]): The Markdown liens read in. + lines (list[str]): The Markdown lines read in. Returns: list[str]: The Markdown lines after the comments have been removed. diff --git a/src/dragonglass/style.py b/src/dragonglass/style.py index 8e11233..6b5c6f5 100644 --- a/src/dragonglass/style.py +++ b/src/dragonglass/style.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 +"""Default stylesheet data and processing for dragonglass.""" + from pathlib import Path from jinja2 import Environment +"""Name of the default stylesheet.""" STYLESHEET_NAME = "dragonglass.css" + +"""Template data for the default stylesheet.""" STYLESHEET_DATA = """/* Dragonglass default CSS file - ensure all generated HTML pages reference this */ a { color: #8a5cf5; @@ -21,6 +26,13 @@ ins { def write_default_stylesheet(tenv: Environment, dest_dir: Path) -> None: + """ + Writes the default stylesheet data to the destination directory. + + Args: + tenv (Environment): Template engine used to render the default stylesheet data. + dest_dir (Path): The destination directory to write the stylesheet to. + """ to_file = dest_dir / STYLESHEET_NAME tmpl = tenv.from_string(STYLESHEET_DATA) data = tmpl.render({}) diff --git a/src/dragonglass/template.py b/src/dragonglass/template.py index a0c49d5..0e1fb23 100644 --- a/src/dragonglass/template.py +++ b/src/dragonglass/template.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +"""The template engine used to render the data to the destination.""" + from jinja2 import Environment, BaseLoader, ChoiceLoader, FunctionLoader, FileSystemLoader from typing import Any from urllib.parse import quote as urlquote @@ -8,6 +10,8 @@ from .config import Context, DEFAULT_TEMPLATE_NAME from .style import STYLESHEET_NAME from .tree import SourceNode + +"""The default template used to render Markdown data.""" DEFAULT_TEMPLATE = """ @@ -23,6 +27,15 @@ DEFAULT_TEMPLATE = """ def _create_loader(ctxt: Context) -> BaseLoader: + """ + Create the loader used to find templates. + + Args: + ctxt (Context): The context providing the configuration data for dragonglass. + + Returns: + BaseLoader: A loader to be passed to the template engine. + """ return ChoiceLoader([ FileSystemLoader(ctxt.template_dir), FunctionLoader(lambda n: DEFAULT_TEMPLATE if n == DEFAULT_TEMPLATE_NAME else None) @@ -30,10 +43,29 @@ def _create_loader(ctxt: Context) -> BaseLoader: def create_template_environment(ctxt: Context) -> Environment: + """ + Create the template environment used to render the data to the destination. + + Args: + ctxt (Context): The context providing the configuration data for dragonglass. + + Returns: + Environment: The new template environment. + """ return Environment(loader=_create_loader(ctxt)) def template_vars(node: SourceNode, ctxt: Context) -> dict[str, Any]: + """ + Create the template variables for the specified node. + + Args: + node (SourceNode): The node being processed. + ctxt (Context): The context providing the configuration data for dragonglass. + + Returns: + dict[str, Any]: The template variables to be used to render the node contents. + """ tvars = node.make_vars() if ctxt.relative_links: diff --git a/src/dragonglass/tree.py b/src/dragonglass/tree.py index 8a9d656..8e0d6ba 100644 --- a/src/dragonglass/tree.py +++ b/src/dragonglass/tree.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +"""The files and directories of the source Obsidian vault.""" + from pathlib import Path from typing import Any from urllib.parse import quote as urlquote @@ -10,57 +12,107 @@ import yaml logger = logging.getLogger(__name__) -# The paths that are always to be ignored. +"""The paths that are always to be ignored.""" STATIC_IGNORE = [ '.obsidian', '.dragonglass' ] +"""The path matching Markdown files.""" MARKDOWN_PAT = '*.md' class SourceNode: + """ + Represents a file or directory in the source Obsidian vault. + + Attributes: + metadata (dict[str, Any]): The metadata from the current node. + text (str): The parsed HTML text of the current node. + """ def __init__(self, root: Path, path: Path, is_dir: bool) -> None: + """ + Initialize the SourceNode. + + Args: + root (Path): Root path the node lives under. + path (Path): Relative path from the root to this node. + is_dir (bool): ``True`` if the node is a directory, ``False`` if not. + """ self._root = root self._path = path self._is_dir = is_dir self._is_md = path.match(MARKDOWN_PAT) - self.metadata: dict[str, Any] | None = None + self.metadata: dict[str, Any] = {} self.text: str | None = None def __str__(self) -> str: + """Returns the string representation of the nmode.""" return f"SourceNode({self._path}, {self._is_dir}) [is_md={self._is_md}]" @property def is_dir(self) -> bool: + """Returns ``True`` if this node refers to a directory, ``False`` if it refers to an ordinary file.""" return self._is_dir @property def is_md(self) -> bool: + """Returns ``True`` if this node refers to a Markdown file, ``False`` if not.""" return self._is_md @property def path(self) -> Path: + """Returns the path, relative to the root path, of this node.""" return self._path + @property + def root_path(self) -> Path: + """Returns the root path this node is under.""" + return self._root + def target_file(self, dest_dir: Path) -> Path: + """ + Computes the path of the target file as it will be written to the destination directory. + + Args: + dest_dir (Path): The destination directory. + + Returns: + Path: The path of the target file to be written. + """ p = self._path if self._is_md: p = p.with_suffix('.html') return dest_dir / p - def link_target(self, prefix: str = "/", rel_to: Any = None, root_path: Path = None) -> str: + def link_target(self, prefix: str = "/", rel_to: Any = None) -> str: + """ + Returns the link target string for this node. + + Args: + prefix (str): The prefix to apply to all absolute URLs. Default is "/". + rel_to (SourceNode): If this is not ``None``, the path is to be computed as a relative path to the + specified node. + + Returns: + str: The link target, as a string. + """ xpath = self._path.with_suffix('.html') if self._is_md else self._path if rel_to is not None: - xpath = root_path / xpath - rel_path = root_path / rel_to.path + xpath = self._root / xpath + rel_path = rel_to.root_path / rel_to.path logger.debug(f"*** Computing path of {xpath} relative to {rel_path}") return urlquote(xpath.relative_to(rel_path.parent, walk_up=True).as_posix()) return urlquote(prefix + xpath.as_posix()) - def load_metadata(self, source_dir: Path) -> None: + def load_metadata(self) -> None: + """ + Loads the metadata for this particular node and saves it in the "metadata" attribute. + + The metadata is YAML at the beginning of the Markdown file, and is set via Obsidian "properties." + """ if self._is_md and not self._is_dir: - with open(source_dir / self._path, "r", encoding="utf-8") as f: + with open(self._root / self._path, "r", encoding="utf-8") as f: cur_line = f.readline() if cur_line == '---\n': metalines: list[str] = [] @@ -70,13 +122,25 @@ class SourceNode: cur_line = f.readline() self.metadata = yaml.full_load(''.join(metalines)) - def parse_markdown(self, source_dir: Path, markdown_parser: markdown.Markdown) -> None: + def parse_markdown(self, markdown_parser: markdown.Markdown) -> None: + """ + Parses the Markdown in this file into HTML, saving the HTML into the "text" attribute. + + Args: + markdown_parser (markdown.Markdown): Instance of the Markdown parser to use in the parsing. + """ if self._is_md and not self._is_dir: markdown_parser.reset() - with open(source_dir / self._path, "r", encoding="utf-8") as f: + with open(self._root / self._path, "r", encoding="utf-8") as f: self.text = markdown_parser.convert(f.read()) def make_vars(self) -> dict[str, Any]: + """ + Creates the template variables to use for this node. + + Returns: + dict[str, Any]: The template variables, as a dict. + """ return { "text": self.text, "title": self._path.stem @@ -84,6 +148,15 @@ class SourceNode: def generate_list(source_root: Path) -> list[SourceNode]: + """ + Generates the list of source nodes from the source path (Obsidian vault). + + Args: + source_root (Path): The root directory (Obsidian vault) to load nodes from. + + Returns: + list[SourceNode]: The list of nodes (files and directories) under the root, as a preorder traversal. + """ nodes: list[SourceNode] = [] dirs = [source_root] while len(dirs) > 0: @@ -103,7 +176,17 @@ def generate_list(source_root: Path) -> list[SourceNode]: class SourceIndex: + """ + The index of all nodes in the Obsidian vault. Nodes are listed by their name, by their relative path name, + and by any aliases specified in their metadata. + """ def __init__(self, nodelist: list[SourceNode]) -> None: + """ + Initialize the source index. + + Args: + nodelist (list[SourceNode]): The list of nodes to be indexed. + """ self._byname: dict[str, SourceNode] = {} self._byalias: dict[str, SourceNode] = {} for node in nodelist: @@ -127,6 +210,16 @@ class SourceIndex: self._byname[node.path.as_posix()] = node def lookup(self, reference: str) -> tuple[SourceNode | None, str | None]: + """ + Looks up a reference in the index and returns the corresponding node if it exists. + + Args: + reference (str): The reference to look up. + + Returns: + SourceNode: The node that was found, or ``None`` if the node was not found. + str: Indicates whether the match was on "NAME" or "ALIAS". Returns ``None`` if the node was not found. + """ if reference in self._byname: return self._byname[reference], 'NAME' elif reference in self._byalias: