added a lot of docstrings and general cleanup

This commit is contained in:
Amy G. Bowersox 2024-08-09 17:43:56 -06:00
parent b8b2d5ebee
commit 1842d33358
7 changed files with 330 additions and 77 deletions

View File

@ -13,7 +13,7 @@ dependencies = [
"pygments~=2.18.0",
]
readme = "README.md"
requires-python = ">= 3.8"
requires-python = ">= 3.12"
[project.scripts]
'dragonglass' = "dragonglass:main"

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3
"""Configuration and context information for dragonglass"""
from argparse import Namespace
from pathlib import Path
from typing import Any
@ -9,12 +11,27 @@ import tomllib
from .tree import SourceIndex, SourceNode
"""The default configuration file name."""
DEFAULT_CONFIG_FILE = ".dragonglass"
"""The default template directory name."""
DEFAULT_TEMPLATE_DIRECTORY = ".dragonglass.tmpl"
"""The default template name."""
DEFAULT_TEMPLATE_NAME = "default.html"
class Context:
"""
The configuration and context information for dragonglass.
Attributes:
source_dir (Path): The source directory (Obsidian vault) for the operation.
template_dir (Path): The directory in which templates may be found.
config (dict[str, Any]): The configuration read in from the default configuration file.
src_index (SourceIndex): The index of all nodes, used to look up targets for internal links.
current_node (SourceNode): The current node being processed.
"""
def __init__(self) -> None:
"""Initialize the Context."""
self.source_dir: Path | None = None
self.template_dir: Path | None = None
self.config: dict[str, Any] = {}
@ -23,7 +40,13 @@ class Context:
self._default_template_name: str | None = None
def load_config(self, args: Namespace) -> None:
config_filename: str = str(args.config) if args.config else ".dragonglass"
"""
Loads the configuration for dragonglass.
Args:
args (Namespace): The command line arguments to dragonglass.
"""
config_filename: str = str(args.config) if args.config else DEFAULT_CONFIG_FILE
assert self.source_dir is not None
config_path = self.source_dir / config_filename
if config_path.exists() and config_path.is_file():
@ -35,16 +58,29 @@ class Context:
@property
def url_prefix(self) -> str:
"""Returns the configured URL prefix for all URLs generated for internal links."""
links_section = self.config.get("links", {})
rc = links_section.get("prefix", "/")
return rc if rc.endswith("/") else rc + '/'
@property
def relative_links(self) -> bool:
"""
Returns ``True`` if we generate relative links for internal links, ``False`` if we generate absolute links.
"""
links_section = self.config.get("links", {})
return links_section.get("relative", False)
def get_template_name_for_node(self, node: SourceNode) -> str:
"""
Returns the name of a template to be used to render the current node.
Args:
node (SourceNode): The current node being processed.
Returns:
str: TRhe name of the template to be used to render the node.
"""
if not self.template_dir.is_dir():
return DEFAULT_TEMPLATE_NAME
return self._default_template_name

View File

@ -1,5 +1,12 @@
#!/usr/bin/env python3
"""
Dragonglass - a utility for processing Obsidian vaults into directories of Web pages.
Add a configuration file named .dragonglass to the root folder of the Obsidian vault, and a template subdirectory
named .dragonglass.tmpl.
"""
import argparse
import logging
import shutil
@ -23,67 +30,79 @@ parser.add_argument('--dump', action='store_true', help='Dumps the parsed data b
def main() -> int:
args = parser.parse_args()
"""
Main function of dragonglass: parses the Obsidian vault and outputs it as a formatted Web page directory.
if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
Returns:
int: 0 on success, nonzero value on error.
"""
try:
args = parser.parse_args()
context = Context()
context.source_dir = Path(args.source_dir).resolve()
if not (context.source_dir.exists() and context.source_dir.is_dir()):
logger.error(f"{context.source_dir} is not a valid directory")
return 1
dest_dir = Path(args.dest_dir).resolve()
if dest_dir.exists() and not dest_dir.is_dir():
logger.error(f"{dest_dir} exists but is not a valid directory")
return 1
context.load_config(args)
nodes = generate_list(context.source_dir)
for node in nodes:
logger.info(f"Loading metadata for {node}")
context.current_node = node
node.load_metadata(context.source_dir)
context.src_index = SourceIndex(nodes)
mdparse = create_markdown_parser(context)
for node in nodes:
context.current_node = node
logger.info(f"Parsing {node}")
node.parse_markdown(context.source_dir, mdparse)
# TEMP
if args.dump:
for node in nodes:
print(node)
if node.metadata:
print(f"Metadata: {node.metadata}")
if node.text:
print("----- BEGIN TEXT -----")
print(node.text)
print("------ END TEXT ------")
if not dest_dir.exists():
dest_dir.mkdir()
tenv = create_template_environment(context)
for node in nodes:
p = node.target_file(dest_dir)
if node.is_dir:
p.mkdir(exist_ok=True)
elif node.is_md:
context.current_node = node
tmpl = tenv.get_template(context.get_template_name_for_node(node))
data = tmpl.render(template_vars(node, context))
with p.open("wt") as f:
f.write(data)
if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
shutil.copyfile(context.source_dir / node.path, p)
logging.basicConfig(level=logging.INFO)
context = Context()
context.source_dir = Path(args.source_dir).resolve()
if not (context.source_dir.exists() and context.source_dir.is_dir()):
logger.error(f"{context.source_dir} is not a valid directory")
return 1
dest_dir = Path(args.dest_dir).resolve()
if dest_dir.exists() and not dest_dir.is_dir():
logger.error(f"{dest_dir} exists but is not a valid directory")
return 1
context.load_config(args)
nodes = generate_list(context.source_dir)
for node in nodes:
logger.info(f"Loading metadata for {node}")
context.current_node = node
node.load_metadata()
context.src_index = SourceIndex(nodes)
mdparse = create_markdown_parser(context)
for node in nodes:
logger.info(f"Parsing {node}")
context.current_node = node
node.parse_markdown(mdparse)
# TEMP
if args.dump:
for node in nodes:
print(node)
if node.metadata:
print(f"Metadata: {node.metadata}")
if node.text:
print("----- BEGIN TEXT -----")
print(node.text)
print("------ END TEXT ------")
if not dest_dir.exists():
dest_dir.mkdir()
tenv = create_template_environment(context)
for node in nodes:
p = node.target_file(dest_dir)
if node.is_dir:
p.mkdir(exist_ok=True)
elif node.is_md:
context.current_node = node
tmpl = tenv.get_template(context.get_template_name_for_node(node))
data = tmpl.render(template_vars(node, context))
with p.open("wt") as f:
f.write(data)
else:
shutil.copyfile(context.source_dir / node.path, p)
write_default_stylesheet(tenv, dest_dir)
except:
logger.exception("Unexpected error in processing")
return 1
write_default_stylesheet(tenv, dest_dir)
return 0

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3
"""The Markdown parser and its extensions."""
import hashlib
import re
import xml.etree.ElementTree as etree # noqa: N813
@ -120,7 +122,7 @@ class MetaStripper(Extension):
class ObsidianImages(Extension):
"""An extension that supports image tags the way Obsidian handles them."""
DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?')
__DIMS = re.compile(r'(.*)\|(\d+)(?:x(\d+))?')
def __init__(self, context: Context, **kwargs: dict[str, Any]) -> None:
"""
@ -153,7 +155,7 @@ class ObsidianImages(Extension):
int: The image height in pixels, or -1 if not specified.
"""
s = s.replace(r'\|', '|') # handle case where we're inside tables
m = self.DIMS.match(s)
m = self.__DIMS.match(s)
if m:
width = int(m.group(2))
height = int(m.group(3)) if m.group(3) else -1
@ -162,20 +164,51 @@ class ObsidianImages(Extension):
return s, -1, -1
def _lookup_image_reference(self, name: str) -> str | None:
"""
Looks up the image reference in the source index and returns its link target value if it exists.
Args:
name (str): The image name to be looked up.
Returns:
str: The image's link target value, or ``None`` if the image was not found.
"""
assert self._context.src_index is not None
node, _ = self._context.src_index.lookup(name)
if node:
return node.link_target(self._context.url_prefix,
self._context.current_node if self._context.relative_links else None,
root_path=self._context.source_dir)
self._context.current_node if self._context.relative_links else None)
return None
class ObsidianImageProc(InlineProcessor):
"""The actual inline processor for Obsidian image references."""
def __init__(self, pattern: str, md: markdown.Markdown, extref: Any) -> None:
"""
Initialize the ObsidianImageProc processor.
Args:
pattern (str): The pattern to be matched to find the image reference.
md (markdown.Markdown): Backreference to the Markdown processor.
extref (ObsidianImages): External reference to the enclosing ObsidianImages object.
"""
super(ObsidianImages.ObsidianImageProc, self).__init__(pattern, md)
self._extref = extref
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802
"""
Handles a match on the reference for this processor.
Args:
m (re.Match[str]): The regular expression match data.
data (str): The entire block of text surrounding the pattern, as a multi-line string.
Returns:
el (etree.Element): The new HTML element being added to the tree, or ``None`` if the match was rejected.
int: The index of the first character in ``data`` that was "consumed" by the pattern, or ``None``
if the match was rejected,
int: The index of the first character in ``data`` that was *not* consumed by the pattern, or ``None``
if the match was rejected.
"""
name, width, height = self._extref._parse_dimensions(m.group(1))
link = self._extref._lookup_image_reference(name)
if link is None:
@ -193,11 +226,34 @@ class ObsidianImages(Extension):
return el, m.start(0), m.end(0)
class GenericImageProc(InlineProcessor):
"""The actual inline processor for generic image references."""
def __init__(self, pattern: str, md: markdown.Markdown, extref: Any) -> None:
"""
Initialize the GenericImageProc processor.
Args:
pattern (str): The pattern to be matched to find the image reference.
md (markdown.Markdown): Backreference to the Markdown processor.
extref (ObsidianImages): External reference to the enclosing ObsidianImages object.
"""
super(ObsidianImages.GenericImageProc, self).__init__(pattern, md)
self._extref = extref
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # noqa: N802
"""
Handles a match on the reference for this processor.
Args:
m (re.Match[str]): The regular expression match data.
data (str): The entire block of text surrounding the pattern, as a multi-line string.
Returns:
el (etree.Element): The new HTML element being added to the tree, or ``None`` if the match was rejected.
int: The index of the first character in ``data`` that was "consumed" by the pattern, or ``None``
if the match was rejected,
int: The index of the first character in ``data`` that was *not* consumed by the pattern, or ``None``
if the match was rejected.
"""
name, width, height = self._extref._parse_dimensions(m.group(1))
link = m.group(2)
if is_proper_url(link):
@ -227,6 +283,12 @@ class ObsidianImages(Extension):
return el, m.start(0), m.end(0)
def extendMarkdown(self, md: markdown.Markdown) -> None: # noqa: N802
"""
Registers the image processor with the Markdown parser.
Args:
md (markdown.Markdown): The Markdown parser to register the footnote processor with.
"""
md.inlinePatterns.register(ObsidianImages.ObsidianImageProc(OBSIMAGE_PATTERN, md, self),
'obsidian_images', PRIO_BASE + 1010)
md.inlinePatterns.register(ObsidianImages.GenericImageProc(GENERICIMAGE_PATTERN, md, self),
@ -264,8 +326,7 @@ class ObsidianLinks(Extension):
text = contents
if node:
return node.link_target(self._context.url_prefix,
self._context.current_node if self._context.relative_links else None,
root_path=self._context.source_dir), text
self._context.current_node if self._context.relative_links else None), text
return None, text
class ObsidianLinksProc(InlineProcessor):
@ -342,7 +403,7 @@ class ObsidianComments(Extension):
Removes the comments from the array of lines.
Args:
lines (list[str]): The Markdown liens read in.
lines (list[str]): The Markdown lines read in.
Returns:
list[str]: The Markdown lines after the comments have been removed.

View File

@ -1,9 +1,14 @@
#!/usr/bin/env python3
"""Default stylesheet data and processing for dragonglass."""
from pathlib import Path
from jinja2 import Environment
"""Name of the default stylesheet."""
STYLESHEET_NAME = "dragonglass.css"
"""Template data for the default stylesheet."""
STYLESHEET_DATA = """/* Dragonglass default CSS file - ensure all generated HTML pages reference this */
a {
color: #8a5cf5;
@ -21,6 +26,13 @@ ins {
def write_default_stylesheet(tenv: Environment, dest_dir: Path) -> None:
"""
Writes the default stylesheet data to the destination directory.
Args:
tenv (Environment): Template engine used to render the default stylesheet data.
dest_dir (Path): The destination directory to write the stylesheet to.
"""
to_file = dest_dir / STYLESHEET_NAME
tmpl = tenv.from_string(STYLESHEET_DATA)
data = tmpl.render({})

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3
"""The template engine used to render the data to the destination."""
from jinja2 import Environment, BaseLoader, ChoiceLoader, FunctionLoader, FileSystemLoader
from typing import Any
from urllib.parse import quote as urlquote
@ -8,6 +10,8 @@ from .config import Context, DEFAULT_TEMPLATE_NAME
from .style import STYLESHEET_NAME
from .tree import SourceNode
"""The default template used to render Markdown data."""
DEFAULT_TEMPLATE = """
<html>
<head>
@ -23,6 +27,15 @@ DEFAULT_TEMPLATE = """
def _create_loader(ctxt: Context) -> BaseLoader:
"""
Create the loader used to find templates.
Args:
ctxt (Context): The context providing the configuration data for dragonglass.
Returns:
BaseLoader: A loader to be passed to the template engine.
"""
return ChoiceLoader([
FileSystemLoader(ctxt.template_dir),
FunctionLoader(lambda n: DEFAULT_TEMPLATE if n == DEFAULT_TEMPLATE_NAME else None)
@ -30,10 +43,29 @@ def _create_loader(ctxt: Context) -> BaseLoader:
def create_template_environment(ctxt: Context) -> Environment:
"""
Create the template environment used to render the data to the destination.
Args:
ctxt (Context): The context providing the configuration data for dragonglass.
Returns:
Environment: The new template environment.
"""
return Environment(loader=_create_loader(ctxt))
def template_vars(node: SourceNode, ctxt: Context) -> dict[str, Any]:
"""
Create the template variables for the specified node.
Args:
node (SourceNode): The node being processed.
ctxt (Context): The context providing the configuration data for dragonglass.
Returns:
dict[str, Any]: The template variables to be used to render the node contents.
"""
tvars = node.make_vars()
if ctxt.relative_links:

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python
"""The files and directories of the source Obsidian vault."""
from pathlib import Path
from typing import Any
from urllib.parse import quote as urlquote
@ -10,57 +12,107 @@ import yaml
logger = logging.getLogger(__name__)
# The paths that are always to be ignored.
"""The paths that are always to be ignored."""
STATIC_IGNORE = [
'.obsidian',
'.dragonglass'
]
"""The path matching Markdown files."""
MARKDOWN_PAT = '*.md'
class SourceNode:
"""
Represents a file or directory in the source Obsidian vault.
Attributes:
metadata (dict[str, Any]): The metadata from the current node.
text (str): The parsed HTML text of the current node.
"""
def __init__(self, root: Path, path: Path, is_dir: bool) -> None:
"""
Initialize the SourceNode.
Args:
root (Path): Root path the node lives under.
path (Path): Relative path from the root to this node.
is_dir (bool): ``True`` if the node is a directory, ``False`` if not.
"""
self._root = root
self._path = path
self._is_dir = is_dir
self._is_md = path.match(MARKDOWN_PAT)
self.metadata: dict[str, Any] | None = None
self.metadata: dict[str, Any] = {}
self.text: str | None = None
def __str__(self) -> str:
"""Returns the string representation of the nmode."""
return f"SourceNode({self._path}, {self._is_dir}) [is_md={self._is_md}]"
@property
def is_dir(self) -> bool:
"""Returns ``True`` if this node refers to a directory, ``False`` if it refers to an ordinary file."""
return self._is_dir
@property
def is_md(self) -> bool:
"""Returns ``True`` if this node refers to a Markdown file, ``False`` if not."""
return self._is_md
@property
def path(self) -> Path:
"""Returns the path, relative to the root path, of this node."""
return self._path
@property
def root_path(self) -> Path:
"""Returns the root path this node is under."""
return self._root
def target_file(self, dest_dir: Path) -> Path:
"""
Computes the path of the target file as it will be written to the destination directory.
Args:
dest_dir (Path): The destination directory.
Returns:
Path: The path of the target file to be written.
"""
p = self._path
if self._is_md:
p = p.with_suffix('.html')
return dest_dir / p
def link_target(self, prefix: str = "/", rel_to: Any = None, root_path: Path = None) -> str:
def link_target(self, prefix: str = "/", rel_to: Any = None) -> str:
"""
Returns the link target string for this node.
Args:
prefix (str): The prefix to apply to all absolute URLs. Default is "/".
rel_to (SourceNode): If this is not ``None``, the path is to be computed as a relative path to the
specified node.
Returns:
str: The link target, as a string.
"""
xpath = self._path.with_suffix('.html') if self._is_md else self._path
if rel_to is not None:
xpath = root_path / xpath
rel_path = root_path / rel_to.path
xpath = self._root / xpath
rel_path = rel_to.root_path / rel_to.path
logger.debug(f"*** Computing path of {xpath} relative to {rel_path}")
return urlquote(xpath.relative_to(rel_path.parent, walk_up=True).as_posix())
return urlquote(prefix + xpath.as_posix())
def load_metadata(self, source_dir: Path) -> None:
def load_metadata(self) -> None:
"""
Loads the metadata for this particular node and saves it in the "metadata" attribute.
The metadata is YAML at the beginning of the Markdown file, and is set via Obsidian "properties."
"""
if self._is_md and not self._is_dir:
with open(source_dir / self._path, "r", encoding="utf-8") as f:
with open(self._root / self._path, "r", encoding="utf-8") as f:
cur_line = f.readline()
if cur_line == '---\n':
metalines: list[str] = []
@ -70,13 +122,25 @@ class SourceNode:
cur_line = f.readline()
self.metadata = yaml.full_load(''.join(metalines))
def parse_markdown(self, source_dir: Path, markdown_parser: markdown.Markdown) -> None:
def parse_markdown(self, markdown_parser: markdown.Markdown) -> None:
"""
Parses the Markdown in this file into HTML, saving the HTML into the "text" attribute.
Args:
markdown_parser (markdown.Markdown): Instance of the Markdown parser to use in the parsing.
"""
if self._is_md and not self._is_dir:
markdown_parser.reset()
with open(source_dir / self._path, "r", encoding="utf-8") as f:
with open(self._root / self._path, "r", encoding="utf-8") as f:
self.text = markdown_parser.convert(f.read())
def make_vars(self) -> dict[str, Any]:
"""
Creates the template variables to use for this node.
Returns:
dict[str, Any]: The template variables, as a dict.
"""
return {
"text": self.text,
"title": self._path.stem
@ -84,6 +148,15 @@ class SourceNode:
def generate_list(source_root: Path) -> list[SourceNode]:
"""
Generates the list of source nodes from the source path (Obsidian vault).
Args:
source_root (Path): The root directory (Obsidian vault) to load nodes from.
Returns:
list[SourceNode]: The list of nodes (files and directories) under the root, as a preorder traversal.
"""
nodes: list[SourceNode] = []
dirs = [source_root]
while len(dirs) > 0:
@ -103,7 +176,17 @@ def generate_list(source_root: Path) -> list[SourceNode]:
class SourceIndex:
"""
The index of all nodes in the Obsidian vault. Nodes are listed by their name, by their relative path name,
and by any aliases specified in their metadata.
"""
def __init__(self, nodelist: list[SourceNode]) -> None:
"""
Initialize the source index.
Args:
nodelist (list[SourceNode]): The list of nodes to be indexed.
"""
self._byname: dict[str, SourceNode] = {}
self._byalias: dict[str, SourceNode] = {}
for node in nodelist:
@ -127,6 +210,16 @@ class SourceIndex:
self._byname[node.path.as_posix()] = node
def lookup(self, reference: str) -> tuple[SourceNode | None, str | None]:
"""
Looks up a reference in the index and returns the corresponding node if it exists.
Args:
reference (str): The reference to look up.
Returns:
SourceNode: The node that was found, or ``None`` if the node was not found.
str: Indicates whether the match was on "NAME" or "ALIAS". Returns ``None`` if the node was not found.
"""
if reference in self._byname:
return self._byname[reference], 'NAME'
elif reference in self._byalias: